From 60123c107ec3bda987b146ed8fdb591f48bce8dc Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Tue, 28 Oct 2025 16:01:04 -0700 Subject: [PATCH 001/143] add cached size to internal nodes --- b+tree.d.ts | 1 - b+tree.js | 93 ++++++++++++++++++++++++++++++++++---------------- b+tree.test.ts | 29 ++++++++++++++++ b+tree.ts | 92 ++++++++++++++++++++++++++++++++++--------------- 4 files changed, 158 insertions(+), 57 deletions(-) diff --git a/b+tree.d.ts b/b+tree.d.ts index 93693eb..8da255b 100644 --- a/b+tree.d.ts +++ b/b+tree.d.ts @@ -105,7 +105,6 @@ export declare function simpleComparator(a: (number | string)[], b: (number | st */ export default class BTree implements ISortedMapF, ISortedMap { private _root; - _size: number; _maxNodeSize: number; /** * provides a total order over keys (and a strict partial order over the type K) diff --git a/b+tree.js b/b+tree.js index 17ae8f1..9246c75 100644 --- a/b+tree.js +++ b/b+tree.js @@ -155,7 +155,6 @@ var BTree = /** @class */ (function () { */ function BTree(entries, compare, maxNodeSize) { this._root = EmptyLeaf; - this._size = 0; this._maxNodeSize = maxNodeSize >= 4 ? Math.min(maxNodeSize, 256) : 32; this._compare = compare || defaultComparator; if (entries) @@ -165,26 +164,25 @@ var BTree = /** @class */ (function () { ///////////////////////////////////////////////////////////////////////////// // ES6 Map methods ///////////////////////////////////////////////////// /** Gets the number of key-value pairs in the tree. */ - get: function () { return this._size; }, + get: function () { return this._root.size(); }, enumerable: false, configurable: true }); Object.defineProperty(BTree.prototype, "length", { /** Gets the number of key-value pairs in the tree. */ - get: function () { return this._size; }, + get: function () { return this.size; }, enumerable: false, configurable: true }); Object.defineProperty(BTree.prototype, "isEmpty", { /** Returns true iff the tree contains no key-value pairs. */ - get: function () { return this._size === 0; }, + get: function () { return this._root.size() === 0; }, enumerable: false, configurable: true }); /** Releases the tree so that its size is 0. */ BTree.prototype.clear = function () { this._root = EmptyLeaf; - this._size = 0; }; /** Runs a function for each key-value pair, in order from smallest to * largest key. For compatibility with ES6 Map, the argument order to @@ -248,7 +246,8 @@ var BTree = /** @class */ (function () { if (result === true || result === false) return result; // Root node has split, so create a new root node. - this._root = new BNodeInternal([this._root, result]); + var children = [this._root, result]; + this._root = new BNodeInternal(children, sumChildSizes(children)); return true; }; /** @@ -775,7 +774,6 @@ var BTree = /** @class */ (function () { this._root.isShared = true; var result = new BTree(undefined, this._compare, this._maxNodeSize); result._root = this._root; - result._size = this._size; return result; }; /** Performs a greedy clone, immediately duplicating any nodes that are @@ -786,7 +784,6 @@ var BTree = /** @class */ (function () { BTree.prototype.greedyClone = function (force) { var result = new BTree(undefined, this._compare, this._maxNodeSize); result._root = this._root.greedyClone(force); - result._size = this._size; return result; }; /** Gets an array filled with the contents of the tree, sorted by key */ @@ -1108,6 +1105,9 @@ var BNode = /** @class */ (function () { enumerable: false, configurable: true }); + BNode.prototype.size = function () { + return this.keys.length; + }; /////////////////////////////////////////////////////////////////////////// // Shared methods ///////////////////////////////////////////////////////// BNode.prototype.maxKey = function () { @@ -1251,7 +1251,6 @@ var BNode = /** @class */ (function () { if (i < 0) { // key does not exist yet i = ~i; - tree._size++; if (this.keys.length < tree._maxNodeSize) { return this.insertInLeaf(i, key, value, tree); } @@ -1367,7 +1366,6 @@ var BNode = /** @class */ (function () { this.keys.splice(i, 1); if (this.values !== undefVals) this.values.splice(i, 1); - tree._size--; i--; iHigh--; } @@ -1403,7 +1401,7 @@ var BNodeInternal = /** @class */ (function (_super) { * This does not mark `children` as shared, so it is the responsibility of the caller * to ensure children are either marked shared, or aren't included in another tree. */ - function BNodeInternal(children, keys) { + function BNodeInternal(children, size, keys) { var _this = this; if (!keys) { keys = []; @@ -1412,18 +1410,22 @@ var BNodeInternal = /** @class */ (function (_super) { } _this = _super.call(this, keys) || this; _this.children = children; + _this._size = size; return _this; } BNodeInternal.prototype.clone = function () { var children = this.children.slice(0); for (var i = 0; i < children.length; i++) children[i].isShared = true; - return new BNodeInternal(children, this.keys.slice(0)); + return new BNodeInternal(children, this._size, this.keys.slice(0)); + }; + BNodeInternal.prototype.size = function () { + return this._size; }; BNodeInternal.prototype.greedyClone = function (force) { if (this.isShared && !force) return this; - var nu = new BNodeInternal(this.children.slice(0), this.keys.slice(0)); + var nu = new BNodeInternal(this.children.slice(0), this._size, this.keys.slice(0)); for (var i = 0; i < nu.children.length; i++) nu.children[i] = nu.children[i].greedyClone(force); return nu; @@ -1467,15 +1469,19 @@ var BNodeInternal = /** @class */ (function (_super) { check(kL > 1 || depth > 0, "internal node has length", kL, "at depth", depth, "baseIndex", baseIndex); var size = 0, c = this.children, k = this.keys, childSize = 0; for (var i = 0; i < cL; i++) { - size += c[i].checkValid(depth + 1, tree, baseIndex + size); - childSize += c[i].keys.length; + var child = c[i]; + var subtreeSize = child.checkValid(depth + 1, tree, baseIndex + size); + check(subtreeSize === child.size(), "cached size mismatch at depth", depth, "index", i, "baseIndex", baseIndex); + size += subtreeSize; + childSize += child.keys.length; check(size >= childSize, "wtf", baseIndex); // no way this will ever fail - check(i === 0 || c[i - 1].constructor === c[i].constructor, "type mismatch, baseIndex:", baseIndex); - if (c[i].maxKey() != k[i]) - check(false, "keys[", i, "] =", k[i], "is wrong, should be ", c[i].maxKey(), "at depth", depth, "baseIndex", baseIndex); + check(i === 0 || c[i - 1].constructor === child.constructor, "type mismatch, baseIndex:", baseIndex); + if (child.maxKey() != k[i]) + check(false, "keys[", i, "] =", k[i], "is wrong, should be ", child.maxKey(), "at depth", depth, "baseIndex", baseIndex); if (!(i === 0 || tree._compare(k[i - 1], k[i]) < 0)) check(false, "sort violation at depth", depth, "index", i, "keys", k[i - 1], k[i]); } + check(this._size === size, "internal node cached size mismatch at depth", depth, "baseIndex", baseIndex, "cached", this._size, "actual", size); // 2020/08: BTree doesn't always avoid grossly undersized nodes, // but AFAIK such nodes are pretty harmless, so accept them. var toofew = childSize === 0; // childSize < (tree.maxNodeSize >> 1)*cL; @@ -1509,7 +1515,9 @@ var BNodeInternal = /** @class */ (function (_super) { this.keys[i] = c[i].maxKey(); } } + var oldSize = child.size(); var result = child.set(key, value, overwrite, tree); + this._size += child.size() - oldSize; if (result === false) return false; this.keys[i] = child.maxKey(); @@ -1538,6 +1546,7 @@ var BNodeInternal = /** @class */ (function (_super) { BNodeInternal.prototype.insert = function (i, child) { this.children.splice(i, 0, child); this.keys.splice(i, 0, child.maxKey()); + this._size += child.size(); }; /** * Split this node. @@ -1546,21 +1555,36 @@ var BNodeInternal = /** @class */ (function (_super) { BNodeInternal.prototype.splitOffRightSide = function () { // assert !this.isShared; var half = this.children.length >> 1; - return new BNodeInternal(this.children.splice(half), this.keys.splice(half)); + var newChildren = this.children.splice(half); + var newKeys = this.keys.splice(half); + var movedSize = sumChildSizes(newChildren); + var newNode = new BNodeInternal(newChildren, movedSize, newKeys); + this._size -= movedSize; + return newNode; }; BNodeInternal.prototype.takeFromRight = function (rhs) { // Reminder: parent node must update its copy of key for this node // assert: neither node is shared // assert rhs.keys.length > (maxNodeSize/2 && this.keys.length (maxNodeSize/2 && this.keys.length }); }); +describe('cached sizes', () => +{ + function buildTestTree(entryCount: number, maxNodeSize: number) { + const tree = new BTree(undefined, undefined, maxNodeSize); + for (let i = 0; i < entryCount; i++) { + tree.set(i, i); + } + return tree; + } + + test('checkValid detects root size mismatch', () => { + const tree = buildTestTree(64, 8); + const root = (tree as any)._root; + expect(root.isLeaf).toBe(false); + (root as any).size = 0; + expect(() => tree.checkValid()).toThrow(); + }); + + test('checkValid detects mismatched child sizes', () => { + const tree = buildTestTree(512, 8); + const root = (tree as any)._root; + expect(root.isLeaf).toBe(false); + const internalChild = (root as any).children.find((child: any) => !child.isLeaf); + expect(internalChild).toBeDefined(); + (internalChild as any).size = 0; + expect(() => tree.checkValid()).toThrow(); + }); +}); + describe('Simple tests on leaf nodes', () => { test('A few insertions (fanout 8)', insert8.bind(null, 8)); diff --git a/b+tree.ts b/b+tree.ts index 42c72c4..824b32a 100644 --- a/b+tree.ts +++ b/b+tree.ts @@ -184,7 +184,6 @@ export function simpleComparator(a: any, b: any): number { export default class BTree implements ISortedMapF, ISortedMap { private _root: BNode = EmptyLeaf as BNode; - _size: number = 0; _maxNodeSize: number; /** @@ -212,16 +211,15 @@ export default class BTree implements ISortedMapF, ISortedMap // ES6 Map methods ///////////////////////////////////////////////////// /** Gets the number of key-value pairs in the tree. */ - get size() { return this._size; } + get size(): number { return this._root.size(); } /** Gets the number of key-value pairs in the tree. */ - get length() { return this._size; } + get length(): number { return this.size; } /** Returns true iff the tree contains no key-value pairs. */ - get isEmpty() { return this._size === 0; } + get isEmpty(): boolean { return this._root.size() === 0; } /** Releases the tree so that its size is 0. */ clear() { this._root = EmptyLeaf as BNode; - this._size = 0; } forEach(callback: (v:V, k:K, tree:BTree) => void, thisArg?: any): number; @@ -290,7 +288,8 @@ export default class BTree implements ISortedMapF, ISortedMap if (result === true || result === false) return result; // Root node has split, so create a new root node. - this._root = new BNodeInternal([this._root, result]); + const children = [this._root, result]; + this._root = new BNodeInternal(children, sumChildSizes(children)); return true; } @@ -870,7 +869,6 @@ export default class BTree implements ISortedMapF, ISortedMap this._root.isShared = true; var result = new BTree(undefined, this._compare, this._maxNodeSize); result._root = this._root; - result._size = this._size; return result; } @@ -882,7 +880,6 @@ export default class BTree implements ISortedMapF, ISortedMap greedyClone(force?: boolean): BTree { var result = new BTree(undefined, this._compare, this._maxNodeSize); result._root = this._root.greedyClone(force); - result._size = this._size; return result; } @@ -1228,6 +1225,10 @@ class BNode { this.isShared = undefined; } + size(): number { + return this.keys.length; + } + /////////////////////////////////////////////////////////////////////////// // Shared methods ///////////////////////////////////////////////////////// @@ -1387,8 +1388,6 @@ class BNode { if (i < 0) { // key does not exist yet i = ~i; - tree._size++; - if (this.keys.length < tree._maxNodeSize) { return this.insertInLeaf(i, key, value, tree); } else { @@ -1506,7 +1505,6 @@ class BNode { this.keys.splice(i, 1); if (this.values !== undefVals) this.values.splice(i, 1); - tree._size--; i--; iHigh--; } else if (result.hasOwnProperty('value')) { @@ -1540,12 +1538,13 @@ class BNodeInternal extends BNode { // children, but I find it easier to keep the array lengths equal: each // keys[i] caches the value of children[i].maxKey(). children: BNode[]; + _size: number; /** * This does not mark `children` as shared, so it is the responsibility of the caller * to ensure children are either marked shared, or aren't included in another tree. */ - constructor(children: BNode[], keys?: K[]) { + constructor(children: BNode[], size: number, keys?: K[]) { if (!keys) { keys = []; for (var i = 0; i < children.length; i++) @@ -1553,19 +1552,24 @@ class BNodeInternal extends BNode { } super(keys); this.children = children; + this._size = size; } clone(): BNode { var children = this.children.slice(0); for (var i = 0; i < children.length; i++) children[i].isShared = true; - return new BNodeInternal(children, this.keys.slice(0)); + return new BNodeInternal(children, this._size, this.keys.slice(0)); + } + + size(): number { + return this._size; } greedyClone(force?: boolean): BNode { if (this.isShared && !force) return this; - var nu = new BNodeInternal(this.children.slice(0), this.keys.slice(0)); + var nu = new BNodeInternal(this.children.slice(0), this._size, this.keys.slice(0)); for (var i = 0; i < nu.children.length; i++) nu.children[i] = nu.children[i].greedyClone(force); return nu; @@ -1616,15 +1620,19 @@ class BNodeInternal extends BNode { check(kL > 1 || depth > 0, "internal node has length", kL, "at depth", depth, "baseIndex", baseIndex); let size = 0, c = this.children, k = this.keys, childSize = 0; for (var i = 0; i < cL; i++) { - size += c[i].checkValid(depth + 1, tree, baseIndex + size); - childSize += c[i].keys.length; + var child = c[i]; + var subtreeSize = child.checkValid(depth + 1, tree, baseIndex + size); + check(subtreeSize === child.size(), "cached size mismatch at depth", depth, "index", i, "baseIndex", baseIndex); + size += subtreeSize; + childSize += child.keys.length; check(size >= childSize, "wtf", baseIndex); // no way this will ever fail - check(i === 0 || c[i-1].constructor === c[i].constructor, "type mismatch, baseIndex:", baseIndex); - if (c[i].maxKey() != k[i]) - check(false, "keys[", i, "] =", k[i], "is wrong, should be ", c[i].maxKey(), "at depth", depth, "baseIndex", baseIndex); + check(i === 0 || c[i-1].constructor === child.constructor, "type mismatch, baseIndex:", baseIndex); + if (child.maxKey() != k[i]) + check(false, "keys[", i, "] =", k[i], "is wrong, should be ", child.maxKey(), "at depth", depth, "baseIndex", baseIndex); if (!(i === 0 || tree._compare(k[i-1], k[i]) < 0)) check(false, "sort violation at depth", depth, "index", i, "keys", k[i-1], k[i]); } + check(this._size === size, "internal node cached size mismatch at depth", depth, "baseIndex", baseIndex, "cached", this._size, "actual", size); // 2020/08: BTree doesn't always avoid grossly undersized nodes, // but AFAIK such nodes are pretty harmless, so accept them. let toofew = childSize === 0; // childSize < (tree.maxNodeSize >> 1)*cL; @@ -1661,7 +1669,9 @@ class BNodeInternal extends BNode { } } + var oldSize = child.size(); var result = child.set(key, value, overwrite, tree); + this._size += child.size() - oldSize; if (result === false) return false; this.keys[i] = child.maxKey(); @@ -1691,6 +1701,7 @@ class BNodeInternal extends BNode { insert(i: index, child: BNode) { this.children.splice(i, 0, child); this.keys.splice(i, 0, child.maxKey()); + this._size += child.size(); } /** @@ -1700,23 +1711,38 @@ class BNodeInternal extends BNode { splitOffRightSide() { // assert !this.isShared; var half = this.children.length >> 1; - return new BNodeInternal(this.children.splice(half), this.keys.splice(half)); + var newChildren = this.children.splice(half); + var newKeys = this.keys.splice(half); + var movedSize = sumChildSizes(newChildren); + var newNode = new BNodeInternal(newChildren, movedSize, newKeys); + this._size -= movedSize; + return newNode; } takeFromRight(rhs: BNode) { // Reminder: parent node must update its copy of key for this node // assert: neither node is shared // assert rhs.keys.length > (maxNodeSize/2 && this.keys.length; this.keys.push(rhs.keys.shift()!); - this.children.push((rhs as BNodeInternal).children.shift()!); + const child = rhsInternal.children.shift()!; + this.children.push(child); + const size = child.size(); + rhsInternal._size -= size; + this._size += size; } takeFromLeft(lhs: BNode) { // Reminder: parent node must update its copy of key for this node // assert: neither node is shared // assert rhs.keys.length > (maxNodeSize/2 && this.keys.length; + const child = lhsInternal.children.pop()!; this.keys.unshift(lhs.keys.pop()!); - this.children.unshift((lhs as BNodeInternal).children.pop()!); + this.children.unshift(child); + const size = child.size(); + lhsInternal._size -= size; + this._size += size; } ///////////////////////////////////////////////////////////////////////////// @@ -1743,12 +1769,15 @@ class BNodeInternal extends BNode { } else if (i <= iHigh) { try { for (; i <= iHigh; i++) { - if (children[i].isShared) - children[i] = children[i].clone(); - var result = children[i].forRange(low, high, includeHigh, editMode, tree, count, onFound); + let child = children[i]; + if (child.isShared) + children[i] = child = child.clone(); + const beforeSize = child.size(); + const result = child.forRange(low, high, includeHigh, editMode, tree, count, onFound); // Note: if children[i] is empty then keys[i]=undefined. // This is an invalid state, but it is fixed below. - keys[i] = children[i].maxKey(); + keys[i] = child.maxKey(); + this._size += child.size() - beforeSize; if (typeof result !== 'number') return result; count = result; @@ -1764,7 +1793,8 @@ class BNodeInternal extends BNode { this.tryMerge(i, tree._maxNodeSize); } else { // child is empty! delete it! keys.splice(i, 1); - children.splice(i, 1); + const removed = children.splice(i, 1); + check(removed[0].size() === 0, "emptiness cleanup"); } } } @@ -1803,6 +1833,7 @@ class BNodeInternal extends BNode { this.keys.push.apply(this.keys, rhs.keys); const rhsChildren = (rhs as any as BNodeInternal).children; this.children.push.apply(this.children, rhsChildren); + this._size += rhs.size(); if (rhs.isShared && !this.isShared) { // All children of a shared node are implicitly shared, and since their new @@ -1846,6 +1877,13 @@ type DiffCursor = { height: number, internalSpine: BNode[][], levelInd // has the side effect of scanning the prototype chain. var undefVals: any[] = []; +function sumChildSizes(children: BNode[]): number { + var total = 0; + for (var i = 0; i < children.length; i++) + total += children[i].size(); + return total; +} + const Delete = {delete: true}, DeleteRange = () => Delete; const Break = {break: true}; const EmptyLeaf = (function() { From 53f8e88264794ca800d1de7e821e84265b19cfdd Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Tue, 28 Oct 2025 17:07:13 -0700 Subject: [PATCH 002/143] more tests --- b+tree.test.ts | 104 ++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 89 insertions(+), 15 deletions(-) diff --git a/b+tree.test.ts b/b+tree.test.ts index cd3d59a..dcc650c 100644 --- a/b+tree.test.ts +++ b/b+tree.test.ts @@ -201,22 +201,96 @@ describe('cached sizes', () => return tree; } - test('checkValid detects root size mismatch', () => { - const tree = buildTestTree(64, 8); - const root = (tree as any)._root; - expect(root.isLeaf).toBe(false); - (root as any).size = 0; - expect(() => tree.checkValid()).toThrow(); - }); + function expectSize(tree: BTree, size: number) { + expect(tree.size).toBe(size); + tree.checkValid(); + } + + [4, 6, 8, 16].forEach(nodeSize => { + describe(`fanout ${nodeSize}`, () => { + test('checkValid detects root size mismatch', () => { + const tree = buildTestTree(nodeSize * 8, nodeSize); + const root = (tree as any)._root; + expect(root.isLeaf).toBe(false); + (root as any).size = 0; + expect(() => tree.checkValid()).toThrow(); + }); + + test('checkValid detects mismatched child sizes', () => { + const tree = buildTestTree(nodeSize * nodeSize * 4, nodeSize); + const root = (tree as any)._root; + expect(root.isLeaf).toBe(false); + const internalChild = (root as any).children.find((child: any) => !child.isLeaf); + expect(internalChild).toBeDefined(); + (internalChild as any).size = 0; + expect(() => tree.checkValid()).toThrow(); + }); + + test('mutations preserve cached sizes', () => { + const tree = buildTestTree(nodeSize * 4, nodeSize); + const initialSize = tree.size; + const expectedKeys = new Set(); + for (let i = 0; i < initialSize; i++) + expectedKeys.add(i); + expectSize(tree, expectedKeys.size); + + // Insert sequential items + const itemsToAdd = nodeSize * 2; + for (let i = 0; i < itemsToAdd; i++) { + const key = initialSize + i; + tree.set(key, key); + expectedKeys.add(key); + } + expectSize(tree, expectedKeys.size); + + // Delete every third new item + let deleted = 0; + for (let i = 0; i < itemsToAdd; i += 3) { + const key = initialSize + i; + if (tree.delete(key)) { + deleted++; + expectedKeys.delete(key); + } + } + expectSize(tree, expectedKeys.size); + + // Bulk delete a middle range + const low = Math.floor(initialSize / 2); + const high = low + nodeSize; + const rangeDeleted = tree.deleteRange(low, high, true); + const toRemove = Array.from(expectedKeys).filter(k => k >= low && k <= high); + expect(rangeDeleted).toBe(toRemove.length); + toRemove.forEach(k => expectedKeys.delete(k)); + expectSize(tree, expectedKeys.size); + + // Mix insertions and overwrites + const extra = nodeSize * 5; + for (let i = 0; i < extra; i++) { + const insertKey = -i - 1; + tree.set(insertKey, insertKey); + expectedKeys.add(insertKey); + const overwriteKey = i % (initialSize + 1); + tree.set(overwriteKey, 42); // overwrite existing keys + expectedKeys.add(overwriteKey); + } + expectSize(tree, expectedKeys.size); - test('checkValid detects mismatched child sizes', () => { - const tree = buildTestTree(512, 8); - const root = (tree as any)._root; - expect(root.isLeaf).toBe(false); - const internalChild = (root as any).children.find((child: any) => !child.isLeaf); - expect(internalChild).toBeDefined(); - (internalChild as any).size = 0; - expect(() => tree.checkValid()).toThrow(); + // Clone should preserve size and cached metadata + const toClone = tree.clone(); + expectSize(toClone, expectedKeys.size); + + // Edit range deletes some entries, patches others + tree.editRange(-extra, extra, false, (k, v, counter) => { + if (counter % 11 === 0) { + expectedKeys.delete(k); + return { delete: true }; + } + if (k % 5 === 0) + return { value: v + 1 }; + }); + expectSize(tree, expectedKeys.size); + }); + }); }); }); From 5c0f85c65f92e5a1f9b608f2de9c9eb7435018d1 Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Tue, 28 Oct 2025 17:09:08 -0700 Subject: [PATCH 003/143] add merge tests --- b+tree.test.ts | 554 ++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 553 insertions(+), 1 deletion(-) diff --git a/b+tree.test.ts b/b+tree.test.ts index dcc650c..1aed4fa 100644 --- a/b+tree.test.ts +++ b/b+tree.test.ts @@ -1,4 +1,4 @@ -import BTree, {IMap, EmptyBTree, defaultComparator, simpleComparator} from './b+tree'; +import BTree, {IMap, defaultComparator, simpleComparator} from './b+tree'; import SortedArray from './sorted-array'; import MersenneTwister from 'mersenne-twister'; @@ -1112,3 +1112,555 @@ function testBTree(maxNodeSize: number) expect(tree.get(key)).not.toBeUndefined(); }); } + +describe('BTree merge tests with fanout 32', testMerge.bind(null, 32)); +describe('BTree merge tests with fanout 10', testMerge.bind(null, 10)); +describe('BTree merge tests with fanout 4', testMerge.bind(null, 4)); + +function testMerge(maxNodeSize: number) { + const compare = (a: number, b: number) => a - b; + const sharesNode = (root: any, targetNode: any): boolean => { + if (root === targetNode) + return true; + if (root.isLeaf) + return false; + const children = (root as any).children as any[]; + for (let i = 0; i < children.length; i++) { + if (sharesNode(children[i], targetNode)) + return true; + } + return false; + }; + + test('Merge two empty trees', () => { + const tree1 = new BTree([], compare, maxNodeSize); + const tree2 = new BTree([], compare, maxNodeSize); + const mergeFunc = (k: number, v1: number, v2: number) => v1 + v2; + + const result = tree1.merge(tree2, mergeFunc); + + expect(result.size).toBe(0); + expect(result.toArray()).toEqual([]); + }); + + test('Merge empty tree with non-empty tree', () => { + const tree1 = new BTree([], compare, maxNodeSize); + const tree2 = new BTree([[1, 10], [2, 20], [3, 30]], compare, maxNodeSize); + const mergeFunc = (k: number, v1: number, v2: number) => v1 + v2; + + const result = tree1.merge(tree2, mergeFunc); + + expect(result.size).toBe(3); + expect(result.toArray()).toEqual([[1, 10], [2, 20], [3, 30]]); + }); + + test('Merge non-empty tree with empty tree', () => { + const tree1 = new BTree([[1, 10], [2, 20], [3, 30]], compare, maxNodeSize); + const tree2 = new BTree([], compare, maxNodeSize); + const mergeFunc = (k: number, v1: number, v2: number) => v1 + v2; + + const result = tree1.merge(tree2, mergeFunc); + + expect(result.size).toBe(3); + expect(result.toArray()).toEqual([[1, 10], [2, 20], [3, 30]]); + }); + + test('Merge with no overlapping keys', () => { + const tree1 = new BTree([[1, 10], [3, 30], [5, 50]], compare, maxNodeSize); + const tree2 = new BTree([[2, 20], [4, 40], [6, 60]], compare, maxNodeSize); + const mergeFunc = (k: number, v1: number, v2: number) => { + throw new Error('Should not be called for non-overlapping keys'); + }; + + const result = tree1.merge(tree2, mergeFunc); + + expect(result.size).toBe(6); + expect(result.toArray()).toEqual([[1, 10], [2, 20], [3, 30], [4, 40], [5, 50], [6, 60]]); + }); + + test('Merge with completely overlapping keys - sum values', () => { + const tree1 = new BTree([[1, 10], [2, 20], [3, 30]], compare, maxNodeSize); + const tree2 = new BTree([[1, 5], [2, 15], [3, 25]], compare, maxNodeSize); + const mergeFunc = (k: number, v1: number, v2: number) => v1 + v2; + + const result = tree1.merge(tree2, mergeFunc); + + expect(result.size).toBe(3); + expect(result.toArray()).toEqual([[1, 15], [2, 35], [3, 55]]); + }); + + test('Merge with completely overlapping keys - prefer left', () => { + const tree1 = new BTree([[1, 10], [2, 20], [3, 30]], compare, maxNodeSize); + const tree2 = new BTree([[1, 100], [2, 200], [3, 300]], compare, maxNodeSize); + const mergeFunc = (k: number, v1: number, v2: number) => v1; + + const result = tree1.merge(tree2, mergeFunc); + + expect(result.size).toBe(3); + expect(result.toArray()).toEqual([[1, 10], [2, 20], [3, 30]]); + }); + + test('Merge with completely overlapping keys - prefer right', () => { + const tree1 = new BTree([[1, 10], [2, 20], [3, 30]], compare, maxNodeSize); + const tree2 = new BTree([[1, 100], [2, 200], [3, 300]], compare, maxNodeSize); + const mergeFunc = (k: number, v1: number, v2: number) => v2; + + const result = tree1.merge(tree2, mergeFunc); + + expect(result.size).toBe(3); + expect(result.toArray()).toEqual([[1, 100], [2, 200], [3, 300]]); + }); + + test('Merge with partially overlapping keys', () => { + const tree1 = new BTree([[1, 10], [2, 20], [3, 30], [4, 40]], compare, maxNodeSize); + const tree2 = new BTree([[3, 300], [4, 400], [5, 500], [6, 600]], compare, maxNodeSize); + const mergeFunc = (k: number, v1: number, v2: number) => v1 + v2; + + const result = tree1.merge(tree2, mergeFunc); + + expect(result.size).toBe(6); + expect(result.toArray()).toEqual([[1, 10], [2, 20], [3, 330], [4, 440], [5, 500], [6, 600]]); + }); + + test('Merge with overlapping keys - exclude some keys via undefined', () => { + const tree1 = new BTree([[1, 10], [2, 20], [3, 30], [4, 40]], compare, maxNodeSize); + const tree2 = new BTree([[2, 200], [3, 300], [4, 400], [5, 500]], compare, maxNodeSize); + const mergeFunc = (k: number, v1: number, v2: number) => { + // Exclude key 3 by returning undefined + if (k === 3) return undefined; + return v1 + v2; + }; + + const result = tree1.merge(tree2, mergeFunc); + + expect(result.size).toBe(4); // Keys 1, 2, 4, 5 (key 3 excluded) + expect(result.toArray()).toEqual([[1, 10], [2, 220], [4, 440], [5, 500]]); + }); + + test('Merge is called even when values are equal', () => { + const tree1 = new BTree([[1, 10], [2, 20]], compare, maxNodeSize); + const tree2 = new BTree([[2, 20], [3, 30]], compare, maxNodeSize); + + const mergeCallLog: Array<{k: number, v1: number, v2: number}> = []; + const mergeFunc = (k: number, v1: number, v2: number) => { + mergeCallLog.push({k, v1, v2}); + return v1; + }; + + const result = tree1.merge(tree2, mergeFunc); + + expect(mergeCallLog).toEqual([{k: 2, v1: 20, v2: 20}]); + expect(result.toArray()).toEqual([[1, 10], [2, 20], [3, 30]]); + }); + + test('Merge does not mutate input trees', () => { + const entries1: [number, number][] = [[1, 10], [2, 20], [3, 30]]; + const entries2: [number, number][] = [[2, 200], [3, 300], [4, 400]]; + const tree1 = new BTree(entries1, compare, maxNodeSize); + const tree2 = new BTree(entries2, compare, maxNodeSize); + const mergeFunc = (k: number, v1: number, v2: number) => v1 + v2; + + const result = tree1.merge(tree2, mergeFunc); + + // Verify original trees are unchanged + expect(tree1.toArray()).toEqual(entries1); + expect(tree2.toArray()).toEqual(entries2); + + // Verify result is correct + expect(result.toArray()).toEqual([[1, 10], [2, 220], [3, 330], [4, 400]]); + }); + + test('Merge with disjoint ranges', () => { + // Tree with keys 1-100 and 201-300 + const entries1: [number, number][] = []; + for (let i = 1; i <= 100; i++) entries1.push([i, i]); + for (let i = 201; i <= 300; i++) entries1.push([i, i]); + + // Tree with keys 101-200 + const entries2: [number, number][] = []; + for (let i = 101; i <= 200; i++) entries2.push([i, i]); + + const tree1 = new BTree(entries1, compare, maxNodeSize); + const tree2 = new BTree(entries2, compare, maxNodeSize); + const mergeFunc = (k: number, v1: number, v2: number) => { + throw new Error('Should not be called - no overlaps'); + }; + + const result = tree1.merge(tree2, mergeFunc); + + expect(result.size).toBe(300); + // Check first few, last few, and boundaries + expect(result.get(1)).toBe(1); + expect(result.get(100)).toBe(100); + expect(result.get(101)).toBe(101); + expect(result.get(200)).toBe(200); + expect(result.get(201)).toBe(201); + expect(result.get(300)).toBe(300); + result.checkValid(); + }); + + test('Merge large trees with some overlaps', () => { + // Tree 1: keys 0-999 + const entries1: [number, number][] = []; + for (let i = 0; i < 1000; i++) entries1.push([i, i]); + + // Tree 2: keys 500-1499 + const entries2: [number, number][] = []; + for (let i = 500; i < 1500; i++) entries2.push([i, i * 10]); + + const tree1 = new BTree(entries1, compare, maxNodeSize); + const tree2 = new BTree(entries2, compare, maxNodeSize); + + let mergeCount = 0; + const mergeFunc = (k: number, v1: number, v2: number) => { + mergeCount++; + return v1 + v2; // Sum the values + }; + + const result = tree1.merge(tree2, mergeFunc); + + // Verify merge was called for overlapping keys (500-999) + expect(mergeCount).toBe(500); + + // Total unique keys: 1500 + expect(result.size).toBe(1500); + + // Check various ranges + expect(result.get(0)).toBe(0); // Only in tree1 + expect(result.get(499)).toBe(499); // Only in tree1 + expect(result.get(500)).toBe(500 + 5000); // In both: 500 + (500*10) + expect(result.get(999)).toBe(999 + 9990); // In both: 999 + (999*10) + expect(result.get(1000)).toBe(10000); // Only in tree2 + expect(result.get(1499)).toBe(14990); // Only in tree2 + + result.checkValid(); + }); + + test('Merge with overlaps at boundaries', () => { + // Test edge case where overlaps occur at the boundaries of node ranges + const tree1 = new BTree([], compare, maxNodeSize); + const tree2 = new BTree([], compare, maxNodeSize); + + // Fill tree1 with even numbers + for (let i = 0; i < 100; i++) { + tree1.set(i * 2, i * 2); + } + + // Fill tree2 with numbers in a different pattern + for (let i = 50; i < 150; i++) { + tree2.set(i, i * 10); + } + + let mergeCallCount = 0; + const mergeFunc = (k: number, v1: number, v2: number) => { + mergeCallCount++; + expect(k % 2).toBe(0); // Only even keys should overlap + return v1 + v2; + }; + + const result = tree1.merge(tree2, mergeFunc); + + // Keys 100, 102, 104, ..., 198 overlap (50 keys) + expect(mergeCallCount).toBe(50); + + result.checkValid(); + }); + + test('Merge throws error when comparators differ', () => { + const tree1 = new BTree([[1, 10]], compare, maxNodeSize); + const tree2 = new BTree([[2, 20]], (a, b) => b - a, maxNodeSize); // Reverse comparator + const mergeFunc = (k: number, v1: number, v2: number) => v1 + v2; + + expect(() => tree1.merge(tree2, mergeFunc)).toThrow(); + }); + + test('Merge throws error when max node sizes differ', () => { + const otherFanout = maxNodeSize === 32 ? 16 : 32; + const tree1 = new BTree([[1, 10]], compare, maxNodeSize); + const tree2 = new BTree([[2, 20]], compare, otherFanout); + const mergeFunc = (k: number, v1: number, v2: number) => v1 + v2; + + expect(() => tree1.merge(tree2, mergeFunc)).toThrow(); + }); + + test('Merge result can be modified without affecting inputs', () => { + const tree1 = new BTree([[1, 10], [2, 20]], compare, maxNodeSize); + const tree2 = new BTree([[3, 30], [4, 40]], compare, maxNodeSize); + const mergeFunc = (k: number, v1: number, v2: number) => v1 + v2; + + const result = tree1.merge(tree2, mergeFunc); + + // Modify result + result.set(1, 100); + result.set(5, 50); + result.delete(2); + + // Verify inputs unchanged + expect(tree1.get(1)).toBe(10); + expect(tree1.get(2)).toBe(20); + expect(tree1.has(5)).toBe(false); + expect(tree2.get(3)).toBe(30); + expect(tree2.get(4)).toBe(40); + }); + + test('Merge with single element trees', () => { + const tree1 = new BTree([[5, 50]], compare, maxNodeSize); + const tree2 = new BTree([[5, 500]], compare, maxNodeSize); + const mergeFunc = (k: number, v1: number, v2: number) => Math.max(v1, v2); + + const result = tree1.merge(tree2, mergeFunc); + + expect(result.size).toBe(1); + expect(result.get(5)).toBe(500); + }); + + test('Merge interleaved keys', () => { + // Tree1 has keys: 1, 3, 5, 7, 9, ... + const tree1 = new BTree([], compare, maxNodeSize); + for (let i = 1; i <= 100; i += 2) { + tree1.set(i, i); + } + + // Tree2 has keys: 2, 4, 6, 8, 10, ... + const tree2 = new BTree([], compare, maxNodeSize); + for (let i = 2; i <= 100; i += 2) { + tree2.set(i, i); + } + + const mergeFunc = (k: number, v1: number, v2: number) => { + throw new Error('Should not be called - no overlapping keys'); + }; + + const result = tree1.merge(tree2, mergeFunc); + + expect(result.size).toBe(100); + for (let i = 1; i <= 100; i++) { + expect(result.get(i)).toBe(i); + } + result.checkValid(); + }); + + test('Merge excluding all overlapping keys', () => { + const tree1 = new BTree([[1, 10], [2, 20], [3, 30]], compare, maxNodeSize); + const tree2 = new BTree([[2, 200], [3, 300], [4, 400]], compare, maxNodeSize); + // Exclude all overlapping keys + const mergeFunc = (k: number, v1: number, v2: number) => undefined; + + const result = tree1.merge(tree2, mergeFunc); + + // Only non-overlapping keys remain + expect(result.toArray()).toEqual([[1, 10], [4, 400]]); + }); + + test('Merge reuses appended subtree with minimum fanout', () => { + const tree1 = new BTree([], compare, maxNodeSize); + const tree2 = new BTree([], compare, maxNodeSize); + + for (let i = 0; i < 400; i++) { + tree1.set(i, i); + } + for (let i = 400; i < 800; i++) { + tree2.set(i, i * 2); + } + + const mergeFunc = (k: number, v1: number, v2: number) => { + throw new Error('Should not be called for disjoint ranges'); + }; + + const result = tree1.merge(tree2, mergeFunc); + + expect(result.size).toBe(tree1.size + tree2.size); + const resultRoot = result['_root'] as any; + const tree2Root = tree2['_root'] as any; + expect(sharesNode(resultRoot, tree2Root)).toBe(true); + result.checkValid(); + }); + + test('Merge with large disjoint ranges', () => { + const tree1 = new BTree([], compare, maxNodeSize); + const tree2 = new BTree([], compare, maxNodeSize); + + for (let i = 0; i <= 10000; i++) { + tree1.set(i, i); + } + for (let i = 10001; i <= 20000; i++) { + tree2.set(i, i); + } + + let mergeCalls = 0; + const mergeFunc = (k: number, v1: number, v2: number) => { + mergeCalls++; + return v1 + v2; + }; + + const result = tree1.merge(tree2, mergeFunc); + + expect(mergeCalls).toBe(0); + expect(result.size).toBe(tree1.size + tree2.size); + expect(result.get(0)).toBe(0); + expect(result.get(20000)).toBe(20000); + const resultRoot = result['_root'] as any; + const tree2Root = tree2['_root'] as any; + expect(sharesNode(resultRoot, tree2Root)).toBe(true); + result.checkValid(); + }); + + test('Merge trees with ~10% overlap', () => { + const size = 200; + const offset = Math.floor(size * 0.9); + const overlap = size - offset; + + const tree1 = new BTree([], compare, maxNodeSize); + const tree2 = new BTree([], compare, maxNodeSize); + + for (let i = 0; i < size; i++) { + tree1.set(i, i); + } + for (let i = 0; i < size; i++) { + const key = offset + i; + tree2.set(key, key * 10); + } + + const preferLeft = (_k: number, v1: number, _v2: number) => v1; + const result = tree1.merge(tree2, preferLeft); + + expect(result.size).toBe(size + size - overlap); + result.checkValid(); + + for (let i = 0; i < offset; i++) { + expect(result.get(i)).toBe(i); + } + for (let i = offset; i < size; i++) { + expect(result.get(i)).toBe(i); + } + const upperBound = offset + size; + for (let i = size; i < upperBound; i++) { + expect(result.get(i)).toBe(i * 10); + } + + expect(tree1.size).toBe(size); + expect(tree2.size).toBe(size); + }); +} + +describe('BTree merge fuzz tests', () => { + const compare = (a: number, b: number) => a - b; + const branchingFactors = [4, 8, 16, 32]; + const seeds = [0x12345678, 0x9ABCDEF]; + const FUZZ_SETTINGS = { + scenarioBudget: 1, // Increase to explore more seed/fanout combinations. + iterationsPerScenario: 1, // Increase to deepen each scenario. + maxInsertSize: 200, // Maximum keys inserted per iteration. + keyRange: 10_000, // Range of key distribution. + valueRange: 1_000, // Range of value distribution. + sampleChecks: 3, // Number of random spot-checks per result. + timeoutMs: 10_000 // Jest timeout for the fuzz test. + } as const; + + const strategies = [ + { + name: 'prefer-left', + fn: (k: number, left: number, _right: number) => left, + apply: (_k: number, left: number, _right: number) => left + }, + { + name: 'prefer-right', + fn: (_k: number, _left: number, right: number) => right, + apply: (_k: number, _left: number, right: number) => right + }, + { + name: 'sum', + fn: (_k: number, left: number, right: number) => left + right, + apply: (_k: number, left: number, right: number) => left + right + }, + { + name: 'min', + fn: (_k: number, left: number, right: number) => Math.min(left, right), + apply: (_k: number, left: number, right: number) => Math.min(left, right) + }, + { + name: 'drop-even-sum', + fn: (_k: number, left: number, right: number) => ((left + right) & 1) === 0 ? undefined : right - left, + apply: (_k: number, left: number, right: number) => ((left + right) & 1) === 0 ? undefined : right - left + } + ] as const; + + test('randomized merges across branching factors', () => { + jest.setTimeout(FUZZ_SETTINGS.timeoutMs); + + const scenarioConfigs: Array<{ seedBase: number, maxNodeSize: number }> = []; + for (const seedBase of seeds) + for (const maxNodeSize of branchingFactors) + scenarioConfigs.push({ seedBase, maxNodeSize }); + + const scenariosToRun = Math.min(FUZZ_SETTINGS.scenarioBudget, scenarioConfigs.length); + const selectedScenarios = scenarioConfigs.slice(0, scenariosToRun); + + for (const { seedBase, maxNodeSize } of selectedScenarios) { + const baseSeed = (seedBase ^ (maxNodeSize * 0x9E3779B1)) >>> 0; + const fuzzRand = new MersenneTwister(baseSeed); + const nextInt = (limit: number) => { + if (limit <= 0) + return 0; + return Math.floor(fuzzRand.random() * limit); + }; + + let currentTree = new BTree([], compare, maxNodeSize); + let currentMap = new Map(); + + for (let iteration = 0; iteration < FUZZ_SETTINGS.iterationsPerScenario; iteration++) { + const size = nextInt(FUZZ_SETTINGS.maxInsertSize); + const otherTree = new BTree([], compare, maxNodeSize); + const otherMap = new Map(); + + for (let i = 0; i < size; i++) { + const key = nextInt(FUZZ_SETTINGS.keyRange); + const value = nextInt(FUZZ_SETTINGS.valueRange); + otherTree.set(key, value); + otherMap.set(key, value); + } + + const strategy = strategies[nextInt(strategies.length)]; + const mergeFunc = strategy.fn; + + const expectedMap = new Map(currentMap); + + otherMap.forEach((rightValue, key) => { + if (expectedMap.has(key)) { + const leftValue = expectedMap.get(key)!; + const mergedValue = strategy.apply(key, leftValue, rightValue); + if (mergedValue === undefined) + expectedMap.delete(key); + else + expectedMap.set(key, mergedValue); + } else { + expectedMap.set(key, rightValue); + } + }); + + const previousSnapshot = currentTree.toArray(); + const merged = currentTree.merge(otherTree, mergeFunc); + + expect(currentTree.toArray()).toEqual(previousSnapshot); + + if ((iteration & 1) === 0) { + merged.checkValid(); + } + + const expectedArray = Array.from(expectedMap.entries()).sort((a, b) => a[0] - b[0]); + expect(merged.toArray()).toEqual(expectedArray); + + // Spot-check a few sampled keys for consistency with the Map + const sampleCount = Math.min(FUZZ_SETTINGS.sampleChecks, expectedArray.length); + for (let s = 0; s < sampleCount; s++) { + const sampleIndex = nextInt(expectedArray.length); + const [sampleKey, sampleValue] = expectedArray[sampleIndex]; + expect(merged.get(sampleKey)).toBe(sampleValue); + } + + currentTree = merged; + currentMap = expectedMap; + } + } + }); +}); From f228868b0b1086313529c6b21d4316b6abecf30e Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Tue, 28 Oct 2025 17:10:11 -0700 Subject: [PATCH 004/143] add merge stub --- b+tree.ts | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/b+tree.ts b/b+tree.ts index 824b32a..dd37430 100644 --- a/b+tree.ts +++ b/b+tree.ts @@ -569,6 +569,35 @@ export default class BTree implements ISortedMapF, ISortedMap return {nodequeue, nodeindex, leaf:nextnode}; } + /** + * Merges this tree with `other`, reusing subtrees wherever possible. + * Neither input tree is modified. + * @param other The other tree to merge into this one. + * @param merge Called for keys that appear in both trees. Return the desired value, or + * `undefined` to omit the key from the result. + * @returns A new BTree that contains the merged key/value pairs. + * @description Complexity: O(1) when the ranges do not overlap; otherwise + * O(k ยท log n) where k is the number of overlapping keys. + */ + merge(other: BTree, merge: (key: K, leftValue: V, rightValue: V) => V | undefined): BTree { + // Fast paths for empty trees + const sizeThis = this._root.size(); + const sizeOther = other._root.size(); + + if (sizeThis === 0) + return other.clone(); + if (sizeOther === 0) + return this.clone(); + + // Ensure both trees share the same comparator reference + if (this._compare !== other._compare) + throw new Error("Cannot merge BTrees with different comparators."); + if (this._maxNodeSize !== other._maxNodeSize) + throw new Error("Cannot merge BTrees with different max node sizes."); + + throw new Error("Not yet implemented: BTree.merge"); + } + /** * Computes the differences between `this` and `other`. * For efficiency, the diff is returned via invocations of supplied handlers. From 6a009b2afddf2588dbab0f8b10691fa144ca91ff Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Tue, 28 Oct 2025 17:10:41 -0700 Subject: [PATCH 005/143] add merge benchmarks --- benchmarks.ts | 303 +++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 302 insertions(+), 1 deletion(-) diff --git a/benchmarks.ts b/benchmarks.ts index 6f64ad0..54c16a4 100644 --- a/benchmarks.ts +++ b/benchmarks.ts @@ -45,6 +45,31 @@ function measure(message: (t:T) => string, callback: () => T, minMillise return result; } +function countTreeNodeStats(tree: BTree) { + const root = (tree as any)._root; + if (!root) + return { total: 0, shared: 0 }; + + const visit = (node: any, ancestorShared: boolean): { total: number, shared: number } => { + if (!node) + return { total: 0, shared: 0 }; + const selfShared = node.sharedSizeTag < 0 || ancestorShared; + let shared = selfShared ? 1 : 0; + let total = 1; + const children: any[] | undefined = node.children; + if (children) { + for (const child of children) { + const stats = visit(child, selfShared); + total += stats.total; + shared += stats.shared; + } + } + return { total, shared }; + }; + + return visit(root, false); +} + console.log("Benchmark results (milliseconds with integer keys/values)"); console.log("---------------------------------------------------------"); @@ -353,4 +378,280 @@ console.log("### Delta between B+ trees"); }); } }) -} \ No newline at end of file +} + +console.log(); +console.log("### Merge between B+ trees"); +{ + console.log(); + const sizes = [100, 1000, 10000, 100000]; + + // Test 1: Non-overlapping ranges (best case - minimal intersections) + console.log("# Non-overlapping ranges (disjoint keys)"); + sizes.forEach((size) => { + const tree1 = new BTree(); + const tree2 = new BTree(); + + const offset = size * 3; + for (let i = 0; i < size; i++) { + tree1.set(i, i); // Keys: 0...(size-1) + tree2.set(offset + i, offset + i); // Keys well beyond tree1's range + } + + const preferLeft = (_k: number, v1: number, _v2: number) => v1; + const mergeResult = measure(() => `Merge ${size}+${size} non-overlapping trees using merge()`, () => { + return tree1.merge(tree2, preferLeft); + }); + const mergeStats = countTreeNodeStats(mergeResult); + console.log(`\tShared nodes (merge): ${mergeStats.shared}/${mergeStats.total}`); + + const baselineResult = measure(() => `Merge ${size}+${size} non-overlapping trees using clone+set loop (baseline)`, () => { + const result = tree1.clone(); + tree2.forEachPair((k, v) => { + result.set(k, v, false); + }); + return result; + }); + const baselineStats = countTreeNodeStats(baselineResult); + console.log(`\tShared nodes (baseline): ${baselineStats.shared}/${baselineStats.total}`); + }); + + console.log(); + console.log("# Adjacent ranges (one intersection points)"); + sizes.forEach((size) => { + const tree1 = new BTree(); + const tree2 = new BTree(); + + // Tree1: 0-size, Tree2: size-(2*size) + for (let i = 0; i < size; i++) { + tree1.set(i, i); + tree2.set(i + size, i + size); + } + + const preferLeft = (_k: number, v1: number, _v2: number) => v1; + const mergeResult = measure(() => `Merge ${size}+${size} adjacent range trees using merge()`, () => { + return tree1.merge(tree2, preferLeft); + }); + const mergeStats = countTreeNodeStats(mergeResult); + console.log(`\tShared nodes (merge): ${mergeStats.shared}/${mergeStats.total}`); + + const baselineResult = measure(() => `Merge ${size}+${size} adjacent range trees using clone+set loop (baseline)`, () => { + const result = tree1.clone(); + tree2.forEachPair((k, v) => { + result.set(k, v, false); + }); + return result; + }); + const baselineStats = countTreeNodeStats(baselineResult); + console.log(`\tShared nodes (baseline): ${baselineStats.shared}/${baselineStats.total}`); + }); + + console.log(); + console.log("# Interleaved ranges (two intersection points)"); + sizes.forEach((size) => { + const tree1 = new BTree(); + const tree2 = new BTree(); + + // Tree1: 0-size, 2*size-3*size + // Tree2: size-2*size + for (let i = 0; i < size; i++) { + tree1.set(i, i); + tree1.set(i + 2 * size, i + 2 * size); + tree2.set(i + size, i + size); + } + + const preferLeft = (_k: number, v1: number, _v2: number) => v1; + const mergeResult = measure(() => `Merge ${size*2}+${size} interleaved range trees using merge()`, () => { + return tree1.merge(tree2, preferLeft); + }); + const mergeStats = countTreeNodeStats(mergeResult); + console.log(`\tShared nodes (merge): ${mergeStats.shared}/${mergeStats.total}`); + + const baselineResult = measure(() => `Merge ${size*2}+${size} interleaved range trees using clone+set loop (baseline)`, () => { + const result = tree1.clone(); + tree2.forEachPair((k, v) => { + result.set(k, v, false); + }); + return result; + }); + const baselineStats = countTreeNodeStats(baselineResult); + console.log(`\tShared nodes (baseline): ${baselineStats.shared}/${baselineStats.total}`); + }); + + console.log(); + console.log("# Complete overlap (worst case - all keys intersect)"); + sizes.forEach((size) => { + const tree1 = new BTree(); + const tree2 = new BTree(); + + for (let i = 0; i < size; i++) { + tree1.set(i, i); + tree2.set(i, i * 10); + } + + const preferLeft = (_k: number, v1: number, _v2: number) => v1; + const mergeResult = measure(() => `Merge ${size}+${size} completely overlapping trees (prefer left)`, () => { + return tree1.merge(tree2, preferLeft); + }); + const mergeStats = countTreeNodeStats(mergeResult); + console.log(`\tShared nodes (merge): ${mergeStats.shared}/${mergeStats.total}`); + }); + + console.log(); + console.log("# Partial overlap (10% intersection)"); + sizes.forEach((size) => { + const tree1 = new BTree(); + const tree2 = new BTree(); + + // Tree1: 0-(size) + // Tree2: (~0.9*size)-(1.9*size) + // Overlap: last 10% of tree1 and first 10% of tree2 + for (let i = 0; i < size; i++) { + tree1.set(i, i); + } + const offset = Math.floor(size * 0.9); + for (let i = 0; i < size; i++) { + const key = offset + i; + tree2.set(key, key * 10); + } + + const preferLeft = (_k: number, v1: number, _v2: number) => v1; + const mergeResult = measure(() => `Merge trees with 10% overlap (${size}+${size} keys) using merge()`, () => { + return tree1.merge(tree2, preferLeft); + }); + const mergeStats = countTreeNodeStats(mergeResult); + console.log(`\tShared nodes (merge): ${mergeStats.shared}/${mergeStats.total}`); + + const baselineResult = measure(() => `Merge trees with 10% overlap (${size}+${size} keys) using clone+set loop (baseline)`, () => { + const result = tree1.clone(); + tree2.forEachPair((k, v) => { + result.set(k, v, false); + }); + return result; + }); + const baselineStats = countTreeNodeStats(baselineResult); + console.log(`\tShared nodes (baseline): ${baselineStats.shared}/${baselineStats.total}`); + }); + + console.log(); + console.log("# Merge random overlaps"); + sizes.forEach((size) => { + const keys1 = makeArray(size, true); + const keys2 = makeArray(size, true); + + const tree1 = new BTree(); + const tree2 = new BTree(); + + for (let k of keys1) { + tree1.set(k, k); + } + for (let k of keys2) { + tree2.set(k, k * 10); + } + + const preferLeft = (_k: number, v1: number, _v2: number) => v1; + const mergeResult = measure(() => `Merge ${tree1.size}+${tree2.size} trees with random keys using merge()`, () => { + return tree1.merge(tree2, preferLeft); + }); + const mergeStats = countTreeNodeStats(mergeResult); + console.log(`\tShared nodes (merge): ${mergeStats.shared}/${mergeStats.total}`); + + const baselineResult = measure(() => `Merge ${tree1.size}+${tree2.size} trees with random keys using clone+set loop (baseline)`, () => { + const result = tree1.clone(); + tree2.forEachPair((k, v) => { + result.set(k, v, false); + }); + return result; + }); + const baselineStats = countTreeNodeStats(baselineResult); + console.log(`\tShared nodes (baseline): ${baselineStats.shared}/${baselineStats.total}`); + }); + + console.log(); + console.log("# Merge with empty tree"); + sizes.forEach((size) => { + const tree1 = new BTree(); + const tree2 = new BTree(); + + for (let i = 0; i < size; i++) { + tree1.set(i, i); + } + + const preferLeft = (_k: number, v1: number, _v2: number) => v1; + const mergeResult = measure(() => `Merge ${size}-key tree with empty tree`, () => { + return tree1.merge(tree2, preferLeft); + }); + const mergeStats = countTreeNodeStats(mergeResult); + console.log(`\tShared nodes (merge): ${mergeStats.shared}/${mergeStats.total}`); + }); + + console.log(); + console.log("# Compare merge vs manual iteration for complete overlap"); + sizes.forEach((size) => { + const tree1 = new BTree(); + const tree2 = new BTree(); + + for (let i = 0; i < size; i++) { + tree1.set(i, i); + tree2.set(i, i * 10); + } + + const preferLeft = (_k: number, v1: number, _v2: number) => v1; + + const mergeResult = measure(() => `Merge ${size}+${size} overlapping trees using merge()`, () => { + return tree1.merge(tree2, preferLeft); + }); + const mergeStats = countTreeNodeStats(mergeResult); + console.log(`\tShared nodes (merge): ${mergeStats.shared}/${mergeStats.total}`); + + const baselineResult = measure(() => `Merge ${size}+${size} overlapping trees using clone+set loop (baseline)`, () => { + const result = tree1.clone(); + tree2.forEachPair((k, v) => { + result.set(k, v, false); // Don't overwrite + }); + return result; + }); + const baselineStats = countTreeNodeStats(baselineResult); + console.log(`\tShared nodes (baseline): ${baselineStats.shared}/${baselineStats.total}`); + }); + + console.log(); + console.log("# Large sparse-overlap trees (1M keys each, 10 overlaps per 100k)"); + { + const totalKeys = 1_000_000; + const overlapInterval = 100_000; + const overlapPerInterval = 10; + + const tree1 = new BTree(); + for (let i = 0; i < totalKeys; i++) { + tree1.set(i, i); + } + + const tree2 = new BTree(); + for (let i = 0; i < totalKeys; i++) { + if ((i % overlapInterval) < overlapPerInterval) { + tree2.set(i, i); + } else { + tree2.set(totalKeys + i, totalKeys + i); + } + } + + const preferLeft = (_k: number, v1: number, _v2: number) => v1; + + const mergeResult = measure(() => `Merge ${tree1.size}+${tree2.size} sparse-overlap trees using merge()`, () => { + return tree1.merge(tree2, preferLeft); + }); + const mergeStats = countTreeNodeStats(mergeResult); + console.log(`\tShared nodes (merge): ${mergeStats.shared}/${mergeStats.total}`); + + const baselineResult = measure(() => `Merge ${tree1.size}+${tree2.size} sparse-overlap trees using clone+set loop (baseline)`, () => { + const result = tree1.clone(); + tree2.forEachPair((k, v) => { + result.set(k, v, false); + }); + return result; + }); + const baselineStats = countTreeNodeStats(baselineResult); + console.log(`\tShared nodes (baseline): ${baselineStats.shared}/${baselineStats.total}`); + } +} From bd5357ea853a6f4af944d2ff336ce47f73a7083d Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Wed, 29 Oct 2025 11:04:15 -0700 Subject: [PATCH 006/143] intersect + merge progress --- b+tree.d.ts | 31 ++++ b+tree.js | 373 +++++++++++++++++++++++++++++++++++++++++++ b+tree.test.ts | 18 +-- b+tree.ts | 420 ++++++++++++++++++++++++++++++++++++++++++++++++- 4 files changed, 827 insertions(+), 15 deletions(-) diff --git a/b+tree.d.ts b/b+tree.d.ts index 8da255b..d180df1 100644 --- a/b+tree.d.ts +++ b/b+tree.d.ts @@ -253,6 +253,37 @@ export default class BTree implements ISortedMapF, ISort */ entriesReversed(highestKey?: K, reusedArray?: (K | V)[], skipHighest?: boolean): IterableIterator<[K, V]>; private findPath; + /** + * Intersects this tree with `other`, calling the supplied `intersection` callback for each intersecting key/value pair. + * Neither tree is modified. + * @param other The other tree to intersect with this one. + * @param intersection Called for keys that appear in both trees. + * @description Complexity: O(N) where N is the number of intersecting keys. + */ + intersect(other: BTree, intersection: (key: K, leftValue: V, rightValue: V) => void): void; + /** + * Merges this tree with `other`, reusing subtrees wherever possible. + * Neither input tree is modified. + * @param other The other tree to merge into this one. + * @param merge Called for keys that appear in both trees. Return the desired value, or + * `undefined` to omit the key from the result. + * @returns A new BTree that contains the merged key/value pairs. + * @description Complexity: O(1) when the ranges do not overlap; otherwise + * O(k ยท log n) where k is the number of overlapping keys. + */ + merge(other: BTree, merge: (key: K, leftValue: V, rightValue: V) => V | undefined): BTree; + /** First pass of merge: decompose into disjoint reusable subtrees and merged leaves. */ + private static decompose; + /** + * Move cursor strictly forward to the first key >= (inclusive) or > (exclusive) target. + * Returns true if end-of-tree was reached (cursor not structurally mutated). + */ + private static moveTo; + /** Create a cursor at the leftmost key. */ + private static createCursor; + private static getKey; + private static getLeaf; + private static areOverlapping; /** * Computes the differences between `this` and `other`. * For efficiency, the diff is returned via invocations of supplied handlers. diff --git a/b+tree.js b/b+tree.js index 9246c75..68ed270 100644 --- a/b+tree.js +++ b/b+tree.js @@ -491,6 +491,379 @@ var BTree = /** @class */ (function () { } return { nodequeue: nodequeue, nodeindex: nodeindex, leaf: nextnode }; }; + /** + * Intersects this tree with `other`, calling the supplied `intersection` callback for each intersecting key/value pair. + * Neither tree is modified. + * @param other The other tree to intersect with this one. + * @param intersection Called for keys that appear in both trees. + * @description Complexity: O(N) where N is the number of intersecting keys. + */ + BTree.prototype.intersect = function (other, intersection) { + var cmp = this._compare; + // Ensure both trees share the same comparator reference + if (cmp !== other._compare) + throw new Error("Cannot merge BTrees with different comparators."); + if (this._maxNodeSize !== other._maxNodeSize) + throw new Error("Cannot merge BTrees with different max node sizes."); + if (other.size === 0 || this.size === 0) + return; + // Cursor payload factory + var mkPayload = function (_) { return undefined; }; + // Callbacks + var empty = function () { }; + // Initialize cursors at minimum keys. + var curA = BTree.createCursor(this, mkPayload, empty, empty, empty, empty, empty); + var curB = BTree.createCursor(other, mkPayload, empty, empty, empty, empty, empty); + // Walk both cursors + while (true) { + var order = cmp(BTree.getKey(curA), BTree.getKey(curB)); + var trailing = curA, leading = curB; + if (order > 0) { + trailing = curB; + leading = curA; + } + var areEqual = order === 0; + if (areEqual) { + var key = BTree.getKey(leading); + var vA = curA.leaf.values[curA.leafIndex]; + var vB = curB.leaf.values[curB.leafIndex]; + intersection(key, vA, vB); + var outT = BTree.moveTo(trailing, leading, key, false, cmp); + var outL = BTree.moveTo(leading, trailing, key, false, cmp); + if (outT && outL) + break; + } + else { + var out = BTree.moveTo(trailing, leading, BTree.getKey(leading), true, cmp); + if (out) { + // We've reached the end of one tree, so intersections are guaranteed to be done. + break; + } + } + } + }; + /** + * Merges this tree with `other`, reusing subtrees wherever possible. + * Neither input tree is modified. + * @param other The other tree to merge into this one. + * @param merge Called for keys that appear in both trees. Return the desired value, or + * `undefined` to omit the key from the result. + * @returns A new BTree that contains the merged key/value pairs. + * @description Complexity: O(1) when the ranges do not overlap; otherwise + * O(k ยท log n) where k is the number of overlapping keys. + */ + BTree.prototype.merge = function (other, merge) { + // Fast paths for empty trees + var sizeThis = this._root.size(); + var sizeOther = other._root.size(); + if (sizeThis === 0) + return other.clone(); + if (sizeOther === 0) + return this.clone(); + // Ensure both trees share the same comparator reference + if (this._compare !== other._compare) + throw new Error("Cannot merge BTrees with different comparators."); + if (this._maxNodeSize !== other._maxNodeSize) + throw new Error("Cannot merge BTrees with different max node sizes."); + var _a = BTree.decompose(this, other, merge), disjoint = _a.disjoint, tallestHeight = _a.tallestHeight, tallestIndex = _a.tallestIndex; + throw new Error("Not yet implemented: BTree.merge"); + }; + /** First pass of merge: decompose into disjoint reusable subtrees and merged leaves. */ + BTree.decompose = function (left, right, mergeValues) { + var cmp = left._compare; + check(left._compare === right._compare, "merge: trees must share comparator"); + check(left._maxNodeSize === right._maxNodeSize, "merge: trees must share max node size"); + check(left._root.size() > 0 && right._root.size() > 0, "decompose requires non-empty inputs"); + var disjoint = []; + var pending = []; + var tallestIndex = -1, tallestHeight = -1; + var flushPendingEntries = function () { + // Flush pending overlapped entries into new leaves + if (pending.length > 0) { + var max = left._maxNodeSize; + var total = pending.length; + var remaining = total; + var leafCount = Math.ceil(total / max); + var offset = 0; + while (leafCount > 0) { + var newLeafSize = Math.ceil(remaining / leafCount); + var slice = pending.slice(offset, offset + newLeafSize); + offset += newLeafSize; + remaining -= newLeafSize; + var keys = slice.map(function (p) { return p[0]; }); + var vals = slice.map(function (p) { return p[1]; }); + var leaf = new BNode(keys, vals); + disjoint.push([0, leaf]); + if (0 > tallestHeight) { + tallestIndex = disjoint.length - 1; + tallestHeight = 0; + } + leafCount--; + } + pending.length = 0; + } + }; + var addSharedNodeToDisjointSet = function (node, height) { + flushPendingEntries(); + node.isShared = true; + disjoint.push([height, node]); + if (height > tallestHeight) { + tallestIndex = disjoint.length - 1; + tallestHeight = height; + } + }; + // Have to do this as cast to convince TS it's ever assigned + var highestDisjoint = undefined; + // Cursor payload factory + var mkPayload = function (_) { return ({ disqualified: false }); }; + var pushLeafRange = function (leaf, from, toExclusive) { + if (from < toExclusive) { + for (var i = from; i < toExclusive; ++i) + pending.push([leaf.keys[i], leaf.values[i]]); + } + }; + // Callbacks + var onEnterLeaf = function (leaf, payload, destIndex, other) { + var otherLeaf = BTree.getLeaf(other); + if (BTree.areOverlapping(leaf, otherLeaf, cmp)) { + payload.disqualified = true; + other.leafPayload.disqualified = true; + pushLeafRange(leaf, 0, Math.min(destIndex, leaf.keys.length)); + } + else { + check(destIndex === 0, "onEnterLeaf: destIndex must be 0 if not overlapping"); + } + }; + var onMoveInLeaf = function (leaf, payload, fromIndex, toIndex, isInclusive, _other) { + check(payload.disqualified === true, "onMoveInLeaf: leaf must be disqualified"); + var start = isInclusive ? fromIndex : fromIndex + 1; + pushLeafRange(leaf, start, Math.min(toIndex, leaf.keys.length)); + }; + var onExitLeaf = function (leaf, startingIndex, isInclusive, payload, _other) { + highestDisjoint = undefined; + if (!payload.disqualified) { + highestDisjoint = { node: leaf, height: 0 }; + } + else { + var start = isInclusive ? startingIndex : startingIndex + 1; + pushLeafRange(leaf, start, leaf.keys.length); + } + }; + var onStepUp = function (parent, height, payload, fromIndex, stepDownIndex, _other) { + if (Number.isNaN(stepDownIndex)) { + if (!payload.disqualified) { + highestDisjoint = { node: parent, height: height }; + } + else { + for (var i = fromIndex + 1; i < parent.children.length; ++i) + addSharedNodeToDisjointSet(parent.children[i], height - 1); + } + } + else if (stepDownIndex === Infinity) { + if (!payload.disqualified) { + check(fromIndex === 0, "onStepUp: Infinity case requires fromIndex==0"); + highestDisjoint = { node: parent, height: height }; + } + else { + for (var i = fromIndex + 1; i < parent.children.length; ++i) + addSharedNodeToDisjointSet(parent.children[i], height - 1); + } + } + else { + for (var i = fromIndex + 1; i < stepDownIndex; ++i) + addSharedNodeToDisjointSet(parent.children[i], height - 1); + } + }; + var onStepDown = function (node, height, payload, stepDownIndex, other) { + var otherLeaf = BTree.getLeaf(other); + if (BTree.areOverlapping(node, otherLeaf, cmp)) { + payload.disqualified = true; + // leaf disqualification is handled in onEnterLeaf + } + for (var i = 0; i < stepDownIndex; ++i) + addSharedNodeToDisjointSet(node.children[i], height - 1); + }; + var curA = BTree.createCursor(left, mkPayload, onEnterLeaf, onMoveInLeaf, onExitLeaf, onStepUp, onStepDown); + var curB = BTree.createCursor(right, mkPayload, onEnterLeaf, onMoveInLeaf, onExitLeaf, onStepUp, onStepDown); + // Initialize disqualification w.r.t. opposite leaf. + var initDisqualify = function (cur, otherLeaf) { + if (BTree.areOverlapping(cur.leaf, otherLeaf, cmp)) + cur.leafPayload.disqualified = true; + for (var i = 0; i < cur.spine.length; ++i) { + var entry = cur.spine[i]; + if (BTree.areOverlapping(entry.node, otherLeaf, cmp)) + entry.payload.disqualified = true; + } + }; + initDisqualify(curA, BTree.getLeaf(curB)); + initDisqualify(curB, BTree.getLeaf(curA)); + // Walk both cursors + while (true) { + var order = cmp(BTree.getKey(curA), BTree.getKey(curB)); + var trailing = curA, leading = curB; + if (order > 0) { + trailing = curB; + leading = curA; + } + var areEqual = order === 0; + if (areEqual) { + var key = BTree.getKey(leading); + var vA = curA.leaf.values[curA.leafIndex]; + var vB = curB.leaf.values[curB.leafIndex]; + var merged = mergeValues(key, vA, vB); + if (merged !== undefined) + pending.push([key, merged]); + var outT = BTree.moveTo(trailing, leading, key, false, cmp); + var outL = BTree.moveTo(leading, trailing, key, false, cmp); + if (outT && outL) + break; + } + else { + var out = BTree.moveTo(trailing, leading, BTree.getKey(leading), true, cmp); + if (highestDisjoint !== undefined) { + addSharedNodeToDisjointSet(highestDisjoint.node, highestDisjoint.height); + highestDisjoint = undefined; + } + if (out) { + var maxKeyLeft = left._root.maxKey(); + var maxKeyRight = right._root.maxKey(); + var maxKey = cmp(maxKeyLeft, maxKeyRight) >= 0 ? maxKeyLeft : maxKeyRight; + BTree.moveTo(leading, trailing, maxKey, false, cmp); + break; + } + } + } + flushPendingEntries(); + return { disjoint: disjoint, tallestIndex: tallestIndex, tallestHeight: tallestHeight }; + }; + /** + * Move cursor strictly forward to the first key >= (inclusive) or > (exclusive) target. + * Returns true if end-of-tree was reached (cursor not structurally mutated). + */ + BTree.moveTo = function (cur, other, targetKey, isInclusive, cmp) { + // We should start before the target (or at it if inclusive) + var keyPos = cmp(BTree.getKey(cur), targetKey); + check(isInclusive && keyPos < 0 || !isInclusive && keyPos <= 0, "moveTo precondition violated"); + // Fast path: destination within current leaf + var leaf = cur.leaf; + { + var i = leaf.indexOf(targetKey, -1, cmp); + var destInLeaf = i < 0 ? ~i : (isInclusive ? i : i + 1); + if (destInLeaf < leaf.keys.length) { + cur.onMoveInLeaf(leaf, cur.leafPayload, cur.leafIndex, destInLeaf, isInclusive, other); + cur.leafIndex = destInLeaf; + return false; + } + } + // Find first ancestor with a viable right step + var spine = cur.spine; + var descentLevel = -1; + var descentIndex = -1; + for (var s = spine.length - 1; s >= 0; --s) { + var parent = spine[s].node; + var fromIndex = spine[s].childIndex; + var j = parent.indexOf(targetKey, 0, cmp); // insertion index or exact + var stepDownIndex = j + (isInclusive ? 0 : (j < parent.keys.length && cmp(parent.keys[j], targetKey) === 0 ? 1 : 0)); + // Note: when key not found, indexOf with failXor=0 already returns insertion index + if (stepDownIndex > fromIndex && stepDownIndex <= parent.keys.length - 1) { + descentLevel = s; + descentIndex = stepDownIndex; + break; + } + } + // Heights for callbacks: height = distance to leaf. Parent-of-leaf height = 1. + var heightOf = function (sIndex) { return spine.length - sIndex; }; + // Exit leaf; we did walk out of it conceptually + var startIndex = cur.leafIndex; + cur.onExitLeaf(leaf, startIndex, isInclusive, cur.leafPayload, other); + // Clear leaf payload after exit as specified + // @ts-ignore + cur.leafPayload = undefined; + if (descentLevel < 0) { + // No descent point; step up all the way; last callback gets Infinity + for (var s = spine.length - 1; s >= 0; --s) { + var entry = spine[s]; + var sd = s === 0 ? Infinity : NaN; + cur.onStepUp(entry.node, heightOf(s), entry.payload, entry.childIndex, sd, other); + } + return true; + } + // Step up through ancestors above the descentLevel + for (var s = spine.length - 1; s > descentLevel; --s) { + var entry = spine[s]; + cur.onStepUp(entry.node, heightOf(s), entry.payload, entry.childIndex, NaN, other); + } + { + var entry = spine[descentLevel]; + cur.onStepUp(entry.node, heightOf(descentLevel), entry.payload, entry.childIndex, descentIndex, other); + entry.childIndex = descentIndex; + } + // Descend, invoking onStepDown and creating payloads + spine.length = descentLevel + 1; + var node = spine[descentLevel].node.children[descentIndex]; + var height = heightOf(descentLevel) - 1; + while (!node.isLeaf) { + var ni = node; + var j = ni.indexOf(targetKey, 0, cmp); + var stepDownIndex = j + (isInclusive ? 0 : (j < ni.keys.length && cmp(ni.keys[j], targetKey) === 0 ? 1 : 0)); + var payload = cur.mkPayload(ni); + spine.push({ node: ni, childIndex: stepDownIndex, payload: payload }); + cur.onStepDown(ni, height, payload, stepDownIndex, other); + node = ni.children[stepDownIndex]; + height -= 1; + } + // Enter destination leaf + var newLeaf = node; + var leafPayload = cur.mkPayload(newLeaf); + var idx = newLeaf.indexOf(targetKey, -1, cmp); + var destIndex = idx < 0 ? ~idx : (isInclusive ? idx : idx + 1); + check(destIndex >= 0 && destIndex < newLeaf.keys.length, "moveTo: destination out of bounds"); + cur.onEnterLeaf(newLeaf, leafPayload, destIndex, other); + cur.leaf = newLeaf; + cur.leafPayload = leafPayload; + cur.leafIndex = destIndex; + return false; + }; + /** Create a cursor at the leftmost key. */ + BTree.createCursor = function (tree, mkPayload, onEnterLeaf, onMoveInLeaf, onExitLeaf, onStepUp, onStepDown) { + check(tree._root.size() > 0, "createCursor: cannot create a cursor for an empty tree"); + var spine = []; + var n = tree._root; + while (!n.isLeaf) { + var ni = n; + var payload = mkPayload(ni); + spine.push({ node: ni, childIndex: 0, payload: payload }); + n = ni.children[0]; + } + var leaf = n; + var leafPayload = mkPayload(leaf); + var cur = { + tree: tree, + leaf: leaf, + leafIndex: 0, + spine: spine, + leafPayload: leafPayload, + mkPayload: mkPayload, + onEnterLeaf: onEnterLeaf, + onMoveInLeaf: onMoveInLeaf, + onExitLeaf: onExitLeaf, + onStepUp: onStepUp, + onStepDown: onStepDown + }; + return cur; + }; + BTree.getKey = function (c) { + return c.leaf.keys[c.leafIndex]; + }; + BTree.getLeaf = function (c) { + return c.leaf; + }; + BTree.areOverlapping = function (a, b, cmp) { + var amin = a.minKey(), amax = a.maxKey(); + var bmin = b.minKey(), bmax = b.maxKey(); + // Overlap iff !(amax < bmin || bmax < amin) on inclusive ranges. + return !(cmp(amax, bmin) < 0 || cmp(bmax, amin) < 0); + }; /** * Computes the differences between `this` and `other`. * For efficiency, the diff is returned via invocations of supplied handlers. diff --git a/b+tree.test.ts b/b+tree.test.ts index 1aed4fa..17d663a 100644 --- a/b+tree.test.ts +++ b/b+tree.test.ts @@ -1148,21 +1148,11 @@ function testMerge(maxNodeSize: number) { const tree2 = new BTree([[1, 10], [2, 20], [3, 30]], compare, maxNodeSize); const mergeFunc = (k: number, v1: number, v2: number) => v1 + v2; - const result = tree1.merge(tree2, mergeFunc); + const result1 = tree1.merge(tree2, mergeFunc); + const result2 = tree2.merge(tree1, mergeFunc); - expect(result.size).toBe(3); - expect(result.toArray()).toEqual([[1, 10], [2, 20], [3, 30]]); - }); - - test('Merge non-empty tree with empty tree', () => { - const tree1 = new BTree([[1, 10], [2, 20], [3, 30]], compare, maxNodeSize); - const tree2 = new BTree([], compare, maxNodeSize); - const mergeFunc = (k: number, v1: number, v2: number) => v1 + v2; - - const result = tree1.merge(tree2, mergeFunc); - - expect(result.size).toBe(3); - expect(result.toArray()).toEqual([[1, 10], [2, 20], [3, 30]]); + expect(result1.toArray()).toEqual([[1, 10], [2, 20], [3, 30]]); + expect(result2.toArray()).toEqual([[1, 10], [2, 20], [3, 30]]); }); test('Merge with no overlapping keys', () => { diff --git a/b+tree.ts b/b+tree.ts index dd37430..9fe4f69 100644 --- a/b+tree.ts +++ b/b+tree.ts @@ -569,7 +569,61 @@ export default class BTree implements ISortedMapF, ISortedMap return {nodequeue, nodeindex, leaf:nextnode}; } - /** + /** + * Intersects this tree with `other`, calling the supplied `intersection` callback for each intersecting key/value pair. + * Neither tree is modified. + * @param other The other tree to intersect with this one. + * @param intersection Called for keys that appear in both trees. + * @description Complexity: O(N) where N is the number of intersecting keys. + */ + intersect(other: BTree, intersection: (key: K, leftValue: V, rightValue: V) => void): void { + const cmp = this._compare; + // Ensure both trees share the same comparator reference + if (cmp !== other._compare) + throw new Error("Cannot merge BTrees with different comparators."); + if (this._maxNodeSize !== other._maxNodeSize) + throw new Error("Cannot merge BTrees with different max node sizes."); + + if (other.size === 0 || this.size === 0) + return; + + // Cursor payload factory + const mkPayload = (_: BNode): undefined => undefined; + + // Callbacks + const empty = () => {}; + + // Initialize cursors at minimum keys. + const curA = BTree.createCursor(this, mkPayload, empty, empty, empty, empty, empty); + const curB = BTree.createCursor(other, mkPayload, empty, empty, empty, empty, empty); + + // Walk both cursors + while (true) { + const order = cmp(BTree.getKey(curA), BTree.getKey(curB)); + let trailing = curA, leading = curB; + if (order > 0) { trailing = curB; leading = curA; } + const areEqual = order === 0; + + if (areEqual) { + const key = BTree.getKey(leading); + const vA = curA.leaf.values[curA.leafIndex]; + const vB = curB.leaf.values[curB.leafIndex]; + intersection(key, vA, vB); + const outT = BTree.moveTo(trailing, leading, key, false, cmp); + const outL = BTree.moveTo(leading, trailing, key, false, cmp); + if (outT && outL) + break; + } else { + const out = BTree.moveTo(trailing, leading, BTree.getKey(leading), true, cmp); + if (out) { + // We've reached the end of one tree, so intersections are guaranteed to be done. + break; + } + } + } + } + + /** * Merges this tree with `other`, reusing subtrees wherever possible. * Neither input tree is modified. * @param other The other tree to merge into this one. @@ -595,9 +649,353 @@ export default class BTree implements ISortedMapF, ISortedMap if (this._maxNodeSize !== other._maxNodeSize) throw new Error("Cannot merge BTrees with different max node sizes."); + const { disjoint, tallestHeight, tallestIndex} = BTree.decompose(this, other, merge); + throw new Error("Not yet implemented: BTree.merge"); } + + /** First pass of merge: decompose into disjoint reusable subtrees and merged leaves. */ + private static decompose( + left: BTree, + right: BTree, + mergeValues: (key: K, leftValue: V, rightValue: V) => V | undefined + ): DecomposeResult { + const cmp = left._compare; + check(left._compare === right._compare, "merge: trees must share comparator"); + check(left._maxNodeSize === right._maxNodeSize, "merge: trees must share max node size"); + check(left._root.size() > 0 && right._root.size() > 0, "decompose requires non-empty inputs"); + + const disjoint: DisjointEntry[] = []; + const pending: [K,V][] = []; + let tallestIndex = -1, tallestHeight = -1; + + const flushPendingEntries = () => { + // Flush pending overlapped entries into new leaves + if (pending.length > 0) { + const max = left._maxNodeSize; + const total = pending.length; + let remaining = total; + let leafCount = Math.ceil(total / max); + let offset = 0; + while (leafCount > 0) { + const newLeafSize = Math.ceil(remaining / leafCount); + const slice = pending.slice(offset, offset + newLeafSize); + offset += newLeafSize; + remaining -= newLeafSize; + const keys = slice.map(p => p[0]); + const vals = slice.map(p => p[1]); + const leaf = new BNode(keys, vals); + disjoint.push([0, leaf]); + if (0 > tallestHeight) { + tallestIndex = disjoint.length - 1; + tallestHeight = 0; + } + leafCount--; + } + pending.length = 0; + } + }; + + const addSharedNodeToDisjointSet = (node: BNode, height: number) => { + flushPendingEntries(); + node.isShared = true; + disjoint.push([height, node]); + if (height > tallestHeight) { + tallestIndex = disjoint.length - 1; + tallestHeight = height; + } + }; + + // Have to do this as cast to convince TS it's ever assigned + let highestDisjoint: { node: BNode, height: number } | undefined = undefined as { node: BNode, height: number } | undefined; + + // Cursor payload factory + const mkPayload = (_: BNode): TP => ({ disqualified: false } as TP); + + const pushLeafRange = (leaf: BNode, from: number, toExclusive: number) => { + if (from < toExclusive) { + for (let i = from; i < toExclusive; ++i) + pending.push([leaf.keys[i], leaf.values[i]]); + } + }; + + // Callbacks + const onEnterLeaf = ( + leaf: BNode, payload: TP, destIndex: number, other: MergeCursor + ) => { + const otherLeaf = BTree.getLeaf(other); + if (BTree.areOverlapping(leaf, otherLeaf, cmp)) { + payload.disqualified = true; + other.leafPayload.disqualified = true; + pushLeafRange(leaf, 0, Math.min(destIndex, leaf.keys.length)); + } else { + check(destIndex === 0, "onEnterLeaf: destIndex must be 0 if not overlapping"); + } + }; + + const onMoveInLeaf = ( + leaf: BNode, payload: TP, fromIndex: number, toIndex: number, isInclusive: boolean, _other: MergeCursor + ) => { + check(payload.disqualified === true, "onMoveInLeaf: leaf must be disqualified"); + const start = isInclusive ? fromIndex : fromIndex + 1; + pushLeafRange(leaf, start, Math.min(toIndex, leaf.keys.length)); + }; + + const onExitLeaf = ( + leaf: BNode, startingIndex: number, isInclusive: boolean, payload: TP, _other: MergeCursor + ) => { + highestDisjoint = undefined; + if (!payload.disqualified) { + highestDisjoint = { node: leaf, height: 0 }; + } else { + const start = isInclusive ? startingIndex : startingIndex + 1; + pushLeafRange(leaf, start, leaf.keys.length); + } + }; + + const onStepUp = ( + parent: BNodeInternal, height: number, payload: TP, fromIndex: number, stepDownIndex: number, _other: MergeCursor + ) => { + if (Number.isNaN(stepDownIndex)) { + if (!payload.disqualified) { + highestDisjoint = { node: parent, height }; + } else { + for (let i = fromIndex + 1; i < parent.children.length; ++i) + addSharedNodeToDisjointSet(parent.children[i], height - 1); + } + } else if (stepDownIndex === Infinity) { + if (!payload.disqualified) { + check(fromIndex === 0, "onStepUp: Infinity case requires fromIndex==0"); + highestDisjoint = { node: parent, height }; + } else { + for (let i = fromIndex + 1; i < parent.children.length; ++i) + addSharedNodeToDisjointSet(parent.children[i], height - 1); + } + } else { + for (let i = fromIndex + 1; i < stepDownIndex; ++i) + addSharedNodeToDisjointSet(parent.children[i], height - 1); + } + }; + + const onStepDown = ( + node: BNodeInternal, height: number, payload: TP, stepDownIndex: number, other: MergeCursor + ) => { + const otherLeaf = BTree.getLeaf(other); + if (BTree.areOverlapping(node, otherLeaf, cmp)) { + payload.disqualified = true; + // leaf disqualification is handled in onEnterLeaf + } + for (let i = 0; i < stepDownIndex; ++i) + addSharedNodeToDisjointSet(node.children[i], height - 1); + }; + + // Initialize cursors at minimum keys. + type Pay = MergeCursorPayload; + const curA = BTree.createCursor(left, mkPayload, onEnterLeaf, onMoveInLeaf, onExitLeaf, onStepUp, onStepDown); + const curB = BTree.createCursor(right, mkPayload, onEnterLeaf, onMoveInLeaf, onExitLeaf, onStepUp, onStepDown); + + // Initialize disqualification w.r.t. opposite leaf. + const initDisqualify = (cur: MergeCursor, otherLeaf: BNode) => { + if (BTree.areOverlapping(cur.leaf, otherLeaf, cmp)) + cur.leafPayload.disqualified = true; + for (let i = 0; i < cur.spine.length; ++i) { + const entry = cur.spine[i]; + if (BTree.areOverlapping(entry.node, otherLeaf, cmp)) + entry.payload.disqualified = true; + } + }; + initDisqualify(curA, BTree.getLeaf(curB)); + initDisqualify(curB, BTree.getLeaf(curA)); + + // Walk both cursors + while (true) { + const order = cmp(BTree.getKey(curA), BTree.getKey(curB)); + let trailing = curA, leading = curB; + if (order > 0) { trailing = curB; leading = curA; } + const areEqual = order === 0; + + if (areEqual) { + const key = BTree.getKey(leading); + const vA = curA.leaf.values[curA.leafIndex]; + const vB = curB.leaf.values[curB.leafIndex]; + const merged = mergeValues(key, vA, vB); + if (merged !== undefined) pending.push([key, merged]); + const outT = BTree.moveTo(trailing, leading, key, false, cmp); + const outL = BTree.moveTo(leading, trailing, key, false, cmp); + if (outT && outL) + break; + } else { + const out = BTree.moveTo(trailing, leading, BTree.getKey(leading), true, cmp); + if (highestDisjoint !== undefined) { + addSharedNodeToDisjointSet(highestDisjoint.node, highestDisjoint.height); + highestDisjoint = undefined; + } + if (out) { + const maxKeyLeft = left._root.maxKey() as K; + const maxKeyRight = right._root.maxKey() as K; + const maxKey = cmp(maxKeyLeft, maxKeyRight) >= 0 ? maxKeyLeft : maxKeyRight; + BTree.moveTo(leading, trailing, maxKey, false, cmp); + break; + } + } + } + + flushPendingEntries(); + return { disjoint, tallestIndex, tallestHeight }; + } + + /** + * Move cursor strictly forward to the first key >= (inclusive) or > (exclusive) target. + * Returns true if end-of-tree was reached (cursor not structurally mutated). + */ + private static moveTo( + cur: MergeCursor, + other: MergeCursor, + targetKey: K, + isInclusive: boolean, + cmp: (a:K,b:K)=>number + ): boolean { + // We should start before the target (or at it if inclusive) + const keyPos = cmp(BTree.getKey(cur), targetKey); + check(isInclusive && keyPos < 0 || !isInclusive && keyPos <= 0, "moveTo precondition violated"); + + // Fast path: destination within current leaf + const leaf = cur.leaf; + { + const i = leaf.indexOf(targetKey, -1, cmp); + const destInLeaf = i < 0 ? ~i : (isInclusive ? i : i + 1); + if (destInLeaf < leaf.keys.length) { + cur.onMoveInLeaf(leaf, cur.leafPayload, cur.leafIndex, destInLeaf, isInclusive, other); + cur.leafIndex = destInLeaf; + return false; + } + } + + // Find first ancestor with a viable right step + const spine = cur.spine; + let descentLevel = -1; + let descentIndex = -1; + + for (let s = spine.length - 1; s >= 0; --s) { + const parent = spine[s].node; + const fromIndex = spine[s].childIndex; + const j = parent.indexOf(targetKey, 0, cmp); // insertion index or exact + const stepDownIndex = (j as number) + (isInclusive ? 0 : (j < parent.keys.length && cmp(parent.keys[j], targetKey) === 0 ? 1 : 0)); + // Note: when key not found, indexOf with failXor=0 already returns insertion index + if (stepDownIndex > fromIndex && stepDownIndex <= parent.keys.length - 1) { + descentLevel = s; + descentIndex = stepDownIndex; + break; + } + } + + // Heights for callbacks: height = distance to leaf. Parent-of-leaf height = 1. + const heightOf = (sIndex: number) => spine.length - sIndex; + + // Exit leaf; we did walk out of it conceptually + const startIndex = cur.leafIndex; + cur.onExitLeaf(leaf, startIndex, isInclusive, cur.leafPayload, other); + // Clear leaf payload after exit as specified + // @ts-ignore + cur.leafPayload = undefined as any; + + if (descentLevel < 0) { + // No descent point; step up all the way; last callback gets Infinity + for (let s = spine.length - 1; s >= 0; --s) { + const entry = spine[s]; + const sd = s === 0 ? Infinity : NaN; + cur.onStepUp(entry.node, heightOf(s), entry.payload, entry.childIndex, sd, other); + } + return true; + } + + // Step up through ancestors above the descentLevel + for (let s = spine.length - 1; s > descentLevel; --s) { + const entry = spine[s]; + cur.onStepUp(entry.node, heightOf(s), entry.payload, entry.childIndex, NaN, other); + } + { + const entry = spine[descentLevel]; + cur.onStepUp(entry.node, heightOf(descentLevel), entry.payload, entry.childIndex, descentIndex, other); + entry.childIndex = descentIndex; + } + + // Descend, invoking onStepDown and creating payloads + spine.length = descentLevel + 1; + let node: BNode = spine[descentLevel].node.children[descentIndex]; + let height = heightOf(descentLevel) - 1; + + while (!(node as any).isLeaf) { + const ni = node as unknown as BNodeInternal; + const j = ni.indexOf(targetKey, 0, cmp); + const stepDownIndex = (j as number) + (isInclusive ? 0 : (j < ni.keys.length && cmp(ni.keys[j], targetKey) === 0 ? 1 : 0)); + const payload = cur.mkPayload(ni); + spine.push({ node: ni, childIndex: stepDownIndex, payload }); + cur.onStepDown(ni, height, payload, stepDownIndex, other); + node = ni.children[stepDownIndex]; + height -= 1; + } + + // Enter destination leaf + const newLeaf = node as BNode; + const leafPayload = cur.mkPayload(newLeaf); + const idx = newLeaf.indexOf(targetKey, -1, cmp); + const destIndex = idx < 0 ? ~idx : (isInclusive ? idx : idx + 1); + check(destIndex >= 0 && destIndex < newLeaf.keys.length, "moveTo: destination out of bounds"); + + cur.onEnterLeaf(newLeaf, leafPayload, destIndex, other); + cur.leaf = newLeaf; + cur.leafPayload = leafPayload; + cur.leafIndex = destIndex; + return false; + } + + /** Create a cursor at the leftmost key. */ + private static createCursor( + tree: BTree, + mkPayload: (n: BNode) => TP, + onEnterLeaf: MergeCursor["onEnterLeaf"], + onMoveInLeaf: MergeCursor["onMoveInLeaf"], + onExitLeaf: MergeCursor["onExitLeaf"], + onStepUp: MergeCursor["onStepUp"], + onStepDown: MergeCursor["onStepDown"], + ): MergeCursor { + check(tree._root.size() > 0, "createCursor: cannot create a cursor for an empty tree"); + const spine: Array<{ node: BNodeInternal, childIndex: number, payload: TP }> = []; + let n: BNode = tree._root; + while (!(n as any).isLeaf) { + const ni = n as unknown as BNodeInternal; + const payload = mkPayload(ni); + spine.push({ node: ni, childIndex: 0, payload }); + n = ni.children[0]; + } + const leaf = n as BNode; + const leafPayload = mkPayload(leaf); + const cur: MergeCursor = { + tree, leaf, leafIndex: 0, spine, leafPayload, mkPayload, + onEnterLeaf, onMoveInLeaf, onExitLeaf, onStepUp, onStepDown + }; + return cur; + } + + private static getKey(c: MergeCursor): K { + return c.leaf.keys[c.leafIndex] as K; + } + + private static getLeaf(c: MergeCursor): BNode { + return c.leaf; + } + + private static areOverlapping( + a: BNode, b: BNode, cmp: (x:K,y:K)=>number + ): boolean { + const amin = a.minKey() as K, amax = a.maxKey() as K; + const bmin = b.minKey() as K, bmax = b.maxKey() as K; + // Overlap iff !(amax < bmin || bmax < amin) on inclusive ranges. + return !(cmp(amax, bmin) < 0 || cmp(bmax, amin) < 0); + } + /** * Computes the differences between `this` and `other`. * For efficiency, the diff is returned via invocations of supplied handlers. @@ -1893,6 +2291,26 @@ class BNodeInternal extends BNode { */ type DiffCursor = { height: number, internalSpine: BNode[][], levelIndices: number[], leaf: BNode | undefined, currentKey: K }; +type MergeCursorPayload = { disqualified: boolean }; + +interface MergeCursor { + tree: BTree; + leaf: BNode; + leafIndex: number; + spine: Array<{ node: BNodeInternal, childIndex: number, payload: TPayload }>; + leafPayload: TPayload; + mkPayload: (n: BNode) => TPayload; + + onEnterLeaf: (leaf: BNode, payload: TPayload, destIndex: number, other: MergeCursor) => void; + onMoveInLeaf: (leaf: BNode, payload: TPayload, fromIndex: number, toIndex: number, isInclusive: boolean, other: MergeCursor) => void; + onExitLeaf: (leaf: BNode, startingIndex: number, isInclusive: boolean, payload: TPayload, other: MergeCursor) => void; + onStepUp: (parent: BNodeInternal, height: number, payload: TPayload, fromIndex: number, stepDownIndex: number, other: MergeCursor) => void; + onStepDown: (node: BNodeInternal, height: number, payload: TPayload, stepDownIndex: number, other: MergeCursor) => void; +} + +type DisjointEntry = [height: number, node: BNode]; +type DecomposeResult = { disjoint: DisjointEntry[], tallestIndex: number, tallestHeight: number }; + // Optimization: this array of `undefined`s is used instead of a normal // array of values in nodes where `undefined` is the only value. // Its length is extended to max node size on first use; since it can From 535309081d456b410d9de071167f717e01a8255c Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Wed, 29 Oct 2025 11:41:32 -0700 Subject: [PATCH 007/143] docs --- b+tree.ts | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/b+tree.ts b/b+tree.ts index 9fe4f69..43ccce4 100644 --- a/b+tree.ts +++ b/b+tree.ts @@ -574,7 +574,7 @@ export default class BTree implements ISortedMapF, ISortedMap * Neither tree is modified. * @param other The other tree to intersect with this one. * @param intersection Called for keys that appear in both trees. - * @description Complexity: O(N) where N is the number of intersecting keys. + * @description Complexity: O(N + M), but often much faster in practice due to skipping any non-intersecting subtrees. */ intersect(other: BTree, intersection: (key: K, leftValue: V, rightValue: V) => void): void { const cmp = this._compare; @@ -624,14 +624,13 @@ export default class BTree implements ISortedMapF, ISortedMap } /** - * Merges this tree with `other`, reusing subtrees wherever possible. + * Efficiently merges this tree with `other`, reusing subtrees wherever possible. * Neither input tree is modified. * @param other The other tree to merge into this one. * @param merge Called for keys that appear in both trees. Return the desired value, or * `undefined` to omit the key from the result. * @returns A new BTree that contains the merged key/value pairs. - * @description Complexity: O(1) when the ranges do not overlap; otherwise - * O(k ยท log n) where k is the number of overlapping keys. + * @description Complexity: O(N + M), but often much faster in practice due to skipping any non-intersecting subtrees. */ merge(other: BTree, merge: (key: K, leftValue: V, rightValue: V) => V | undefined): BTree { // Fast paths for empty trees @@ -655,7 +654,11 @@ export default class BTree implements ISortedMapF, ISortedMap } - /** First pass of merge: decompose into disjoint reusable subtrees and merged leaves. */ + + /** + * Decomposes two BTrees into disjoint nodes. Reuses interior nodes when they do not overlap/intersect with any leaf nodes + * in the other tree. Overlapping leaf nodes are broken down into new leaf nodes containing merged entries. + */ private static decompose( left: BTree, right: BTree, From 72aa5c82223cb3ae1332c603658f7a78b805bf60 Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Wed, 29 Oct 2025 12:14:43 -0700 Subject: [PATCH 008/143] add intersection tests --- b+tree.d.ts | 12 +-- b+tree.js | 12 +-- b+tree.test.ts | 224 +++++++++++++++++++++++++++++++++++++++++++++++++ benchmarks.ts | 155 ++++++++++++++++++++++++++++++++++ 4 files changed, 393 insertions(+), 10 deletions(-) diff --git a/b+tree.d.ts b/b+tree.d.ts index d180df1..a118ba2 100644 --- a/b+tree.d.ts +++ b/b+tree.d.ts @@ -258,21 +258,23 @@ export default class BTree implements ISortedMapF, ISort * Neither tree is modified. * @param other The other tree to intersect with this one. * @param intersection Called for keys that appear in both trees. - * @description Complexity: O(N) where N is the number of intersecting keys. + * @description Complexity: O(N + M), but often much faster in practice due to skipping any non-intersecting subtrees. */ intersect(other: BTree, intersection: (key: K, leftValue: V, rightValue: V) => void): void; /** - * Merges this tree with `other`, reusing subtrees wherever possible. + * Efficiently merges this tree with `other`, reusing subtrees wherever possible. * Neither input tree is modified. * @param other The other tree to merge into this one. * @param merge Called for keys that appear in both trees. Return the desired value, or * `undefined` to omit the key from the result. * @returns A new BTree that contains the merged key/value pairs. - * @description Complexity: O(1) when the ranges do not overlap; otherwise - * O(k ยท log n) where k is the number of overlapping keys. + * @description Complexity: O(N + M), but often much faster in practice due to skipping any non-intersecting subtrees. */ merge(other: BTree, merge: (key: K, leftValue: V, rightValue: V) => V | undefined): BTree; - /** First pass of merge: decompose into disjoint reusable subtrees and merged leaves. */ + /** + * Decomposes two BTrees into disjoint nodes. Reuses interior nodes when they do not overlap/intersect with any leaf nodes + * in the other tree. Overlapping leaf nodes are broken down into new leaf nodes containing merged entries. + */ private static decompose; /** * Move cursor strictly forward to the first key >= (inclusive) or > (exclusive) target. diff --git a/b+tree.js b/b+tree.js index 68ed270..6dfba18 100644 --- a/b+tree.js +++ b/b+tree.js @@ -496,7 +496,7 @@ var BTree = /** @class */ (function () { * Neither tree is modified. * @param other The other tree to intersect with this one. * @param intersection Called for keys that appear in both trees. - * @description Complexity: O(N) where N is the number of intersecting keys. + * @description Complexity: O(N + M), but often much faster in practice due to skipping any non-intersecting subtrees. */ BTree.prototype.intersect = function (other, intersection) { var cmp = this._compare; @@ -543,14 +543,13 @@ var BTree = /** @class */ (function () { } }; /** - * Merges this tree with `other`, reusing subtrees wherever possible. + * Efficiently merges this tree with `other`, reusing subtrees wherever possible. * Neither input tree is modified. * @param other The other tree to merge into this one. * @param merge Called for keys that appear in both trees. Return the desired value, or * `undefined` to omit the key from the result. * @returns A new BTree that contains the merged key/value pairs. - * @description Complexity: O(1) when the ranges do not overlap; otherwise - * O(k ยท log n) where k is the number of overlapping keys. + * @description Complexity: O(N + M), but often much faster in practice due to skipping any non-intersecting subtrees. */ BTree.prototype.merge = function (other, merge) { // Fast paths for empty trees @@ -568,7 +567,10 @@ var BTree = /** @class */ (function () { var _a = BTree.decompose(this, other, merge), disjoint = _a.disjoint, tallestHeight = _a.tallestHeight, tallestIndex = _a.tallestIndex; throw new Error("Not yet implemented: BTree.merge"); }; - /** First pass of merge: decompose into disjoint reusable subtrees and merged leaves. */ + /** + * Decomposes two BTrees into disjoint nodes. Reuses interior nodes when they do not overlap/intersect with any leaf nodes + * in the other tree. Overlapping leaf nodes are broken down into new leaf nodes containing merged entries. + */ BTree.decompose = function (left, right, mergeValues) { var cmp = left._compare; check(left._compare === right._compare, "merge: trees must share comparator"); diff --git a/b+tree.test.ts b/b+tree.test.ts index 17d663a..9c3e8e2 100644 --- a/b+tree.test.ts +++ b/b+tree.test.ts @@ -1113,6 +1113,230 @@ function testBTree(maxNodeSize: number) }); } +describe('BTree intersect tests with fanout 32', testIntersect.bind(null, 32)); +describe('BTree intersect tests with fanout 10', testIntersect.bind(null, 10)); +describe('BTree intersect tests with fanout 4', testIntersect.bind(null, 4)); + +function testIntersect(maxNodeSize: number) { + const compare = (a: number, b: number) => a - b; + + const buildTree = (entries: Array<[number, number]>) => + new BTree(entries, compare, maxNodeSize); + + const tuples = (...pairs: Array<[number, number]>) => pairs; + + const collectCalls = (left: BTree, right: BTree) => { + const calls: Array<{ key: number, leftValue: number, rightValue: number }> = []; + left.intersect(right, (key, leftValue, rightValue) => { + calls.push({ key, leftValue, rightValue }); + }); + return calls; + }; + + test('Intersect two empty trees', () => { + const tree1 = buildTree([]); + const tree2 = buildTree([]); + expect(collectCalls(tree1, tree2)).toEqual([]); + }); + + test('Intersect empty tree with non-empty tree', () => { + const tree1 = buildTree([]); + const tree2 = buildTree(tuples([1, 10], [2, 20], [3, 30])); + expect(collectCalls(tree1, tree2)).toEqual([]); + expect(collectCalls(tree2, tree1)).toEqual([]); + }); + + test('Intersect with no overlapping keys', () => { + const tree1 = buildTree(tuples([1, 10], [3, 30], [5, 50])); + const tree2 = buildTree(tuples([2, 20], [4, 40], [6, 60])); + expect(collectCalls(tree1, tree2)).toEqual([]); + }); + + test('Intersect with single overlapping key', () => { + const tree1 = buildTree(tuples([1, 10], [2, 20], [3, 30])); + const tree2 = buildTree(tuples([0, 100], [2, 200], [4, 400])); + expect(collectCalls(tree1, tree2)).toEqual([{ key: 2, leftValue: 20, rightValue: 200 }]); + }); + + test('Intersect with multiple overlapping keys maintains tree contents', () => { + const leftEntries: Array<[number, number]> = [[1, 10], [2, 20], [3, 30], [4, 40], [5, 50]]; + const rightEntries: Array<[number, number]> = [[0, 100], [2, 200], [4, 400], [6, 600]]; + const tree1 = buildTree(leftEntries); + const tree2 = buildTree(rightEntries); + const leftBefore = tree1.toArray(); + const rightBefore = tree2.toArray(); + expect(collectCalls(tree1, tree2)).toEqual([ + { key: 2, leftValue: 20, rightValue: 200 }, + { key: 4, leftValue: 40, rightValue: 400 }, + ]); + expect(tree1.toArray()).toEqual(leftBefore); + expect(tree2.toArray()).toEqual(rightBefore); + tree1.checkValid(); + tree2.checkValid(); + }); + + test('Intersect with contiguous overlap yields sorted keys', () => { + const tree1 = buildTree(tuples([1, 1], [2, 2], [3, 3], [4, 4], [5, 5], [6, 6])); + const tree2 = buildTree(tuples([3, 30], [4, 40], [5, 50], [6, 60], [7, 70])); + const calls = collectCalls(tree1, tree2); + expect(calls.map(c => c.key)).toEqual([3, 4, 5, 6]); + expect(calls.map(c => c.leftValue)).toEqual([3, 4, 5, 6]); + expect(calls.map(c => c.rightValue)).toEqual([30, 40, 50, 60]); + }); + + test('Intersect large overlapping range counts each shared key once', () => { + const size = 1000; + const overlapStart = 500; + const leftEntries = Array.from({ length: size }, (_, i) => [i, i * 3] as [number, number]); + const rightEntries = Array.from({ length: size }, (_, i) => { + const key = i + overlapStart; + return [key, key * 7] as [number, number]; + }); + const tree1 = buildTree(leftEntries); + const tree2 = buildTree(rightEntries); + const calls = collectCalls(tree1, tree2); + expect(calls.length).toBe(size - overlapStart); + expect(calls[0]).toEqual({ + key: overlapStart, + leftValue: overlapStart * 3, + rightValue: overlapStart * 7 + }); + const lastCall = calls[calls.length - 1]; + expect(lastCall.key).toBe(size - 1); + expect(lastCall.leftValue).toBe((size - 1) * 3); + expect(lastCall.rightValue).toBe((size - 1) * 7); + }); + + test('Intersect tree with itself visits each key once', () => { + const entries = Array.from({ length: 20 }, (_, i) => [i, i * 2] as [number, number]); + const tree = buildTree(entries); + const calls = collectCalls(tree, tree); + expect(calls.length).toBe(entries.length); + for (let i = 0; i < entries.length; i++) { + const [key, value] = entries[i]; + expect(calls[i]).toEqual({ key, leftValue: value, rightValue: value }); + } + }); + + test('Intersect arguments determine left/right values', () => { + const tree1 = buildTree(tuples([1, 100], [2, 200], [4, 400])); + const tree2 = buildTree(tuples([2, 20], [3, 30], [4, 40])); + const callsLeft = collectCalls(tree1, tree2); + const callsRight = collectCalls(tree2, tree1); + expect(callsLeft).toEqual([ + { key: 2, leftValue: 200, rightValue: 20 }, + { key: 4, leftValue: 400, rightValue: 40 }, + ]); + expect(callsRight).toEqual([ + { key: 2, leftValue: 20, rightValue: 200 }, + { key: 4, leftValue: 40, rightValue: 400 }, + ]); + }); + + test('Intersect throws for comparator mismatch', () => { + const compareA = (a: number, b: number) => a - b; + const compareB = (a: number, b: number) => a - b; + const tree1 = new BTree([[1, 1]], compareA, maxNodeSize); + const tree2 = new BTree([[1, 1]], compareB, maxNodeSize); + expect(() => tree1.intersect(tree2, () => {})).toThrow("Cannot merge BTrees with different comparators."); + }); + + test('Intersect throws for max node size mismatch', () => { + const tree1 = new BTree([[1, 1]], compare, maxNodeSize); + const tree2 = new BTree([[1, 1]], compare, maxNodeSize + 1); + expect(() => tree1.intersect(tree2, () => {})).toThrow("Cannot merge BTrees with different max node sizes."); + }); +} + +describe('BTree intersect fuzz tests', () => { + const compare = (a: number, b: number) => a - b; + const branchingFactors = [4, 8, 16, 32]; + const seeds = [0x1234ABCD, 0x9ABCDEFF]; + const FUZZ_SETTINGS = { + scenarioBudget: 2, + iterationsPerScenario: 3, + maxInsertSize: 200, + keyRange: 5_000, + valueRange: 1_000, + timeoutMs: 8_000 + } as const; + + test('randomized intersects across branching factors', () => { + jest.setTimeout(FUZZ_SETTINGS.timeoutMs); + + const scenarioConfigs: Array<{ seedBase: number, maxNodeSize: number }> = []; + for (const seedBase of seeds) + for (const maxNodeSize of branchingFactors) + scenarioConfigs.push({ seedBase, maxNodeSize }); + + const scenariosToRun = Math.min(FUZZ_SETTINGS.scenarioBudget, scenarioConfigs.length); + const selectedScenarios = scenarioConfigs.slice(0, scenariosToRun); + + for (const { seedBase, maxNodeSize } of selectedScenarios) { + const baseSeed = (seedBase ^ (maxNodeSize * 0x9E3779B1)) >>> 0; + const fuzzRand = new MersenneTwister(baseSeed); + const nextInt = (limit: number) => limit <= 0 ? 0 : Math.floor(fuzzRand.random() * limit); + + for (let iteration = 0; iteration < FUZZ_SETTINGS.iterationsPerScenario; iteration++) { + const treeA = new BTree([], compare, maxNodeSize); + const treeB = new BTree([], compare, maxNodeSize); + const mapA = new Map(); + const mapB = new Map(); + + const sizeA = nextInt(FUZZ_SETTINGS.maxInsertSize); + const sizeB = nextInt(FUZZ_SETTINGS.maxInsertSize); + + for (let i = 0; i < sizeA; i++) { + const key = nextInt(FUZZ_SETTINGS.keyRange); + const value = nextInt(FUZZ_SETTINGS.valueRange); + treeA.set(key, value); + mapA.set(key, value); + } + + for (let i = 0; i < sizeB; i++) { + const key = nextInt(FUZZ_SETTINGS.keyRange); + const value = nextInt(FUZZ_SETTINGS.valueRange); + treeB.set(key, value); + mapB.set(key, value); + } + + const expected: Array<{ key: number, leftValue: number, rightValue: number }> = []; + mapA.forEach((leftValue, key) => { + const rightValue = mapB.get(key); + if (rightValue !== undefined) { + expected.push({ key, leftValue, rightValue }); + } + }); + expected.sort((a, b) => a.key - b.key); + + const actual: Array<{ key: number, leftValue: number, rightValue: number }> = []; + treeA.intersect(treeB, (key, leftValue, rightValue) => { + actual.push({ key, leftValue, rightValue }); + }); + expect(actual).toEqual(expected); + + const swapped: Array<{ key: number, leftValue: number, rightValue: number }> = []; + treeB.intersect(treeA, (key, leftValue, rightValue) => { + swapped.push({ key, leftValue, rightValue }); + }); + const swapExpected = expected.map(({ key, leftValue, rightValue }) => ({ + key, + leftValue: rightValue, + rightValue: leftValue + })); + expect(swapped).toEqual(swapExpected); + + const sortedA = Array.from(mapA.entries()).sort((a, b) => a[0] - b[0]); + const sortedB = Array.from(mapB.entries()).sort((a, b) => a[0] - b[0]); + expect(treeA.toArray()).toEqual(sortedA); + expect(treeB.toArray()).toEqual(sortedB); + treeA.checkValid(); + treeB.checkValid(); + } + } + }); +}); + describe('BTree merge tests with fanout 32', testMerge.bind(null, 32)); describe('BTree merge tests with fanout 10', testMerge.bind(null, 10)); describe('BTree merge tests with fanout 4', testMerge.bind(null, 4)); diff --git a/benchmarks.ts b/benchmarks.ts index 54c16a4..496c3ef 100644 --- a/benchmarks.ts +++ b/benchmarks.ts @@ -70,6 +70,35 @@ function countTreeNodeStats(tree: BTree) { return visit(root, false); } +function intersectBySorting( + tree1: BTree, + tree2: BTree, + callback: (k: number, leftValue: number, rightValue: number) => void +) { + const left = tree1.toArray(); + const right = tree2.toArray(); + left.sort((a, b) => a[0] - b[0]); + right.sort((a, b) => a[0] - b[0]); + let i = 0; + let j = 0; + const leftLen = left.length; + const rightLen = right.length; + + while (i < leftLen && j < rightLen) { + const [leftKey, leftValue] = left[i]; + const [rightKey, rightValue] = right[j]; + if (leftKey === rightKey) { + callback(leftKey, leftValue, rightValue); + i++; + j++; + } else if (leftKey < rightKey) { + i++; + } else { + j++; + } + } +} + console.log("Benchmark results (milliseconds with integer keys/values)"); console.log("---------------------------------------------------------"); @@ -655,3 +684,129 @@ console.log("### Merge between B+ trees"); console.log(`\tShared nodes (baseline): ${baselineStats.shared}/${baselineStats.total}`); } } + +console.log(); +console.log("### Intersect between B+ trees"); +{ + const sizes = [100, 1000, 10000, 100000]; + + const runIntersect = ( + tree1: BTree, + tree2: BTree + ) => { + let count = 0; + let checksum = 0; + tree1.intersect(tree2, (_k, leftValue, rightValue) => { + count++; + checksum += leftValue + rightValue; + }); + return { count, checksum }; + }; + + const runBaseline = ( + tree1: BTree, + tree2: BTree + ) => { + let count = 0; + let checksum = 0; + intersectBySorting(tree1, tree2, (_k, leftValue, rightValue) => { + count++; + checksum += leftValue + rightValue; + }); + return { count, checksum }; + }; + + console.log(); + console.log("# Non-overlapping ranges (no shared keys)"); + sizes.forEach((size) => { + const tree1 = new BTree(); + const tree2 = new BTree(); + const offset = size * 3; + for (let i = 0; i < size; i++) { + tree1.set(i, i); + tree2.set(offset + i, offset + i); + } + + measure( + result => `Intersect ${size}+${size} disjoint trees using intersect() [count=${result.count}, checksum=${result.checksum}]`, + () => runIntersect(tree1, tree2) + ); + measure( + result => `Intersect ${size}+${size} disjoint trees using sort baseline [count=${result.count}, checksum=${result.checksum}]`, + () => runBaseline(tree1, tree2) + ); + }); + + console.log(); + console.log("# 50% overlapping ranges"); + sizes.forEach((size) => { + const tree1 = new BTree(); + const tree2 = new BTree(); + const offset = Math.floor(size / 2); + for (let i = 0; i < size; i++) { + tree1.set(i, i); + tree2.set(i + offset, (i + offset) * 2); + } + + measure( + result => `Intersect ${size}+${size} half-overlapping trees using intersect() [count=${result.count}, checksum=${result.checksum}]`, + () => runIntersect(tree1, tree2) + ); + measure( + result => `Intersect ${size}+${size} half-overlapping trees using sort baseline [count=${result.count}, checksum=${result.checksum}]`, + () => runBaseline(tree1, tree2) + ); + }); + + console.log(); + console.log("# Complete overlap (all keys shared)"); + sizes.forEach((size) => { + const tree1 = new BTree(); + const tree2 = new BTree(); + for (let i = 0; i < size; i++) { + tree1.set(i, i); + tree2.set(i, i * 3); + } + + measure( + result => `Intersect ${size}+${size} identical-key trees using intersect() [count=${result.count}, checksum=${result.checksum}]`, + () => runIntersect(tree1, tree2) + ); + measure( + result => `Intersect ${size}+${size} identical-key trees using sort baseline [count=${result.count}, checksum=${result.checksum}]`, + () => runBaseline(tree1, tree2) + ); + }); + + console.log(); + console.log("# Random overlaps (~10% shared keys)"); + sizes.forEach((size) => { + const keys1 = makeArray(size, true); + const keys2 = makeArray(size, true); + const overlapCount = Math.max(1, Math.floor(size * 0.1)); + for (let i = 0; i < overlapCount && i < keys1.length && i < keys2.length; i++) { + keys2[i] = keys1[i]; + } + + const tree1 = new BTree(); + const tree2 = new BTree(); + + for (let i = 0; i < keys1.length; i++) { + const key = keys1[i]; + tree1.set(key, key * 5); + } + for (let i = 0; i < keys2.length; i++) { + const key = keys2[i]; + tree2.set(key, key * 7); + } + + measure( + result => `Intersect ${tree1.size}+${tree2.size} random trees using intersect() [count=${result.count}, checksum=${result.checksum}]`, + () => runIntersect(tree1, tree2) + ); + measure( + result => `Intersect ${tree1.size}+${tree2.size} random trees using sort baseline [count=${result.count}, checksum=${result.checksum}]`, + () => runBaseline(tree1, tree2) + ); + }); +} From 8064f334c562f6956a8686886259c49f6f1eb570 Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Wed, 29 Oct 2025 12:23:51 -0700 Subject: [PATCH 009/143] add more merge tests --- b+tree.test.ts | 193 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 193 insertions(+) diff --git a/b+tree.test.ts b/b+tree.test.ts index 9c3e8e2..4d17bf6 100644 --- a/b+tree.test.ts +++ b/b+tree.test.ts @@ -1356,6 +1356,199 @@ function testMerge(maxNodeSize: number) { return false; }; + const buildTree = (keys: number[], valueScale = 1, valueOffset = 0) => { + const tree = new BTree([], compare, maxNodeSize); + for (const key of keys) { + tree.set(key, key * valueScale + valueOffset); + } + return tree; + }; + + const expectRootLeafState = (tree: BTree, expectedIsLeaf: boolean) => { + const root = tree['_root'] as any; + expect(root.isLeaf).toBe(expectedIsLeaf); + }; + + const range = (start: number, endExclusive: number, step = 1): number[] => { + const result: number[] = []; + for (let i = start; i < endExclusive; i += step) + result.push(i); + return result; + }; + + test('Merge disjoint roots', () => { + const size = maxNodeSize * 3; + const tree1 = buildTree(range(0, size), 1, 0); + const offset = size * 5; + const tree2 = buildTree(range(offset, offset + size), 2, 0); + + expectRootLeafState(tree1, false); + expectRootLeafState(tree2, false); + + let mergeCalls = 0; + const result = tree1.merge(tree2, () => { + mergeCalls++; + return 0; + }); + + expect(mergeCalls).toBe(0); + expect(result.size).toBe(tree1.size + tree2.size); + const resultRoot = result['_root'] as any; + expect(sharesNode(resultRoot, tree1['_root'] as any)).toBe(true); + expect(sharesNode(resultRoot, tree2['_root'] as any)).toBe(true); + }); + + test('Merge leaf roots with intersecting keys', () => { + const tree1 = buildTree([1, 2, 4], 10, 0); + const tree2 = buildTree([2, 3, 5], 100, 0); + + expectRootLeafState(tree1, true); + expectRootLeafState(tree2, true); + + const calls: Array<{ key: number, leftValue: number, rightValue: number }> = []; + const result = tree1.merge(tree2, (key, leftValue, rightValue) => { + calls.push({ key, leftValue, rightValue }); + return leftValue + rightValue; + }); + + expect(calls).toEqual([{ key: 2, leftValue: 20, rightValue: 200 }]); + expect(result.toArray()).toEqual([[1, 10], [2, 220], [3, 300], [4, 40], [5, 500]]); + }); + + test('Merge leaf roots with disjoint keys', () => { + const tree1 = buildTree([1, 3, 5], 1, 0); + const tree2 = buildTree([2, 4, 6], 1, 1000); + + expectRootLeafState(tree1, true); + expectRootLeafState(tree2, true); + + let mergeCalls = 0; + const result = tree1.merge(tree2, () => { + mergeCalls++; + return 0; + }); + + expect(mergeCalls).toBe(0); + expect(result.toArray()).toEqual([ + [1, 1], + [2, 1002], + [3, 3], + [4, 1004], + [5, 5], + [6, 1006] + ]); + }); + + test('Merge trees disjoint except for shared maximum key', () => { + const size = maxNodeSize * 2; + const tree1 = buildTree(range(0, size), 1, 0); + const tree2 = buildTree(range(size - 1, size - 1 + size), 3, 0); + + expectRootLeafState(tree1, false); + expectRootLeafState(tree2, false); + + let mergeCalls = 0; + const result = tree1.merge(tree2, (key, leftValue, rightValue) => { + mergeCalls++; + return leftValue + rightValue; + }); + + expect(mergeCalls).toBe(1); + expect(result.get(size - 1)).toBe((size - 1) + (size - 1) * 3); + expect(result.size).toBe(tree1.size + tree2.size - 1); + }); + + test('Merge where two-leaf tree intersects leaf-root tree across both leaves', () => { + const size = maxNodeSize + Math.max(3, Math.floor(maxNodeSize / 2)); + const tree1 = buildTree(range(0, size), 2, 0); + const tree2 = buildTree([1, Math.floor(size / 2), size - 1], 5, 0); + + expectRootLeafState(tree1, false); + expectRootLeafState(tree2, true); + + const seenKeys: number[] = []; + const result = tree1.merge(tree2, (key, leftValue, rightValue) => { + seenKeys.push(key); + return rightValue; + }); + + expect(seenKeys.sort((a, b) => a - b)).toEqual([1, Math.floor(size / 2), size - 1]); + expect(result.get(1)).toBe(5); + expect(result.get(Math.floor(size / 2))).toBe(5 * Math.floor(size / 2)); + expect(result.get(size - 1)).toBe(5 * (size - 1)); + expect(result.size).toBe(tree1.size + tree2.size - seenKeys.length); + }); + + test('Merge where max key equals min key of other tree', () => { + const size = maxNodeSize * 2; + const tree1 = buildTree(range(0, size), 1, 0); + const tree2 = buildTree(range(size - 1, size - 1 + size), 10, 0); + + expectRootLeafState(tree1, false); + expectRootLeafState(tree2, false); + + let mergeCalls = 0; + const result = tree1.merge(tree2, (key, leftValue, rightValue) => { + mergeCalls++; + return rightValue; + }); + + expect(mergeCalls).toBe(1); + expect(result.get(size - 1)).toBe((size - 1) * 10); + expect(result.size).toBe(tree1.size + tree2.size - 1); + }); + + test('Merge odd and even keyed trees', () => { + const limit = maxNodeSize * 3; + const treeOdd = buildTree(range(1, limit * 2, 2), 1, 0); + const treeEven = buildTree(range(0, limit * 2, 2), 1, 100); + + expectRootLeafState(treeOdd, false); + expectRootLeafState(treeEven, false); + + let mergeCalls = 0; + const result = treeOdd.merge(treeEven, () => { + mergeCalls++; + return 0; + }); + + expect(mergeCalls).toBe(0); + expect(result.size).toBe(treeOdd.size + treeEven.size); + }); + + test('Merge overlapping prefix equal to branching factor', () => { + const shared = maxNodeSize; + const tree1Keys = [ + ...range(0, shared), + ...range(shared, shared + maxNodeSize) + ]; + const tree2Keys = [ + ...range(0, shared), + ...range(shared + maxNodeSize, shared + maxNodeSize * 2) + ]; + + const tree1 = buildTree(tree1Keys, 1, 0); + const tree2 = buildTree(tree2Keys, 2, 0); + + expectRootLeafState(tree1, false); + expectRootLeafState(tree2, false); + + const mergedKeys: number[] = []; + const result = tree1.merge(tree2, (key, leftValue, rightValue) => { + mergedKeys.push(key); + return leftValue + rightValue; + }); + + expect(mergedKeys.sort((a, b) => a - b)).toEqual(range(0, shared)); + const expected = [ + ...range(0, shared).map(k => [k, k + k * 2]), + ...range(shared, shared + maxNodeSize).map(k => [k, k]), + ...range(shared + maxNodeSize, shared + maxNodeSize * 2).map(k => [k, k * 2]) + ]; + expect(result.toArray()).toEqual(expected); + expect(result.size).toBe(tree1.size + tree2.size - shared); + }); + test('Merge two empty trees', () => { const tree1 = new BTree([], compare, maxNodeSize); const tree2 = new BTree([], compare, maxNodeSize); From dfdc0cb0ce66f3241c30fdd14b73805b269fa25e Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Wed, 29 Oct 2025 12:31:38 -0700 Subject: [PATCH 010/143] cleanup --- b+tree.ts | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/b+tree.ts b/b+tree.ts index 43ccce4..eb570af 100644 --- a/b+tree.ts +++ b/b+tree.ts @@ -865,14 +865,12 @@ export default class BTree implements ISortedMapF, ISortedMap // Fast path: destination within current leaf const leaf = cur.leaf; - { - const i = leaf.indexOf(targetKey, -1, cmp); - const destInLeaf = i < 0 ? ~i : (isInclusive ? i : i + 1); - if (destInLeaf < leaf.keys.length) { - cur.onMoveInLeaf(leaf, cur.leafPayload, cur.leafIndex, destInLeaf, isInclusive, other); - cur.leafIndex = destInLeaf; - return false; - } + const i = leaf.indexOf(targetKey, -1, cmp); + const destInLeaf = i < 0 ? ~i : (isInclusive ? i : i + 1); + if (destInLeaf < leaf.keys.length) { + cur.onMoveInLeaf(leaf, cur.leafPayload, cur.leafIndex, destInLeaf, isInclusive, other); + cur.leafIndex = destInLeaf; + return false; } // Find first ancestor with a viable right step @@ -900,7 +898,6 @@ export default class BTree implements ISortedMapF, ISortedMap const startIndex = cur.leafIndex; cur.onExitLeaf(leaf, startIndex, isInclusive, cur.leafPayload, other); // Clear leaf payload after exit as specified - // @ts-ignore cur.leafPayload = undefined as any; if (descentLevel < 0) { From ebd9c1152dbe49cb87fdfab0993e09eaa5ce44d0 Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Wed, 29 Oct 2025 12:32:33 -0700 Subject: [PATCH 011/143] remove weird clearing of payload --- b+tree.js | 17 ++++++----------- b+tree.ts | 2 -- 2 files changed, 6 insertions(+), 13 deletions(-) diff --git a/b+tree.js b/b+tree.js index 6dfba18..8b79379 100644 --- a/b+tree.js +++ b/b+tree.js @@ -748,14 +748,12 @@ var BTree = /** @class */ (function () { check(isInclusive && keyPos < 0 || !isInclusive && keyPos <= 0, "moveTo precondition violated"); // Fast path: destination within current leaf var leaf = cur.leaf; - { - var i = leaf.indexOf(targetKey, -1, cmp); - var destInLeaf = i < 0 ? ~i : (isInclusive ? i : i + 1); - if (destInLeaf < leaf.keys.length) { - cur.onMoveInLeaf(leaf, cur.leafPayload, cur.leafIndex, destInLeaf, isInclusive, other); - cur.leafIndex = destInLeaf; - return false; - } + var i = leaf.indexOf(targetKey, -1, cmp); + var destInLeaf = i < 0 ? ~i : (isInclusive ? i : i + 1); + if (destInLeaf < leaf.keys.length) { + cur.onMoveInLeaf(leaf, cur.leafPayload, cur.leafIndex, destInLeaf, isInclusive, other); + cur.leafIndex = destInLeaf; + return false; } // Find first ancestor with a viable right step var spine = cur.spine; @@ -778,9 +776,6 @@ var BTree = /** @class */ (function () { // Exit leaf; we did walk out of it conceptually var startIndex = cur.leafIndex; cur.onExitLeaf(leaf, startIndex, isInclusive, cur.leafPayload, other); - // Clear leaf payload after exit as specified - // @ts-ignore - cur.leafPayload = undefined; if (descentLevel < 0) { // No descent point; step up all the way; last callback gets Infinity for (var s = spine.length - 1; s >= 0; --s) { diff --git a/b+tree.ts b/b+tree.ts index eb570af..5b247fe 100644 --- a/b+tree.ts +++ b/b+tree.ts @@ -897,8 +897,6 @@ export default class BTree implements ISortedMapF, ISortedMap // Exit leaf; we did walk out of it conceptually const startIndex = cur.leafIndex; cur.onExitLeaf(leaf, startIndex, isInclusive, cur.leafPayload, other); - // Clear leaf payload after exit as specified - cur.leafPayload = undefined as any; if (descentLevel < 0) { // No descent point; step up all the way; last callback gets Infinity From 4483c1d3fdb9c86560d7a085c98db516a2faa83f Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Wed, 29 Oct 2025 20:59:59 -0700 Subject: [PATCH 012/143] fix double use of isInclusive --- b+tree.ts | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/b+tree.ts b/b+tree.ts index 5b247fe..889097f 100644 --- a/b+tree.ts +++ b/b+tree.ts @@ -609,12 +609,12 @@ export default class BTree implements ISortedMapF, ISortedMap const vA = curA.leaf.values[curA.leafIndex]; const vB = curB.leaf.values[curB.leafIndex]; intersection(key, vA, vB); - const outT = BTree.moveTo(trailing, leading, key, false, cmp); - const outL = BTree.moveTo(leading, trailing, key, false, cmp); + const outT = BTree.moveTo(trailing, leading, key, false, areEqual, cmp); + const outL = BTree.moveTo(leading, trailing, key, false, areEqual, cmp); if (outT && outL) break; } else { - const out = BTree.moveTo(trailing, leading, BTree.getKey(leading), true, cmp); + const out = BTree.moveTo(trailing, leading, BTree.getKey(leading), true, areEqual, cmp); if (out) { // We've reached the end of one tree, so intersections are guaranteed to be done. break; @@ -738,21 +738,21 @@ export default class BTree implements ISortedMapF, ISortedMap }; const onMoveInLeaf = ( - leaf: BNode, payload: TP, fromIndex: number, toIndex: number, isInclusive: boolean, _other: MergeCursor + leaf: BNode, payload: TP, fromIndex: number, toIndex: number, startedEqual: boolean, _other: MergeCursor ) => { check(payload.disqualified === true, "onMoveInLeaf: leaf must be disqualified"); - const start = isInclusive ? fromIndex : fromIndex + 1; + const start = startedEqual ? fromIndex + 1 : fromIndex; pushLeafRange(leaf, start, Math.min(toIndex, leaf.keys.length)); }; const onExitLeaf = ( - leaf: BNode, startingIndex: number, isInclusive: boolean, payload: TP, _other: MergeCursor + leaf: BNode, startingIndex: number, startedEqual: boolean, payload: TP, _other: MergeCursor ) => { highestDisjoint = undefined; if (!payload.disqualified) { highestDisjoint = { node: leaf, height: 0 }; } else { - const start = isInclusive ? startingIndex : startingIndex + 1; + const start = startedEqual ? startingIndex + 1 : startingIndex; pushLeafRange(leaf, start, leaf.keys.length); } }; @@ -824,12 +824,12 @@ export default class BTree implements ISortedMapF, ISortedMap const vB = curB.leaf.values[curB.leafIndex]; const merged = mergeValues(key, vA, vB); if (merged !== undefined) pending.push([key, merged]); - const outT = BTree.moveTo(trailing, leading, key, false, cmp); - const outL = BTree.moveTo(leading, trailing, key, false, cmp); + const outT = BTree.moveTo(trailing, leading, key, false, areEqual, cmp); + const outL = BTree.moveTo(leading, trailing, key, false, areEqual, cmp); if (outT && outL) break; } else { - const out = BTree.moveTo(trailing, leading, BTree.getKey(leading), true, cmp); + const out = BTree.moveTo(trailing, leading, BTree.getKey(leading), true, areEqual, cmp); if (highestDisjoint !== undefined) { addSharedNodeToDisjointSet(highestDisjoint.node, highestDisjoint.height); highestDisjoint = undefined; @@ -838,7 +838,7 @@ export default class BTree implements ISortedMapF, ISortedMap const maxKeyLeft = left._root.maxKey() as K; const maxKeyRight = right._root.maxKey() as K; const maxKey = cmp(maxKeyLeft, maxKeyRight) >= 0 ? maxKeyLeft : maxKeyRight; - BTree.moveTo(leading, trailing, maxKey, false, cmp); + BTree.moveTo(leading, trailing, maxKey, false, areEqual, cmp); break; } } @@ -857,6 +857,7 @@ export default class BTree implements ISortedMapF, ISortedMap other: MergeCursor, targetKey: K, isInclusive: boolean, + startedEqual: boolean, cmp: (a:K,b:K)=>number ): boolean { // We should start before the target (or at it if inclusive) @@ -868,7 +869,7 @@ export default class BTree implements ISortedMapF, ISortedMap const i = leaf.indexOf(targetKey, -1, cmp); const destInLeaf = i < 0 ? ~i : (isInclusive ? i : i + 1); if (destInLeaf < leaf.keys.length) { - cur.onMoveInLeaf(leaf, cur.leafPayload, cur.leafIndex, destInLeaf, isInclusive, other); + cur.onMoveInLeaf(leaf, cur.leafPayload, cur.leafIndex, destInLeaf, startedEqual, other); cur.leafIndex = destInLeaf; return false; } @@ -896,7 +897,7 @@ export default class BTree implements ISortedMapF, ISortedMap // Exit leaf; we did walk out of it conceptually const startIndex = cur.leafIndex; - cur.onExitLeaf(leaf, startIndex, isInclusive, cur.leafPayload, other); + cur.onExitLeaf(leaf, startIndex, startedEqual, cur.leafPayload, other); if (descentLevel < 0) { // No descent point; step up all the way; last callback gets Infinity From eea4efce8d879211c70ff0e4455576f1998bd7f0 Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Fri, 31 Oct 2025 17:44:50 -0700 Subject: [PATCH 013/143] wip impl --- b+tree.js | 257 ++++++++++++++++++++++++++++++++++++++++++++++----- b+tree.ts | 271 ++++++++++++++++++++++++++++++++++++++++++++++++------ 2 files changed, 480 insertions(+), 48 deletions(-) diff --git a/b+tree.js b/b+tree.js index 8b79379..32a790f 100644 --- a/b+tree.js +++ b/b+tree.js @@ -528,13 +528,13 @@ var BTree = /** @class */ (function () { var vA = curA.leaf.values[curA.leafIndex]; var vB = curB.leaf.values[curB.leafIndex]; intersection(key, vA, vB); - var outT = BTree.moveTo(trailing, leading, key, false, cmp); - var outL = BTree.moveTo(leading, trailing, key, false, cmp); + var outT = BTree.moveTo(trailing, leading, key, false, areEqual, cmp); + var outL = BTree.moveTo(leading, trailing, key, false, areEqual, cmp); if (outT && outL) break; } else { - var out = BTree.moveTo(trailing, leading, BTree.getKey(leading), true, cmp); + var out = BTree.moveTo(trailing, leading, BTree.getKey(leading), true, areEqual, cmp); if (out) { // We've reached the end of one tree, so intersections are guaranteed to be done. break; @@ -552,20 +552,233 @@ var BTree = /** @class */ (function () { * @description Complexity: O(N + M), but often much faster in practice due to skipping any non-intersecting subtrees. */ BTree.prototype.merge = function (other, merge) { - // Fast paths for empty trees + // 1. Throw if comparators or max node sizes differ + if (this._compare !== other._compare) + throw new Error("Cannot merge BTrees with different comparators."); + if (this._maxNodeSize !== other._maxNodeSize) + throw new Error("Cannot merge BTrees with different max node sizes."); + // 2. Early outs for empty trees (cheap clone of the non-empty tree) var sizeThis = this._root.size(); var sizeOther = other._root.size(); if (sizeThis === 0) return other.clone(); if (sizeOther === 0) return this.clone(); - // Ensure both trees share the same comparator reference - if (this._compare !== other._compare) - throw new Error("Cannot merge BTrees with different comparators."); - if (this._maxNodeSize !== other._maxNodeSize) - throw new Error("Cannot merge BTrees with different max node sizes."); - var _a = BTree.decompose(this, other, merge), disjoint = _a.disjoint, tallestHeight = _a.tallestHeight, tallestIndex = _a.tallestIndex; - throw new Error("Not yet implemented: BTree.merge"); + // 3. Decompose into disjoint subtrees and merged leaves + var _a = BTree.decompose(this, other, merge), disjoint = _a.disjoint, tallestIndex = _a.tallestIndex, tallestHeight = _a.tallestHeight; + // 4. Start result at the tallest subtree from the disjoint set + var result = new BTree(undefined, this._compare, this._maxNodeSize); + var root = disjoint[tallestIndex][1]; + var height = tallestHeight; // number of internal levels; 0 means leaf + result._root = root; + var max = this._maxNodeSize; + // Build a spine (frontier) from the root down the chosen side (no leaf included) + var buildFrontier = function (r, rightSide) { + var spine = []; + var n = r; + while (!n.isLeaf) { + var ni = n; + spine.push(ni); + n = ni.children[rightSide ? ni.children.length - 1 : 0]; + } + return spine; + }; + // 6. Initialize left/right frontiers of current tree (root=tallest subtree) + var rightFrontier = buildFrontier(root, true); + var leftFrontier = buildFrontier(root, false); + // 7. Track highest shared frontier depth for the currently processed frontier + // Depth is indexed from root=0 down to parent-of-leaf=(height-1). + // Any changes at or below 'depthTo' require cloning from isSharedFrontierDepth..depthTo. + var processSide = function (start, end, step, rightSide) { + var spine = rightSide ? rightFrontier : leftFrontier; + var isSharedFrontierDepth = 0; + // 8. Refresh sizes on the spine for nodes in (isSharedFrontierDepth, depthTo) + var updateSpineSizes = function (sp, depthToExclusive) { + // Update from (isSharedFrontierDepth + 1) up to (depthToExclusive - 1) + var startDepth = isSharedFrontierDepth + 1; + var endDepth = Math.max(0, depthToExclusive); + for (var d = startDepth; d < endDepth && d < sp.length; d++) { + var node = sp[d]; + node._size = sumChildSizes(node.children); + } + }; + // 9. Clone along the spine from isSharedFrontierDepth..depthTo inclusive so path is unique + var ensureNotShared = function (sp, depthToInclusive) { + if (sp.length === 0) + return; // nothing to clone when root is a leaf; equal-height case will handle this + // Clone root if needed first (depth 0) + if (isSharedFrontierDepth === 0) { + var atRoot = sp[0]; + if (atRoot.isShared) { + var clonedRoot = atRoot.clone(); + result._root = clonedRoot; + sp[0] = clonedRoot; + // After cloning an internal node, its children become explicitly shared + } + } + // Clone downward along the frontier to 'depthToInclusive' + for (var d = Math.max(isSharedFrontierDepth, 0); d <= depthToInclusive && d < sp.length; d++) { + if (d === 0) + continue; // already considered root above + var parent = sp[d - 1]; + var childIndex = rightSide ? parent.children.length - 1 : 0; + var child = parent.children[childIndex]; + // Clone regardless; guarantees uniqueness down this path + var clonedChild = child.clone(); + parent.children[childIndex] = clonedChild; + parent.keys[childIndex] = clonedChild.maxKey(); + sp[d] = clonedChild; + } + }; + // Find the first ancestor (starting at insertionDepth) with capacity + var findExpansionDepth = function (sp, insertionDepth) { + for (var d = insertionDepth; d >= 0; d--) { + if (sp[d].keys.length < max) + return d; + } + return -1; // no capacity, will need a new root + }; + // Append a subtree at a given depth on the chosen side; cascade splits upward if needed. + var appendAndCascade = function (sp, insertionDepth, subtree) { + var carry; + // 10.1.5 Append at insertionDepth + var node = sp[insertionDepth]; + if (rightSide) { + if (node.keys.length < max) { + node.insert(node.children.length, subtree); + } + else { + var newRight = node.splitOffRightSide(); + newRight.insert(newRight.children.length, subtree); + carry = newRight; + } + } + else { + if (node.keys.length < max) { + node.insert(0, subtree); + } + else { + var newRight = node.splitOffRightSide(); + // Insert into left half at index 0 (new child is the new leftmost) + node.insert(0, subtree); + carry = newRight; + } + } + // Bubble new right siblings upward until a node with capacity accepts them or we reach root + var d = insertionDepth - 1; + while (carry && d >= 0) { + var parent = sp[d]; + if (rightSide) { + if (parent.keys.length < max) { + parent.insert(parent.children.length, carry); + carry = undefined; + } + else { + var newRight = parent.splitOffRightSide(); + newRight.insert(newRight.children.length, carry); + carry = newRight; + } + } + else { + if (parent.keys.length < max) { + parent.insert(1, carry); // directly to the right of leftmost child + carry = undefined; + } + else { + var newRight = parent.splitOffRightSide(); + parent.insert(1, carry); + carry = newRight; + } + } + d--; + } + // If still carrying after root, create a new root + if (carry) { + var oldRoot = result._root; + var children = rightSide ? [oldRoot, carry] : [oldRoot, carry]; + var newRoot = new BNodeInternal(children, oldRoot.size() + carry.size()); + result._root = newRoot; + // Height increases by 1 + height += 1; + // Replace spine with new frontier for this side + sp = rightSide ? buildFrontier(result._root, true) : buildFrontier(result._root, false); + if (rightSide) + rightFrontier = sp; + else + leftFrontier = sp; + } + // Refresh ancestor keys along the side from insertionDepth up to root, + // since rightmost/leftmost child maxKey may have changed. + for (var u = Math.min(insertionDepth, sp.length - 1); u >= 0; u--) { + var parent = sp[u]; + var idx = rightSide ? parent.children.length - 1 : 0; + parent.keys[idx] = parent.children[idx].maxKey(); + } + // Rebuild the frontier from the (possibly new) root to ensure it reflects all structural changes. + sp = rightSide ? buildFrontier(result._root, true) : buildFrontier(result._root, false); + if (rightSide) + rightFrontier = sp; + else + leftFrontier = sp; + // 10.1.6 Set isSharedFrontierDepth to insertionDepth (relative to current height) + // If height changed due to a root split, adjust to parent-of-subtree in the new tree: + var subtreeHeight = computeHeight(subtree); + isSharedFrontierDepth = Math.max(0, (height - (subtreeHeight + 1))); + }; + // Utility to compute height (number of internal levels) of a subtree + var computeHeight = function (node) { + var h = -1, n = node; + while (n) { + h++; + n = n.isLeaf ? undefined : n.children[0]; + } + return h; + }; + // Iterate the assigned half of the disjoint set + for (var i = start; step > 0 ? i <= end : i >= end; i += step) { + var subtree = disjoint[i][1]; + var subtreeHeight = disjoint[i][0]; + if (subtreeHeight < height) { + // 10.1.1 Determine insertion depth on this frontier + var insertionDepth = height - (subtreeHeight + 1); // node at this depth has children of height 'subtreeHeight' + // 10.1.2 Ensure path is unshared before mutation + ensureNotShared(spine, insertionDepth); + // 10.1.3 Calculate expansion depth (first ancestor with capacity) + var expansionDepth = findExpansionDepth(spine, insertionDepth); + // 10.1.4 Update sizes on spine above the shared ancestor before we expand + updateSpineSizes(spine, expansionDepth >= 0 ? expansionDepth : 0); + // 10.1.5 Append and cascade splits upward + appendAndCascade(spine, insertionDepth, subtree); + } + else { + // 10.2 Equal height; create a new root combining [old root, subtree] (right) or [subtree, old root] (left) + // 10.2.2 Update spine sizes before shifting frontier inward + updateSpineSizes(spine, 0); + // 10.2.3 Create a new root with children [old root, subtree] or [subtree, old root] + var oldRoot = result._root; + var newChildren = rightSide ? [oldRoot, subtree] : [subtree, oldRoot]; + var newRoot = new BNodeInternal(newChildren, oldRoot.size() + subtree.size()); + result._root = root = newRoot; + height += 1; + // 10.2.4 Update the frontier to match the new root and appended subtree side + spine = rightSide ? buildFrontier(result._root, true) : buildFrontier(result._root, false); + if (rightSide) + rightFrontier = spine; + else + leftFrontier = spine; + // 10.2.5 Set isSharedFrontierDepth to 1 + isSharedFrontierDepth = 1; + } + } + }; + // 10. Process all subtrees to the right of the tallest subtree + if (tallestIndex + 1 <= disjoint.length - 1) + processSide(tallestIndex + 1, disjoint.length - 1, +1, true); + // 11. Process all subtrees to the left of the tallest subtree (reverse order) + if (tallestIndex - 1 >= 0) + processSide(tallestIndex - 1, 0, -1, false); + // 12. Return the resulting tree + return result; }; /** * Decomposes two BTrees into disjoint nodes. Reuses interior nodes when they do not overlap/intersect with any leaf nodes @@ -636,18 +849,18 @@ var BTree = /** @class */ (function () { check(destIndex === 0, "onEnterLeaf: destIndex must be 0 if not overlapping"); } }; - var onMoveInLeaf = function (leaf, payload, fromIndex, toIndex, isInclusive, _other) { + var onMoveInLeaf = function (leaf, payload, fromIndex, toIndex, startedEqual, _other) { check(payload.disqualified === true, "onMoveInLeaf: leaf must be disqualified"); - var start = isInclusive ? fromIndex : fromIndex + 1; + var start = startedEqual ? fromIndex + 1 : fromIndex; pushLeafRange(leaf, start, Math.min(toIndex, leaf.keys.length)); }; - var onExitLeaf = function (leaf, startingIndex, isInclusive, payload, _other) { + var onExitLeaf = function (leaf, startingIndex, startedEqual, payload, _other) { highestDisjoint = undefined; if (!payload.disqualified) { highestDisjoint = { node: leaf, height: 0 }; } else { - var start = isInclusive ? startingIndex : startingIndex + 1; + var start = startedEqual ? startingIndex + 1 : startingIndex; pushLeafRange(leaf, start, leaf.keys.length); } }; @@ -715,13 +928,13 @@ var BTree = /** @class */ (function () { var merged = mergeValues(key, vA, vB); if (merged !== undefined) pending.push([key, merged]); - var outT = BTree.moveTo(trailing, leading, key, false, cmp); - var outL = BTree.moveTo(leading, trailing, key, false, cmp); + var outT = BTree.moveTo(trailing, leading, key, false, areEqual, cmp); + var outL = BTree.moveTo(leading, trailing, key, false, areEqual, cmp); if (outT && outL) break; } else { - var out = BTree.moveTo(trailing, leading, BTree.getKey(leading), true, cmp); + var out = BTree.moveTo(trailing, leading, BTree.getKey(leading), true, areEqual, cmp); if (highestDisjoint !== undefined) { addSharedNodeToDisjointSet(highestDisjoint.node, highestDisjoint.height); highestDisjoint = undefined; @@ -730,7 +943,7 @@ var BTree = /** @class */ (function () { var maxKeyLeft = left._root.maxKey(); var maxKeyRight = right._root.maxKey(); var maxKey = cmp(maxKeyLeft, maxKeyRight) >= 0 ? maxKeyLeft : maxKeyRight; - BTree.moveTo(leading, trailing, maxKey, false, cmp); + BTree.moveTo(leading, trailing, maxKey, false, areEqual, cmp); break; } } @@ -742,7 +955,7 @@ var BTree = /** @class */ (function () { * Move cursor strictly forward to the first key >= (inclusive) or > (exclusive) target. * Returns true if end-of-tree was reached (cursor not structurally mutated). */ - BTree.moveTo = function (cur, other, targetKey, isInclusive, cmp) { + BTree.moveTo = function (cur, other, targetKey, isInclusive, startedEqual, cmp) { // We should start before the target (or at it if inclusive) var keyPos = cmp(BTree.getKey(cur), targetKey); check(isInclusive && keyPos < 0 || !isInclusive && keyPos <= 0, "moveTo precondition violated"); @@ -751,7 +964,7 @@ var BTree = /** @class */ (function () { var i = leaf.indexOf(targetKey, -1, cmp); var destInLeaf = i < 0 ? ~i : (isInclusive ? i : i + 1); if (destInLeaf < leaf.keys.length) { - cur.onMoveInLeaf(leaf, cur.leafPayload, cur.leafIndex, destInLeaf, isInclusive, other); + cur.onMoveInLeaf(leaf, cur.leafPayload, cur.leafIndex, destInLeaf, startedEqual, other); cur.leafIndex = destInLeaf; return false; } @@ -775,7 +988,7 @@ var BTree = /** @class */ (function () { var heightOf = function (sIndex) { return spine.length - sIndex; }; // Exit leaf; we did walk out of it conceptually var startIndex = cur.leafIndex; - cur.onExitLeaf(leaf, startIndex, isInclusive, cur.leafPayload, other); + cur.onExitLeaf(leaf, startIndex, startedEqual, cur.leafPayload, other); if (descentLevel < 0) { // No descent point; step up all the way; last callback gets Infinity for (var s = spine.length - 1; s >= 0; --s) { diff --git a/b+tree.ts b/b+tree.ts index 889097f..ad9784b 100644 --- a/b+tree.ts +++ b/b+tree.ts @@ -633,27 +633,238 @@ export default class BTree implements ISortedMapF, ISortedMap * @description Complexity: O(N + M), but often much faster in practice due to skipping any non-intersecting subtrees. */ merge(other: BTree, merge: (key: K, leftValue: V, rightValue: V) => V | undefined): BTree { - // Fast paths for empty trees + if (this._compare !== other._compare) + throw new Error("Cannot merge BTrees with different comparators."); + if (this._maxNodeSize !== other._maxNodeSize) + throw new Error("Cannot merge BTrees with different max node sizes."); + + // Early outs for empty trees (cheap clone of the non-empty tree) const sizeThis = this._root.size(); const sizeOther = other._root.size(); - if (sizeThis === 0) return other.clone(); if (sizeOther === 0) return this.clone(); - // Ensure both trees share the same comparator reference - if (this._compare !== other._compare) - throw new Error("Cannot merge BTrees with different comparators."); - if (this._maxNodeSize !== other._maxNodeSize) - throw new Error("Cannot merge BTrees with different max node sizes."); + // Decompose into disjoint subtrees and merged leaves + const { disjoint, tallestIndex } = BTree.decompose(this, other, merge); + + // Start result at the tallest subtree from the disjoint set + const [initialHeight, initialRoot] = disjoint[tallestIndex]; + const branchingFactor = this._maxNodeSize; + let rightFrontier: BNode[] = [initialRoot], leftFrontier: BNode[] = [initialRoot]; + BTree.updateFrontier(rightFrontier, 0, BTree.getRightmostChild); + BTree.updateFrontier(leftFrontier, 0, BTree.getLeftmostChild); + + // Process all subtrees to the right of the tallest subtree + if (tallestIndex + 1 <= disjoint.length - 1) + BTree.processSide(branchingFactor, initialHeight,disjoint, rightFrontier, tallestIndex + 1, disjoint.length - 1, +1, true, BTree.getRightmostChild); + + // Process all subtrees to the left of the tallest subtree (reverse order) + if (tallestIndex - 1 >= 0) + BTree.processSide(branchingFactor, initialHeight, disjoint, leftFrontier, tallestIndex - 1, 0, -1, false, BTree.getLeftmostChild); - const { disjoint, tallestHeight, tallestIndex} = BTree.decompose(this, other, merge); + const merged = new BTree(undefined, this._compare, this._maxNodeSize); + check(rightFrontier[0] === leftFrontier[0], "Root mismatch after merge processing."); + merged._root = rightFrontier[0]; - throw new Error("Not yet implemented: BTree.merge"); + // Return the resulting tree + return merged; } + /** + * Track highest shared frontier depth for the currently processed frontier + * Depth is indexed from root=0 down to parent-of-leaf=(height-1). + * Any changes at or below 'depthTo' require cloning from isSharedFrontierDepth..depthTo. + */ + private static processSide( + branchingFactor: number, + initialHeight: number, + disjoint: DisjointEntry[], + spine: BNode[], + start: number, + end: number, + step: number, + rightSide: boolean, + frontierChildIndex: (node: BNodeInternal) => number): void { + + // Append a subtree at a given depth on the chosen side; cascade splits upward if needed. + const appendAndCascade = (insertionDepth: number, subtree: BNode): BNodeInternal | undefined => { + check(spine.length > 1 && insertionDepth < spine.length - 1, "Invalid insertion at leaf level."); + let carry: BNode | undefined = subtree; + // Append at insertionDepth and bubble new right siblings upward until a node with capacity accepts them or we reach root + let d = insertionDepth; + // TODO REVIEW FOR SIZE UPWARDS AND SPLITTING ERRORS...split is carried tho??? + while (carry && d >= 0) { + const parent = spine[d] as BNodeInternal; + if (rightSide) { + if (parent.keys.length < branchingFactor) { + parent.insert(parent.children.length, carry); + carry = undefined; + } else { + const newRight = parent.splitOffRightSide(); + newRight.insert(newRight.children.length, carry); + carry = newRight; + } + } else { + if (parent.keys.length < branchingFactor) { + parent.insert(0, carry); + carry = undefined; + } else { + const newRight = parent.splitOffRightSide(); + parent.insert(0, carry); + carry = newRight; + } + } + d--; + } + // If still carrying after root, create a new root + if (carry) { + const oldRoot = spine[0] as BNodeInternal; + const children = rightSide ? [oldRoot, carry] : [oldRoot, carry]; + const newRoot = new BNodeInternal(children, oldRoot.size() + carry.size()); + return newRoot; + } + return undefined; + }; + + let isSharedFrontierDepth = initialHeight; + const unflushedSizes: number[] = []; + + // Iterate the assigned half of the disjoint set + for (let i = start; step != end; i += step) { + const currentHeight = spine.length; // height is number of internal levels; 0 means leaf + const subtree = disjoint[i][1]; + const subtreeHeight = disjoint[i][0]; + let highestNewNodeDepth: number; + if (subtreeHeight < currentHeight) { + // Determine insertion depth on this frontier + const insertionDepth = currentHeight - (subtreeHeight + 1); // node at this depth has children of height 'subtreeHeight' + + // Ensure path is unshared before mutation + BTree.ensureNotShared(spine, isSharedFrontierDepth, insertionDepth, frontierChildIndex); + + // Calculate expansion depth (first ancestor with capacity) + const expansionDepth = Math.max(0, BTree.findExpansionDepth(spine, insertionDepth, branchingFactor)); + + // Update sizes on spine above the shared ancestor before we expand + BTree.updateSizeAndMax(spine, unflushedSizes, isSharedFrontierDepth, expansionDepth, frontierChildIndex); + + // Append and cascade splits upward + const newRoot = appendAndCascade(insertionDepth, subtree); + if (newRoot) { + // Set the spine root to the highest up new node; the rest of the spine is updated below + spine[0] = newRoot; + } + isSharedFrontierDepth = insertionDepth; + highestNewNodeDepth = expansionDepth; + } else { + // Equal height case, create a new root combining roots + // No need to clone path, since we are adding a new root instead of inserting/mutating + BTree.updateSizeAndMax(spine, unflushedSizes, isSharedFrontierDepth, 0, frontierChildIndex); + + const oldRoot = spine[0]; + const newChildren = rightSide ? [oldRoot, subtree] : [subtree, oldRoot]; + const newRoot = new BNodeInternal(newChildren, oldRoot.size() + subtree.size()); + spine[0] = newRoot; + // first shared node is just below new root + isSharedFrontierDepth = 1; + highestNewNodeDepth = 0; + } + + // Finally, update the frontier from the highest new node downward + // Note that this is often the point where the new subtree is attached, + // but in the case of cascaded splits it may be higher up. + BTree.updateFrontier(spine, highestNewNodeDepth, frontierChildIndex); + } + }; + + // Clone along the spine from isSharedFrontierDepth..depthTo inclusive so path is mutable + private static ensureNotShared( + spine: BNode[], + isSharedFrontierDepth: number, + depthToInclusive: number, + frontierChildIndex: (node: BNodeInternal) => number) { + if (spine.length === 1) + return; // nothing to clone when root is a leaf; equal-height case will handle this + + check(spine[isSharedFrontierDepth].isShared === true, "Expected shared root at isSharedFrontierDepth 0"); + + // Clone root if needed first (depth 0) + if (isSharedFrontierDepth === 0) { + const root = spine[0]; + spine[0] = root.clone() as BNodeInternal; + } + + // Clone downward along the frontier to 'depthToInclusive' + for (let depth = Math.max(isSharedFrontierDepth, 1); depth <= depthToInclusive; depth++) { + const parent = spine[depth - 1] as BNodeInternal; + const childIndex = frontierChildIndex(parent); + const clone = parent.children[childIndex].clone(); + parent.children[childIndex] = clone; + parent.keys[childIndex] = clone.maxKey(); + spine[depth] = clone as BNodeInternal; + } + }; + + /** + * Refresh sizes on the spine for nodes in (isSharedFrontierDepth, depthTo) + */ + private static updateSizeAndMax( + spine: BNode[], + unflushedSizes: number[], + isSharedFrontierDepth: number, + depthUpToInclusive: number, + frontierChildIndex: (node: BNodeInternal) => number) { + // If isSharedFrontierDepth is <= depthUpToInclusive there is nothing to update because + // the insertion point is inside a shared node which will always have correct sizes + const maxKey = spine[isSharedFrontierDepth].maxKey(); + const startDepth = isSharedFrontierDepth - 1; + for (let depth = startDepth; depth >= depthUpToInclusive; depth--) { + const sizeAtLevel = unflushedSizes[depth]; + unflushedSizes[depth] = 0; // we are propagating it now + if (depth > 0) { + // propagate size upward, will be added lazily, either when a subtree is appended at or above that level or + // at the end of processing the entire side + unflushedSizes[depth - 1] += sizeAtLevel; + } + const node = spine[depth] as BNodeInternal; + node._size += sizeAtLevel; + node.keys[frontierChildIndex(node)] = maxKey; + } + }; + + /** + * Update a spine (frontier) from a specific depth down, inclusive + */ + private static updateFrontier(frontier: BNode[], depthLastValid: number, frontierChildIndex: (node: BNodeInternal) => number): void { + check(frontier.length > depthLastValid, "updateFrontier: depthLastValid exceeds frontier height"); + const startingAncestor = frontier[depthLastValid]; + if (startingAncestor.isLeaf) + return; + const an = startingAncestor as BNodeInternal; + let cur: BNode = an.children[frontierChildIndex(an)]; + let depth = depthLastValid + 1; + while (!cur.isLeaf) { + const ni = cur as BNodeInternal; + frontier[depth] = ni; + cur = ni.children[frontierChildIndex(ni)]; + depth++; + } + frontier[depth] = cur; + }; + + /** + * Find the first ancestor (starting at insertionDepth) with capacity + */ + private static findExpansionDepth(spine: BNode[], insertionDepth: number, branchingFactor: number): number { + for (let depth = insertionDepth; depth >= 0; depth--) { + if (spine[depth].keys.length < branchingFactor) + return depth; + } + return -1; // no capacity, will need a new root + }; /** * Decomposes two BTrees into disjoint nodes. Reuses interior nodes when they do not overlap/intersect with any leaf nodes @@ -845,7 +1056,7 @@ export default class BTree implements ISortedMapF, ISortedMap } flushPendingEntries(); - return { disjoint, tallestIndex, tallestHeight }; + return { disjoint, tallestIndex }; } /** @@ -925,10 +1136,10 @@ export default class BTree implements ISortedMapF, ISortedMap let node: BNode = spine[descentLevel].node.children[descentIndex]; let height = heightOf(descentLevel) - 1; - while (!(node as any).isLeaf) { - const ni = node as unknown as BNodeInternal; + while (!node.isLeaf) { + const ni = node as BNodeInternal; const j = ni.indexOf(targetKey, 0, cmp); - const stepDownIndex = (j as number) + (isInclusive ? 0 : (j < ni.keys.length && cmp(ni.keys[j], targetKey) === 0 ? 1 : 0)); + const stepDownIndex = j + (isInclusive ? 0 : (j < ni.keys.length && cmp(ni.keys[j], targetKey) === 0 ? 1 : 0)); const payload = cur.mkPayload(ni); spine.push({ node: ni, childIndex: stepDownIndex, payload }); cur.onStepDown(ni, height, payload, stepDownIndex, other); @@ -937,20 +1148,21 @@ export default class BTree implements ISortedMapF, ISortedMap } // Enter destination leaf - const newLeaf = node as BNode; - const leafPayload = cur.mkPayload(newLeaf); - const idx = newLeaf.indexOf(targetKey, -1, cmp); + const leafPayload = cur.mkPayload(node); + const idx = node.indexOf(targetKey, -1, cmp); const destIndex = idx < 0 ? ~idx : (isInclusive ? idx : idx + 1); - check(destIndex >= 0 && destIndex < newLeaf.keys.length, "moveTo: destination out of bounds"); + check(destIndex >= 0 && destIndex < node.keys.length, "moveTo: destination out of bounds"); - cur.onEnterLeaf(newLeaf, leafPayload, destIndex, other); - cur.leaf = newLeaf; + cur.onEnterLeaf(node, leafPayload, destIndex, other); + cur.leaf = node; cur.leafPayload = leafPayload; cur.leafIndex = destIndex; return false; } - /** Create a cursor at the leftmost key. */ + /** + * Create a cursor pointing to the leftmost key of the supplied tree. + */ private static createCursor( tree: BTree, mkPayload: (n: BNode) => TP, @@ -963,16 +1175,15 @@ export default class BTree implements ISortedMapF, ISortedMap check(tree._root.size() > 0, "createCursor: cannot create a cursor for an empty tree"); const spine: Array<{ node: BNodeInternal, childIndex: number, payload: TP }> = []; let n: BNode = tree._root; - while (!(n as any).isLeaf) { - const ni = n as unknown as BNodeInternal; + while (!n.isLeaf) { + const ni = n as BNodeInternal; const payload = mkPayload(ni); spine.push({ node: ni, childIndex: 0, payload }); n = ni.children[0]; } - const leaf = n as BNode; - const leafPayload = mkPayload(leaf); + const leafPayload = mkPayload(n); const cur: MergeCursor = { - tree, leaf, leafIndex: 0, spine, leafPayload, mkPayload, + tree, leaf: n, leafIndex: 0, spine, leafPayload, mkPayload, onEnterLeaf, onMoveInLeaf, onExitLeaf, onStepUp, onStepDown }; return cur; @@ -995,6 +1206,14 @@ export default class BTree implements ISortedMapF, ISortedMap return !(cmp(amax, bmin) < 0 || cmp(bmax, amin) < 0); } + private static getLeftmostChild(): number { + return 0; + } + + private static getRightmostChild(node: BNodeInternal): number { + return node.children.length - 1; + } + /** * Computes the differences between `this` and `other`. * For efficiency, the diff is returned via invocations of supplied handlers. @@ -2308,7 +2527,7 @@ interface MergeCursor { } type DisjointEntry = [height: number, node: BNode]; -type DecomposeResult = { disjoint: DisjointEntry[], tallestIndex: number, tallestHeight: number }; +type DecomposeResult = { disjoint: DisjointEntry[], tallestIndex: number }; // Optimization: this array of `undefined`s is used instead of a normal // array of values in nodes where `undefined` is the only value. From 0e6006c8609a3d7af6f360d4536ac1f9a1708ca5 Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Sat, 1 Nov 2025 09:14:14 -0700 Subject: [PATCH 014/143] progress --- b+tree.ts | 126 +++++++++++++++++++++++++++++++++--------------------- 1 file changed, 77 insertions(+), 49 deletions(-) diff --git a/b+tree.ts b/b+tree.ts index ad9784b..fd37c8b 100644 --- a/b+tree.ts +++ b/b+tree.ts @@ -688,49 +688,14 @@ export default class BTree implements ISortedMapF, ISortedMap rightSide: boolean, frontierChildIndex: (node: BNodeInternal) => number): void { - // Append a subtree at a given depth on the chosen side; cascade splits upward if needed. - const appendAndCascade = (insertionDepth: number, subtree: BNode): BNodeInternal | undefined => { - check(spine.length > 1 && insertionDepth < spine.length - 1, "Invalid insertion at leaf level."); - let carry: BNode | undefined = subtree; - // Append at insertionDepth and bubble new right siblings upward until a node with capacity accepts them or we reach root - let d = insertionDepth; - // TODO REVIEW FOR SIZE UPWARDS AND SPLITTING ERRORS...split is carried tho??? - while (carry && d >= 0) { - const parent = spine[d] as BNodeInternal; - if (rightSide) { - if (parent.keys.length < branchingFactor) { - parent.insert(parent.children.length, carry); - carry = undefined; - } else { - const newRight = parent.splitOffRightSide(); - newRight.insert(newRight.children.length, carry); - carry = newRight; - } - } else { - if (parent.keys.length < branchingFactor) { - parent.insert(0, carry); - carry = undefined; - } else { - const newRight = parent.splitOffRightSide(); - parent.insert(0, carry); - carry = newRight; - } - } - d--; - } - - // If still carrying after root, create a new root - if (carry) { - const oldRoot = spine[0] as BNodeInternal; - const children = rightSide ? [oldRoot, carry] : [oldRoot, carry]; - const newRoot = new BNodeInternal(children, oldRoot.size() + carry.size()); - return newRoot; - } - return undefined; - }; - let isSharedFrontierDepth = initialHeight; - const unflushedSizes: number[] = []; + // This array holds the sum of sizes of nodes that have been inserted but not yet propagated upward. + // For example, if a subtree of size 5 is inserted at depth 2, then unflushedSizes[1] += 5. + // These sizes are added to the depth above the insertion point because the insertion updates the direct parent of the insertion. + // These sizes are flushed upward any time we need to insert at level higher than pending unflushed sizes. + // E.g. in our example, if we later insert at depth 0, we will add 5 to the node at depth 1 and the root at depth 0 before inserting. + // This scheme enables us to avoid a log(n) propagation of sizes for each insertion. + const unflushedSizes: number[] = new Array(initialHeight).fill(0); // pre-fill to avoid "holey" array // Iterate the assigned half of the disjoint set for (let i = start; step != end; i += step) { @@ -746,17 +711,19 @@ export default class BTree implements ISortedMapF, ISortedMap BTree.ensureNotShared(spine, isSharedFrontierDepth, insertionDepth, frontierChildIndex); // Calculate expansion depth (first ancestor with capacity) - const expansionDepth = Math.max(0, BTree.findExpansionDepth(spine, insertionDepth, branchingFactor)); + const expansionDepth = Math.max(0, BTree.findCascadeEndDepth(spine, insertionDepth, branchingFactor)); // Update sizes on spine above the shared ancestor before we expand BTree.updateSizeAndMax(spine, unflushedSizes, isSharedFrontierDepth, expansionDepth, frontierChildIndex); // Append and cascade splits upward - const newRoot = appendAndCascade(insertionDepth, subtree); + const newRoot = BTree.appendAndCascade(spine, insertionDepth, branchingFactor, subtree, rightSide); if (newRoot) { // Set the spine root to the highest up new node; the rest of the spine is updated below spine[0] = newRoot; + unflushedSizes.unshift(0); } + unflushedSizes[insertionDepth] = subtree.size(); isSharedFrontierDepth = insertionDepth; highestNewNodeDepth = expansionDepth; } else { @@ -768,6 +735,7 @@ export default class BTree implements ISortedMapF, ISortedMap const newChildren = rightSide ? [oldRoot, subtree] : [subtree, oldRoot]; const newRoot = new BNodeInternal(newChildren, oldRoot.size() + subtree.size()); spine[0] = newRoot; + unflushedSizes.unshift(0); // first shared node is just below new root isSharedFrontierDepth = 1; highestNewNodeDepth = 0; @@ -780,6 +748,52 @@ export default class BTree implements ISortedMapF, ISortedMap } }; + // Append a subtree at a given depth on the chosen side; cascade splits upward if needed. + private static appendAndCascade( + spine: BNode[], + insertionDepth: number, + branchingFactor: number, + subtree: BNode, + rightSide: boolean): BNodeInternal | undefined { + check(spine.length > 1 && insertionDepth < spine.length - 1, "Invalid insertion at leaf level."); + let carry: BNode | undefined = subtree; + // Append at insertionDepth and bubble new right siblings upward until a node with capacity accepts them or we reach root + let d = insertionDepth; + // TODO REVIEW FOR SIZE UPWARDS AND SPLITTING ERRORS...split is carried tho??? + while (carry && d >= 0) { + const parent = spine[d] as BNodeInternal; + if (rightSide) { + if (parent.keys.length < branchingFactor) { + parent.insert(parent.children.length, carry); + carry = undefined; + } else { + const newRight = parent.splitOffRightSide(); + newRight.insert(newRight.children.length, carry); + carry = newRight; + } + } else { + if (parent.keys.length < branchingFactor) { + parent.insert(0, carry); + carry = undefined; + } else { + const newLeft = parent.splitOffLeftSide(); + newLeft.insert(0, carry); + carry = newLeft; + } + } + d--; + } + + // If still carrying after root, create a new root + if (carry) { + const oldRoot = spine[0] as BNodeInternal; + const children = rightSide ? [oldRoot, carry] : [oldRoot, carry]; + const newRoot = new BNodeInternal(children, oldRoot.size() + carry.size()); + return newRoot; + } + return undefined; + }; + // Clone along the spine from isSharedFrontierDepth..depthTo inclusive so path is mutable private static ensureNotShared( spine: BNode[], @@ -858,7 +872,7 @@ export default class BTree implements ISortedMapF, ISortedMap /** * Find the first ancestor (starting at insertionDepth) with capacity */ - private static findExpansionDepth(spine: BNode[], insertionDepth: number, branchingFactor: number): number { + private static findCascadeEndDepth(spine: BNode[], insertionDepth: number, branchingFactor: number): number { for (let depth = insertionDepth; depth >= 0; depth--) { if (spine[depth].keys.length < branchingFactor) return depth; @@ -1005,12 +1019,11 @@ export default class BTree implements ISortedMapF, ISortedMap }; // Initialize cursors at minimum keys. - type Pay = MergeCursorPayload; - const curA = BTree.createCursor(left, mkPayload, onEnterLeaf, onMoveInLeaf, onExitLeaf, onStepUp, onStepDown); - const curB = BTree.createCursor(right, mkPayload, onEnterLeaf, onMoveInLeaf, onExitLeaf, onStepUp, onStepDown); + const curA = BTree.createCursor(left, mkPayload, onEnterLeaf, onMoveInLeaf, onExitLeaf, onStepUp, onStepDown); + const curB = BTree.createCursor(right, mkPayload, onEnterLeaf, onMoveInLeaf, onExitLeaf, onStepUp, onStepDown); // Initialize disqualification w.r.t. opposite leaf. - const initDisqualify = (cur: MergeCursor, otherLeaf: BNode) => { + const initDisqualify = (cur: MergeCursor, otherLeaf: BNode) => { if (BTree.areOverlapping(cur.leaf, otherLeaf, cmp)) cur.leafPayload.disqualified = true; for (let i = 0; i < cur.spine.length; ++i) { @@ -2364,6 +2377,21 @@ class BNodeInternal extends BNode { return newNode; } + /** + * Split this node. + * Modifies this to remove the first half of the items, returning a separate node containing them. + */ + splitOffLeftSide() { + // assert !this.isShared; + var half = this.children.length >> 1; + var newChildren = this.children.splice(0, half); + var newKeys = this.keys.splice(0, half); + var movedSize = sumChildSizes(newChildren); + var newNode = new BNodeInternal(newChildren, movedSize, newKeys); + this._size -= movedSize; + return newNode; + } + takeFromRight(rhs: BNode) { // Reminder: parent node must update its copy of key for this node // assert: neither node is shared From ade77352aa5526ba8d74c4042fd3e8621c9968ed Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Sat, 1 Nov 2025 10:57:39 -0700 Subject: [PATCH 015/143] fixing bugs --- b+tree.d.ts | 25 ++- b+tree.js | 472 ++++++++++++++++++++++++++-------------------------- b+tree.ts | 142 ++++++++-------- 3 files changed, 332 insertions(+), 307 deletions(-) diff --git a/b+tree.d.ts b/b+tree.d.ts index a118ba2..06db83a 100644 --- a/b+tree.d.ts +++ b/b+tree.d.ts @@ -271,6 +271,25 @@ export default class BTree implements ISortedMapF, ISort * @description Complexity: O(N + M), but often much faster in practice due to skipping any non-intersecting subtrees. */ merge(other: BTree, merge: (key: K, leftValue: V, rightValue: V) => V | undefined): BTree; + /** + * Processes one side (left or right) of the disjoint subtree set during a merge operation. + * Merges each subtree in the disjoint set from start to end (exclusive) into the given spine. + */ + private static processSide; + private static appendAndCascade; + private static ensureNotShared; + /** + * Refresh sizes on the spine for nodes in (isSharedFrontierDepth, depthTo) + */ + private static updateSizeAndMax; + /** + * Update a spine (frontier) from a specific depth down, inclusive + */ + private static updateFrontier; + /** + * Find the first ancestor (starting at insertionDepth) with capacity + */ + private static findCascadeEndDepth; /** * Decomposes two BTrees into disjoint nodes. Reuses interior nodes when they do not overlap/intersect with any leaf nodes * in the other tree. Overlapping leaf nodes are broken down into new leaf nodes containing merged entries. @@ -281,11 +300,15 @@ export default class BTree implements ISortedMapF, ISort * Returns true if end-of-tree was reached (cursor not structurally mutated). */ private static moveTo; - /** Create a cursor at the leftmost key. */ + /** + * Create a cursor pointing to the leftmost key of the supplied tree. + */ private static createCursor; private static getKey; private static getLeaf; private static areOverlapping; + private static getLeftmostChild; + private static getRightmostChild; /** * Computes the differences between `this` and `other`. * For efficiency, the diff is returned via invocations of supplied handlers. diff --git a/b+tree.js b/b+tree.js index 32a790f..201b8ee 100644 --- a/b+tree.js +++ b/b+tree.js @@ -508,12 +508,12 @@ var BTree = /** @class */ (function () { if (other.size === 0 || this.size === 0) return; // Cursor payload factory - var mkPayload = function (_) { return undefined; }; + var makePayload = function (_) { return undefined; }; // Callbacks var empty = function () { }; // Initialize cursors at minimum keys. - var curA = BTree.createCursor(this, mkPayload, empty, empty, empty, empty, empty); - var curB = BTree.createCursor(other, mkPayload, empty, empty, empty, empty, empty); + var curA = BTree.createCursor(this, makePayload, empty, empty, empty, empty, empty, empty); + var curB = BTree.createCursor(other, makePayload, empty, empty, empty, empty, empty, empty); // Walk both cursors while (true) { var order = cmp(BTree.getKey(curA), BTree.getKey(curB)); @@ -552,234 +552,208 @@ var BTree = /** @class */ (function () { * @description Complexity: O(N + M), but often much faster in practice due to skipping any non-intersecting subtrees. */ BTree.prototype.merge = function (other, merge) { - // 1. Throw if comparators or max node sizes differ if (this._compare !== other._compare) throw new Error("Cannot merge BTrees with different comparators."); if (this._maxNodeSize !== other._maxNodeSize) throw new Error("Cannot merge BTrees with different max node sizes."); - // 2. Early outs for empty trees (cheap clone of the non-empty tree) + // Early outs for empty trees (cheap clone of the non-empty tree) var sizeThis = this._root.size(); var sizeOther = other._root.size(); if (sizeThis === 0) return other.clone(); if (sizeOther === 0) return this.clone(); - // 3. Decompose into disjoint subtrees and merged leaves - var _a = BTree.decompose(this, other, merge), disjoint = _a.disjoint, tallestIndex = _a.tallestIndex, tallestHeight = _a.tallestHeight; - // 4. Start result at the tallest subtree from the disjoint set - var result = new BTree(undefined, this._compare, this._maxNodeSize); - var root = disjoint[tallestIndex][1]; - var height = tallestHeight; // number of internal levels; 0 means leaf - result._root = root; - var max = this._maxNodeSize; - // Build a spine (frontier) from the root down the chosen side (no leaf included) - var buildFrontier = function (r, rightSide) { - var spine = []; - var n = r; - while (!n.isLeaf) { - var ni = n; - spine.push(ni); - n = ni.children[rightSide ? ni.children.length - 1 : 0]; + // Decompose into disjoint subtrees and merged leaves + var _a = BTree.decompose(this, other, merge), disjoint = _a.disjoint, tallestIndex = _a.tallestIndex; + // Start result at the tallest subtree from the disjoint set + var initialRoot = disjoint[tallestIndex][1]; + var branchingFactor = this._maxNodeSize; + var rightFrontier = [initialRoot]; + BTree.updateFrontier(rightFrontier, 0, BTree.getRightmostChild); + // Process all subtrees to the right of the tallest subtree + if (tallestIndex + 1 <= disjoint.length - 1) + BTree.processSide(branchingFactor, disjoint, rightFrontier, tallestIndex + 1, disjoint.length, 1, true, BTree.getRightmostChild); + var leftFrontier = [rightFrontier[0]]; + BTree.updateFrontier(leftFrontier, 0, BTree.getLeftmostChild); + // Process all subtrees to the left of the tallest subtree (reverse order) + if (tallestIndex - 1 >= 0) + BTree.processSide(branchingFactor, disjoint, leftFrontier, tallestIndex - 1, -1, -1, false, BTree.getLeftmostChild); + var merged = new BTree(undefined, this._compare, this._maxNodeSize); + check(rightFrontier[0] === leftFrontier[0], "Root mismatch after merge processing."); + merged._root = rightFrontier[0]; + // Return the resulting tree + return merged; + }; + /** + * Processes one side (left or right) of the disjoint subtree set during a merge operation. + * Merges each subtree in the disjoint set from start to end (exclusive) into the given spine. + */ + BTree.processSide = function (branchingFactor, disjoint, spine, start, end, step, rightSide, frontierChildIndex) { + var isSharedFrontierDepth = 0; + var cur = spine[0]; + // Find the first shared node on the frontier + while (!cur.isShared && isSharedFrontierDepth < spine.length - 1) { + isSharedFrontierDepth++; + cur = cur.children[frontierChildIndex(cur)]; + } + // This array holds the sum of sizes of nodes that have been inserted but not yet propagated upward. + // For example, if a subtree of size 5 is inserted at depth 2, then unflushedSizes[1] += 5. + // These sizes are added to the depth above the insertion point because the insertion updates the direct parent of the insertion. + // These sizes are flushed upward any time we need to insert at level higher than pending unflushed sizes. + // E.g. in our example, if we later insert at depth 0, we will add 5 to the node at depth 1 and the root at depth 0 before inserting. + // This scheme enables us to avoid a log(n) propagation of sizes for each insertion. + var unflushedSizes = new Array(spine.length).fill(0); // pre-fill to avoid "holey" array + // Iterate the assigned half of the disjoint set + for (var i = start; step != end; i += step) { + var currentHeight = spine.length - 1; // height is number of internal levels; 0 means leaf + var subtree = disjoint[i][1]; + var subtreeHeight = disjoint[i][0]; + var insertionDepth = currentHeight - (subtreeHeight + 1); // node at this depth has children of height 'subtreeHeight' + // Ensure path is unshared before mutation + BTree.ensureNotShared(spine, isSharedFrontierDepth, insertionDepth, frontierChildIndex); + // Calculate expansion depth (first ancestor with capacity) + var expansionDepth = Math.max(0, BTree.findCascadeEndDepth(spine, insertionDepth, branchingFactor)); + // Update sizes on spine above the shared ancestor before we expand + BTree.updateSizeAndMax(spine, unflushedSizes, isSharedFrontierDepth, expansionDepth, frontierChildIndex); + // Append and cascade splits upward + var newRoot = BTree.appendAndCascade(spine, insertionDepth, branchingFactor, subtree, rightSide); + if (newRoot) { + // Set the spine root to the highest up new node; the rest of the spine is updated below + spine[0] = newRoot; + unflushedSizes.forEach(function (count) { return check(count === 0, "Unexpected unflushed size after root split."); }); + unflushedSizes.push(0); // new root level + isSharedFrontierDepth = insertionDepth; } - return spine; - }; - // 6. Initialize left/right frontiers of current tree (root=tallest subtree) - var rightFrontier = buildFrontier(root, true); - var leftFrontier = buildFrontier(root, false); - // 7. Track highest shared frontier depth for the currently processed frontier - // Depth is indexed from root=0 down to parent-of-leaf=(height-1). - // Any changes at or below 'depthTo' require cloning from isSharedFrontierDepth..depthTo. - var processSide = function (start, end, step, rightSide) { - var spine = rightSide ? rightFrontier : leftFrontier; - var isSharedFrontierDepth = 0; - // 8. Refresh sizes on the spine for nodes in (isSharedFrontierDepth, depthTo) - var updateSpineSizes = function (sp, depthToExclusive) { - // Update from (isSharedFrontierDepth + 1) up to (depthToExclusive - 1) - var startDepth = isSharedFrontierDepth + 1; - var endDepth = Math.max(0, depthToExclusive); - for (var d = startDepth; d < endDepth && d < sp.length; d++) { - var node = sp[d]; - node._size = sumChildSizes(node.children); - } - }; - // 9. Clone along the spine from isSharedFrontierDepth..depthTo inclusive so path is unique - var ensureNotShared = function (sp, depthToInclusive) { - if (sp.length === 0) - return; // nothing to clone when root is a leaf; equal-height case will handle this - // Clone root if needed first (depth 0) - if (isSharedFrontierDepth === 0) { - var atRoot = sp[0]; - if (atRoot.isShared) { - var clonedRoot = atRoot.clone(); - result._root = clonedRoot; - sp[0] = clonedRoot; - // After cloning an internal node, its children become explicitly shared - } - } - // Clone downward along the frontier to 'depthToInclusive' - for (var d = Math.max(isSharedFrontierDepth, 0); d <= depthToInclusive && d < sp.length; d++) { - if (d === 0) - continue; // already considered root above - var parent = sp[d - 1]; - var childIndex = rightSide ? parent.children.length - 1 : 0; - var child = parent.children[childIndex]; - // Clone regardless; guarantees uniqueness down this path - var clonedChild = child.clone(); - parent.children[childIndex] = clonedChild; - parent.keys[childIndex] = clonedChild.maxKey(); - sp[d] = clonedChild; - } - }; - // Find the first ancestor (starting at insertionDepth) with capacity - var findExpansionDepth = function (sp, insertionDepth) { - for (var d = insertionDepth; d >= 0; d--) { - if (sp[d].keys.length < max) - return d; - } - return -1; // no capacity, will need a new root - }; - // Append a subtree at a given depth on the chosen side; cascade splits upward if needed. - var appendAndCascade = function (sp, insertionDepth, subtree) { - var carry; - // 10.1.5 Append at insertionDepth - var node = sp[insertionDepth]; - if (rightSide) { - if (node.keys.length < max) { - node.insert(node.children.length, subtree); - } - else { - var newRight = node.splitOffRightSide(); - newRight.insert(newRight.children.length, subtree); - carry = newRight; - } + else { + unflushedSizes[insertionDepth] += subtree.size(); + isSharedFrontierDepth = insertionDepth; + } + // Finally, update the frontier from the highest new node downward + // Note that this is often the point where the new subtree is attached, + // but in the case of cascaded splits it may be higher up. + BTree.updateFrontier(spine, expansionDepth, frontierChildIndex); + check(unflushedSizes.length === spine.length - 1, "Unflushed sizes length mismatch after root split."); + } + // Finally, propagate any remaining unflushed sizes upward and update max keys + BTree.updateSizeAndMax(spine, unflushedSizes, isSharedFrontierDepth, 0, frontierChildIndex); + }; + ; + // Append a subtree at a given depth on the chosen side; cascade splits upward if needed. + BTree.appendAndCascade = function (spine, insertionDepth, branchingFactor, subtree, rightSide) { + check(spine.length > 1 && insertionDepth < spine.length - 1, "Invalid insertion at leaf level."); + var carry = subtree; + // Append at insertionDepth and bubble new right siblings upward until a node with capacity accepts them or we reach root + var d = insertionDepth; + while (carry && d >= 0) { + var parent = spine[d]; + if (rightSide) { + if (parent.keys.length < branchingFactor) { + parent.insert(parent.children.length, carry); + carry = undefined; } else { - if (node.keys.length < max) { - node.insert(0, subtree); - } - else { - var newRight = node.splitOffRightSide(); - // Insert into left half at index 0 (new child is the new leftmost) - node.insert(0, subtree); - carry = newRight; - } - } - // Bubble new right siblings upward until a node with capacity accepts them or we reach root - var d = insertionDepth - 1; - while (carry && d >= 0) { - var parent = sp[d]; - if (rightSide) { - if (parent.keys.length < max) { - parent.insert(parent.children.length, carry); - carry = undefined; - } - else { - var newRight = parent.splitOffRightSide(); - newRight.insert(newRight.children.length, carry); - carry = newRight; - } - } - else { - if (parent.keys.length < max) { - parent.insert(1, carry); // directly to the right of leftmost child - carry = undefined; - } - else { - var newRight = parent.splitOffRightSide(); - parent.insert(1, carry); - carry = newRight; - } - } - d--; - } - // If still carrying after root, create a new root - if (carry) { - var oldRoot = result._root; - var children = rightSide ? [oldRoot, carry] : [oldRoot, carry]; - var newRoot = new BNodeInternal(children, oldRoot.size() + carry.size()); - result._root = newRoot; - // Height increases by 1 - height += 1; - // Replace spine with new frontier for this side - sp = rightSide ? buildFrontier(result._root, true) : buildFrontier(result._root, false); - if (rightSide) - rightFrontier = sp; - else - leftFrontier = sp; - } - // Refresh ancestor keys along the side from insertionDepth up to root, - // since rightmost/leftmost child maxKey may have changed. - for (var u = Math.min(insertionDepth, sp.length - 1); u >= 0; u--) { - var parent = sp[u]; - var idx = rightSide ? parent.children.length - 1 : 0; - parent.keys[idx] = parent.children[idx].maxKey(); + var newRight = parent.splitOffRightSide(); + newRight.insert(newRight.children.length, carry); + carry = newRight; } - // Rebuild the frontier from the (possibly new) root to ensure it reflects all structural changes. - sp = rightSide ? buildFrontier(result._root, true) : buildFrontier(result._root, false); - if (rightSide) - rightFrontier = sp; - else - leftFrontier = sp; - // 10.1.6 Set isSharedFrontierDepth to insertionDepth (relative to current height) - // If height changed due to a root split, adjust to parent-of-subtree in the new tree: - var subtreeHeight = computeHeight(subtree); - isSharedFrontierDepth = Math.max(0, (height - (subtreeHeight + 1))); - }; - // Utility to compute height (number of internal levels) of a subtree - var computeHeight = function (node) { - var h = -1, n = node; - while (n) { - h++; - n = n.isLeaf ? undefined : n.children[0]; - } - return h; - }; - // Iterate the assigned half of the disjoint set - for (var i = start; step > 0 ? i <= end : i >= end; i += step) { - var subtree = disjoint[i][1]; - var subtreeHeight = disjoint[i][0]; - if (subtreeHeight < height) { - // 10.1.1 Determine insertion depth on this frontier - var insertionDepth = height - (subtreeHeight + 1); // node at this depth has children of height 'subtreeHeight' - // 10.1.2 Ensure path is unshared before mutation - ensureNotShared(spine, insertionDepth); - // 10.1.3 Calculate expansion depth (first ancestor with capacity) - var expansionDepth = findExpansionDepth(spine, insertionDepth); - // 10.1.4 Update sizes on spine above the shared ancestor before we expand - updateSpineSizes(spine, expansionDepth >= 0 ? expansionDepth : 0); - // 10.1.5 Append and cascade splits upward - appendAndCascade(spine, insertionDepth, subtree); + } + else { + if (parent.keys.length < branchingFactor) { + parent.insert(0, carry); + carry = undefined; } else { - // 10.2 Equal height; create a new root combining [old root, subtree] (right) or [subtree, old root] (left) - // 10.2.2 Update spine sizes before shifting frontier inward - updateSpineSizes(spine, 0); - // 10.2.3 Create a new root with children [old root, subtree] or [subtree, old root] - var oldRoot = result._root; - var newChildren = rightSide ? [oldRoot, subtree] : [subtree, oldRoot]; - var newRoot = new BNodeInternal(newChildren, oldRoot.size() + subtree.size()); - result._root = root = newRoot; - height += 1; - // 10.2.4 Update the frontier to match the new root and appended subtree side - spine = rightSide ? buildFrontier(result._root, true) : buildFrontier(result._root, false); - if (rightSide) - rightFrontier = spine; - else - leftFrontier = spine; - // 10.2.5 Set isSharedFrontierDepth to 1 - isSharedFrontierDepth = 1; + var newLeft = parent.splitOffLeftSide(); + newLeft.insert(0, carry); + carry = newLeft; } } - }; - // 10. Process all subtrees to the right of the tallest subtree - if (tallestIndex + 1 <= disjoint.length - 1) - processSide(tallestIndex + 1, disjoint.length - 1, +1, true); - // 11. Process all subtrees to the left of the tallest subtree (reverse order) - if (tallestIndex - 1 >= 0) - processSide(tallestIndex - 1, 0, -1, false); - // 12. Return the resulting tree - return result; + d--; + } + // If still carrying after root, create a new root + if (carry) { + var oldRoot = spine[0]; + var children = rightSide ? [oldRoot, carry] : [oldRoot, carry]; + var newRoot = new BNodeInternal(children, oldRoot.size() + carry.size()); + return newRoot; + } + return undefined; + }; + ; + // Clone along the spine from isSharedFrontierDepth..depthTo inclusive so path is mutable + BTree.ensureNotShared = function (spine, isSharedFrontierDepth, depthToInclusive, frontierChildIndex) { + if (spine.length === 1 /* only a leaf */ || depthToInclusive < 0 /* new root case */) + return; // nothing to clone when root is a leaf; equal-height case will handle this + check(spine[isSharedFrontierDepth].isShared === true, "Expected shared root at isSharedFrontierDepth 0"); + // Clone root if needed first (depth 0) + if (isSharedFrontierDepth === 0) { + var root = spine[0]; + spine[0] = root.clone(); + } + // Clone downward along the frontier to 'depthToInclusive' + for (var depth = Math.max(isSharedFrontierDepth, 1); depth <= depthToInclusive; depth++) { + var parent = spine[depth - 1]; + var childIndex = frontierChildIndex(parent); + var clone = parent.children[childIndex].clone(); + parent.children[childIndex] = clone; + parent.keys[childIndex] = clone.maxKey(); + spine[depth] = clone; + } + }; + ; + /** + * Refresh sizes on the spine for nodes in (isSharedFrontierDepth, depthTo) + */ + BTree.updateSizeAndMax = function (spine, unflushedSizes, isSharedFrontierDepth, depthUpToInclusive, frontierChildIndex) { + // If isSharedFrontierDepth is <= depthUpToInclusive there is nothing to update because + // the insertion point is inside a shared node which will always have correct sizes + var maxKey = spine[isSharedFrontierDepth].maxKey(); + var startDepth = isSharedFrontierDepth - 1; + for (var depth = startDepth; depth >= depthUpToInclusive; depth--) { + var sizeAtLevel = unflushedSizes[depth]; + unflushedSizes[depth] = 0; // we are propagating it now + if (depth > 0) { + // propagate size upward, will be added lazily, either when a subtree is appended at or above that level or + // at the end of processing the entire side + unflushedSizes[depth - 1] += sizeAtLevel; + } + var node = spine[depth]; + node._size += sizeAtLevel; + node.keys[frontierChildIndex(node)] = maxKey; + } + }; + ; + /** + * Update a spine (frontier) from a specific depth down, inclusive + */ + BTree.updateFrontier = function (frontier, depthLastValid, frontierChildIndex) { + check(frontier.length > depthLastValid, "updateFrontier: depthLastValid exceeds frontier height"); + var startingAncestor = frontier[depthLastValid]; + if (startingAncestor.isLeaf) + return; + var an = startingAncestor; + var cur = an.children[frontierChildIndex(an)]; + var depth = depthLastValid + 1; + while (!cur.isLeaf) { + var ni = cur; + frontier[depth] = ni; + cur = ni.children[frontierChildIndex(ni)]; + depth++; + } + frontier[depth] = cur; + }; + ; + /** + * Find the first ancestor (starting at insertionDepth) with capacity + */ + BTree.findCascadeEndDepth = function (spine, insertionDepth, branchingFactor) { + for (var depth = insertionDepth; depth >= 0; depth--) { + if (spine[depth].keys.length < branchingFactor) + return depth; + } + return -1; // no capacity, will need a new root }; + ; /** * Decomposes two BTrees into disjoint nodes. Reuses interior nodes when they do not overlap/intersect with any leaf nodes * in the other tree. Overlapping leaf nodes are broken down into new leaf nodes containing merged entries. @@ -830,7 +804,7 @@ var BTree = /** @class */ (function () { // Have to do this as cast to convince TS it's ever assigned var highestDisjoint = undefined; // Cursor payload factory - var mkPayload = function (_) { return ({ disqualified: false }); }; + var makePayload = function (_) { return ({ disqualified: false }); }; var pushLeafRange = function (leaf, from, toExclusive) { if (from < toExclusive) { for (var i = from; i < toExclusive; ++i) @@ -898,8 +872,15 @@ var BTree = /** @class */ (function () { for (var i = 0; i < stepDownIndex; ++i) addSharedNodeToDisjointSet(node.children[i], height - 1); }; - var curA = BTree.createCursor(left, mkPayload, onEnterLeaf, onMoveInLeaf, onExitLeaf, onStepUp, onStepDown); - var curB = BTree.createCursor(right, mkPayload, onEnterLeaf, onMoveInLeaf, onExitLeaf, onStepUp, onStepDown); + var onEndMove = function () { + if (highestDisjoint !== undefined) { + addSharedNodeToDisjointSet(highestDisjoint.node, highestDisjoint.height); + highestDisjoint = undefined; + } + }; + // Initialize cursors at minimum keys. + var curA = BTree.createCursor(left, makePayload, onEnterLeaf, onMoveInLeaf, onExitLeaf, onStepUp, onStepDown, onEndMove); + var curB = BTree.createCursor(right, makePayload, onEnterLeaf, onMoveInLeaf, onExitLeaf, onStepUp, onStepDown, onEndMove); // Initialize disqualification w.r.t. opposite leaf. var initDisqualify = function (cur, otherLeaf) { if (BTree.areOverlapping(cur.leaf, otherLeaf, cmp)) @@ -935,10 +916,6 @@ var BTree = /** @class */ (function () { } else { var out = BTree.moveTo(trailing, leading, BTree.getKey(leading), true, areEqual, cmp); - if (highestDisjoint !== undefined) { - addSharedNodeToDisjointSet(highestDisjoint.node, highestDisjoint.height); - highestDisjoint = undefined; - } if (out) { var maxKeyLeft = left._root.maxKey(); var maxKeyRight = right._root.maxKey(); @@ -949,7 +926,7 @@ var BTree = /** @class */ (function () { } } flushPendingEntries(); - return { disjoint: disjoint, tallestIndex: tallestIndex, tallestHeight: tallestHeight }; + return { disjoint: disjoint, tallestIndex: tallestIndex }; }; /** * Move cursor strictly forward to the first key >= (inclusive) or > (exclusive) target. @@ -966,6 +943,7 @@ var BTree = /** @class */ (function () { if (destInLeaf < leaf.keys.length) { cur.onMoveInLeaf(leaf, cur.leafPayload, cur.leafIndex, destInLeaf, startedEqual, other); cur.leafIndex = destInLeaf; + cur.onEndMove(); return false; } // Find first ancestor with a viable right step @@ -996,6 +974,7 @@ var BTree = /** @class */ (function () { var sd = s === 0 ? Infinity : NaN; cur.onStepUp(entry.node, heightOf(s), entry.payload, entry.childIndex, sd, other); } + cur.onEndMove(); return true; } // Step up through ancestors above the descentLevel @@ -1016,49 +995,50 @@ var BTree = /** @class */ (function () { var ni = node; var j = ni.indexOf(targetKey, 0, cmp); var stepDownIndex = j + (isInclusive ? 0 : (j < ni.keys.length && cmp(ni.keys[j], targetKey) === 0 ? 1 : 0)); - var payload = cur.mkPayload(ni); + var payload = cur.makePayload(ni); spine.push({ node: ni, childIndex: stepDownIndex, payload: payload }); cur.onStepDown(ni, height, payload, stepDownIndex, other); node = ni.children[stepDownIndex]; height -= 1; } // Enter destination leaf - var newLeaf = node; - var leafPayload = cur.mkPayload(newLeaf); - var idx = newLeaf.indexOf(targetKey, -1, cmp); + var leafPayload = cur.makePayload(node); + var idx = node.indexOf(targetKey, -1, cmp); var destIndex = idx < 0 ? ~idx : (isInclusive ? idx : idx + 1); - check(destIndex >= 0 && destIndex < newLeaf.keys.length, "moveTo: destination out of bounds"); - cur.onEnterLeaf(newLeaf, leafPayload, destIndex, other); - cur.leaf = newLeaf; + check(destIndex >= 0 && destIndex < node.keys.length, "moveTo: destination out of bounds"); + cur.onEnterLeaf(node, leafPayload, destIndex, other); + cur.leaf = node; cur.leafPayload = leafPayload; cur.leafIndex = destIndex; + cur.onEndMove(); return false; }; - /** Create a cursor at the leftmost key. */ - BTree.createCursor = function (tree, mkPayload, onEnterLeaf, onMoveInLeaf, onExitLeaf, onStepUp, onStepDown) { + /** + * Create a cursor pointing to the leftmost key of the supplied tree. + */ + BTree.createCursor = function (tree, makePayload, onEnterLeaf, onMoveInLeaf, onExitLeaf, onStepUp, onStepDown, onEndMove) { check(tree._root.size() > 0, "createCursor: cannot create a cursor for an empty tree"); var spine = []; var n = tree._root; while (!n.isLeaf) { var ni = n; - var payload = mkPayload(ni); + var payload = makePayload(ni); spine.push({ node: ni, childIndex: 0, payload: payload }); n = ni.children[0]; } - var leaf = n; - var leafPayload = mkPayload(leaf); + var leafPayload = makePayload(n); var cur = { tree: tree, - leaf: leaf, - leafIndex: 0, + leaf: n, leafIndex: 0, spine: spine, leafPayload: leafPayload, - mkPayload: mkPayload, + makePayload: makePayload, onEnterLeaf: onEnterLeaf, onMoveInLeaf: onMoveInLeaf, onExitLeaf: onExitLeaf, onStepUp: onStepUp, - onStepDown: onStepDown + onStepDown: onStepDown, + onEndMove: onEndMove }; return cur; }; @@ -1074,6 +1054,12 @@ var BTree = /** @class */ (function () { // Overlap iff !(amax < bmin || bmax < amin) on inclusive ranges. return !(cmp(amax, bmin) < 0 || cmp(bmax, amin) < 0); }; + BTree.getLeftmostChild = function () { + return 0; + }; + BTree.getRightmostChild = function (node) { + return node.children.length - 1; + }; /** * Computes the differences between `this` and `other`. * For efficiency, the diff is returned via invocations of supplied handlers. @@ -2145,6 +2131,20 @@ var BNodeInternal = /** @class */ (function (_super) { this._size -= movedSize; return newNode; }; + /** + * Split this node. + * Modifies this to remove the first half of the items, returning a separate node containing them. + */ + BNodeInternal.prototype.splitOffLeftSide = function () { + // assert !this.isShared; + var half = this.children.length >> 1; + var newChildren = this.children.splice(0, half); + var newKeys = this.keys.splice(0, half); + var movedSize = sumChildSizes(newChildren); + var newNode = new BNodeInternal(newChildren, movedSize, newKeys); + this._size -= movedSize; + return newNode; + }; BNodeInternal.prototype.takeFromRight = function (rhs) { // Reminder: parent node must update its copy of key for this node // assert: neither node is shared diff --git a/b+tree.ts b/b+tree.ts index fd37c8b..600f5cb 100644 --- a/b+tree.ts +++ b/b+tree.ts @@ -588,14 +588,14 @@ export default class BTree implements ISortedMapF, ISortedMap return; // Cursor payload factory - const mkPayload = (_: BNode): undefined => undefined; + const makePayload = (_: BNode): undefined => undefined; // Callbacks const empty = () => {}; // Initialize cursors at minimum keys. - const curA = BTree.createCursor(this, mkPayload, empty, empty, empty, empty, empty); - const curB = BTree.createCursor(other, mkPayload, empty, empty, empty, empty, empty); + const curA = BTree.createCursor(this, makePayload, empty, empty, empty, empty, empty, empty); + const curB = BTree.createCursor(other, makePayload, empty, empty, empty, empty, empty, empty); // Walk both cursors while (true) { @@ -650,19 +650,21 @@ export default class BTree implements ISortedMapF, ISortedMap const { disjoint, tallestIndex } = BTree.decompose(this, other, merge); // Start result at the tallest subtree from the disjoint set - const [initialHeight, initialRoot] = disjoint[tallestIndex]; + const initialRoot = disjoint[tallestIndex][1]; const branchingFactor = this._maxNodeSize; - let rightFrontier: BNode[] = [initialRoot], leftFrontier: BNode[] = [initialRoot]; + const rightFrontier: BNode[] = [initialRoot]; BTree.updateFrontier(rightFrontier, 0, BTree.getRightmostChild); - BTree.updateFrontier(leftFrontier, 0, BTree.getLeftmostChild); // Process all subtrees to the right of the tallest subtree if (tallestIndex + 1 <= disjoint.length - 1) - BTree.processSide(branchingFactor, initialHeight,disjoint, rightFrontier, tallestIndex + 1, disjoint.length - 1, +1, true, BTree.getRightmostChild); + BTree.processSide(branchingFactor,disjoint, rightFrontier, tallestIndex + 1, disjoint.length, 1, true, BTree.getRightmostChild); + + const leftFrontier: BNode[] = [rightFrontier[0]]; + BTree.updateFrontier(leftFrontier, 0, BTree.getLeftmostChild); // Process all subtrees to the left of the tallest subtree (reverse order) if (tallestIndex - 1 >= 0) - BTree.processSide(branchingFactor, initialHeight, disjoint, leftFrontier, tallestIndex - 1, 0, -1, false, BTree.getLeftmostChild); + BTree.processSide(branchingFactor, disjoint, leftFrontier, tallestIndex - 1, -1, -1, false, BTree.getLeftmostChild); const merged = new BTree(undefined, this._compare, this._maxNodeSize); check(rightFrontier[0] === leftFrontier[0], "Root mismatch after merge processing."); @@ -673,13 +675,11 @@ export default class BTree implements ISortedMapF, ISortedMap } /** - * Track highest shared frontier depth for the currently processed frontier - * Depth is indexed from root=0 down to parent-of-leaf=(height-1). - * Any changes at or below 'depthTo' require cloning from isSharedFrontierDepth..depthTo. + * Processes one side (left or right) of the disjoint subtree set during a merge operation. + * Merges each subtree in the disjoint set from start to end (exclusive) into the given spine. */ private static processSide( branchingFactor: number, - initialHeight: number, disjoint: DisjointEntry[], spine: BNode[], start: number, @@ -687,65 +687,60 @@ export default class BTree implements ISortedMapF, ISortedMap step: number, rightSide: boolean, frontierChildIndex: (node: BNodeInternal) => number): void { + let isSharedFrontierDepth = 0; + let cur = spine[0]; + // Find the first shared node on the frontier + while (!cur.isShared && isSharedFrontierDepth < spine.length - 1) { + isSharedFrontierDepth++; + cur = (cur as BNodeInternal).children[frontierChildIndex(cur as BNodeInternal)]; + } - let isSharedFrontierDepth = initialHeight; // This array holds the sum of sizes of nodes that have been inserted but not yet propagated upward. // For example, if a subtree of size 5 is inserted at depth 2, then unflushedSizes[1] += 5. // These sizes are added to the depth above the insertion point because the insertion updates the direct parent of the insertion. // These sizes are flushed upward any time we need to insert at level higher than pending unflushed sizes. // E.g. in our example, if we later insert at depth 0, we will add 5 to the node at depth 1 and the root at depth 0 before inserting. // This scheme enables us to avoid a log(n) propagation of sizes for each insertion. - const unflushedSizes: number[] = new Array(initialHeight).fill(0); // pre-fill to avoid "holey" array + const unflushedSizes: number[] = new Array(spine.length).fill(0); // pre-fill to avoid "holey" array // Iterate the assigned half of the disjoint set for (let i = start; step != end; i += step) { - const currentHeight = spine.length; // height is number of internal levels; 0 means leaf + const currentHeight = spine.length - 1; // height is number of internal levels; 0 means leaf const subtree = disjoint[i][1]; const subtreeHeight = disjoint[i][0]; - let highestNewNodeDepth: number; - if (subtreeHeight < currentHeight) { - // Determine insertion depth on this frontier - const insertionDepth = currentHeight - (subtreeHeight + 1); // node at this depth has children of height 'subtreeHeight' - - // Ensure path is unshared before mutation - BTree.ensureNotShared(spine, isSharedFrontierDepth, insertionDepth, frontierChildIndex); - - // Calculate expansion depth (first ancestor with capacity) - const expansionDepth = Math.max(0, BTree.findCascadeEndDepth(spine, insertionDepth, branchingFactor)); - - // Update sizes on spine above the shared ancestor before we expand - BTree.updateSizeAndMax(spine, unflushedSizes, isSharedFrontierDepth, expansionDepth, frontierChildIndex); - - // Append and cascade splits upward - const newRoot = BTree.appendAndCascade(spine, insertionDepth, branchingFactor, subtree, rightSide); - if (newRoot) { - // Set the spine root to the highest up new node; the rest of the spine is updated below - spine[0] = newRoot; - unflushedSizes.unshift(0); - } - unflushedSizes[insertionDepth] = subtree.size(); - isSharedFrontierDepth = insertionDepth; - highestNewNodeDepth = expansionDepth; - } else { - // Equal height case, create a new root combining roots - // No need to clone path, since we are adding a new root instead of inserting/mutating - BTree.updateSizeAndMax(spine, unflushedSizes, isSharedFrontierDepth, 0, frontierChildIndex); + const insertionDepth = currentHeight - (subtreeHeight + 1); // node at this depth has children of height 'subtreeHeight' + + // Ensure path is unshared before mutation + BTree.ensureNotShared(spine, isSharedFrontierDepth, insertionDepth, frontierChildIndex); + + // Calculate expansion depth (first ancestor with capacity) + const expansionDepth = Math.max(0, BTree.findCascadeEndDepth(spine, insertionDepth, branchingFactor)); + + // Update sizes on spine above the shared ancestor before we expand + BTree.updateSizeAndMax(spine, unflushedSizes, isSharedFrontierDepth, expansionDepth, frontierChildIndex); - const oldRoot = spine[0]; - const newChildren = rightSide ? [oldRoot, subtree] : [subtree, oldRoot]; - const newRoot = new BNodeInternal(newChildren, oldRoot.size() + subtree.size()); + // Append and cascade splits upward + const newRoot = BTree.appendAndCascade(spine, insertionDepth, branchingFactor, subtree, rightSide); + if (newRoot) { + // Set the spine root to the highest up new node; the rest of the spine is updated below spine[0] = newRoot; - unflushedSizes.unshift(0); - // first shared node is just below new root - isSharedFrontierDepth = 1; - highestNewNodeDepth = 0; + unflushedSizes.forEach((count) => check(count === 0, "Unexpected unflushed size after root split.")); + unflushedSizes.push(0); // new root level + isSharedFrontierDepth = insertionDepth; + } else { + unflushedSizes[insertionDepth] += subtree.size(); + isSharedFrontierDepth = insertionDepth; } // Finally, update the frontier from the highest new node downward // Note that this is often the point where the new subtree is attached, // but in the case of cascaded splits it may be higher up. - BTree.updateFrontier(spine, highestNewNodeDepth, frontierChildIndex); + BTree.updateFrontier(spine, expansionDepth, frontierChildIndex); + check(unflushedSizes.length === spine.length - 1, "Unflushed sizes length mismatch after root split."); } + + // Finally, propagate any remaining unflushed sizes upward and update max keys + BTree.updateSizeAndMax(spine, unflushedSizes, isSharedFrontierDepth, 0, frontierChildIndex); }; // Append a subtree at a given depth on the chosen side; cascade splits upward if needed. @@ -759,7 +754,6 @@ export default class BTree implements ISortedMapF, ISortedMap let carry: BNode | undefined = subtree; // Append at insertionDepth and bubble new right siblings upward until a node with capacity accepts them or we reach root let d = insertionDepth; - // TODO REVIEW FOR SIZE UPWARDS AND SPLITTING ERRORS...split is carried tho??? while (carry && d >= 0) { const parent = spine[d] as BNodeInternal; if (rightSide) { @@ -800,7 +794,7 @@ export default class BTree implements ISortedMapF, ISortedMap isSharedFrontierDepth: number, depthToInclusive: number, frontierChildIndex: (node: BNodeInternal) => number) { - if (spine.length === 1) + if (spine.length === 1 /* only a leaf */ || depthToInclusive < 0 /* new root case */) return; // nothing to clone when root is a leaf; equal-height case will handle this check(spine[isSharedFrontierDepth].isShared === true, "Expected shared root at isSharedFrontierDepth 0"); @@ -876,7 +870,7 @@ export default class BTree implements ISortedMapF, ISortedMap for (let depth = insertionDepth; depth >= 0; depth--) { if (spine[depth].keys.length < branchingFactor) return depth; - } + } return -1; // no capacity, will need a new root }; @@ -939,7 +933,7 @@ export default class BTree implements ISortedMapF, ISortedMap let highestDisjoint: { node: BNode, height: number } | undefined = undefined as { node: BNode, height: number } | undefined; // Cursor payload factory - const mkPayload = (_: BNode): TP => ({ disqualified: false } as TP); + const makePayload = (_: BNode): TP => ({ disqualified: false } as TP); const pushLeafRange = (leaf: BNode, from: number, toExclusive: number) => { if (from < toExclusive) { @@ -1018,9 +1012,16 @@ export default class BTree implements ISortedMapF, ISortedMap addSharedNodeToDisjointSet(node.children[i], height - 1); }; + const onEndMove = () => { + if (highestDisjoint !== undefined) { + addSharedNodeToDisjointSet(highestDisjoint.node, highestDisjoint.height); + highestDisjoint = undefined; + } + }; + // Initialize cursors at minimum keys. - const curA = BTree.createCursor(left, mkPayload, onEnterLeaf, onMoveInLeaf, onExitLeaf, onStepUp, onStepDown); - const curB = BTree.createCursor(right, mkPayload, onEnterLeaf, onMoveInLeaf, onExitLeaf, onStepUp, onStepDown); + const curA = BTree.createCursor(left, makePayload, onEnterLeaf, onMoveInLeaf, onExitLeaf, onStepUp, onStepDown, onEndMove); + const curB = BTree.createCursor(right, makePayload, onEnterLeaf, onMoveInLeaf, onExitLeaf, onStepUp, onStepDown, onEndMove); // Initialize disqualification w.r.t. opposite leaf. const initDisqualify = (cur: MergeCursor, otherLeaf: BNode) => { @@ -1054,10 +1055,6 @@ export default class BTree implements ISortedMapF, ISortedMap break; } else { const out = BTree.moveTo(trailing, leading, BTree.getKey(leading), true, areEqual, cmp); - if (highestDisjoint !== undefined) { - addSharedNodeToDisjointSet(highestDisjoint.node, highestDisjoint.height); - highestDisjoint = undefined; - } if (out) { const maxKeyLeft = left._root.maxKey() as K; const maxKeyRight = right._root.maxKey() as K; @@ -1095,6 +1092,7 @@ export default class BTree implements ISortedMapF, ISortedMap if (destInLeaf < leaf.keys.length) { cur.onMoveInLeaf(leaf, cur.leafPayload, cur.leafIndex, destInLeaf, startedEqual, other); cur.leafIndex = destInLeaf; + cur.onEndMove(); return false; } @@ -1107,7 +1105,7 @@ export default class BTree implements ISortedMapF, ISortedMap const parent = spine[s].node; const fromIndex = spine[s].childIndex; const j = parent.indexOf(targetKey, 0, cmp); // insertion index or exact - const stepDownIndex = (j as number) + (isInclusive ? 0 : (j < parent.keys.length && cmp(parent.keys[j], targetKey) === 0 ? 1 : 0)); + const stepDownIndex = j + (isInclusive ? 0 : (j < parent.keys.length && cmp(parent.keys[j], targetKey) === 0 ? 1 : 0)); // Note: when key not found, indexOf with failXor=0 already returns insertion index if (stepDownIndex > fromIndex && stepDownIndex <= parent.keys.length - 1) { descentLevel = s; @@ -1130,6 +1128,7 @@ export default class BTree implements ISortedMapF, ISortedMap const sd = s === 0 ? Infinity : NaN; cur.onStepUp(entry.node, heightOf(s), entry.payload, entry.childIndex, sd, other); } + cur.onEndMove(); return true; } @@ -1153,7 +1152,7 @@ export default class BTree implements ISortedMapF, ISortedMap const ni = node as BNodeInternal; const j = ni.indexOf(targetKey, 0, cmp); const stepDownIndex = j + (isInclusive ? 0 : (j < ni.keys.length && cmp(ni.keys[j], targetKey) === 0 ? 1 : 0)); - const payload = cur.mkPayload(ni); + const payload = cur.makePayload(ni); spine.push({ node: ni, childIndex: stepDownIndex, payload }); cur.onStepDown(ni, height, payload, stepDownIndex, other); node = ni.children[stepDownIndex]; @@ -1161,7 +1160,7 @@ export default class BTree implements ISortedMapF, ISortedMap } // Enter destination leaf - const leafPayload = cur.mkPayload(node); + const leafPayload = cur.makePayload(node); const idx = node.indexOf(targetKey, -1, cmp); const destIndex = idx < 0 ? ~idx : (isInclusive ? idx : idx + 1); check(destIndex >= 0 && destIndex < node.keys.length, "moveTo: destination out of bounds"); @@ -1170,6 +1169,7 @@ export default class BTree implements ISortedMapF, ISortedMap cur.leaf = node; cur.leafPayload = leafPayload; cur.leafIndex = destIndex; + cur.onEndMove(); return false; } @@ -1178,26 +1178,27 @@ export default class BTree implements ISortedMapF, ISortedMap */ private static createCursor( tree: BTree, - mkPayload: (n: BNode) => TP, + makePayload: (n: BNode) => TP, onEnterLeaf: MergeCursor["onEnterLeaf"], onMoveInLeaf: MergeCursor["onMoveInLeaf"], onExitLeaf: MergeCursor["onExitLeaf"], onStepUp: MergeCursor["onStepUp"], onStepDown: MergeCursor["onStepDown"], + onEndMove: MergeCursor["onEndMove"], ): MergeCursor { check(tree._root.size() > 0, "createCursor: cannot create a cursor for an empty tree"); const spine: Array<{ node: BNodeInternal, childIndex: number, payload: TP }> = []; let n: BNode = tree._root; while (!n.isLeaf) { const ni = n as BNodeInternal; - const payload = mkPayload(ni); + const payload = makePayload(ni); spine.push({ node: ni, childIndex: 0, payload }); n = ni.children[0]; } - const leafPayload = mkPayload(n); + const leafPayload = makePayload(n); const cur: MergeCursor = { - tree, leaf: n, leafIndex: 0, spine, leafPayload, mkPayload, - onEnterLeaf, onMoveInLeaf, onExitLeaf, onStepUp, onStepDown + tree, leaf: n, leafIndex: 0, spine, leafPayload, makePayload: makePayload, + onEnterLeaf, onMoveInLeaf, onExitLeaf, onStepUp, onStepDown, onEndMove }; return cur; } @@ -2545,13 +2546,14 @@ interface MergeCursor { leafIndex: number; spine: Array<{ node: BNodeInternal, childIndex: number, payload: TPayload }>; leafPayload: TPayload; - mkPayload: (n: BNode) => TPayload; + makePayload: (n: BNode) => TPayload; onEnterLeaf: (leaf: BNode, payload: TPayload, destIndex: number, other: MergeCursor) => void; onMoveInLeaf: (leaf: BNode, payload: TPayload, fromIndex: number, toIndex: number, isInclusive: boolean, other: MergeCursor) => void; onExitLeaf: (leaf: BNode, startingIndex: number, isInclusive: boolean, payload: TPayload, other: MergeCursor) => void; onStepUp: (parent: BNodeInternal, height: number, payload: TPayload, fromIndex: number, stepDownIndex: number, other: MergeCursor) => void; onStepDown: (node: BNodeInternal, height: number, payload: TPayload, stepDownIndex: number, other: MergeCursor) => void; + onEndMove: () => void; } type DisjointEntry = [height: number, node: BNode]; From 8369f88c9131f4abccfc51dba3f610f8b20e452f Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Sat, 1 Nov 2025 11:34:07 -0700 Subject: [PATCH 016/143] bug fixes --- b+tree.js | 6 +++--- b+tree.ts | 10 +++++----- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/b+tree.js b/b+tree.js index 201b8ee..04e454a 100644 --- a/b+tree.js +++ b/b+tree.js @@ -622,17 +622,17 @@ var BTree = /** @class */ (function () { spine[0] = newRoot; unflushedSizes.forEach(function (count) { return check(count === 0, "Unexpected unflushed size after root split."); }); unflushedSizes.push(0); // new root level - isSharedFrontierDepth = insertionDepth; } else { unflushedSizes[insertionDepth] += subtree.size(); - isSharedFrontierDepth = insertionDepth; } + // if insertionDepth was -1, a new root was made and the shared node was inserted just below it + isSharedFrontierDepth = Math.max(1, insertionDepth + 1); // Finally, update the frontier from the highest new node downward // Note that this is often the point where the new subtree is attached, // but in the case of cascaded splits it may be higher up. BTree.updateFrontier(spine, expansionDepth, frontierChildIndex); - check(unflushedSizes.length === spine.length - 1, "Unflushed sizes length mismatch after root split."); + check(unflushedSizes.length === spine.length, "Unflushed sizes length mismatch after root split."); } // Finally, propagate any remaining unflushed sizes upward and update max keys BTree.updateSizeAndMax(spine, unflushedSizes, isSharedFrontierDepth, 0, frontierChildIndex); diff --git a/b+tree.ts b/b+tree.ts index 600f5cb..d35fcea 100644 --- a/b+tree.ts +++ b/b+tree.ts @@ -726,17 +726,19 @@ export default class BTree implements ISortedMapF, ISortedMap spine[0] = newRoot; unflushedSizes.forEach((count) => check(count === 0, "Unexpected unflushed size after root split.")); unflushedSizes.push(0); // new root level - isSharedFrontierDepth = insertionDepth; } else { unflushedSizes[insertionDepth] += subtree.size(); - isSharedFrontierDepth = insertionDepth; } + // if insertionDepth was -1, a new root was made and the shared node was inserted just below it + isSharedFrontierDepth = Math.max(1, insertionDepth + 1); + // Finally, update the frontier from the highest new node downward // Note that this is often the point where the new subtree is attached, // but in the case of cascaded splits it may be higher up. BTree.updateFrontier(spine, expansionDepth, frontierChildIndex); - check(unflushedSizes.length === spine.length - 1, "Unflushed sizes length mismatch after root split."); + check(isSharedFrontierDepth === spine.length - 1 || spine[isSharedFrontierDepth].isShared === true, "Non-leaf subtrees must be shared."); + check(unflushedSizes.length === spine.length, "Unflushed sizes length mismatch after root split."); } // Finally, propagate any remaining unflushed sizes upward and update max keys @@ -797,8 +799,6 @@ export default class BTree implements ISortedMapF, ISortedMap if (spine.length === 1 /* only a leaf */ || depthToInclusive < 0 /* new root case */) return; // nothing to clone when root is a leaf; equal-height case will handle this - check(spine[isSharedFrontierDepth].isShared === true, "Expected shared root at isSharedFrontierDepth 0"); - // Clone root if needed first (depth 0) if (isSharedFrontierDepth === 0) { const root = spine[0]; From a58f3529a4634ef90e666692bf383f2e99561a48 Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Sat, 1 Nov 2025 12:58:00 -0700 Subject: [PATCH 017/143] loop bug --- b+tree.js | 4 ++-- b+tree.ts | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/b+tree.js b/b+tree.js index 04e454a..b2c6c0a 100644 --- a/b+tree.js +++ b/b+tree.js @@ -604,7 +604,7 @@ var BTree = /** @class */ (function () { // This scheme enables us to avoid a log(n) propagation of sizes for each insertion. var unflushedSizes = new Array(spine.length).fill(0); // pre-fill to avoid "holey" array // Iterate the assigned half of the disjoint set - for (var i = start; step != end; i += step) { + for (var i = start; i != end; i += step) { var currentHeight = spine.length - 1; // height is number of internal levels; 0 means leaf var subtree = disjoint[i][1]; var subtreeHeight = disjoint[i][0]; @@ -632,6 +632,7 @@ var BTree = /** @class */ (function () { // Note that this is often the point where the new subtree is attached, // but in the case of cascaded splits it may be higher up. BTree.updateFrontier(spine, expansionDepth, frontierChildIndex); + check(isSharedFrontierDepth === spine.length - 1 || spine[isSharedFrontierDepth].isShared === true, "Non-leaf subtrees must be shared."); check(unflushedSizes.length === spine.length, "Unflushed sizes length mismatch after root split."); } // Finally, propagate any remaining unflushed sizes upward and update max keys @@ -684,7 +685,6 @@ var BTree = /** @class */ (function () { BTree.ensureNotShared = function (spine, isSharedFrontierDepth, depthToInclusive, frontierChildIndex) { if (spine.length === 1 /* only a leaf */ || depthToInclusive < 0 /* new root case */) return; // nothing to clone when root is a leaf; equal-height case will handle this - check(spine[isSharedFrontierDepth].isShared === true, "Expected shared root at isSharedFrontierDepth 0"); // Clone root if needed first (depth 0) if (isSharedFrontierDepth === 0) { var root = spine[0]; diff --git a/b+tree.ts b/b+tree.ts index d35fcea..cece679 100644 --- a/b+tree.ts +++ b/b+tree.ts @@ -704,7 +704,7 @@ export default class BTree implements ISortedMapF, ISortedMap const unflushedSizes: number[] = new Array(spine.length).fill(0); // pre-fill to avoid "holey" array // Iterate the assigned half of the disjoint set - for (let i = start; step != end; i += step) { + for (let i = start; i != end; i += step) { const currentHeight = spine.length - 1; // height is number of internal levels; 0 means leaf const subtree = disjoint[i][1]; const subtreeHeight = disjoint[i][0]; From c37707d8cbfc6d34b4a0755dcd85294694267012 Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Sun, 2 Nov 2025 09:24:49 -0800 Subject: [PATCH 018/143] bug fixes --- b+tree.js | 68 +++++++++++++++++++++++++++++-------------------------- b+tree.ts | 68 +++++++++++++++++++++++++++++-------------------------- 2 files changed, 72 insertions(+), 64 deletions(-) diff --git a/b+tree.js b/b+tree.js index b2c6c0a..3550473 100644 --- a/b+tree.js +++ b/b+tree.js @@ -512,8 +512,8 @@ var BTree = /** @class */ (function () { // Callbacks var empty = function () { }; // Initialize cursors at minimum keys. - var curA = BTree.createCursor(this, makePayload, empty, empty, empty, empty, empty, empty); - var curB = BTree.createCursor(other, makePayload, empty, empty, empty, empty, empty, empty); + var curA = BTree.createCursor(this, makePayload, empty, empty, empty, empty, empty); + var curB = BTree.createCursor(other, makePayload, empty, empty, empty, empty, empty); // Walk both cursors while (true) { var order = cmp(BTree.getKey(curA), BTree.getKey(curB)); @@ -760,8 +760,6 @@ var BTree = /** @class */ (function () { */ BTree.decompose = function (left, right, mergeValues) { var cmp = left._compare; - check(left._compare === right._compare, "merge: trees must share comparator"); - check(left._maxNodeSize === right._maxNodeSize, "merge: trees must share max node size"); check(left._root.size() > 0 && right._root.size() > 0, "decompose requires non-empty inputs"); var disjoint = []; var pending = []; @@ -828,10 +826,15 @@ var BTree = /** @class */ (function () { var start = startedEqual ? fromIndex + 1 : fromIndex; pushLeafRange(leaf, start, Math.min(toIndex, leaf.keys.length)); }; - var onExitLeaf = function (leaf, startingIndex, startedEqual, payload, _other) { + var onExitLeaf = function (cursorThis, leaf, startingIndex, startedEqual, payload, _other) { highestDisjoint = undefined; if (!payload.disqualified) { highestDisjoint = { node: leaf, height: 0 }; + if (cursorThis.spine.length === 0) { + // if we are exiting a leaf and there are no internal nodes, we will reach the end of the tree. + // In this case we need to add the leaf now because step up will not be called. + addHighestDisjoint(); + } } else { var start = startedEqual ? startingIndex + 1 : startingIndex; @@ -839,26 +842,18 @@ var BTree = /** @class */ (function () { } }; var onStepUp = function (parent, height, payload, fromIndex, stepDownIndex, _other) { - if (Number.isNaN(stepDownIndex)) { - if (!payload.disqualified) { - highestDisjoint = { node: parent, height: height }; - } - else { - for (var i = fromIndex + 1; i < parent.children.length; ++i) - addSharedNodeToDisjointSet(parent.children[i], height - 1); - } - } - else if (stepDownIndex === Infinity) { + if (Number.isNaN(stepDownIndex) || stepDownIndex === Number.POSITIVE_INFINITY) { if (!payload.disqualified) { - check(fromIndex === 0, "onStepUp: Infinity case requires fromIndex==0"); highestDisjoint = { node: parent, height: height }; } else { + addHighestDisjoint(); for (var i = fromIndex + 1; i < parent.children.length; ++i) addSharedNodeToDisjointSet(parent.children[i], height - 1); } } else { + addHighestDisjoint(); for (var i = fromIndex + 1; i < stepDownIndex; ++i) addSharedNodeToDisjointSet(parent.children[i], height - 1); } @@ -872,15 +867,19 @@ var BTree = /** @class */ (function () { for (var i = 0; i < stepDownIndex; ++i) addSharedNodeToDisjointSet(node.children[i], height - 1); }; - var onEndMove = function () { + var addHighestDisjoint = function () { if (highestDisjoint !== undefined) { addSharedNodeToDisjointSet(highestDisjoint.node, highestDisjoint.height); highestDisjoint = undefined; } }; + // Need the max key of both trees to perform the "finishing" walk of which ever cursor finishes second + var maxKeyLeft = left._root.maxKey(); + var maxKeyRight = right._root.maxKey(); + var maxKey = cmp(maxKeyLeft, maxKeyRight) >= 0 ? maxKeyLeft : maxKeyRight; // Initialize cursors at minimum keys. - var curA = BTree.createCursor(left, makePayload, onEnterLeaf, onMoveInLeaf, onExitLeaf, onStepUp, onStepDown, onEndMove); - var curB = BTree.createCursor(right, makePayload, onEnterLeaf, onMoveInLeaf, onExitLeaf, onStepUp, onStepDown, onEndMove); + var curA = BTree.createCursor(left, makePayload, onEnterLeaf, onMoveInLeaf, onExitLeaf, onStepUp, onStepDown); + var curB = BTree.createCursor(right, makePayload, onEnterLeaf, onMoveInLeaf, onExitLeaf, onStepUp, onStepDown); // Initialize disqualification w.r.t. opposite leaf. var initDisqualify = function (cur, otherLeaf) { if (BTree.areOverlapping(cur.leaf, otherLeaf, cmp)) @@ -911,15 +910,24 @@ var BTree = /** @class */ (function () { pending.push([key, merged]); var outT = BTree.moveTo(trailing, leading, key, false, areEqual, cmp); var outL = BTree.moveTo(leading, trailing, key, false, areEqual, cmp); - if (outT && outL) + if (outT || outL) { + if (!outT || !outL) { + // In these cases, we pass areEqual=false because a return value of "out of tree" means + // the cursor did not move. This must be true because they started equal and one of them had more tree + // to walk (one is !out), so they cannot be equal at this point. + if (outT) { + BTree.moveTo(leading, trailing, maxKey, false, false, cmp); + } + else { + BTree.moveTo(trailing, leading, maxKey, false, false, cmp); + } + } break; + } } else { var out = BTree.moveTo(trailing, leading, BTree.getKey(leading), true, areEqual, cmp); if (out) { - var maxKeyLeft = left._root.maxKey(); - var maxKeyRight = right._root.maxKey(); - var maxKey = cmp(maxKeyLeft, maxKeyRight) >= 0 ? maxKeyLeft : maxKeyRight; BTree.moveTo(leading, trailing, maxKey, false, areEqual, cmp); break; } @@ -943,7 +951,6 @@ var BTree = /** @class */ (function () { if (destInLeaf < leaf.keys.length) { cur.onMoveInLeaf(leaf, cur.leafPayload, cur.leafIndex, destInLeaf, startedEqual, other); cur.leafIndex = destInLeaf; - cur.onEndMove(); return false; } // Find first ancestor with a viable right step @@ -966,15 +973,14 @@ var BTree = /** @class */ (function () { var heightOf = function (sIndex) { return spine.length - sIndex; }; // Exit leaf; we did walk out of it conceptually var startIndex = cur.leafIndex; - cur.onExitLeaf(leaf, startIndex, startedEqual, cur.leafPayload, other); + cur.onExitLeaf(cur, leaf, startIndex, startedEqual, cur.leafPayload, other); if (descentLevel < 0) { - // No descent point; step up all the way; last callback gets Infinity + // No descent point; step up all the way; last callback gets infinity for (var s = spine.length - 1; s >= 0; --s) { var entry = spine[s]; - var sd = s === 0 ? Infinity : NaN; + var sd = s === 0 ? Number.POSITIVE_INFINITY : Number.NaN; cur.onStepUp(entry.node, heightOf(s), entry.payload, entry.childIndex, sd, other); } - cur.onEndMove(); return true; } // Step up through ancestors above the descentLevel @@ -1010,13 +1016,12 @@ var BTree = /** @class */ (function () { cur.leaf = node; cur.leafPayload = leafPayload; cur.leafIndex = destIndex; - cur.onEndMove(); return false; }; /** * Create a cursor pointing to the leftmost key of the supplied tree. */ - BTree.createCursor = function (tree, makePayload, onEnterLeaf, onMoveInLeaf, onExitLeaf, onStepUp, onStepDown, onEndMove) { + BTree.createCursor = function (tree, makePayload, onEnterLeaf, onMoveInLeaf, onExitLeaf, onStepUp, onStepDown) { check(tree._root.size() > 0, "createCursor: cannot create a cursor for an empty tree"); var spine = []; var n = tree._root; @@ -1037,8 +1042,7 @@ var BTree = /** @class */ (function () { onMoveInLeaf: onMoveInLeaf, onExitLeaf: onExitLeaf, onStepUp: onStepUp, - onStepDown: onStepDown, - onEndMove: onEndMove + onStepDown: onStepDown }; return cur; }; diff --git a/b+tree.ts b/b+tree.ts index cece679..b95516e 100644 --- a/b+tree.ts +++ b/b+tree.ts @@ -594,8 +594,8 @@ export default class BTree implements ISortedMapF, ISortedMap const empty = () => {}; // Initialize cursors at minimum keys. - const curA = BTree.createCursor(this, makePayload, empty, empty, empty, empty, empty, empty); - const curB = BTree.createCursor(other, makePayload, empty, empty, empty, empty, empty, empty); + const curA = BTree.createCursor(this, makePayload, empty, empty, empty, empty, empty); + const curB = BTree.createCursor(other, makePayload, empty, empty, empty, empty, empty); // Walk both cursors while (true) { @@ -884,10 +884,7 @@ export default class BTree implements ISortedMapF, ISortedMap mergeValues: (key: K, leftValue: V, rightValue: V) => V | undefined ): DecomposeResult { const cmp = left._compare; - check(left._compare === right._compare, "merge: trees must share comparator"); - check(left._maxNodeSize === right._maxNodeSize, "merge: trees must share max node size"); check(left._root.size() > 0 && right._root.size() > 0, "decompose requires non-empty inputs"); - const disjoint: DisjointEntry[] = []; const pending: [K,V][] = []; let tallestIndex = -1, tallestHeight = -1; @@ -965,11 +962,16 @@ export default class BTree implements ISortedMapF, ISortedMap }; const onExitLeaf = ( - leaf: BNode, startingIndex: number, startedEqual: boolean, payload: TP, _other: MergeCursor + cursorThis: MergeCursor, leaf: BNode, startingIndex: number, startedEqual: boolean, payload: TP, _other: MergeCursor ) => { highestDisjoint = undefined; if (!payload.disqualified) { highestDisjoint = { node: leaf, height: 0 }; + if (cursorThis.spine.length === 0) { + // if we are exiting a leaf and there are no internal nodes, we will reach the end of the tree. + // In this case we need to add the leaf now because step up will not be called. + addHighestDisjoint(); + } } else { const start = startedEqual ? startingIndex + 1 : startingIndex; pushLeafRange(leaf, start, leaf.keys.length); @@ -979,22 +981,16 @@ export default class BTree implements ISortedMapF, ISortedMap const onStepUp = ( parent: BNodeInternal, height: number, payload: TP, fromIndex: number, stepDownIndex: number, _other: MergeCursor ) => { - if (Number.isNaN(stepDownIndex)) { + if (Number.isNaN(stepDownIndex) || stepDownIndex === Number.POSITIVE_INFINITY) { if (!payload.disqualified) { highestDisjoint = { node: parent, height }; } else { - for (let i = fromIndex + 1; i < parent.children.length; ++i) - addSharedNodeToDisjointSet(parent.children[i], height - 1); - } - } else if (stepDownIndex === Infinity) { - if (!payload.disqualified) { - check(fromIndex === 0, "onStepUp: Infinity case requires fromIndex==0"); - highestDisjoint = { node: parent, height }; - } else { + addHighestDisjoint(); for (let i = fromIndex + 1; i < parent.children.length; ++i) addSharedNodeToDisjointSet(parent.children[i], height - 1); } } else { + addHighestDisjoint(); for (let i = fromIndex + 1; i < stepDownIndex; ++i) addSharedNodeToDisjointSet(parent.children[i], height - 1); } @@ -1012,16 +1008,21 @@ export default class BTree implements ISortedMapF, ISortedMap addSharedNodeToDisjointSet(node.children[i], height - 1); }; - const onEndMove = () => { + const addHighestDisjoint = () => { if (highestDisjoint !== undefined) { addSharedNodeToDisjointSet(highestDisjoint.node, highestDisjoint.height); highestDisjoint = undefined; } }; + // Need the max key of both trees to perform the "finishing" walk of which ever cursor finishes second + const maxKeyLeft = left._root.maxKey() as K; + const maxKeyRight = right._root.maxKey() as K; + const maxKey = cmp(maxKeyLeft, maxKeyRight) >= 0 ? maxKeyLeft : maxKeyRight; + // Initialize cursors at minimum keys. - const curA = BTree.createCursor(left, makePayload, onEnterLeaf, onMoveInLeaf, onExitLeaf, onStepUp, onStepDown, onEndMove); - const curB = BTree.createCursor(right, makePayload, onEnterLeaf, onMoveInLeaf, onExitLeaf, onStepUp, onStepDown, onEndMove); + const curA = BTree.createCursor(left, makePayload, onEnterLeaf, onMoveInLeaf, onExitLeaf, onStepUp, onStepDown); + const curB = BTree.createCursor(right, makePayload, onEnterLeaf, onMoveInLeaf, onExitLeaf, onStepUp, onStepDown); // Initialize disqualification w.r.t. opposite leaf. const initDisqualify = (cur: MergeCursor, otherLeaf: BNode) => { @@ -1051,14 +1052,22 @@ export default class BTree implements ISortedMapF, ISortedMap if (merged !== undefined) pending.push([key, merged]); const outT = BTree.moveTo(trailing, leading, key, false, areEqual, cmp); const outL = BTree.moveTo(leading, trailing, key, false, areEqual, cmp); - if (outT && outL) + if (outT || outL) { + if (!outT || !outL) { + // In these cases, we pass areEqual=false because a return value of "out of tree" means + // the cursor did not move. This must be true because they started equal and one of them had more tree + // to walk (one is !out), so they cannot be equal at this point. + if (outT) { + BTree.moveTo(leading, trailing, maxKey, false, false, cmp); + } else { + BTree.moveTo(trailing, leading, maxKey, false, false, cmp); + } + } break; + } } else { const out = BTree.moveTo(trailing, leading, BTree.getKey(leading), true, areEqual, cmp); if (out) { - const maxKeyLeft = left._root.maxKey() as K; - const maxKeyRight = right._root.maxKey() as K; - const maxKey = cmp(maxKeyLeft, maxKeyRight) >= 0 ? maxKeyLeft : maxKeyRight; BTree.moveTo(leading, trailing, maxKey, false, areEqual, cmp); break; } @@ -1092,7 +1101,6 @@ export default class BTree implements ISortedMapF, ISortedMap if (destInLeaf < leaf.keys.length) { cur.onMoveInLeaf(leaf, cur.leafPayload, cur.leafIndex, destInLeaf, startedEqual, other); cur.leafIndex = destInLeaf; - cur.onEndMove(); return false; } @@ -1119,16 +1127,15 @@ export default class BTree implements ISortedMapF, ISortedMap // Exit leaf; we did walk out of it conceptually const startIndex = cur.leafIndex; - cur.onExitLeaf(leaf, startIndex, startedEqual, cur.leafPayload, other); + cur.onExitLeaf(cur, leaf, startIndex, startedEqual, cur.leafPayload, other); if (descentLevel < 0) { - // No descent point; step up all the way; last callback gets Infinity + // No descent point; step up all the way; last callback gets infinity for (let s = spine.length - 1; s >= 0; --s) { const entry = spine[s]; - const sd = s === 0 ? Infinity : NaN; + const sd = s === 0 ? Number.POSITIVE_INFINITY : Number.NaN; cur.onStepUp(entry.node, heightOf(s), entry.payload, entry.childIndex, sd, other); } - cur.onEndMove(); return true; } @@ -1169,7 +1176,6 @@ export default class BTree implements ISortedMapF, ISortedMap cur.leaf = node; cur.leafPayload = leafPayload; cur.leafIndex = destIndex; - cur.onEndMove(); return false; } @@ -1184,7 +1190,6 @@ export default class BTree implements ISortedMapF, ISortedMap onExitLeaf: MergeCursor["onExitLeaf"], onStepUp: MergeCursor["onStepUp"], onStepDown: MergeCursor["onStepDown"], - onEndMove: MergeCursor["onEndMove"], ): MergeCursor { check(tree._root.size() > 0, "createCursor: cannot create a cursor for an empty tree"); const spine: Array<{ node: BNodeInternal, childIndex: number, payload: TP }> = []; @@ -1198,7 +1203,7 @@ export default class BTree implements ISortedMapF, ISortedMap const leafPayload = makePayload(n); const cur: MergeCursor = { tree, leaf: n, leafIndex: 0, spine, leafPayload, makePayload: makePayload, - onEnterLeaf, onMoveInLeaf, onExitLeaf, onStepUp, onStepDown, onEndMove + onEnterLeaf, onMoveInLeaf, onExitLeaf, onStepUp, onStepDown }; return cur; } @@ -2550,10 +2555,9 @@ interface MergeCursor { onEnterLeaf: (leaf: BNode, payload: TPayload, destIndex: number, other: MergeCursor) => void; onMoveInLeaf: (leaf: BNode, payload: TPayload, fromIndex: number, toIndex: number, isInclusive: boolean, other: MergeCursor) => void; - onExitLeaf: (leaf: BNode, startingIndex: number, isInclusive: boolean, payload: TPayload, other: MergeCursor) => void; + onExitLeaf: (cursorThis: MergeCursor, leaf: BNode, startingIndex: number, isInclusive: boolean, payload: TPayload, other: MergeCursor) => void; onStepUp: (parent: BNodeInternal, height: number, payload: TPayload, fromIndex: number, stepDownIndex: number, other: MergeCursor) => void; onStepDown: (node: BNodeInternal, height: number, payload: TPayload, stepDownIndex: number, other: MergeCursor) => void; - onEndMove: () => void; } type DisjointEntry = [height: number, node: BNode]; From e373b5eeacda06fca085f6fb1a080396c1704150 Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Sun, 2 Nov 2025 18:37:26 -0800 Subject: [PATCH 019/143] fix bugs --- b+tree.d.ts | 6 +- b+tree.js | 178 ++++++++++++++++++++++++------------- b+tree.test.ts | 18 ++++ b+tree.ts | 233 ++++++++++++++++++++++++++++++++----------------- 4 files changed, 294 insertions(+), 141 deletions(-) diff --git a/b+tree.d.ts b/b+tree.d.ts index 06db83a..2975923 100644 --- a/b+tree.d.ts +++ b/b+tree.d.ts @@ -305,7 +305,11 @@ export default class BTree implements ISortedMapF, ISort */ private static createCursor; private static getKey; - private static getLeaf; + /** + * Determines whether two nodes are overlapping in key range. + * Takes the leftmost known key of each node to avoid a log(n) min calculation. + * This will still catch overlapping nodes because of the alternate hopping walk of the cursors. + */ private static areOverlapping; private static getLeftmostChild; private static getRightmostChild; diff --git a/b+tree.js b/b+tree.js index 3550473..2cb2ee2 100644 --- a/b+tree.js +++ b/b+tree.js @@ -507,9 +507,7 @@ var BTree = /** @class */ (function () { throw new Error("Cannot merge BTrees with different max node sizes."); if (other.size === 0 || this.size === 0) return; - // Cursor payload factory - var makePayload = function (_) { return undefined; }; - // Callbacks + var makePayload = function () { return undefined; }; var empty = function () { }; // Initialize cursors at minimum keys. var curA = BTree.createCursor(this, makePayload, empty, empty, empty, empty, empty); @@ -624,6 +622,7 @@ var BTree = /** @class */ (function () { unflushedSizes.push(0); // new root level } else { + // TODO unflushedSizes[insertionDepth] += subtree.size(); } // if insertionDepth was -1, a new root was made and the shared node was inserted just below it @@ -790,6 +789,8 @@ var BTree = /** @class */ (function () { pending.length = 0; } }; + // Have to do this as cast to convince TS it's ever assigned + var highestDisjoint = undefined; var addSharedNodeToDisjointSet = function (node, height) { flushPendingEntries(); node.isShared = true; @@ -799,34 +800,34 @@ var BTree = /** @class */ (function () { tallestHeight = height; } }; - // Have to do this as cast to convince TS it's ever assigned - var highestDisjoint = undefined; + var addHighestDisjoint = function () { + if (highestDisjoint !== undefined) { + addSharedNodeToDisjointSet(highestDisjoint.node, highestDisjoint.height); + highestDisjoint = undefined; + } + }; + var disqualifySpine = function (cursor, depthFrom) { + for (var i = depthFrom; i > 0; --i) { + var entry = cursor.spine[i]; + if (entry.payload.disqualified) + break; + entry.payload.disqualified = true; + } + }; // Cursor payload factory - var makePayload = function (_) { return ({ disqualified: false }); }; + var makePayload = function () { return ({ disqualified: false }); }; var pushLeafRange = function (leaf, from, toExclusive) { if (from < toExclusive) { for (var i = from; i < toExclusive; ++i) pending.push([leaf.keys[i], leaf.values[i]]); } }; - // Callbacks - var onEnterLeaf = function (leaf, payload, destIndex, other) { - var otherLeaf = BTree.getLeaf(other); - if (BTree.areOverlapping(leaf, otherLeaf, cmp)) { - payload.disqualified = true; - other.leafPayload.disqualified = true; - pushLeafRange(leaf, 0, Math.min(destIndex, leaf.keys.length)); - } - else { - check(destIndex === 0, "onEnterLeaf: destIndex must be 0 if not overlapping"); - } - }; - var onMoveInLeaf = function (leaf, payload, fromIndex, toIndex, startedEqual, _other) { + var onMoveInLeaf = function (leaf, payload, fromIndex, toIndex, startedEqual) { check(payload.disqualified === true, "onMoveInLeaf: leaf must be disqualified"); var start = startedEqual ? fromIndex + 1 : fromIndex; pushLeafRange(leaf, start, Math.min(toIndex, leaf.keys.length)); }; - var onExitLeaf = function (cursorThis, leaf, startingIndex, startedEqual, payload, _other) { + var onExitLeaf = function (leaf, payload, startingIndex, startedEqual, cursorThis) { highestDisjoint = undefined; if (!payload.disqualified) { highestDisjoint = { node: leaf, height: 0 }; @@ -841,10 +842,15 @@ var BTree = /** @class */ (function () { pushLeafRange(leaf, start, leaf.keys.length); } }; - var onStepUp = function (parent, height, payload, fromIndex, stepDownIndex, _other) { - if (Number.isNaN(stepDownIndex) || stepDownIndex === Number.POSITIVE_INFINITY) { + var onStepUp = function (parent, height, payload, fromIndex, stepDownIndex) { + if (Number.isNaN(stepDownIndex) /* still walking up */ + || stepDownIndex === Number.POSITIVE_INFINITY /* target key is beyond edge of tree, done with walk */) { if (!payload.disqualified) { highestDisjoint = { node: parent, height: height }; + if (stepDownIndex === Number.POSITIVE_INFINITY) { + // We have finished our walk, and we won't be stepping down, so add the root + addHighestDisjoint(); + } } else { addHighestDisjoint(); @@ -858,19 +864,32 @@ var BTree = /** @class */ (function () { addSharedNodeToDisjointSet(parent.children[i], height - 1); } }; - var onStepDown = function (node, height, payload, stepDownIndex, other) { - var otherLeaf = BTree.getLeaf(other); - if (BTree.areOverlapping(node, otherLeaf, cmp)) { - payload.disqualified = true; - // leaf disqualification is handled in onEnterLeaf + var onStepDown = function (node, height, stepDownIndex, cursorThis) { + if (stepDownIndex > 0) { + // When we step down into a node, we know that we have walked from a key that is less than our target. + // Because of this, if we are not stepping down into the first child, we know that all children before + // the stepDownIndex must overlap with the other tree because they must be before our target key. Since + // the child we are stepping into has a key greater than our target key, this node must overlap. + // If a child overlaps, the entire spine overlaps because a parent in a btree always encloses the range + // of its children. + cursorThis.spine[height].payload.disqualified = true; + disqualifySpine(cursorThis, cursorThis.spine.length - height); + for (var i = 0; i < stepDownIndex; ++i) + addSharedNodeToDisjointSet(node.children[i], height - 1); } - for (var i = 0; i < stepDownIndex; ++i) - addSharedNodeToDisjointSet(node.children[i], height - 1); }; - var addHighestDisjoint = function () { - if (highestDisjoint !== undefined) { - addSharedNodeToDisjointSet(highestDisjoint.node, highestDisjoint.height); - highestDisjoint = undefined; + var onEnterLeaf = function (leaf, destIndex, cursorThis, cursorOther) { + if (destIndex > 0 || cmp(leaf.keys[0], BTree.getKey(cursorOther)) < 0) { + // Similar logic to the step-down case, except in this case we also know the leaf in the other + // tree overlaps a leaf in this tree (this leaf, specifically). Thus, we can disqualify both spines. + cursorThis.leafPayload.disqualified = true; + cursorOther.leafPayload.disqualified = true; + disqualifySpine(cursorThis, cursorThis.spine.length - 1); + disqualifySpine(cursorOther, cursorOther.spine.length - 1); + pushLeafRange(leaf, 0, Math.min(destIndex, leaf.keys.length)); + } + else { + check(destIndex === 0, "onEnterLeaf: destIndex must be 0 if not overlapping"); } }; // Need the max key of both trees to perform the "finishing" walk of which ever cursor finishes second @@ -880,19 +899,32 @@ var BTree = /** @class */ (function () { // Initialize cursors at minimum keys. var curA = BTree.createCursor(left, makePayload, onEnterLeaf, onMoveInLeaf, onExitLeaf, onStepUp, onStepDown); var curB = BTree.createCursor(right, makePayload, onEnterLeaf, onMoveInLeaf, onExitLeaf, onStepUp, onStepDown); + // The guarantee that no overlapping interior nodes are accidentally reused relies on the careful + // alternating hopping walk of the cursors: WLOG, cursorA always--with one exception--walks from a key just behind (in key space) + // the key of cursorB to the first key >= cursorB. Call this transition a "crossover point." All interior nodes that + // overlap cause a crossover point, and all crossover points are guaranteed to be walked using this method. Thus, + // all overlapping interior nodes will be found if they are checked for on step-down. + // The one exception mentioned above is when they start at the same key. In this case, they are both advanced forward and then + // their new ordering determines how they walk from there. + // The one issue then is detecting any overlaps that occur based on their very initial position (minimum key of each tree). + // This is handled by the initial disqualification step below, which essentially emulates the step down disqualification for each spine. // Initialize disqualification w.r.t. opposite leaf. - var initDisqualify = function (cur, otherLeaf) { - if (BTree.areOverlapping(cur.leaf, otherLeaf, cmp)) + var initDisqualify = function (cur, other) { + var minKey = BTree.getKey(cur); + var otherMin = BTree.getKey(other); + var otherMax = other.leaf.maxKey(); + if (BTree.areOverlapping(minKey, cur.leaf.maxKey(), otherMin, otherMax, cmp)) cur.leafPayload.disqualified = true; for (var i = 0; i < cur.spine.length; ++i) { var entry = cur.spine[i]; - if (BTree.areOverlapping(entry.node, otherLeaf, cmp)) + // Since we are on the left side of the tree, we can use the leaf min key for every spine node + if (BTree.areOverlapping(minKey, entry.node.maxKey(), otherMin, otherMax, cmp)) entry.payload.disqualified = true; } }; - initDisqualify(curA, BTree.getLeaf(curB)); - initDisqualify(curB, BTree.getLeaf(curA)); - // Walk both cursors + initDisqualify(curA, curB); + initDisqualify(curB, curA); + // Walk both cursors in alternating hops while (true) { var order = cmp(BTree.getKey(curA), BTree.getKey(curB)); var trailing = curA, leading = curB; @@ -941,15 +973,16 @@ var BTree = /** @class */ (function () { * Returns true if end-of-tree was reached (cursor not structurally mutated). */ BTree.moveTo = function (cur, other, targetKey, isInclusive, startedEqual, cmp) { + var curKey = BTree.getKey(cur); // We should start before the target (or at it if inclusive) - var keyPos = cmp(BTree.getKey(cur), targetKey); + var keyPos = cmp(curKey, targetKey); check(isInclusive && keyPos < 0 || !isInclusive && keyPos <= 0, "moveTo precondition violated"); // Fast path: destination within current leaf var leaf = cur.leaf; var i = leaf.indexOf(targetKey, -1, cmp); var destInLeaf = i < 0 ? ~i : (isInclusive ? i : i + 1); if (destInLeaf < leaf.keys.length) { - cur.onMoveInLeaf(leaf, cur.leafPayload, cur.leafIndex, destInLeaf, startedEqual, other); + cur.onMoveInLeaf(leaf, cur.leafPayload, cur.leafIndex, destInLeaf, startedEqual); cur.leafIndex = destInLeaf; return false; } @@ -973,24 +1006,24 @@ var BTree = /** @class */ (function () { var heightOf = function (sIndex) { return spine.length - sIndex; }; // Exit leaf; we did walk out of it conceptually var startIndex = cur.leafIndex; - cur.onExitLeaf(cur, leaf, startIndex, startedEqual, cur.leafPayload, other); + cur.onExitLeaf(leaf, cur.leafPayload, startIndex, startedEqual, cur); if (descentLevel < 0) { // No descent point; step up all the way; last callback gets infinity for (var s = spine.length - 1; s >= 0; --s) { var entry = spine[s]; var sd = s === 0 ? Number.POSITIVE_INFINITY : Number.NaN; - cur.onStepUp(entry.node, heightOf(s), entry.payload, entry.childIndex, sd, other); + cur.onStepUp(entry.node, heightOf(s), entry.payload, entry.childIndex, sd); } return true; } // Step up through ancestors above the descentLevel for (var s = spine.length - 1; s > descentLevel; --s) { var entry = spine[s]; - cur.onStepUp(entry.node, heightOf(s), entry.payload, entry.childIndex, NaN, other); + cur.onStepUp(entry.node, heightOf(s), entry.payload, entry.childIndex, NaN); } { var entry = spine[descentLevel]; - cur.onStepUp(entry.node, heightOf(descentLevel), entry.payload, entry.childIndex, descentIndex, other); + cur.onStepUp(entry.node, heightOf(descentLevel), entry.payload, entry.childIndex, descentIndex); entry.childIndex = descentIndex; } // Descend, invoking onStepDown and creating payloads @@ -1001,37 +1034,35 @@ var BTree = /** @class */ (function () { var ni = node; var j = ni.indexOf(targetKey, 0, cmp); var stepDownIndex = j + (isInclusive ? 0 : (j < ni.keys.length && cmp(ni.keys[j], targetKey) === 0 ? 1 : 0)); - var payload = cur.makePayload(ni); + var payload = cur.makePayload(); spine.push({ node: ni, childIndex: stepDownIndex, payload: payload }); - cur.onStepDown(ni, height, payload, stepDownIndex, other); + cur.onStepDown(ni, height, stepDownIndex, cur); node = ni.children[stepDownIndex]; height -= 1; } // Enter destination leaf - var leafPayload = cur.makePayload(node); var idx = node.indexOf(targetKey, -1, cmp); var destIndex = idx < 0 ? ~idx : (isInclusive ? idx : idx + 1); check(destIndex >= 0 && destIndex < node.keys.length, "moveTo: destination out of bounds"); - cur.onEnterLeaf(node, leafPayload, destIndex, other); cur.leaf = node; - cur.leafPayload = leafPayload; + cur.leafPayload = cur.makePayload(); cur.leafIndex = destIndex; + cur.onEnterLeaf(node, destIndex, cur, other); return false; }; /** * Create a cursor pointing to the leftmost key of the supplied tree. */ BTree.createCursor = function (tree, makePayload, onEnterLeaf, onMoveInLeaf, onExitLeaf, onStepUp, onStepDown) { - check(tree._root.size() > 0, "createCursor: cannot create a cursor for an empty tree"); var spine = []; var n = tree._root; while (!n.isLeaf) { var ni = n; - var payload = makePayload(ni); + var payload = makePayload(); spine.push({ node: ni, childIndex: 0, payload: payload }); n = ni.children[0]; } - var leafPayload = makePayload(n); + var leafPayload = makePayload(); var cur = { tree: tree, leaf: n, leafIndex: 0, @@ -1049,14 +1080,39 @@ var BTree = /** @class */ (function () { BTree.getKey = function (c) { return c.leaf.keys[c.leafIndex]; }; - BTree.getLeaf = function (c) { - return c.leaf; - }; - BTree.areOverlapping = function (a, b, cmp) { - var amin = a.minKey(), amax = a.maxKey(); - var bmin = b.minKey(), bmax = b.maxKey(); - // Overlap iff !(amax < bmin || bmax < amin) on inclusive ranges. - return !(cmp(amax, bmin) < 0 || cmp(bmax, amin) < 0); + /** + * Determines whether two nodes are overlapping in key range. + * Takes the leftmost known key of each node to avoid a log(n) min calculation. + * This will still catch overlapping nodes because of the alternate hopping walk of the cursors. + */ + BTree.areOverlapping = function (aMin, aMax, bMin, bMax, cmp) { + // There are 4 possibilities: + // 1. aMin.........aMax + // bMin.........bMax + // (aMax between bMin and bMax) + // 2. aMin.........aMax + // bMin.........bMax + // (aMin between bMin and bMax) + // 3. aMin.............aMax + // bMin....bMax + // (aMin and aMax enclose bMin and bMax; note this includes equality cases) + // 4. aMin....aMax + // bMin.............bMax + // (bMin and bMax enclose aMin and aMax; note equality cases are identical to case 3) + var aMinBMin = cmp(aMin, bMin); + var aMinBMax = cmp(aMin, bMax); + if (aMinBMin >= 0 && aMinBMax <= 0) { + // case 2 or 4 + return true; + } + var aMaxBMin = cmp(aMax, bMin); + var aMaxBMax = cmp(aMax, bMax); + if (aMaxBMin >= 0 && aMaxBMax <= 0) { + // case 1 + return true; + } + // case 3 or no overlap + return aMinBMin <= 0 && aMaxBMax >= 0; }; BTree.getLeftmostChild = function () { return 0; diff --git a/b+tree.test.ts b/b+tree.test.ts index 4d17bf6..3776746 100644 --- a/b+tree.test.ts +++ b/b+tree.test.ts @@ -1458,6 +1458,24 @@ function testMerge(maxNodeSize: number) { expect(result.size).toBe(tree1.size + tree2.size - 1); }); + test('Merge trees where all leaves are disjoint and one tree straddles the other', () => { + const straddleLength = 3 * 2 * maxNodeSize; // guaranteed to create multiple leaves on both trees + const tree1 = buildTree(range(0, straddleLength / 3).concat(range((straddleLength / 3) * 2, straddleLength)), 1); + const tree2 = buildTree(range(straddleLength / 3, (straddleLength / 3) * 2), 3); + + expectRootLeafState(tree1, false); + expectRootLeafState(tree2, false); + + let mergeCalls = 0; + const result = tree1.merge(tree2, (key, leftValue, rightValue) => { + mergeCalls++; + return leftValue + rightValue; + }); + + expect(mergeCalls).toBe(1); + expect(result.size).toBe(tree1.size + tree2.size); + }); + test('Merge where two-leaf tree intersects leaf-root tree across both leaves', () => { const size = maxNodeSize + Math.max(3, Math.floor(maxNodeSize / 2)); const tree1 = buildTree(range(0, size), 2, 0); diff --git a/b+tree.ts b/b+tree.ts index b95516e..c4cfbec 100644 --- a/b+tree.ts +++ b/b+tree.ts @@ -587,10 +587,7 @@ export default class BTree implements ISortedMapF, ISortedMap if (other.size === 0 || this.size === 0) return; - // Cursor payload factory - const makePayload = (_: BNode): undefined => undefined; - - // Callbacks + const makePayload = (): undefined => undefined; const empty = () => {}; // Initialize cursors at minimum keys. @@ -727,6 +724,7 @@ export default class BTree implements ISortedMapF, ISortedMap unflushedSizes.forEach((count) => check(count === 0, "Unexpected unflushed size after root split.")); unflushedSizes.push(0); // new root level } else { + // TODO unflushedSizes[insertionDepth] += subtree.size(); } @@ -916,6 +914,9 @@ export default class BTree implements ISortedMapF, ISortedMap } }; + // Have to do this as cast to convince TS it's ever assigned + let highestDisjoint: { node: BNode, height: number } | undefined = undefined as { node: BNode, height: number } | undefined; + const addSharedNodeToDisjointSet = (node: BNode, height: number) => { flushPendingEntries(); node.isShared = true; @@ -926,11 +927,24 @@ export default class BTree implements ISortedMapF, ISortedMap } }; - // Have to do this as cast to convince TS it's ever assigned - let highestDisjoint: { node: BNode, height: number } | undefined = undefined as { node: BNode, height: number } | undefined; + const addHighestDisjoint = () => { + if (highestDisjoint !== undefined) { + addSharedNodeToDisjointSet(highestDisjoint.node, highestDisjoint.height); + highestDisjoint = undefined; + } + }; + + const disqualifySpine = (cursor: MergeCursor, depthFrom: number) => { + for (let i = depthFrom; i > 0; --i) { + const entry = cursor.spine[i]; + if (entry.payload.disqualified) + break; + entry.payload.disqualified = true; + } + }; // Cursor payload factory - const makePayload = (_: BNode): TP => ({ disqualified: false } as TP); + const makePayload = (): MergeCursorPayload => ({ disqualified: false }); const pushLeafRange = (leaf: BNode, from: number, toExclusive: number) => { if (from < toExclusive) { @@ -939,30 +953,24 @@ export default class BTree implements ISortedMapF, ISortedMap } }; - // Callbacks - const onEnterLeaf = ( - leaf: BNode, payload: TP, destIndex: number, other: MergeCursor - ) => { - const otherLeaf = BTree.getLeaf(other); - if (BTree.areOverlapping(leaf, otherLeaf, cmp)) { - payload.disqualified = true; - other.leafPayload.disqualified = true; - pushLeafRange(leaf, 0, Math.min(destIndex, leaf.keys.length)); - } else { - check(destIndex === 0, "onEnterLeaf: destIndex must be 0 if not overlapping"); - } - }; - - const onMoveInLeaf = ( - leaf: BNode, payload: TP, fromIndex: number, toIndex: number, startedEqual: boolean, _other: MergeCursor + const onMoveInLeaf = ( + leaf: BNode, + payload: MergeCursorPayload, + fromIndex: number, + toIndex: number, + startedEqual: boolean ) => { check(payload.disqualified === true, "onMoveInLeaf: leaf must be disqualified"); const start = startedEqual ? fromIndex + 1 : fromIndex; pushLeafRange(leaf, start, Math.min(toIndex, leaf.keys.length)); }; - const onExitLeaf = ( - cursorThis: MergeCursor, leaf: BNode, startingIndex: number, startedEqual: boolean, payload: TP, _other: MergeCursor + const onExitLeaf = ( + leaf: BNode, + payload: MergeCursorPayload, + startingIndex: number, + startedEqual: boolean, + cursorThis: MergeCursor, ) => { highestDisjoint = undefined; if (!payload.disqualified) { @@ -978,12 +986,21 @@ export default class BTree implements ISortedMapF, ISortedMap } }; - const onStepUp = ( - parent: BNodeInternal, height: number, payload: TP, fromIndex: number, stepDownIndex: number, _other: MergeCursor + const onStepUp = ( + parent: BNodeInternal, + height: number, + payload: MergeCursorPayload, + fromIndex: number, + stepDownIndex: number ) => { - if (Number.isNaN(stepDownIndex) || stepDownIndex === Number.POSITIVE_INFINITY) { + if (Number.isNaN(stepDownIndex) /* still walking up */ + || stepDownIndex === Number.POSITIVE_INFINITY /* target key is beyond edge of tree, done with walk */) { if (!payload.disqualified) { highestDisjoint = { node: parent, height }; + if (stepDownIndex === Number.POSITIVE_INFINITY) { + // We have finished our walk, and we won't be stepping down, so add the root + addHighestDisjoint(); + } } else { addHighestDisjoint(); for (let i = fromIndex + 1; i < parent.children.length; ++i) @@ -996,22 +1013,42 @@ export default class BTree implements ISortedMapF, ISortedMap } }; - const onStepDown = ( - node: BNodeInternal, height: number, payload: TP, stepDownIndex: number, other: MergeCursor + const onStepDown = ( + node: BNodeInternal, + height: number, + stepDownIndex: number, + cursorThis: MergeCursor ) => { - const otherLeaf = BTree.getLeaf(other); - if (BTree.areOverlapping(node, otherLeaf, cmp)) { - payload.disqualified = true; - // leaf disqualification is handled in onEnterLeaf + if (stepDownIndex > 0) { + // When we step down into a node, we know that we have walked from a key that is less than our target. + // Because of this, if we are not stepping down into the first child, we know that all children before + // the stepDownIndex must overlap with the other tree because they must be before our target key. Since + // the child we are stepping into has a key greater than our target key, this node must overlap. + // If a child overlaps, the entire spine overlaps because a parent in a btree always encloses the range + // of its children. + cursorThis.spine[height].payload.disqualified = true; + disqualifySpine(cursorThis, cursorThis.spine.length - height); + for (let i = 0; i < stepDownIndex; ++i) + addSharedNodeToDisjointSet(node.children[i], height - 1); } - for (let i = 0; i < stepDownIndex; ++i) - addSharedNodeToDisjointSet(node.children[i], height - 1); }; - const addHighestDisjoint = () => { - if (highestDisjoint !== undefined) { - addSharedNodeToDisjointSet(highestDisjoint.node, highestDisjoint.height); - highestDisjoint = undefined; + const onEnterLeaf = ( + leaf: BNode, + destIndex: number, + cursorThis: MergeCursor, + cursorOther: MergeCursor + ) => { + if (destIndex > 0 || cmp(leaf.keys[0], BTree.getKey(cursorOther)) < 0) { + // Similar logic to the step-down case, except in this case we also know the leaf in the other + // tree overlaps a leaf in this tree (this leaf, specifically). Thus, we can disqualify both spines. + cursorThis.leafPayload.disqualified = true; + cursorOther.leafPayload.disqualified = true; + disqualifySpine(cursorThis, cursorThis.spine.length - 1); + disqualifySpine(cursorOther, cursorOther.spine.length - 1); + pushLeafRange(leaf, 0, Math.min(destIndex, leaf.keys.length)); + } else { + check(destIndex === 0, "onEnterLeaf: destIndex must be 0 if not overlapping"); } }; @@ -1024,20 +1061,33 @@ export default class BTree implements ISortedMapF, ISortedMap const curA = BTree.createCursor(left, makePayload, onEnterLeaf, onMoveInLeaf, onExitLeaf, onStepUp, onStepDown); const curB = BTree.createCursor(right, makePayload, onEnterLeaf, onMoveInLeaf, onExitLeaf, onStepUp, onStepDown); + // The guarantee that no overlapping interior nodes are accidentally reused relies on the careful + // alternating hopping walk of the cursors: WLOG, cursorA always--with one exception--walks from a key just behind (in key space) + // the key of cursorB to the first key >= cursorB. Call this transition a "crossover point." All interior nodes that + // overlap cause a crossover point, and all crossover points are guaranteed to be walked using this method. Thus, + // all overlapping interior nodes will be found if they are checked for on step-down. + // The one exception mentioned above is when they start at the same key. In this case, they are both advanced forward and then + // their new ordering determines how they walk from there. + // The one issue then is detecting any overlaps that occur based on their very initial position (minimum key of each tree). + // This is handled by the initial disqualification step below, which essentially emulates the step down disqualification for each spine. // Initialize disqualification w.r.t. opposite leaf. - const initDisqualify = (cur: MergeCursor, otherLeaf: BNode) => { - if (BTree.areOverlapping(cur.leaf, otherLeaf, cmp)) + const initDisqualify = (cur: MergeCursor, other: MergeCursor) => { + const minKey = BTree.getKey(cur); + const otherMin = BTree.getKey(other); + const otherMax = other.leaf.maxKey(); + if (BTree.areOverlapping(minKey, cur.leaf.maxKey(), otherMin, otherMax, cmp)) cur.leafPayload.disqualified = true; for (let i = 0; i < cur.spine.length; ++i) { const entry = cur.spine[i]; - if (BTree.areOverlapping(entry.node, otherLeaf, cmp)) + // Since we are on the left side of the tree, we can use the leaf min key for every spine node + if (BTree.areOverlapping(minKey, entry.node.maxKey(), otherMin, otherMax, cmp)) entry.payload.disqualified = true; } }; - initDisqualify(curA, BTree.getLeaf(curB)); - initDisqualify(curB, BTree.getLeaf(curA)); + initDisqualify(curA, curB); + initDisqualify(curB, curA); - // Walk both cursors + // Walk both cursors in alternating hops while (true) { const order = cmp(BTree.getKey(curA), BTree.getKey(curB)); let trailing = curA, leading = curB; @@ -1090,8 +1140,9 @@ export default class BTree implements ISortedMapF, ISortedMap startedEqual: boolean, cmp: (a:K,b:K)=>number ): boolean { + const curKey = BTree.getKey(cur); // We should start before the target (or at it if inclusive) - const keyPos = cmp(BTree.getKey(cur), targetKey); + const keyPos = cmp(curKey, targetKey); check(isInclusive && keyPos < 0 || !isInclusive && keyPos <= 0, "moveTo precondition violated"); // Fast path: destination within current leaf @@ -1099,7 +1150,7 @@ export default class BTree implements ISortedMapF, ISortedMap const i = leaf.indexOf(targetKey, -1, cmp); const destInLeaf = i < 0 ? ~i : (isInclusive ? i : i + 1); if (destInLeaf < leaf.keys.length) { - cur.onMoveInLeaf(leaf, cur.leafPayload, cur.leafIndex, destInLeaf, startedEqual, other); + cur.onMoveInLeaf(leaf, cur.leafPayload, cur.leafIndex, destInLeaf, startedEqual); cur.leafIndex = destInLeaf; return false; } @@ -1127,14 +1178,14 @@ export default class BTree implements ISortedMapF, ISortedMap // Exit leaf; we did walk out of it conceptually const startIndex = cur.leafIndex; - cur.onExitLeaf(cur, leaf, startIndex, startedEqual, cur.leafPayload, other); + cur.onExitLeaf(leaf, cur.leafPayload, startIndex, startedEqual, cur); if (descentLevel < 0) { // No descent point; step up all the way; last callback gets infinity for (let s = spine.length - 1; s >= 0; --s) { const entry = spine[s]; const sd = s === 0 ? Number.POSITIVE_INFINITY : Number.NaN; - cur.onStepUp(entry.node, heightOf(s), entry.payload, entry.childIndex, sd, other); + cur.onStepUp(entry.node, heightOf(s), entry.payload, entry.childIndex, sd); } return true; } @@ -1142,11 +1193,11 @@ export default class BTree implements ISortedMapF, ISortedMap // Step up through ancestors above the descentLevel for (let s = spine.length - 1; s > descentLevel; --s) { const entry = spine[s]; - cur.onStepUp(entry.node, heightOf(s), entry.payload, entry.childIndex, NaN, other); + cur.onStepUp(entry.node, heightOf(s), entry.payload, entry.childIndex, NaN); } { const entry = spine[descentLevel]; - cur.onStepUp(entry.node, heightOf(descentLevel), entry.payload, entry.childIndex, descentIndex, other); + cur.onStepUp(entry.node, heightOf(descentLevel), entry.payload, entry.childIndex, descentIndex); entry.childIndex = descentIndex; } @@ -1159,23 +1210,21 @@ export default class BTree implements ISortedMapF, ISortedMap const ni = node as BNodeInternal; const j = ni.indexOf(targetKey, 0, cmp); const stepDownIndex = j + (isInclusive ? 0 : (j < ni.keys.length && cmp(ni.keys[j], targetKey) === 0 ? 1 : 0)); - const payload = cur.makePayload(ni); + const payload = cur.makePayload(); spine.push({ node: ni, childIndex: stepDownIndex, payload }); - cur.onStepDown(ni, height, payload, stepDownIndex, other); + cur.onStepDown(ni, height, stepDownIndex, cur); node = ni.children[stepDownIndex]; height -= 1; } // Enter destination leaf - const leafPayload = cur.makePayload(node); const idx = node.indexOf(targetKey, -1, cmp); const destIndex = idx < 0 ? ~idx : (isInclusive ? idx : idx + 1); check(destIndex >= 0 && destIndex < node.keys.length, "moveTo: destination out of bounds"); - - cur.onEnterLeaf(node, leafPayload, destIndex, other); cur.leaf = node; - cur.leafPayload = leafPayload; + cur.leafPayload = cur.makePayload(); cur.leafIndex = destIndex; + cur.onEnterLeaf(node, destIndex, cur, other); return false; } @@ -1184,23 +1233,22 @@ export default class BTree implements ISortedMapF, ISortedMap */ private static createCursor( tree: BTree, - makePayload: (n: BNode) => TP, + makePayload: MergeCursor["makePayload"], onEnterLeaf: MergeCursor["onEnterLeaf"], onMoveInLeaf: MergeCursor["onMoveInLeaf"], onExitLeaf: MergeCursor["onExitLeaf"], onStepUp: MergeCursor["onStepUp"], onStepDown: MergeCursor["onStepDown"], ): MergeCursor { - check(tree._root.size() > 0, "createCursor: cannot create a cursor for an empty tree"); const spine: Array<{ node: BNodeInternal, childIndex: number, payload: TP }> = []; let n: BNode = tree._root; while (!n.isLeaf) { const ni = n as BNodeInternal; - const payload = makePayload(ni); + const payload = makePayload(); spine.push({ node: ni, childIndex: 0, payload }); n = ni.children[0]; } - const leafPayload = makePayload(n); + const leafPayload = makePayload(); const cur: MergeCursor = { tree, leaf: n, leafIndex: 0, spine, leafPayload, makePayload: makePayload, onEnterLeaf, onMoveInLeaf, onExitLeaf, onStepUp, onStepDown @@ -1209,20 +1257,48 @@ export default class BTree implements ISortedMapF, ISortedMap } private static getKey(c: MergeCursor): K { - return c.leaf.keys[c.leafIndex] as K; - } - - private static getLeaf(c: MergeCursor): BNode { - return c.leaf; + return c.leaf.keys[c.leafIndex]; } + /** + * Determines whether two nodes are overlapping in key range. + * Takes the leftmost known key of each node to avoid a log(n) min calculation. + * This will still catch overlapping nodes because of the alternate hopping walk of the cursors. + */ private static areOverlapping( - a: BNode, b: BNode, cmp: (x:K,y:K)=>number + aMin: K, + aMax: K, + bMin: K, + bMax: K, + cmp: (x:K,y:K)=>number ): boolean { - const amin = a.minKey() as K, amax = a.maxKey() as K; - const bmin = b.minKey() as K, bmax = b.maxKey() as K; - // Overlap iff !(amax < bmin || bmax < amin) on inclusive ranges. - return !(cmp(amax, bmin) < 0 || cmp(bmax, amin) < 0); + // There are 4 possibilities: + // 1. aMin.........aMax + // bMin.........bMax + // (aMax between bMin and bMax) + // 2. aMin.........aMax + // bMin.........bMax + // (aMin between bMin and bMax) + // 3. aMin.............aMax + // bMin....bMax + // (aMin and aMax enclose bMin and bMax; note this includes equality cases) + // 4. aMin....aMax + // bMin.............bMax + // (bMin and bMax enclose aMin and aMax; note equality cases are identical to case 3) + const aMinBMin = cmp(aMin, bMin); + const aMinBMax = cmp(aMin, bMax); + if (aMinBMin >= 0 && aMinBMax <= 0) { + // case 2 or 4 + return true; + } + const aMaxBMin = cmp(aMax, bMin); + const aMaxBMax = cmp(aMax, bMax); + if (aMaxBMin >= 0 && aMaxBMax <= 0) { + // case 1 + return true; + } + // case 3 or no overlap + return aMinBMin <= 0 && aMaxBMax >= 0; } private static getLeftmostChild(): number { @@ -2551,13 +2627,12 @@ interface MergeCursor { leafIndex: number; spine: Array<{ node: BNodeInternal, childIndex: number, payload: TPayload }>; leafPayload: TPayload; - makePayload: (n: BNode) => TPayload; - - onEnterLeaf: (leaf: BNode, payload: TPayload, destIndex: number, other: MergeCursor) => void; - onMoveInLeaf: (leaf: BNode, payload: TPayload, fromIndex: number, toIndex: number, isInclusive: boolean, other: MergeCursor) => void; - onExitLeaf: (cursorThis: MergeCursor, leaf: BNode, startingIndex: number, isInclusive: boolean, payload: TPayload, other: MergeCursor) => void; - onStepUp: (parent: BNodeInternal, height: number, payload: TPayload, fromIndex: number, stepDownIndex: number, other: MergeCursor) => void; - onStepDown: (node: BNodeInternal, height: number, payload: TPayload, stepDownIndex: number, other: MergeCursor) => void; + makePayload: () => TPayload; + onMoveInLeaf: (leaf: BNode, payload: TPayload, fromIndex: number, toIndex: number, isInclusive: boolean) => void; + onExitLeaf: (leaf: BNode, payload: TPayload, startingIndex: number, isInclusive: boolean, cursorThis: MergeCursor) => void; + onStepUp: (parent: BNodeInternal, height: number, payload: TPayload, fromIndex: number, stepDownIndex: number) => void; + onStepDown: (node: BNodeInternal, height: number, stepDownIndex: number, cursorThis: MergeCursor) => void; + onEnterLeaf: (leaf: BNode, destIndex: number, cursorThis: MergeCursor, cursorOther: MergeCursor) => void; } type DisjointEntry = [height: number, node: BNode]; From 429f00093f418262fc31a5d5461e561f0342d219 Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Sun, 2 Nov 2025 19:04:21 -0800 Subject: [PATCH 020/143] fix new root bug --- b+tree.js | 13 ++++++++----- b+tree.ts | 14 ++++++++------ 2 files changed, 16 insertions(+), 11 deletions(-) diff --git a/b+tree.js b/b+tree.js index 2cb2ee2..55a2caf 100644 --- a/b+tree.js +++ b/b+tree.js @@ -620,13 +620,16 @@ var BTree = /** @class */ (function () { spine[0] = newRoot; unflushedSizes.forEach(function (count) { return check(count === 0, "Unexpected unflushed size after root split."); }); unflushedSizes.push(0); // new root level + isSharedFrontierDepth = insertionDepth + 2; } else { - // TODO - unflushedSizes[insertionDepth] += subtree.size(); + if (insertionDepth > 0) { + // appendAndCascade updates the size of the parent of the insertion, but does not update recursively upward + // This is done lazily to avoid log(n) asymptotics. + unflushedSizes[insertionDepth - 1] += subtree.size(); + } + isSharedFrontierDepth = insertionDepth + 1; } - // if insertionDepth was -1, a new root was made and the shared node was inserted just below it - isSharedFrontierDepth = Math.max(1, insertionDepth + 1); // Finally, update the frontier from the highest new node downward // Note that this is often the point where the new subtree is attached, // but in the case of cascaded splits it may be higher up. @@ -673,7 +676,7 @@ var BTree = /** @class */ (function () { // If still carrying after root, create a new root if (carry) { var oldRoot = spine[0]; - var children = rightSide ? [oldRoot, carry] : [oldRoot, carry]; + var children = rightSide ? [oldRoot, carry] : [carry, oldRoot]; var newRoot = new BNodeInternal(children, oldRoot.size() + carry.size()); return newRoot; } diff --git a/b+tree.ts b/b+tree.ts index c4cfbec..3abd9a2 100644 --- a/b+tree.ts +++ b/b+tree.ts @@ -723,14 +723,16 @@ export default class BTree implements ISortedMapF, ISortedMap spine[0] = newRoot; unflushedSizes.forEach((count) => check(count === 0, "Unexpected unflushed size after root split.")); unflushedSizes.push(0); // new root level + isSharedFrontierDepth = insertionDepth + 2; } else { - // TODO - unflushedSizes[insertionDepth] += subtree.size(); + if (insertionDepth > 0) { + // appendAndCascade updates the size of the parent of the insertion, but does not update recursively upward + // This is done lazily to avoid log(n) asymptotics. + unflushedSizes[insertionDepth - 1] += subtree.size(); + } + isSharedFrontierDepth = insertionDepth + 1; } - // if insertionDepth was -1, a new root was made and the shared node was inserted just below it - isSharedFrontierDepth = Math.max(1, insertionDepth + 1); - // Finally, update the frontier from the highest new node downward // Note that this is often the point where the new subtree is attached, // but in the case of cascaded splits it may be higher up. @@ -781,7 +783,7 @@ export default class BTree implements ISortedMapF, ISortedMap // If still carrying after root, create a new root if (carry) { const oldRoot = spine[0] as BNodeInternal; - const children = rightSide ? [oldRoot, carry] : [oldRoot, carry]; + const children = rightSide ? [oldRoot, carry] : [carry, oldRoot]; const newRoot = new BNodeInternal(children, oldRoot.size() + carry.size()); return newRoot; } From 757a3439ed81e4c903df173e626309ea7d0a778e Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Sun, 2 Nov 2025 19:10:20 -0800 Subject: [PATCH 021/143] more fixes --- b+tree.js | 20 ++++++++++---------- b+tree.ts | 21 ++++++++++----------- 2 files changed, 20 insertions(+), 21 deletions(-) diff --git a/b+tree.js b/b+tree.js index 55a2caf..13de9ad 100644 --- a/b+tree.js +++ b/b+tree.js @@ -566,19 +566,19 @@ var BTree = /** @class */ (function () { // Start result at the tallest subtree from the disjoint set var initialRoot = disjoint[tallestIndex][1]; var branchingFactor = this._maxNodeSize; - var rightFrontier = [initialRoot]; - BTree.updateFrontier(rightFrontier, 0, BTree.getRightmostChild); + var frontier = [initialRoot]; // Process all subtrees to the right of the tallest subtree - if (tallestIndex + 1 <= disjoint.length - 1) - BTree.processSide(branchingFactor, disjoint, rightFrontier, tallestIndex + 1, disjoint.length, 1, true, BTree.getRightmostChild); - var leftFrontier = [rightFrontier[0]]; - BTree.updateFrontier(leftFrontier, 0, BTree.getLeftmostChild); + if (tallestIndex + 1 <= disjoint.length - 1) { + BTree.updateFrontier(frontier, 0, BTree.getRightmostChild); + BTree.processSide(branchingFactor, disjoint, frontier, tallestIndex + 1, disjoint.length, 1, true, BTree.getRightmostChild); + } // Process all subtrees to the left of the tallest subtree (reverse order) - if (tallestIndex - 1 >= 0) - BTree.processSide(branchingFactor, disjoint, leftFrontier, tallestIndex - 1, -1, -1, false, BTree.getLeftmostChild); + if (tallestIndex - 1 >= 0) { + BTree.updateFrontier(frontier, 0, BTree.getLeftmostChild); + BTree.processSide(branchingFactor, disjoint, frontier, tallestIndex - 1, -1, -1, false, BTree.getLeftmostChild); + } var merged = new BTree(undefined, this._compare, this._maxNodeSize); - check(rightFrontier[0] === leftFrontier[0], "Root mismatch after merge processing."); - merged._root = rightFrontier[0]; + merged._root = frontier[0]; // Return the resulting tree return merged; }; diff --git a/b+tree.ts b/b+tree.ts index 3abd9a2..91d1606 100644 --- a/b+tree.ts +++ b/b+tree.ts @@ -649,23 +649,22 @@ export default class BTree implements ISortedMapF, ISortedMap // Start result at the tallest subtree from the disjoint set const initialRoot = disjoint[tallestIndex][1]; const branchingFactor = this._maxNodeSize; - const rightFrontier: BNode[] = [initialRoot]; - BTree.updateFrontier(rightFrontier, 0, BTree.getRightmostChild); + const frontier: BNode[] = [initialRoot]; // Process all subtrees to the right of the tallest subtree - if (tallestIndex + 1 <= disjoint.length - 1) - BTree.processSide(branchingFactor,disjoint, rightFrontier, tallestIndex + 1, disjoint.length, 1, true, BTree.getRightmostChild); - - const leftFrontier: BNode[] = [rightFrontier[0]]; - BTree.updateFrontier(leftFrontier, 0, BTree.getLeftmostChild); + if (tallestIndex + 1 <= disjoint.length - 1) { + BTree.updateFrontier(frontier, 0, BTree.getRightmostChild); + BTree.processSide(branchingFactor,disjoint, frontier, tallestIndex + 1, disjoint.length, 1, true, BTree.getRightmostChild); + } // Process all subtrees to the left of the tallest subtree (reverse order) - if (tallestIndex - 1 >= 0) - BTree.processSide(branchingFactor, disjoint, leftFrontier, tallestIndex - 1, -1, -1, false, BTree.getLeftmostChild); + if (tallestIndex - 1 >= 0) { + BTree.updateFrontier(frontier, 0, BTree.getLeftmostChild); + BTree.processSide(branchingFactor, disjoint, frontier, tallestIndex - 1, -1, -1, false, BTree.getLeftmostChild); + } const merged = new BTree(undefined, this._compare, this._maxNodeSize); - check(rightFrontier[0] === leftFrontier[0], "Root mismatch after merge processing."); - merged._root = rightFrontier[0]; + merged._root = frontier[0]; // Return the resulting tree return merged; From 354c369004f1da825a12ba868862693bc5762eda Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Sun, 2 Nov 2025 19:31:25 -0800 Subject: [PATCH 022/143] more fixes --- b+tree.js | 3 +-- b+tree.ts | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/b+tree.js b/b+tree.js index 13de9ad..66d2d4a 100644 --- a/b+tree.js +++ b/b+tree.js @@ -643,7 +643,6 @@ var BTree = /** @class */ (function () { ; // Append a subtree at a given depth on the chosen side; cascade splits upward if needed. BTree.appendAndCascade = function (spine, insertionDepth, branchingFactor, subtree, rightSide) { - check(spine.length > 1 && insertionDepth < spine.length - 1, "Invalid insertion at leaf level."); var carry = subtree; // Append at insertionDepth and bubble new right siblings upward until a node with capacity accepts them or we reach root var d = insertionDepth; @@ -810,7 +809,7 @@ var BTree = /** @class */ (function () { } }; var disqualifySpine = function (cursor, depthFrom) { - for (var i = depthFrom; i > 0; --i) { + for (var i = depthFrom; i >= 0; --i) { var entry = cursor.spine[i]; if (entry.payload.disqualified) break; diff --git a/b+tree.ts b/b+tree.ts index 91d1606..5bf5100 100644 --- a/b+tree.ts +++ b/b+tree.ts @@ -751,7 +751,6 @@ export default class BTree implements ISortedMapF, ISortedMap branchingFactor: number, subtree: BNode, rightSide: boolean): BNodeInternal | undefined { - check(spine.length > 1 && insertionDepth < spine.length - 1, "Invalid insertion at leaf level."); let carry: BNode | undefined = subtree; // Append at insertionDepth and bubble new right siblings upward until a node with capacity accepts them or we reach root let d = insertionDepth; @@ -936,7 +935,7 @@ export default class BTree implements ISortedMapF, ISortedMap }; const disqualifySpine = (cursor: MergeCursor, depthFrom: number) => { - for (let i = depthFrom; i > 0; --i) { + for (let i = depthFrom; i >= 0; --i) { const entry = cursor.spine[i]; if (entry.payload.disqualified) break; From 36c2106ca88c4176c4209ade5753b8a4cf8cd911 Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Sun, 2 Nov 2025 19:50:45 -0800 Subject: [PATCH 023/143] more fixes --- b+tree.js | 2 +- b+tree.test.ts | 2 +- b+tree.ts | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/b+tree.js b/b+tree.js index 66d2d4a..dcbe140 100644 --- a/b+tree.js +++ b/b+tree.js @@ -881,7 +881,7 @@ var BTree = /** @class */ (function () { } }; var onEnterLeaf = function (leaf, destIndex, cursorThis, cursorOther) { - if (destIndex > 0 || cmp(leaf.keys[0], BTree.getKey(cursorOther)) < 0) { + if (destIndex > 0 || cmp(leaf.keys[0], BTree.getKey(cursorOther)) === 0) { // Similar logic to the step-down case, except in this case we also know the leaf in the other // tree overlaps a leaf in this tree (this leaf, specifically). Thus, we can disqualify both spines. cursorThis.leafPayload.disqualified = true; diff --git a/b+tree.test.ts b/b+tree.test.ts index 3776746..4df673c 100644 --- a/b+tree.test.ts +++ b/b+tree.test.ts @@ -1472,7 +1472,7 @@ function testMerge(maxNodeSize: number) { return leftValue + rightValue; }); - expect(mergeCalls).toBe(1); + expect(mergeCalls).toBe(0); expect(result.size).toBe(tree1.size + tree2.size); }); diff --git a/b+tree.ts b/b+tree.ts index 5bf5100..0af0cb7 100644 --- a/b+tree.ts +++ b/b+tree.ts @@ -1039,7 +1039,7 @@ export default class BTree implements ISortedMapF, ISortedMap cursorThis: MergeCursor, cursorOther: MergeCursor ) => { - if (destIndex > 0 || cmp(leaf.keys[0], BTree.getKey(cursorOther)) < 0) { + if (destIndex > 0 || cmp(leaf.keys[0], BTree.getKey(cursorOther)) === 0) { // Similar logic to the step-down case, except in this case we also know the leaf in the other // tree overlaps a leaf in this tree (this leaf, specifically). Thus, we can disqualify both spines. cursorThis.leafPayload.disqualified = true; From fdaae38d00b31e0588042bb000d0bd21fae3989d Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Sun, 2 Nov 2025 20:34:06 -0800 Subject: [PATCH 024/143] fixes --- b+tree.js | 25 ++++++++++--------------- b+tree.ts | 19 ++++++++----------- 2 files changed, 18 insertions(+), 26 deletions(-) diff --git a/b+tree.js b/b+tree.js index dcbe140..435354c 100644 --- a/b+tree.js +++ b/b+tree.js @@ -881,7 +881,8 @@ var BTree = /** @class */ (function () { } }; var onEnterLeaf = function (leaf, destIndex, cursorThis, cursorOther) { - if (destIndex > 0 || cmp(leaf.keys[0], BTree.getKey(cursorOther)) === 0) { + if (destIndex > 0 + || cmp(leaf.keys[0], cursorOther.leaf.minKey()) >= 0 && cmp(leaf.keys[0], cursorOther.leaf.maxKey()) <= 0) { // Similar logic to the step-down case, except in this case we also know the leaf in the other // tree overlaps a leaf in this tree (this leaf, specifically). Thus, we can disqualify both spines. cursorThis.leafPayload.disqualified = true; @@ -890,9 +891,6 @@ var BTree = /** @class */ (function () { disqualifySpine(cursorOther, cursorOther.spine.length - 1); pushLeafRange(leaf, 0, Math.min(destIndex, leaf.keys.length)); } - else { - check(destIndex === 0, "onEnterLeaf: destIndex must be 0 if not overlapping"); - } }; // Need the max key of both trees to perform the "finishing" walk of which ever cursor finishes second var maxKeyLeft = left._root.maxKey(); @@ -975,9 +973,8 @@ var BTree = /** @class */ (function () { * Returns true if end-of-tree was reached (cursor not structurally mutated). */ BTree.moveTo = function (cur, other, targetKey, isInclusive, startedEqual, cmp) { - var curKey = BTree.getKey(cur); // We should start before the target (or at it if inclusive) - var keyPos = cmp(curKey, targetKey); + var keyPos = cmp(BTree.getKey(cur), targetKey); check(isInclusive && keyPos < 0 || !isInclusive && keyPos <= 0, "moveTo precondition violated"); // Fast path: destination within current leaf var leaf = cur.leaf; @@ -1012,22 +1009,20 @@ var BTree = /** @class */ (function () { if (descentLevel < 0) { // No descent point; step up all the way; last callback gets infinity for (var s = spine.length - 1; s >= 0; --s) { - var entry = spine[s]; + var entry_1 = spine[s]; var sd = s === 0 ? Number.POSITIVE_INFINITY : Number.NaN; - cur.onStepUp(entry.node, heightOf(s), entry.payload, entry.childIndex, sd); + cur.onStepUp(entry_1.node, heightOf(s), entry_1.payload, entry_1.childIndex, sd); } return true; } // Step up through ancestors above the descentLevel for (var s = spine.length - 1; s > descentLevel; --s) { - var entry = spine[s]; - cur.onStepUp(entry.node, heightOf(s), entry.payload, entry.childIndex, NaN); - } - { - var entry = spine[descentLevel]; - cur.onStepUp(entry.node, heightOf(descentLevel), entry.payload, entry.childIndex, descentIndex); - entry.childIndex = descentIndex; + var entry_2 = spine[s]; + cur.onStepUp(entry_2.node, heightOf(s), entry_2.payload, entry_2.childIndex, NaN); } + var entry = spine[descentLevel]; + cur.onStepUp(entry.node, heightOf(descentLevel), entry.payload, entry.childIndex, descentIndex); + entry.childIndex = descentIndex; // Descend, invoking onStepDown and creating payloads spine.length = descentLevel + 1; var node = spine[descentLevel].node.children[descentIndex]; diff --git a/b+tree.ts b/b+tree.ts index 0af0cb7..4b1b939 100644 --- a/b+tree.ts +++ b/b+tree.ts @@ -1039,7 +1039,8 @@ export default class BTree implements ISortedMapF, ISortedMap cursorThis: MergeCursor, cursorOther: MergeCursor ) => { - if (destIndex > 0 || cmp(leaf.keys[0], BTree.getKey(cursorOther)) === 0) { + if (destIndex > 0 + || cmp(leaf.keys[0], cursorOther.leaf.minKey()!) >= 0 && cmp(leaf.keys[0], cursorOther.leaf.maxKey()) <= 0) { // Similar logic to the step-down case, except in this case we also know the leaf in the other // tree overlaps a leaf in this tree (this leaf, specifically). Thus, we can disqualify both spines. cursorThis.leafPayload.disqualified = true; @@ -1047,8 +1048,6 @@ export default class BTree implements ISortedMapF, ISortedMap disqualifySpine(cursorThis, cursorThis.spine.length - 1); disqualifySpine(cursorOther, cursorOther.spine.length - 1); pushLeafRange(leaf, 0, Math.min(destIndex, leaf.keys.length)); - } else { - check(destIndex === 0, "onEnterLeaf: destIndex must be 0 if not overlapping"); } }; @@ -1140,10 +1139,9 @@ export default class BTree implements ISortedMapF, ISortedMap startedEqual: boolean, cmp: (a:K,b:K)=>number ): boolean { - const curKey = BTree.getKey(cur); // We should start before the target (or at it if inclusive) - const keyPos = cmp(curKey, targetKey); - check(isInclusive && keyPos < 0 || !isInclusive && keyPos <= 0, "moveTo precondition violated"); + const keyPos = cmp(BTree.getKey(cur), targetKey); + check(isInclusive && keyPos < 0 || !isInclusive && keyPos <= 0, "moveTo requires alternating hop pattern"); // Fast path: destination within current leaf const leaf = cur.leaf; @@ -1195,11 +1193,10 @@ export default class BTree implements ISortedMapF, ISortedMap const entry = spine[s]; cur.onStepUp(entry.node, heightOf(s), entry.payload, entry.childIndex, NaN); } - { - const entry = spine[descentLevel]; - cur.onStepUp(entry.node, heightOf(descentLevel), entry.payload, entry.childIndex, descentIndex); - entry.childIndex = descentIndex; - } + + const entry = spine[descentLevel]; + cur.onStepUp(entry.node, heightOf(descentLevel), entry.payload, entry.childIndex, descentIndex); + entry.childIndex = descentIndex; // Descend, invoking onStepDown and creating payloads spine.length = descentLevel + 1; From 5dda4c904dad3f196ad0d598dc3a48751ca373f5 Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Sun, 2 Nov 2025 20:43:56 -0800 Subject: [PATCH 025/143] cleanup --- b+tree.js | 27 +++++++++++++-------------- b+tree.ts | 25 ++++++++++++------------- 2 files changed, 25 insertions(+), 27 deletions(-) diff --git a/b+tree.js b/b+tree.js index 435354c..808841a 100644 --- a/b+tree.js +++ b/b+tree.js @@ -975,7 +975,7 @@ var BTree = /** @class */ (function () { BTree.moveTo = function (cur, other, targetKey, isInclusive, startedEqual, cmp) { // We should start before the target (or at it if inclusive) var keyPos = cmp(BTree.getKey(cur), targetKey); - check(isInclusive && keyPos < 0 || !isInclusive && keyPos <= 0, "moveTo precondition violated"); + check(isInclusive && keyPos < 0 || !isInclusive && keyPos <= 0, "moveTo requires alternating hop pattern"); // Fast path: destination within current leaf var leaf = cur.leaf; var i = leaf.indexOf(targetKey, -1, cmp); @@ -989,36 +989,35 @@ var BTree = /** @class */ (function () { var spine = cur.spine; var descentLevel = -1; var descentIndex = -1; - for (var s = spine.length - 1; s >= 0; --s) { + for (var s = spine.length - 1; s >= 0; s--) { var parent = spine[s].node; - var fromIndex = spine[s].childIndex; - var j = parent.indexOf(targetKey, 0, cmp); // insertion index or exact - var stepDownIndex = j + (isInclusive ? 0 : (j < parent.keys.length && cmp(parent.keys[j], targetKey) === 0 ? 1 : 0)); + var indexOf = parent.indexOf(targetKey, 0, cmp); // insertion index or exact + var stepDownIndex = indexOf + (isInclusive ? 0 : (indexOf < parent.keys.length && cmp(parent.keys[indexOf], targetKey) === 0 ? 1 : 0)); // Note: when key not found, indexOf with failXor=0 already returns insertion index - if (stepDownIndex > fromIndex && stepDownIndex <= parent.keys.length - 1) { + if (stepDownIndex <= parent.keys.length - 1) { descentLevel = s; descentIndex = stepDownIndex; break; } } // Heights for callbacks: height = distance to leaf. Parent-of-leaf height = 1. - var heightOf = function (sIndex) { return spine.length - sIndex; }; + var heightOf = function (depth) { return spine.length - depth; }; // Exit leaf; we did walk out of it conceptually var startIndex = cur.leafIndex; cur.onExitLeaf(leaf, cur.leafPayload, startIndex, startedEqual, cur); if (descentLevel < 0) { // No descent point; step up all the way; last callback gets infinity - for (var s = spine.length - 1; s >= 0; --s) { - var entry_1 = spine[s]; - var sd = s === 0 ? Number.POSITIVE_INFINITY : Number.NaN; - cur.onStepUp(entry_1.node, heightOf(s), entry_1.payload, entry_1.childIndex, sd); + for (var depth = spine.length - 1; depth >= 0; depth--) { + var entry_1 = spine[depth]; + var sd = depth === 0 ? Number.POSITIVE_INFINITY : Number.NaN; + cur.onStepUp(entry_1.node, heightOf(depth), entry_1.payload, entry_1.childIndex, sd); } return true; } // Step up through ancestors above the descentLevel - for (var s = spine.length - 1; s > descentLevel; --s) { - var entry_2 = spine[s]; - cur.onStepUp(entry_2.node, heightOf(s), entry_2.payload, entry_2.childIndex, NaN); + for (var depth = spine.length - 1; depth > descentLevel; depth--) { + var entry_2 = spine[depth]; + cur.onStepUp(entry_2.node, heightOf(depth), entry_2.payload, entry_2.childIndex, NaN); } var entry = spine[descentLevel]; cur.onStepUp(entry.node, heightOf(descentLevel), entry.payload, entry.childIndex, descentIndex); diff --git a/b+tree.ts b/b+tree.ts index 4b1b939..070f0ab 100644 --- a/b+tree.ts +++ b/b+tree.ts @@ -1158,13 +1158,12 @@ export default class BTree implements ISortedMapF, ISortedMap let descentLevel = -1; let descentIndex = -1; - for (let s = spine.length - 1; s >= 0; --s) { + for (let s = spine.length - 1; s >= 0; s--) { const parent = spine[s].node; - const fromIndex = spine[s].childIndex; - const j = parent.indexOf(targetKey, 0, cmp); // insertion index or exact - const stepDownIndex = j + (isInclusive ? 0 : (j < parent.keys.length && cmp(parent.keys[j], targetKey) === 0 ? 1 : 0)); + const indexOf = parent.indexOf(targetKey, 0, cmp); // insertion index or exact + const stepDownIndex = indexOf + (isInclusive ? 0 : (indexOf < parent.keys.length && cmp(parent.keys[indexOf], targetKey) === 0 ? 1 : 0)); // Note: when key not found, indexOf with failXor=0 already returns insertion index - if (stepDownIndex > fromIndex && stepDownIndex <= parent.keys.length - 1) { + if (stepDownIndex <= parent.keys.length - 1) { descentLevel = s; descentIndex = stepDownIndex; break; @@ -1172,7 +1171,7 @@ export default class BTree implements ISortedMapF, ISortedMap } // Heights for callbacks: height = distance to leaf. Parent-of-leaf height = 1. - const heightOf = (sIndex: number) => spine.length - sIndex; + const heightOf = (depth: number) => spine.length - depth; // Exit leaf; we did walk out of it conceptually const startIndex = cur.leafIndex; @@ -1180,18 +1179,18 @@ export default class BTree implements ISortedMapF, ISortedMap if (descentLevel < 0) { // No descent point; step up all the way; last callback gets infinity - for (let s = spine.length - 1; s >= 0; --s) { - const entry = spine[s]; - const sd = s === 0 ? Number.POSITIVE_INFINITY : Number.NaN; - cur.onStepUp(entry.node, heightOf(s), entry.payload, entry.childIndex, sd); + for (let depth = spine.length - 1; depth >= 0; depth--) { + const entry = spine[depth]; + const sd = depth === 0 ? Number.POSITIVE_INFINITY : Number.NaN; + cur.onStepUp(entry.node, heightOf(depth), entry.payload, entry.childIndex, sd); } return true; } // Step up through ancestors above the descentLevel - for (let s = spine.length - 1; s > descentLevel; --s) { - const entry = spine[s]; - cur.onStepUp(entry.node, heightOf(s), entry.payload, entry.childIndex, NaN); + for (let depth = spine.length - 1; depth > descentLevel; depth--) { + const entry = spine[depth]; + cur.onStepUp(entry.node, heightOf(depth), entry.payload, entry.childIndex, NaN); } const entry = spine[descentLevel]; From 845af2e7d448be432c8ddf7a786225ed80f6e7af Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Mon, 3 Nov 2025 07:17:08 -0800 Subject: [PATCH 026/143] height bug --- b+tree.js | 9 ++++----- b+tree.ts | 10 +++++----- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/b+tree.js b/b+tree.js index 808841a..a436869 100644 --- a/b+tree.js +++ b/b+tree.js @@ -866,7 +866,7 @@ var BTree = /** @class */ (function () { addSharedNodeToDisjointSet(parent.children[i], height - 1); } }; - var onStepDown = function (node, height, stepDownIndex, cursorThis) { + var onStepDown = function (node, height, spineIndex, stepDownIndex, cursorThis) { if (stepDownIndex > 0) { // When we step down into a node, we know that we have walked from a key that is less than our target. // Because of this, if we are not stepping down into the first child, we know that all children before @@ -874,8 +874,7 @@ var BTree = /** @class */ (function () { // the child we are stepping into has a key greater than our target key, this node must overlap. // If a child overlaps, the entire spine overlaps because a parent in a btree always encloses the range // of its children. - cursorThis.spine[height].payload.disqualified = true; - disqualifySpine(cursorThis, cursorThis.spine.length - height); + disqualifySpine(cursorThis, spineIndex); for (var i = 0; i < stepDownIndex; ++i) addSharedNodeToDisjointSet(node.children[i], height - 1); } @@ -1023,16 +1022,16 @@ var BTree = /** @class */ (function () { cur.onStepUp(entry.node, heightOf(descentLevel), entry.payload, entry.childIndex, descentIndex); entry.childIndex = descentIndex; // Descend, invoking onStepDown and creating payloads + var height = heightOf(descentLevel) - 1; // calculate height before changing length spine.length = descentLevel + 1; var node = spine[descentLevel].node.children[descentIndex]; - var height = heightOf(descentLevel) - 1; while (!node.isLeaf) { var ni = node; var j = ni.indexOf(targetKey, 0, cmp); var stepDownIndex = j + (isInclusive ? 0 : (j < ni.keys.length && cmp(ni.keys[j], targetKey) === 0 ? 1 : 0)); var payload = cur.makePayload(); spine.push({ node: ni, childIndex: stepDownIndex, payload: payload }); - cur.onStepDown(ni, height, stepDownIndex, cur); + cur.onStepDown(ni, height, spine.length - 1, stepDownIndex, cur); node = ni.children[stepDownIndex]; height -= 1; } diff --git a/b+tree.ts b/b+tree.ts index 070f0ab..c09fc09 100644 --- a/b+tree.ts +++ b/b+tree.ts @@ -1016,6 +1016,7 @@ export default class BTree implements ISortedMapF, ISortedMap const onStepDown = ( node: BNodeInternal, height: number, + spineIndex: number, stepDownIndex: number, cursorThis: MergeCursor ) => { @@ -1026,8 +1027,7 @@ export default class BTree implements ISortedMapF, ISortedMap // the child we are stepping into has a key greater than our target key, this node must overlap. // If a child overlaps, the entire spine overlaps because a parent in a btree always encloses the range // of its children. - cursorThis.spine[height].payload.disqualified = true; - disqualifySpine(cursorThis, cursorThis.spine.length - height); + disqualifySpine(cursorThis, spineIndex); for (let i = 0; i < stepDownIndex; ++i) addSharedNodeToDisjointSet(node.children[i], height - 1); } @@ -1198,9 +1198,9 @@ export default class BTree implements ISortedMapF, ISortedMap entry.childIndex = descentIndex; // Descend, invoking onStepDown and creating payloads + let height = heightOf(descentLevel) - 1; // calculate height before changing length spine.length = descentLevel + 1; let node: BNode = spine[descentLevel].node.children[descentIndex]; - let height = heightOf(descentLevel) - 1; while (!node.isLeaf) { const ni = node as BNodeInternal; @@ -1208,7 +1208,7 @@ export default class BTree implements ISortedMapF, ISortedMap const stepDownIndex = j + (isInclusive ? 0 : (j < ni.keys.length && cmp(ni.keys[j], targetKey) === 0 ? 1 : 0)); const payload = cur.makePayload(); spine.push({ node: ni, childIndex: stepDownIndex, payload }); - cur.onStepDown(ni, height, stepDownIndex, cur); + cur.onStepDown(ni, height, spine.length - 1, stepDownIndex, cur); node = ni.children[stepDownIndex]; height -= 1; } @@ -2627,7 +2627,7 @@ interface MergeCursor { onMoveInLeaf: (leaf: BNode, payload: TPayload, fromIndex: number, toIndex: number, isInclusive: boolean) => void; onExitLeaf: (leaf: BNode, payload: TPayload, startingIndex: number, isInclusive: boolean, cursorThis: MergeCursor) => void; onStepUp: (parent: BNodeInternal, height: number, payload: TPayload, fromIndex: number, stepDownIndex: number) => void; - onStepDown: (node: BNodeInternal, height: number, stepDownIndex: number, cursorThis: MergeCursor) => void; + onStepDown: (node: BNodeInternal, height: number, spineIndex: number, stepDownIndex: number, cursorThis: MergeCursor) => void; onEnterLeaf: (leaf: BNode, destIndex: number, cursorThis: MergeCursor, cursorOther: MergeCursor) => void; } From 3d03312e1dc42484bf28f986a424191fec3050cc Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Mon, 3 Nov 2025 07:49:16 -0800 Subject: [PATCH 027/143] cleanup --- b+tree.js | 1 - b+tree.ts | 1 - 2 files changed, 2 deletions(-) diff --git a/b+tree.js b/b+tree.js index a436869..0749578 100644 --- a/b+tree.js +++ b/b+tree.js @@ -697,7 +697,6 @@ var BTree = /** @class */ (function () { var childIndex = frontierChildIndex(parent); var clone = parent.children[childIndex].clone(); parent.children[childIndex] = clone; - parent.keys[childIndex] = clone.maxKey(); spine[depth] = clone; } }; diff --git a/b+tree.ts b/b+tree.ts index c09fc09..bb3e1b5 100644 --- a/b+tree.ts +++ b/b+tree.ts @@ -809,7 +809,6 @@ export default class BTree implements ISortedMapF, ISortedMap const childIndex = frontierChildIndex(parent); const clone = parent.children[childIndex].clone(); parent.children[childIndex] = clone; - parent.keys[childIndex] = clone.maxKey(); spine[depth] = clone as BNodeInternal; } }; From ad2a0ffaff762b1502f51abfa91653b99fdd2dee Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Tue, 4 Nov 2025 19:35:29 -0800 Subject: [PATCH 028/143] fix sizing --- b+tree.d.ts | 8 ++- b+tree.js | 136 ++++++++++++++++++++------------------ b+tree.ts | 185 ++++++++++++++++++++++++++++++++-------------------- 3 files changed, 194 insertions(+), 135 deletions(-) diff --git a/b+tree.d.ts b/b+tree.d.ts index 2975923..70727f3 100644 --- a/b+tree.d.ts +++ b/b+tree.d.ts @@ -290,6 +290,12 @@ export default class BTree implements ISortedMapF, ISort * Find the first ancestor (starting at insertionDepth) with capacity */ private static findCascadeEndDepth; + private static insertNoCount; + private static getLeftmostIndex; + private static getRightmostIndex; + private static getRightInsertionIndex; + private static splitOffRightSide; + private static splitOffLeftSide; /** * Decomposes two BTrees into disjoint nodes. Reuses interior nodes when they do not overlap/intersect with any leaf nodes * in the other tree. Overlapping leaf nodes are broken down into new leaf nodes containing merged entries. @@ -311,8 +317,6 @@ export default class BTree implements ISortedMapF, ISort * This will still catch overlapping nodes because of the alternate hopping walk of the cursors. */ private static areOverlapping; - private static getLeftmostChild; - private static getRightmostChild; /** * Computes the differences between `this` and `other`. * For efficiency, the diff is returned via invocations of supplied handlers. diff --git a/b+tree.js b/b+tree.js index 0749578..a76d098 100644 --- a/b+tree.js +++ b/b+tree.js @@ -569,13 +569,13 @@ var BTree = /** @class */ (function () { var frontier = [initialRoot]; // Process all subtrees to the right of the tallest subtree if (tallestIndex + 1 <= disjoint.length - 1) { - BTree.updateFrontier(frontier, 0, BTree.getRightmostChild); - BTree.processSide(branchingFactor, disjoint, frontier, tallestIndex + 1, disjoint.length, 1, true, BTree.getRightmostChild); + BTree.updateFrontier(frontier, 0, BTree.getRightmostIndex); + BTree.processSide(branchingFactor, disjoint, frontier, tallestIndex + 1, disjoint.length, 1, BTree.getRightmostIndex, BTree.getRightInsertionIndex, BTree.splitOffRightSide); } // Process all subtrees to the left of the tallest subtree (reverse order) if (tallestIndex - 1 >= 0) { - BTree.updateFrontier(frontier, 0, BTree.getLeftmostChild); - BTree.processSide(branchingFactor, disjoint, frontier, tallestIndex - 1, -1, -1, false, BTree.getLeftmostChild); + BTree.updateFrontier(frontier, 0, BTree.getLeftmostIndex); + BTree.processSide(branchingFactor, disjoint, frontier, tallestIndex - 1, -1, -1, BTree.getLeftmostIndex, BTree.getLeftmostIndex, BTree.splitOffLeftSide); } var merged = new BTree(undefined, this._compare, this._maxNodeSize); merged._root = frontier[0]; @@ -586,13 +586,13 @@ var BTree = /** @class */ (function () { * Processes one side (left or right) of the disjoint subtree set during a merge operation. * Merges each subtree in the disjoint set from start to end (exclusive) into the given spine. */ - BTree.processSide = function (branchingFactor, disjoint, spine, start, end, step, rightSide, frontierChildIndex) { + BTree.processSide = function (branchingFactor, disjoint, spine, start, end, step, sideLastIndex, sideInsertionIndex, splitOffSide) { var isSharedFrontierDepth = 0; var cur = spine[0]; // Find the first shared node on the frontier while (!cur.isShared && isSharedFrontierDepth < spine.length - 1) { isSharedFrontierDepth++; - cur = cur.children[frontierChildIndex(cur)]; + cur = cur.children[sideLastIndex(cur)]; } // This array holds the sum of sizes of nodes that have been inserted but not yet propagated upward. // For example, if a subtree of size 5 is inserted at depth 2, then unflushedSizes[1] += 5. @@ -608,82 +608,81 @@ var BTree = /** @class */ (function () { var subtreeHeight = disjoint[i][0]; var insertionDepth = currentHeight - (subtreeHeight + 1); // node at this depth has children of height 'subtreeHeight' // Ensure path is unshared before mutation - BTree.ensureNotShared(spine, isSharedFrontierDepth, insertionDepth, frontierChildIndex); + BTree.ensureNotShared(spine, isSharedFrontierDepth, insertionDepth, sideLastIndex); // Calculate expansion depth (first ancestor with capacity) var expansionDepth = Math.max(0, BTree.findCascadeEndDepth(spine, insertionDepth, branchingFactor)); // Update sizes on spine above the shared ancestor before we expand - BTree.updateSizeAndMax(spine, unflushedSizes, isSharedFrontierDepth, expansionDepth, frontierChildIndex); + BTree.updateSizeAndMax(spine, unflushedSizes, isSharedFrontierDepth, expansionDepth, sideLastIndex); // Append and cascade splits upward - var newRoot = BTree.appendAndCascade(spine, insertionDepth, branchingFactor, subtree, rightSide); + var newRoot = BTree.appendAndCascade(spine, insertionDepth, branchingFactor, subtree, sideLastIndex, sideInsertionIndex, splitOffSide); if (newRoot) { // Set the spine root to the highest up new node; the rest of the spine is updated below spine[0] = newRoot; unflushedSizes.forEach(function (count) { return check(count === 0, "Unexpected unflushed size after root split."); }); unflushedSizes.push(0); // new root level isSharedFrontierDepth = insertionDepth + 2; + unflushedSizes[insertionDepth + 1] += subtree.size(); } else { - if (insertionDepth > 0) { - // appendAndCascade updates the size of the parent of the insertion, but does not update recursively upward - // This is done lazily to avoid log(n) asymptotics. - unflushedSizes[insertionDepth - 1] += subtree.size(); - } isSharedFrontierDepth = insertionDepth + 1; + unflushedSizes[insertionDepth] += subtree.size(); } // Finally, update the frontier from the highest new node downward // Note that this is often the point where the new subtree is attached, // but in the case of cascaded splits it may be higher up. - BTree.updateFrontier(spine, expansionDepth, frontierChildIndex); + BTree.updateFrontier(spine, expansionDepth, sideLastIndex); check(isSharedFrontierDepth === spine.length - 1 || spine[isSharedFrontierDepth].isShared === true, "Non-leaf subtrees must be shared."); check(unflushedSizes.length === spine.length, "Unflushed sizes length mismatch after root split."); } // Finally, propagate any remaining unflushed sizes upward and update max keys - BTree.updateSizeAndMax(spine, unflushedSizes, isSharedFrontierDepth, 0, frontierChildIndex); + BTree.updateSizeAndMax(spine, unflushedSizes, isSharedFrontierDepth, 0, sideLastIndex); }; ; // Append a subtree at a given depth on the chosen side; cascade splits upward if needed. - BTree.appendAndCascade = function (spine, insertionDepth, branchingFactor, subtree, rightSide) { - var carry = subtree; - // Append at insertionDepth and bubble new right siblings upward until a node with capacity accepts them or we reach root - var d = insertionDepth; - while (carry && d >= 0) { - var parent = spine[d]; - if (rightSide) { - if (parent.keys.length < branchingFactor) { - parent.insert(parent.children.length, carry); - carry = undefined; - } - else { - var newRight = parent.splitOffRightSide(); - newRight.insert(newRight.children.length, carry); - carry = newRight; - } + BTree.appendAndCascade = function (spine, insertionDepth, branchingFactor, subtree, sideLastIndex, sideInsertionIndex, splitOffSide) { + if (insertionDepth >= 0) { + var carry = undefined; + var insertTarget = spine[insertionDepth]; + if (insertTarget.keys.length >= branchingFactor) { + insertTarget = carry = splitOffSide(insertTarget); } - else { + var d = insertionDepth - 1; + while (carry && d >= 0) { + var parent = spine[d]; + var sideIndex = sideLastIndex(parent); + // Refresh last key since child was split + parent.keys[sideIndex] = parent.children[sideIndex].maxKey(); if (parent.keys.length < branchingFactor) { - parent.insert(0, carry); + BTree.insertNoCount(parent, sideInsertionIndex(parent), carry); carry = undefined; } else { - var newLeft = parent.splitOffLeftSide(); - newLeft.insert(0, carry); - carry = newLeft; + var tornOff = splitOffSide(parent); + BTree.insertNoCount(tornOff, sideInsertionIndex(tornOff), carry); + carry = tornOff; } + d--; + } + var newRoot = undefined; + if (carry !== undefined) { + var oldRoot = spine[0]; + newRoot = new BNodeInternal([oldRoot], oldRoot.size() + carry.size()); + BTree.insertNoCount(newRoot, sideInsertionIndex(newRoot), carry); } - d--; + BTree.insertNoCount(insertTarget, sideInsertionIndex(insertTarget), subtree); + return newRoot; } - // If still carrying after root, create a new root - if (carry) { + else { + // Insertion of subtree with equal height to current tree var oldRoot = spine[0]; - var children = rightSide ? [oldRoot, carry] : [carry, oldRoot]; - var newRoot = new BNodeInternal(children, oldRoot.size() + carry.size()); + var newRoot = new BNodeInternal([oldRoot], oldRoot.size()); + BTree.insertNoCount(newRoot, sideInsertionIndex(newRoot), subtree); return newRoot; } - return undefined; }; ; // Clone along the spine from isSharedFrontierDepth..depthTo inclusive so path is mutable - BTree.ensureNotShared = function (spine, isSharedFrontierDepth, depthToInclusive, frontierChildIndex) { + BTree.ensureNotShared = function (spine, isSharedFrontierDepth, depthToInclusive, sideLastIndex) { if (spine.length === 1 /* only a leaf */ || depthToInclusive < 0 /* new root case */) return; // nothing to clone when root is a leaf; equal-height case will handle this // Clone root if needed first (depth 0) @@ -694,7 +693,7 @@ var BTree = /** @class */ (function () { // Clone downward along the frontier to 'depthToInclusive' for (var depth = Math.max(isSharedFrontierDepth, 1); depth <= depthToInclusive; depth++) { var parent = spine[depth - 1]; - var childIndex = frontierChildIndex(parent); + var childIndex = sideLastIndex(parent); var clone = parent.children[childIndex].clone(); parent.children[childIndex] = clone; spine[depth] = clone; @@ -704,7 +703,7 @@ var BTree = /** @class */ (function () { /** * Refresh sizes on the spine for nodes in (isSharedFrontierDepth, depthTo) */ - BTree.updateSizeAndMax = function (spine, unflushedSizes, isSharedFrontierDepth, depthUpToInclusive, frontierChildIndex) { + BTree.updateSizeAndMax = function (spine, unflushedSizes, isSharedFrontierDepth, depthUpToInclusive, sideLastIndex) { // If isSharedFrontierDepth is <= depthUpToInclusive there is nothing to update because // the insertion point is inside a shared node which will always have correct sizes var maxKey = spine[isSharedFrontierDepth].maxKey(); @@ -719,25 +718,25 @@ var BTree = /** @class */ (function () { } var node = spine[depth]; node._size += sizeAtLevel; - node.keys[frontierChildIndex(node)] = maxKey; + node.keys[sideLastIndex(node)] = maxKey; } }; ; /** * Update a spine (frontier) from a specific depth down, inclusive */ - BTree.updateFrontier = function (frontier, depthLastValid, frontierChildIndex) { + BTree.updateFrontier = function (frontier, depthLastValid, sideLastIndex) { check(frontier.length > depthLastValid, "updateFrontier: depthLastValid exceeds frontier height"); var startingAncestor = frontier[depthLastValid]; if (startingAncestor.isLeaf) return; var an = startingAncestor; - var cur = an.children[frontierChildIndex(an)]; + var cur = an.children[sideLastIndex(an)]; var depth = depthLastValid + 1; while (!cur.isLeaf) { var ni = cur; frontier[depth] = ni; - cur = ni.children[frontierChildIndex(ni)]; + cur = ni.children[sideLastIndex(ni)]; depth++; } frontier[depth] = cur; @@ -754,6 +753,25 @@ var BTree = /** @class */ (function () { return -1; // no capacity, will need a new root }; ; + BTree.insertNoCount = function (parent, index, child) { + parent.children.splice(index, 0, child); + parent.keys.splice(index, 0, child.maxKey()); + }; + BTree.getLeftmostIndex = function () { + return 0; + }; + BTree.getRightmostIndex = function (node) { + return node.children.length - 1; + }; + BTree.getRightInsertionIndex = function (node) { + return node.children.length; + }; + BTree.splitOffRightSide = function (node) { + return node.splitOffRightSide(); + }; + BTree.splitOffLeftSide = function (node) { + return node.splitOffLeftSide(); + }; /** * Decomposes two BTrees into disjoint nodes. Reuses interior nodes when they do not overlap/intersect with any leaf nodes * in the other tree. Overlapping leaf nodes are broken down into new leaf nodes containing merged entries. @@ -1108,12 +1126,6 @@ var BTree = /** @class */ (function () { // case 3 or no overlap return aMinBMin <= 0 && aMaxBMax >= 0; }; - BTree.getLeftmostChild = function () { - return 0; - }; - BTree.getRightmostChild = function (node) { - return node.children.length - 1; - }; /** * Computes the differences between `this` and `other`. * For efficiency, the diff is returned via invocations of supplied handlers. @@ -2180,9 +2192,9 @@ var BNodeInternal = /** @class */ (function (_super) { var half = this.children.length >> 1; var newChildren = this.children.splice(half); var newKeys = this.keys.splice(half); - var movedSize = sumChildSizes(newChildren); - var newNode = new BNodeInternal(newChildren, movedSize, newKeys); - this._size -= movedSize; + var sizePrev = this._size; + this._size = sumChildSizes(this.children); + var newNode = new BNodeInternal(newChildren, sizePrev - this._size, newKeys); return newNode; }; /** @@ -2194,9 +2206,9 @@ var BNodeInternal = /** @class */ (function (_super) { var half = this.children.length >> 1; var newChildren = this.children.splice(0, half); var newKeys = this.keys.splice(0, half); - var movedSize = sumChildSizes(newChildren); - var newNode = new BNodeInternal(newChildren, movedSize, newKeys); - this._size -= movedSize; + var sizePrev = this._size; + this._size = sumChildSizes(this.children); + var newNode = new BNodeInternal(newChildren, sizePrev - this._size, newKeys); return newNode; }; BNodeInternal.prototype.takeFromRight = function (rhs) { diff --git a/b+tree.ts b/b+tree.ts index bb3e1b5..01ad62a 100644 --- a/b+tree.ts +++ b/b+tree.ts @@ -653,14 +653,31 @@ export default class BTree implements ISortedMapF, ISortedMap // Process all subtrees to the right of the tallest subtree if (tallestIndex + 1 <= disjoint.length - 1) { - BTree.updateFrontier(frontier, 0, BTree.getRightmostChild); - BTree.processSide(branchingFactor,disjoint, frontier, tallestIndex + 1, disjoint.length, 1, true, BTree.getRightmostChild); + BTree.updateFrontier(frontier, 0, BTree.getRightmostIndex); + BTree.processSide( + branchingFactor, + disjoint, + frontier, + tallestIndex + 1, + disjoint.length, 1, + BTree.getRightmostIndex, + BTree.getRightInsertionIndex, + BTree.splitOffRightSide); } // Process all subtrees to the left of the tallest subtree (reverse order) if (tallestIndex - 1 >= 0) { - BTree.updateFrontier(frontier, 0, BTree.getLeftmostChild); - BTree.processSide(branchingFactor, disjoint, frontier, tallestIndex - 1, -1, -1, false, BTree.getLeftmostChild); + BTree.updateFrontier(frontier, 0, BTree.getLeftmostIndex); + BTree.processSide(branchingFactor, + disjoint, + frontier, + tallestIndex - 1, + -1, + -1, + BTree.getLeftmostIndex, + BTree.getLeftmostIndex, + BTree.splitOffLeftSide + ); } const merged = new BTree(undefined, this._compare, this._maxNodeSize); @@ -681,14 +698,15 @@ export default class BTree implements ISortedMapF, ISortedMap start: number, end: number, step: number, - rightSide: boolean, - frontierChildIndex: (node: BNodeInternal) => number): void { + sideLastIndex: (node: BNodeInternal) => number, + sideInsertionIndex: (node: BNodeInternal) => number, + splitOffSide: (node: BNodeInternal) => BNodeInternal) { let isSharedFrontierDepth = 0; let cur = spine[0]; // Find the first shared node on the frontier while (!cur.isShared && isSharedFrontierDepth < spine.length - 1) { isSharedFrontierDepth++; - cur = (cur as BNodeInternal).children[frontierChildIndex(cur as BNodeInternal)]; + cur = (cur as BNodeInternal).children[sideLastIndex(cur as BNodeInternal)]; } // This array holds the sum of sizes of nodes that have been inserted but not yet propagated upward. @@ -707,41 +725,38 @@ export default class BTree implements ISortedMapF, ISortedMap const insertionDepth = currentHeight - (subtreeHeight + 1); // node at this depth has children of height 'subtreeHeight' // Ensure path is unshared before mutation - BTree.ensureNotShared(spine, isSharedFrontierDepth, insertionDepth, frontierChildIndex); + BTree.ensureNotShared(spine, isSharedFrontierDepth, insertionDepth, sideLastIndex); // Calculate expansion depth (first ancestor with capacity) const expansionDepth = Math.max(0, BTree.findCascadeEndDepth(spine, insertionDepth, branchingFactor)); // Update sizes on spine above the shared ancestor before we expand - BTree.updateSizeAndMax(spine, unflushedSizes, isSharedFrontierDepth, expansionDepth, frontierChildIndex); + BTree.updateSizeAndMax(spine, unflushedSizes, isSharedFrontierDepth, expansionDepth, sideLastIndex); // Append and cascade splits upward - const newRoot = BTree.appendAndCascade(spine, insertionDepth, branchingFactor, subtree, rightSide); + const newRoot = BTree.appendAndCascade(spine, insertionDepth, branchingFactor, subtree, sideLastIndex, sideInsertionIndex, splitOffSide); if (newRoot) { // Set the spine root to the highest up new node; the rest of the spine is updated below spine[0] = newRoot; unflushedSizes.forEach((count) => check(count === 0, "Unexpected unflushed size after root split.")); unflushedSizes.push(0); // new root level isSharedFrontierDepth = insertionDepth + 2; + unflushedSizes[insertionDepth + 1] += subtree.size(); } else { - if (insertionDepth > 0) { - // appendAndCascade updates the size of the parent of the insertion, but does not update recursively upward - // This is done lazily to avoid log(n) asymptotics. - unflushedSizes[insertionDepth - 1] += subtree.size(); - } isSharedFrontierDepth = insertionDepth + 1; + unflushedSizes[insertionDepth] += subtree.size(); } // Finally, update the frontier from the highest new node downward // Note that this is often the point where the new subtree is attached, // but in the case of cascaded splits it may be higher up. - BTree.updateFrontier(spine, expansionDepth, frontierChildIndex); + BTree.updateFrontier(spine, expansionDepth, sideLastIndex); check(isSharedFrontierDepth === spine.length - 1 || spine[isSharedFrontierDepth].isShared === true, "Non-leaf subtrees must be shared."); check(unflushedSizes.length === spine.length, "Unflushed sizes length mismatch after root split."); } // Finally, propagate any remaining unflushed sizes upward and update max keys - BTree.updateSizeAndMax(spine, unflushedSizes, isSharedFrontierDepth, 0, frontierChildIndex); + BTree.updateSizeAndMax(spine, unflushedSizes, isSharedFrontierDepth, 0, sideLastIndex); }; // Append a subtree at a given depth on the chosen side; cascade splits upward if needed. @@ -750,42 +765,49 @@ export default class BTree implements ISortedMapF, ISortedMap insertionDepth: number, branchingFactor: number, subtree: BNode, - rightSide: boolean): BNodeInternal | undefined { - let carry: BNode | undefined = subtree; - // Append at insertionDepth and bubble new right siblings upward until a node with capacity accepts them or we reach root - let d = insertionDepth; - while (carry && d >= 0) { - const parent = spine[d] as BNodeInternal; - if (rightSide) { - if (parent.keys.length < branchingFactor) { - parent.insert(parent.children.length, carry); - carry = undefined; - } else { - const newRight = parent.splitOffRightSide(); - newRight.insert(newRight.children.length, carry); - carry = newRight; - } - } else { + sideLastIndex: (node: BNodeInternal) => number, + sideInsertionIndex: (node: BNodeInternal) => number, + splitOffSide: (node: BNodeInternal) => BNodeInternal + ): BNodeInternal | undefined { + if (insertionDepth >= 0) { + let carry: BNode | undefined = undefined; + let insertTarget: BNodeInternal = spine[insertionDepth] as BNodeInternal; + if (insertTarget.keys.length >= branchingFactor) { + insertTarget = carry = splitOffSide(insertTarget); + } + + let d = insertionDepth - 1; + while (carry && d >= 0) { + const parent = spine[d] as BNodeInternal; + const sideIndex = sideLastIndex(parent); + // Refresh last key since child was split + parent.keys[sideIndex] = parent.children[sideIndex].maxKey(); if (parent.keys.length < branchingFactor) { - parent.insert(0, carry); + BTree.insertNoCount(parent, sideInsertionIndex(parent), carry); carry = undefined; } else { - const newLeft = parent.splitOffLeftSide(); - newLeft.insert(0, carry); - carry = newLeft; + const tornOff = splitOffSide(parent); + BTree.insertNoCount(tornOff, sideInsertionIndex(tornOff), carry); + carry = tornOff; } + d--; } - d--; - } - // If still carrying after root, create a new root - if (carry) { + let newRoot: BNodeInternal | undefined = undefined; + if (carry !== undefined) { + const oldRoot = spine[0] as BNodeInternal; + newRoot = new BNodeInternal([oldRoot], oldRoot.size() + carry.size()); + BTree.insertNoCount(newRoot, sideInsertionIndex(newRoot), carry); + } + BTree.insertNoCount(insertTarget, sideInsertionIndex(insertTarget), subtree); + return newRoot; + } else { + // Insertion of subtree with equal height to current tree const oldRoot = spine[0] as BNodeInternal; - const children = rightSide ? [oldRoot, carry] : [carry, oldRoot]; - const newRoot = new BNodeInternal(children, oldRoot.size() + carry.size()); + const newRoot = new BNodeInternal([oldRoot], oldRoot.size()); + BTree.insertNoCount(newRoot, sideInsertionIndex(newRoot), subtree); return newRoot; } - return undefined; }; // Clone along the spine from isSharedFrontierDepth..depthTo inclusive so path is mutable @@ -793,7 +815,7 @@ export default class BTree implements ISortedMapF, ISortedMap spine: BNode[], isSharedFrontierDepth: number, depthToInclusive: number, - frontierChildIndex: (node: BNodeInternal) => number) { + sideLastIndex: (node: BNodeInternal) => number) { if (spine.length === 1 /* only a leaf */ || depthToInclusive < 0 /* new root case */) return; // nothing to clone when root is a leaf; equal-height case will handle this @@ -806,7 +828,7 @@ export default class BTree implements ISortedMapF, ISortedMap // Clone downward along the frontier to 'depthToInclusive' for (let depth = Math.max(isSharedFrontierDepth, 1); depth <= depthToInclusive; depth++) { const parent = spine[depth - 1] as BNodeInternal; - const childIndex = frontierChildIndex(parent); + const childIndex = sideLastIndex(parent); const clone = parent.children[childIndex].clone(); parent.children[childIndex] = clone; spine[depth] = clone as BNodeInternal; @@ -821,7 +843,7 @@ export default class BTree implements ISortedMapF, ISortedMap unflushedSizes: number[], isSharedFrontierDepth: number, depthUpToInclusive: number, - frontierChildIndex: (node: BNodeInternal) => number) { + sideLastIndex: (node: BNodeInternal) => number) { // If isSharedFrontierDepth is <= depthUpToInclusive there is nothing to update because // the insertion point is inside a shared node which will always have correct sizes const maxKey = spine[isSharedFrontierDepth].maxKey(); @@ -836,25 +858,25 @@ export default class BTree implements ISortedMapF, ISortedMap } const node = spine[depth] as BNodeInternal; node._size += sizeAtLevel; - node.keys[frontierChildIndex(node)] = maxKey; + node.keys[sideLastIndex(node)] = maxKey; } }; /** * Update a spine (frontier) from a specific depth down, inclusive */ - private static updateFrontier(frontier: BNode[], depthLastValid: number, frontierChildIndex: (node: BNodeInternal) => number): void { + private static updateFrontier(frontier: BNode[], depthLastValid: number, sideLastIndex: (node: BNodeInternal) => number): void { check(frontier.length > depthLastValid, "updateFrontier: depthLastValid exceeds frontier height"); const startingAncestor = frontier[depthLastValid]; if (startingAncestor.isLeaf) return; const an = startingAncestor as BNodeInternal; - let cur: BNode = an.children[frontierChildIndex(an)]; + let cur: BNode = an.children[sideLastIndex(an)]; let depth = depthLastValid + 1; while (!cur.isLeaf) { const ni = cur as BNodeInternal; frontier[depth] = ni; - cur = ni.children[frontierChildIndex(ni)]; + cur = ni.children[sideLastIndex(ni)]; depth++; } frontier[depth] = cur; @@ -871,6 +893,35 @@ export default class BTree implements ISortedMapF, ISortedMap return -1; // no capacity, will need a new root }; + private static insertNoCount( + parent: BNodeInternal, + index: number, + child: BNode + ): void { + parent.children.splice(index, 0, child); + parent.keys.splice(index, 0, child.maxKey()); + } + + private static getLeftmostIndex(): number { + return 0; + } + + private static getRightmostIndex(node: BNodeInternal): number { + return node.children.length - 1; + } + + private static getRightInsertionIndex(node: BNodeInternal): number { + return node.children.length; + } + + private static splitOffRightSide(node: BNodeInternal): BNodeInternal { + return node.splitOffRightSide(); + } + + private static splitOffLeftSide(node: BNodeInternal): BNodeInternal { + return node.splitOffLeftSide(); + } + /** * Decomposes two BTrees into disjoint nodes. Reuses interior nodes when they do not overlap/intersect with any leaf nodes * in the other tree. Overlapping leaf nodes are broken down into new leaf nodes containing merged entries. @@ -1296,14 +1347,6 @@ export default class BTree implements ISortedMapF, ISortedMap return aMinBMin <= 0 && aMaxBMax >= 0; } - private static getLeftmostChild(): number { - return 0; - } - - private static getRightmostChild(node: BNodeInternal): number { - return node.children.length - 1; - } - /** * Computes the differences between `this` and `other`. * For efficiency, the diff is returned via invocations of supplied handlers. @@ -2445,12 +2488,12 @@ class BNodeInternal extends BNode { */ splitOffRightSide() { // assert !this.isShared; - var half = this.children.length >> 1; - var newChildren = this.children.splice(half); - var newKeys = this.keys.splice(half); - var movedSize = sumChildSizes(newChildren); - var newNode = new BNodeInternal(newChildren, movedSize, newKeys); - this._size -= movedSize; + const half = this.children.length >> 1; + const newChildren = this.children.splice(half); + const newKeys = this.keys.splice(half); + const sizePrev = this._size; + this._size = sumChildSizes(this.children); + const newNode = new BNodeInternal(newChildren, sizePrev - this._size, newKeys); return newNode; } @@ -2460,12 +2503,12 @@ class BNodeInternal extends BNode { */ splitOffLeftSide() { // assert !this.isShared; - var half = this.children.length >> 1; - var newChildren = this.children.splice(0, half); - var newKeys = this.keys.splice(0, half); - var movedSize = sumChildSizes(newChildren); - var newNode = new BNodeInternal(newChildren, movedSize, newKeys); - this._size -= movedSize; + const half = this.children.length >> 1; + const newChildren = this.children.splice(0, half); + const newKeys = this.keys.splice(0, half); + const sizePrev = this._size; + this._size = sumChildSizes(this.children); + const newNode = new BNodeInternal(newChildren, sizePrev - this._size, newKeys); return newNode; } From d29f98c5a8fa862148c58413a0cd52b0c077322e Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Tue, 4 Nov 2025 20:38:57 -0800 Subject: [PATCH 029/143] tests passing --- b+tree.d.ts | 2 ++ b+tree.js | 48 +++++++++++++++++++++++-------------------- b+tree.ts | 59 +++++++++++++++++++++++++++++++---------------------- 3 files changed, 63 insertions(+), 46 deletions(-) diff --git a/b+tree.d.ts b/b+tree.d.ts index 70727f3..3feae67 100644 --- a/b+tree.d.ts +++ b/b+tree.d.ts @@ -296,6 +296,8 @@ export default class BTree implements ISortedMapF, ISort private static getRightInsertionIndex; private static splitOffRightSide; private static splitOffLeftSide; + private static updateRightMax; + private static noop; /** * Decomposes two BTrees into disjoint nodes. Reuses interior nodes when they do not overlap/intersect with any leaf nodes * in the other tree. Overlapping leaf nodes are broken down into new leaf nodes containing merged entries. diff --git a/b+tree.js b/b+tree.js index a76d098..1f923dc 100644 --- a/b+tree.js +++ b/b+tree.js @@ -508,10 +508,9 @@ var BTree = /** @class */ (function () { if (other.size === 0 || this.size === 0) return; var makePayload = function () { return undefined; }; - var empty = function () { }; // Initialize cursors at minimum keys. - var curA = BTree.createCursor(this, makePayload, empty, empty, empty, empty, empty); - var curB = BTree.createCursor(other, makePayload, empty, empty, empty, empty, empty); + var curA = BTree.createCursor(this, makePayload, BTree.noop, BTree.noop, BTree.noop, BTree.noop, BTree.noop); + var curB = BTree.createCursor(other, makePayload, BTree.noop, BTree.noop, BTree.noop, BTree.noop, BTree.noop); // Walk both cursors while (true) { var order = cmp(BTree.getKey(curA), BTree.getKey(curB)); @@ -570,12 +569,13 @@ var BTree = /** @class */ (function () { // Process all subtrees to the right of the tallest subtree if (tallestIndex + 1 <= disjoint.length - 1) { BTree.updateFrontier(frontier, 0, BTree.getRightmostIndex); - BTree.processSide(branchingFactor, disjoint, frontier, tallestIndex + 1, disjoint.length, 1, BTree.getRightmostIndex, BTree.getRightInsertionIndex, BTree.splitOffRightSide); + BTree.processSide(branchingFactor, disjoint, frontier, tallestIndex + 1, disjoint.length, 1, BTree.getRightmostIndex, BTree.getRightInsertionIndex, BTree.splitOffRightSide, BTree.updateRightMax); } // Process all subtrees to the left of the tallest subtree (reverse order) if (tallestIndex - 1 >= 0) { BTree.updateFrontier(frontier, 0, BTree.getLeftmostIndex); - BTree.processSide(branchingFactor, disjoint, frontier, tallestIndex - 1, -1, -1, BTree.getLeftmostIndex, BTree.getLeftmostIndex, BTree.splitOffLeftSide); + BTree.processSide(branchingFactor, disjoint, frontier, tallestIndex - 1, -1, -1, BTree.getLeftmostIndex, BTree.getLeftmostIndex, BTree.splitOffLeftSide, BTree.noop // left side appending doesn't update max keys + ); } var merged = new BTree(undefined, this._compare, this._maxNodeSize); merged._root = frontier[0]; @@ -586,13 +586,13 @@ var BTree = /** @class */ (function () { * Processes one side (left or right) of the disjoint subtree set during a merge operation. * Merges each subtree in the disjoint set from start to end (exclusive) into the given spine. */ - BTree.processSide = function (branchingFactor, disjoint, spine, start, end, step, sideLastIndex, sideInsertionIndex, splitOffSide) { + BTree.processSide = function (branchingFactor, disjoint, spine, start, end, step, sideIndex, sideInsertionIndex, splitOffSide, updateMax) { var isSharedFrontierDepth = 0; var cur = spine[0]; // Find the first shared node on the frontier while (!cur.isShared && isSharedFrontierDepth < spine.length - 1) { isSharedFrontierDepth++; - cur = cur.children[sideLastIndex(cur)]; + cur = cur.children[sideIndex(cur)]; } // This array holds the sum of sizes of nodes that have been inserted but not yet propagated upward. // For example, if a subtree of size 5 is inserted at depth 2, then unflushedSizes[1] += 5. @@ -608,13 +608,13 @@ var BTree = /** @class */ (function () { var subtreeHeight = disjoint[i][0]; var insertionDepth = currentHeight - (subtreeHeight + 1); // node at this depth has children of height 'subtreeHeight' // Ensure path is unshared before mutation - BTree.ensureNotShared(spine, isSharedFrontierDepth, insertionDepth, sideLastIndex); + BTree.ensureNotShared(spine, isSharedFrontierDepth, insertionDepth, sideIndex); // Calculate expansion depth (first ancestor with capacity) var expansionDepth = Math.max(0, BTree.findCascadeEndDepth(spine, insertionDepth, branchingFactor)); // Update sizes on spine above the shared ancestor before we expand - BTree.updateSizeAndMax(spine, unflushedSizes, isSharedFrontierDepth, expansionDepth, sideLastIndex); + BTree.updateSizeAndMax(spine, unflushedSizes, isSharedFrontierDepth, expansionDepth, updateMax); // Append and cascade splits upward - var newRoot = BTree.appendAndCascade(spine, insertionDepth, branchingFactor, subtree, sideLastIndex, sideInsertionIndex, splitOffSide); + var newRoot = BTree.appendAndCascade(spine, insertionDepth, branchingFactor, subtree, sideIndex, sideInsertionIndex, splitOffSide); if (newRoot) { // Set the spine root to the highest up new node; the rest of the spine is updated below spine[0] = newRoot; @@ -630,16 +630,16 @@ var BTree = /** @class */ (function () { // Finally, update the frontier from the highest new node downward // Note that this is often the point where the new subtree is attached, // but in the case of cascaded splits it may be higher up. - BTree.updateFrontier(spine, expansionDepth, sideLastIndex); + BTree.updateFrontier(spine, expansionDepth, sideIndex); check(isSharedFrontierDepth === spine.length - 1 || spine[isSharedFrontierDepth].isShared === true, "Non-leaf subtrees must be shared."); check(unflushedSizes.length === spine.length, "Unflushed sizes length mismatch after root split."); } // Finally, propagate any remaining unflushed sizes upward and update max keys - BTree.updateSizeAndMax(spine, unflushedSizes, isSharedFrontierDepth, 0, sideLastIndex); + BTree.updateSizeAndMax(spine, unflushedSizes, isSharedFrontierDepth, 0, updateMax); }; ; // Append a subtree at a given depth on the chosen side; cascade splits upward if needed. - BTree.appendAndCascade = function (spine, insertionDepth, branchingFactor, subtree, sideLastIndex, sideInsertionIndex, splitOffSide) { + BTree.appendAndCascade = function (spine, insertionDepth, branchingFactor, subtree, sideIndex, sideInsertionIndex, splitOffSide) { if (insertionDepth >= 0) { var carry = undefined; var insertTarget = spine[insertionDepth]; @@ -649,9 +649,9 @@ var BTree = /** @class */ (function () { var d = insertionDepth - 1; while (carry && d >= 0) { var parent = spine[d]; - var sideIndex = sideLastIndex(parent); + var idx = sideIndex(parent); // Refresh last key since child was split - parent.keys[sideIndex] = parent.children[sideIndex].maxKey(); + parent.keys[idx] = parent.children[idx].maxKey(); if (parent.keys.length < branchingFactor) { BTree.insertNoCount(parent, sideInsertionIndex(parent), carry); carry = undefined; @@ -682,7 +682,7 @@ var BTree = /** @class */ (function () { }; ; // Clone along the spine from isSharedFrontierDepth..depthTo inclusive so path is mutable - BTree.ensureNotShared = function (spine, isSharedFrontierDepth, depthToInclusive, sideLastIndex) { + BTree.ensureNotShared = function (spine, isSharedFrontierDepth, depthToInclusive, sideIndex) { if (spine.length === 1 /* only a leaf */ || depthToInclusive < 0 /* new root case */) return; // nothing to clone when root is a leaf; equal-height case will handle this // Clone root if needed first (depth 0) @@ -693,7 +693,7 @@ var BTree = /** @class */ (function () { // Clone downward along the frontier to 'depthToInclusive' for (var depth = Math.max(isSharedFrontierDepth, 1); depth <= depthToInclusive; depth++) { var parent = spine[depth - 1]; - var childIndex = sideLastIndex(parent); + var childIndex = sideIndex(parent); var clone = parent.children[childIndex].clone(); parent.children[childIndex] = clone; spine[depth] = clone; @@ -703,7 +703,7 @@ var BTree = /** @class */ (function () { /** * Refresh sizes on the spine for nodes in (isSharedFrontierDepth, depthTo) */ - BTree.updateSizeAndMax = function (spine, unflushedSizes, isSharedFrontierDepth, depthUpToInclusive, sideLastIndex) { + BTree.updateSizeAndMax = function (spine, unflushedSizes, isSharedFrontierDepth, depthUpToInclusive, updateMax) { // If isSharedFrontierDepth is <= depthUpToInclusive there is nothing to update because // the insertion point is inside a shared node which will always have correct sizes var maxKey = spine[isSharedFrontierDepth].maxKey(); @@ -718,25 +718,25 @@ var BTree = /** @class */ (function () { } var node = spine[depth]; node._size += sizeAtLevel; - node.keys[sideLastIndex(node)] = maxKey; + updateMax(node, maxKey); } }; ; /** * Update a spine (frontier) from a specific depth down, inclusive */ - BTree.updateFrontier = function (frontier, depthLastValid, sideLastIndex) { + BTree.updateFrontier = function (frontier, depthLastValid, sideIndex) { check(frontier.length > depthLastValid, "updateFrontier: depthLastValid exceeds frontier height"); var startingAncestor = frontier[depthLastValid]; if (startingAncestor.isLeaf) return; var an = startingAncestor; - var cur = an.children[sideLastIndex(an)]; + var cur = an.children[sideIndex(an)]; var depth = depthLastValid + 1; while (!cur.isLeaf) { var ni = cur; frontier[depth] = ni; - cur = ni.children[sideLastIndex(ni)]; + cur = ni.children[sideIndex(ni)]; depth++; } frontier[depth] = cur; @@ -772,6 +772,10 @@ var BTree = /** @class */ (function () { BTree.splitOffLeftSide = function (node) { return node.splitOffLeftSide(); }; + BTree.updateRightMax = function (node, maxBelow) { + node.keys[node.keys.length - 1] = maxBelow; + }; + BTree.noop = function () { }; /** * Decomposes two BTrees into disjoint nodes. Reuses interior nodes when they do not overlap/intersect with any leaf nodes * in the other tree. Overlapping leaf nodes are broken down into new leaf nodes containing merged entries. diff --git a/b+tree.ts b/b+tree.ts index 01ad62a..57e9db8 100644 --- a/b+tree.ts +++ b/b+tree.ts @@ -588,11 +588,10 @@ export default class BTree implements ISortedMapF, ISortedMap return; const makePayload = (): undefined => undefined; - const empty = () => {}; // Initialize cursors at minimum keys. - const curA = BTree.createCursor(this, makePayload, empty, empty, empty, empty, empty); - const curB = BTree.createCursor(other, makePayload, empty, empty, empty, empty, empty); + const curA = BTree.createCursor(this, makePayload, BTree.noop, BTree.noop, BTree.noop, BTree.noop, BTree.noop); + const curB = BTree.createCursor(other, makePayload, BTree.noop, BTree.noop, BTree.noop, BTree.noop, BTree.noop); // Walk both cursors while (true) { @@ -662,13 +661,16 @@ export default class BTree implements ISortedMapF, ISortedMap disjoint.length, 1, BTree.getRightmostIndex, BTree.getRightInsertionIndex, - BTree.splitOffRightSide); + BTree.splitOffRightSide, + BTree.updateRightMax + ); } // Process all subtrees to the left of the tallest subtree (reverse order) if (tallestIndex - 1 >= 0) { BTree.updateFrontier(frontier, 0, BTree.getLeftmostIndex); - BTree.processSide(branchingFactor, + BTree.processSide( + branchingFactor, disjoint, frontier, tallestIndex - 1, @@ -676,7 +678,8 @@ export default class BTree implements ISortedMapF, ISortedMap -1, BTree.getLeftmostIndex, BTree.getLeftmostIndex, - BTree.splitOffLeftSide + BTree.splitOffLeftSide, + BTree.noop // left side appending doesn't update max keys ); } @@ -698,15 +701,17 @@ export default class BTree implements ISortedMapF, ISortedMap start: number, end: number, step: number, - sideLastIndex: (node: BNodeInternal) => number, + sideIndex: (node: BNodeInternal) => number, sideInsertionIndex: (node: BNodeInternal) => number, - splitOffSide: (node: BNodeInternal) => BNodeInternal) { + splitOffSide: (node: BNodeInternal) => BNodeInternal, + updateMax: (node: BNodeInternal, maxBelow: K) => void + ): void { let isSharedFrontierDepth = 0; let cur = spine[0]; // Find the first shared node on the frontier while (!cur.isShared && isSharedFrontierDepth < spine.length - 1) { isSharedFrontierDepth++; - cur = (cur as BNodeInternal).children[sideLastIndex(cur as BNodeInternal)]; + cur = (cur as BNodeInternal).children[sideIndex(cur as BNodeInternal)]; } // This array holds the sum of sizes of nodes that have been inserted but not yet propagated upward. @@ -725,16 +730,16 @@ export default class BTree implements ISortedMapF, ISortedMap const insertionDepth = currentHeight - (subtreeHeight + 1); // node at this depth has children of height 'subtreeHeight' // Ensure path is unshared before mutation - BTree.ensureNotShared(spine, isSharedFrontierDepth, insertionDepth, sideLastIndex); + BTree.ensureNotShared(spine, isSharedFrontierDepth, insertionDepth, sideIndex); // Calculate expansion depth (first ancestor with capacity) const expansionDepth = Math.max(0, BTree.findCascadeEndDepth(spine, insertionDepth, branchingFactor)); // Update sizes on spine above the shared ancestor before we expand - BTree.updateSizeAndMax(spine, unflushedSizes, isSharedFrontierDepth, expansionDepth, sideLastIndex); + BTree.updateSizeAndMax(spine, unflushedSizes, isSharedFrontierDepth, expansionDepth, updateMax); // Append and cascade splits upward - const newRoot = BTree.appendAndCascade(spine, insertionDepth, branchingFactor, subtree, sideLastIndex, sideInsertionIndex, splitOffSide); + const newRoot = BTree.appendAndCascade(spine, insertionDepth, branchingFactor, subtree, sideIndex, sideInsertionIndex, splitOffSide); if (newRoot) { // Set the spine root to the highest up new node; the rest of the spine is updated below spine[0] = newRoot; @@ -750,13 +755,13 @@ export default class BTree implements ISortedMapF, ISortedMap // Finally, update the frontier from the highest new node downward // Note that this is often the point where the new subtree is attached, // but in the case of cascaded splits it may be higher up. - BTree.updateFrontier(spine, expansionDepth, sideLastIndex); + BTree.updateFrontier(spine, expansionDepth, sideIndex); check(isSharedFrontierDepth === spine.length - 1 || spine[isSharedFrontierDepth].isShared === true, "Non-leaf subtrees must be shared."); check(unflushedSizes.length === spine.length, "Unflushed sizes length mismatch after root split."); } // Finally, propagate any remaining unflushed sizes upward and update max keys - BTree.updateSizeAndMax(spine, unflushedSizes, isSharedFrontierDepth, 0, sideLastIndex); + BTree.updateSizeAndMax(spine, unflushedSizes, isSharedFrontierDepth, 0, updateMax); }; // Append a subtree at a given depth on the chosen side; cascade splits upward if needed. @@ -765,7 +770,7 @@ export default class BTree implements ISortedMapF, ISortedMap insertionDepth: number, branchingFactor: number, subtree: BNode, - sideLastIndex: (node: BNodeInternal) => number, + sideIndex: (node: BNodeInternal) => number, sideInsertionIndex: (node: BNodeInternal) => number, splitOffSide: (node: BNodeInternal) => BNodeInternal ): BNodeInternal | undefined { @@ -779,9 +784,9 @@ export default class BTree implements ISortedMapF, ISortedMap let d = insertionDepth - 1; while (carry && d >= 0) { const parent = spine[d] as BNodeInternal; - const sideIndex = sideLastIndex(parent); + const idx = sideIndex(parent); // Refresh last key since child was split - parent.keys[sideIndex] = parent.children[sideIndex].maxKey(); + parent.keys[idx] = parent.children[idx].maxKey(); if (parent.keys.length < branchingFactor) { BTree.insertNoCount(parent, sideInsertionIndex(parent), carry); carry = undefined; @@ -815,7 +820,7 @@ export default class BTree implements ISortedMapF, ISortedMap spine: BNode[], isSharedFrontierDepth: number, depthToInclusive: number, - sideLastIndex: (node: BNodeInternal) => number) { + sideIndex: (node: BNodeInternal) => number) { if (spine.length === 1 /* only a leaf */ || depthToInclusive < 0 /* new root case */) return; // nothing to clone when root is a leaf; equal-height case will handle this @@ -828,7 +833,7 @@ export default class BTree implements ISortedMapF, ISortedMap // Clone downward along the frontier to 'depthToInclusive' for (let depth = Math.max(isSharedFrontierDepth, 1); depth <= depthToInclusive; depth++) { const parent = spine[depth - 1] as BNodeInternal; - const childIndex = sideLastIndex(parent); + const childIndex = sideIndex(parent); const clone = parent.children[childIndex].clone(); parent.children[childIndex] = clone; spine[depth] = clone as BNodeInternal; @@ -843,7 +848,7 @@ export default class BTree implements ISortedMapF, ISortedMap unflushedSizes: number[], isSharedFrontierDepth: number, depthUpToInclusive: number, - sideLastIndex: (node: BNodeInternal) => number) { + updateMax: (node: BNodeInternal, maxBelow: K) => void) { // If isSharedFrontierDepth is <= depthUpToInclusive there is nothing to update because // the insertion point is inside a shared node which will always have correct sizes const maxKey = spine[isSharedFrontierDepth].maxKey(); @@ -858,25 +863,25 @@ export default class BTree implements ISortedMapF, ISortedMap } const node = spine[depth] as BNodeInternal; node._size += sizeAtLevel; - node.keys[sideLastIndex(node)] = maxKey; + updateMax(node, maxKey); } }; /** * Update a spine (frontier) from a specific depth down, inclusive */ - private static updateFrontier(frontier: BNode[], depthLastValid: number, sideLastIndex: (node: BNodeInternal) => number): void { + private static updateFrontier(frontier: BNode[], depthLastValid: number, sideIndex: (node: BNodeInternal) => number): void { check(frontier.length > depthLastValid, "updateFrontier: depthLastValid exceeds frontier height"); const startingAncestor = frontier[depthLastValid]; if (startingAncestor.isLeaf) return; const an = startingAncestor as BNodeInternal; - let cur: BNode = an.children[sideLastIndex(an)]; + let cur: BNode = an.children[sideIndex(an)]; let depth = depthLastValid + 1; while (!cur.isLeaf) { const ni = cur as BNodeInternal; frontier[depth] = ni; - cur = ni.children[sideLastIndex(ni)]; + cur = ni.children[sideIndex(ni)]; depth++; } frontier[depth] = cur; @@ -922,6 +927,12 @@ export default class BTree implements ISortedMapF, ISortedMap return node.splitOffLeftSide(); } + private static updateRightMax(node: BNodeInternal, maxBelow: K): void { + node.keys[node.keys.length - 1] = maxBelow; + } + + private static noop(): void {} + /** * Decomposes two BTrees into disjoint nodes. Reuses interior nodes when they do not overlap/intersect with any leaf nodes * in the other tree. Overlapping leaf nodes are broken down into new leaf nodes containing merged entries. From ac45d50a14180d8d942c0aa8a4aec6c4a7b7aa54 Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Tue, 4 Nov 2025 21:06:20 -0800 Subject: [PATCH 030/143] check valid --- b+tree.test.ts | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/b+tree.test.ts b/b+tree.test.ts index 4df673c..891f822 100644 --- a/b+tree.test.ts +++ b/b+tree.test.ts @@ -1396,6 +1396,7 @@ function testMerge(maxNodeSize: number) { const resultRoot = result['_root'] as any; expect(sharesNode(resultRoot, tree1['_root'] as any)).toBe(true); expect(sharesNode(resultRoot, tree2['_root'] as any)).toBe(true); + result.checkValid(); }); test('Merge leaf roots with intersecting keys', () => { @@ -1413,6 +1414,7 @@ function testMerge(maxNodeSize: number) { expect(calls).toEqual([{ key: 2, leftValue: 20, rightValue: 200 }]); expect(result.toArray()).toEqual([[1, 10], [2, 220], [3, 300], [4, 40], [5, 500]]); + result.checkValid(); }); test('Merge leaf roots with disjoint keys', () => { @@ -1437,6 +1439,7 @@ function testMerge(maxNodeSize: number) { [5, 5], [6, 1006] ]); + result.checkValid(); }); test('Merge trees disjoint except for shared maximum key', () => { @@ -1456,6 +1459,7 @@ function testMerge(maxNodeSize: number) { expect(mergeCalls).toBe(1); expect(result.get(size - 1)).toBe((size - 1) + (size - 1) * 3); expect(result.size).toBe(tree1.size + tree2.size - 1); + result.checkValid(); }); test('Merge trees where all leaves are disjoint and one tree straddles the other', () => { @@ -1474,6 +1478,7 @@ function testMerge(maxNodeSize: number) { expect(mergeCalls).toBe(0); expect(result.size).toBe(tree1.size + tree2.size); + result.checkValid(); }); test('Merge where two-leaf tree intersects leaf-root tree across both leaves', () => { @@ -1495,6 +1500,7 @@ function testMerge(maxNodeSize: number) { expect(result.get(Math.floor(size / 2))).toBe(5 * Math.floor(size / 2)); expect(result.get(size - 1)).toBe(5 * (size - 1)); expect(result.size).toBe(tree1.size + tree2.size - seenKeys.length); + result.checkValid(); }); test('Merge where max key equals min key of other tree', () => { @@ -1514,6 +1520,7 @@ function testMerge(maxNodeSize: number) { expect(mergeCalls).toBe(1); expect(result.get(size - 1)).toBe((size - 1) * 10); expect(result.size).toBe(tree1.size + tree2.size - 1); + result.checkValid(); }); test('Merge odd and even keyed trees', () => { @@ -1532,6 +1539,7 @@ function testMerge(maxNodeSize: number) { expect(mergeCalls).toBe(0); expect(result.size).toBe(treeOdd.size + treeEven.size); + result.checkValid(); }); test('Merge overlapping prefix equal to branching factor', () => { @@ -1565,6 +1573,7 @@ function testMerge(maxNodeSize: number) { ]; expect(result.toArray()).toEqual(expected); expect(result.size).toBe(tree1.size + tree2.size - shared); + result.checkValid(); }); test('Merge two empty trees', () => { @@ -1601,6 +1610,7 @@ function testMerge(maxNodeSize: number) { expect(result.size).toBe(6); expect(result.toArray()).toEqual([[1, 10], [2, 20], [3, 30], [4, 40], [5, 50], [6, 60]]); + result.checkValid(); }); test('Merge with completely overlapping keys - sum values', () => { @@ -1612,6 +1622,7 @@ function testMerge(maxNodeSize: number) { expect(result.size).toBe(3); expect(result.toArray()).toEqual([[1, 15], [2, 35], [3, 55]]); + result.checkValid(); }); test('Merge with completely overlapping keys - prefer left', () => { @@ -1623,6 +1634,7 @@ function testMerge(maxNodeSize: number) { expect(result.size).toBe(3); expect(result.toArray()).toEqual([[1, 10], [2, 20], [3, 30]]); + result.checkValid(); }); test('Merge with completely overlapping keys - prefer right', () => { @@ -1634,6 +1646,7 @@ function testMerge(maxNodeSize: number) { expect(result.size).toBe(3); expect(result.toArray()).toEqual([[1, 100], [2, 200], [3, 300]]); + result.checkValid(); }); test('Merge with partially overlapping keys', () => { @@ -1645,6 +1658,7 @@ function testMerge(maxNodeSize: number) { expect(result.size).toBe(6); expect(result.toArray()).toEqual([[1, 10], [2, 20], [3, 330], [4, 440], [5, 500], [6, 600]]); + result.checkValid(); }); test('Merge with overlapping keys - exclude some keys via undefined', () => { @@ -1660,6 +1674,7 @@ function testMerge(maxNodeSize: number) { expect(result.size).toBe(4); // Keys 1, 2, 4, 5 (key 3 excluded) expect(result.toArray()).toEqual([[1, 10], [2, 220], [4, 440], [5, 500]]); + result.checkValid(); }); test('Merge is called even when values are equal', () => { @@ -1676,6 +1691,7 @@ function testMerge(maxNodeSize: number) { expect(mergeCallLog).toEqual([{k: 2, v1: 20, v2: 20}]); expect(result.toArray()).toEqual([[1, 10], [2, 20], [3, 30]]); + result.checkValid(); }); test('Merge does not mutate input trees', () => { @@ -1693,6 +1709,7 @@ function testMerge(maxNodeSize: number) { // Verify result is correct expect(result.toArray()).toEqual([[1, 10], [2, 220], [3, 330], [4, 400]]); + result.checkValid(); }); test('Merge with disjoint ranges', () => { @@ -1826,6 +1843,9 @@ function testMerge(maxNodeSize: number) { expect(tree1.has(5)).toBe(false); expect(tree2.get(3)).toBe(30); expect(tree2.get(4)).toBe(40); + tree1.checkValid(); + tree2.checkValid(); + result.checkValid(); }); test('Merge with single element trees', () => { @@ -1875,6 +1895,7 @@ function testMerge(maxNodeSize: number) { // Only non-overlapping keys remain expect(result.toArray()).toEqual([[1, 10], [4, 400]]); + result.checkValid(); }); test('Merge reuses appended subtree with minimum fanout', () => { From 8346ea131f63acee20084a6eee70fedf68b991a4 Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Wed, 5 Nov 2025 08:17:36 -0800 Subject: [PATCH 031/143] cleanup, last bug fix --- b+tree.js | 2 +- b+tree.test.ts | 231 +++++++++++++++++++++++++------------------------ b+tree.ts | 2 +- benchmarks.ts | 33 ++++--- 4 files changed, 141 insertions(+), 127 deletions(-) diff --git a/b+tree.js b/b+tree.js index 1f923dc..d6aca68 100644 --- a/b+tree.js +++ b/b+tree.js @@ -902,7 +902,7 @@ var BTree = /** @class */ (function () { }; var onEnterLeaf = function (leaf, destIndex, cursorThis, cursorOther) { if (destIndex > 0 - || cmp(leaf.keys[0], cursorOther.leaf.minKey()) >= 0 && cmp(leaf.keys[0], cursorOther.leaf.maxKey()) <= 0) { + || BTree.areOverlapping(leaf.minKey(), leaf.maxKey(), BTree.getKey(cursorOther), cursorOther.leaf.maxKey(), cmp)) { // Similar logic to the step-down case, except in this case we also know the leaf in the other // tree overlaps a leaf in this tree (this leaf, specifically). Thus, we can disqualify both spines. cursorThis.leafPayload.disqualified = true; diff --git a/b+tree.test.ts b/b+tree.test.ts index 891f822..a3ad740 100644 --- a/b+tree.test.ts +++ b/b+tree.test.ts @@ -1951,6 +1951,28 @@ function testMerge(maxNodeSize: number) { result.checkValid(); }); + test('Merge trees with random overlap', () => { + const sizes = [100, 1000, 10000, 100000]; + sizes.forEach((size) => { + const keys1 = makeArray(size, true); + const keys2 = makeArray(size, true); + + const tree1 = new BTree(); + const tree2 = new BTree(); + + for (let k of keys1) { + tree1.set(k, k); + } + for (let k of keys2) { + tree2.set(k, k * 10); + } + + const preferLeft = (_k: number, v1: number, _v2: number) => v1; + const mergeResult = tree1.merge(tree2, preferLeft); + mergeResult.checkValid(); + }); + }); + test('Merge trees with ~10% overlap', () => { const size = 200; const offset = Math.floor(size * 0.9); @@ -1989,124 +2011,111 @@ function testMerge(maxNodeSize: number) { }); } +function swap(keys: any[], i: number, j: number) { + var tmp = keys[i]; + keys[i] = keys[j]; + keys[j] = tmp; +} + +function makeArray(size: number, randomOrder: boolean, spacing = 10) { + var keys: number[] = [], i, n; + for (i = 0, n = 0; i < size; i++, n += 1 + randInt(spacing)) + keys[i] = n; + if (randomOrder) + for (i = 0; i < size; i++) + swap(keys, i, randInt(size)); + return keys; +} + describe('BTree merge fuzz tests', () => { const compare = (a: number, b: number) => a - b; - const branchingFactors = [4, 8, 16, 32]; - const seeds = [0x12345678, 0x9ABCDEF]; + const mergeFn = (_k: number, left: number, _right: number) => left; const FUZZ_SETTINGS = { - scenarioBudget: 1, // Increase to explore more seed/fanout combinations. - iterationsPerScenario: 1, // Increase to deepen each scenario. - maxInsertSize: 200, // Maximum keys inserted per iteration. - keyRange: 10_000, // Range of key distribution. - valueRange: 1_000, // Range of value distribution. - sampleChecks: 3, // Number of random spot-checks per result. - timeoutMs: 10_000 // Jest timeout for the fuzz test. + branchingFactors: [4, 5, 8, 16], + ooms: [0, 1, 2, 3], + fractionsPerOOM: [0.0001, 0.01, 0.1, 0.25, 0.5] } as const; + const RANDOM_EDITS_PER_TEST = 20; + const TIMEOUT_MS = 1000_000; - const strategies = [ - { - name: 'prefer-left', - fn: (k: number, left: number, _right: number) => left, - apply: (_k: number, left: number, _right: number) => left - }, - { - name: 'prefer-right', - fn: (_k: number, _left: number, right: number) => right, - apply: (_k: number, _left: number, right: number) => right - }, - { - name: 'sum', - fn: (_k: number, left: number, right: number) => left + right, - apply: (_k: number, left: number, right: number) => left + right - }, - { - name: 'min', - fn: (_k: number, left: number, right: number) => Math.min(left, right), - apply: (_k: number, left: number, right: number) => Math.min(left, right) - }, - { - name: 'drop-even-sum', - fn: (_k: number, left: number, right: number) => ((left + right) & 1) === 0 ? undefined : right - left, - apply: (_k: number, left: number, right: number) => ((left + right) & 1) === 0 ? undefined : right - left - } - ] as const; - - test('randomized merges across branching factors', () => { - jest.setTimeout(FUZZ_SETTINGS.timeoutMs); - - const scenarioConfigs: Array<{ seedBase: number, maxNodeSize: number }> = []; - for (const seedBase of seeds) - for (const maxNodeSize of branchingFactors) - scenarioConfigs.push({ seedBase, maxNodeSize }); - - const scenariosToRun = Math.min(FUZZ_SETTINGS.scenarioBudget, scenarioConfigs.length); - const selectedScenarios = scenarioConfigs.slice(0, scenariosToRun); - - for (const { seedBase, maxNodeSize } of selectedScenarios) { - const baseSeed = (seedBase ^ (maxNodeSize * 0x9E3779B1)) >>> 0; - const fuzzRand = new MersenneTwister(baseSeed); - const nextInt = (limit: number) => { - if (limit <= 0) - return 0; - return Math.floor(fuzzRand.random() * limit); - }; - - let currentTree = new BTree([], compare, maxNodeSize); - let currentMap = new Map(); - - for (let iteration = 0; iteration < FUZZ_SETTINGS.iterationsPerScenario; iteration++) { - const size = nextInt(FUZZ_SETTINGS.maxInsertSize); - const otherTree = new BTree([], compare, maxNodeSize); - const otherMap = new Map(); - - for (let i = 0; i < size; i++) { - const key = nextInt(FUZZ_SETTINGS.keyRange); - const value = nextInt(FUZZ_SETTINGS.valueRange); - otherTree.set(key, value); - otherMap.set(key, value); - } - - const strategy = strategies[nextInt(strategies.length)]; - const mergeFunc = strategy.fn; - - const expectedMap = new Map(currentMap); - - otherMap.forEach((rightValue, key) => { - if (expectedMap.has(key)) { - const leftValue = expectedMap.get(key)!; - const mergedValue = strategy.apply(key, leftValue, rightValue); - if (mergedValue === undefined) - expectedMap.delete(key); - else - expectedMap.set(key, mergedValue); - } else { - expectedMap.set(key, rightValue); - } - }); - - const previousSnapshot = currentTree.toArray(); - const merged = currentTree.merge(otherTree, mergeFunc); - - expect(currentTree.toArray()).toEqual(previousSnapshot); - - if ((iteration & 1) === 0) { - merged.checkValid(); - } + FUZZ_SETTINGS.fractionsPerOOM.forEach(fraction => { + if (fraction < 0 || fraction > 1) + throw new Error('FUZZ_SETTINGS.fractionsPerOOM must contain values between 0 and 1'); + }); - const expectedArray = Array.from(expectedMap.entries()).sort((a, b) => a[0] - b[0]); - expect(merged.toArray()).toEqual(expectedArray); + const randomInt = (rng: MersenneTwister, maxExclusive: number) => Math.floor(rng.random() * maxExclusive); + const shuffleInPlace = (rng: MersenneTwister, items: T[]): void => { + for (let i = items.length - 1; i > 0; i--) { + const swapIndex = Math.floor(rng.random() * (i + 1)); + const tmp = items[i]; + items[i] = items[swapIndex]; + items[swapIndex] = tmp; + } + }; - // Spot-check a few sampled keys for consistency with the Map - const sampleCount = Math.min(FUZZ_SETTINGS.sampleChecks, expectedArray.length); - for (let s = 0; s < sampleCount; s++) { - const sampleIndex = nextInt(expectedArray.length); - const [sampleKey, sampleValue] = expectedArray[sampleIndex]; - expect(merged.get(sampleKey)).toBe(sampleValue); + jest.setTimeout(TIMEOUT_MS); + + const rng = new MersenneTwister(0xBEEFCAFE); + + for (const maxNodeSize of FUZZ_SETTINGS.branchingFactors) { + describe(`branching factor ${maxNodeSize}`, () => { + for (const oom of FUZZ_SETTINGS.ooms) { + const size = 5 * Math.pow(10, oom); + for (const fractionA of FUZZ_SETTINGS.fractionsPerOOM) { + const fractionB = 1 - fractionA; + + test(`size ${size}, fractionA ${fractionA.toFixed(2)}, fractionB ${fractionB.toFixed(2)}`, () => { + const treeA = new BTree([], compare, maxNodeSize); + const treeB = new BTree([], compare, maxNodeSize); + + const keys: number[] = []; + for (let value = 1; value <= size; value++) { + keys.push(value); + } + shuffleInPlace(rng, keys); + + for (const value of keys) { + const target = rng.random() < fractionA ? treeA : treeB; + target.set(value, value); + } + + const snapshotA = treeA.toArray(); + const snapshotB = treeB.toArray(); + + const merged = treeA.merge(treeB, mergeFn); + merged.checkValid(); + + for (let value = 1; value <= size; value++) { + expect(merged.get(value)).toBe(value); + } + + expect(treeA.toArray()).toEqual(snapshotA); + expect(treeB.toArray()).toEqual(snapshotB); + + for (let edit = 0; edit < RANDOM_EDITS_PER_TEST; edit++) { + const key = 1 + randomInt(rng, size); + const action = rng.random(); + if (action < 0.33) { + merged.set(key, key); + } else if (action < 0.66) { + merged.set(key, -key); + } else { + merged.delete(key); + } + } + + expect(treeA.toArray()).toEqual(snapshotA); + expect(treeB.toArray()).toEqual(snapshotB); + + for (const [key, value] of snapshotA) { + expect(treeA.get(key)).toBe(value); + } + for (const [key, value] of snapshotB) { + expect(treeB.get(key)).toBe(value); + } + }); } - - currentTree = merged; - currentMap = expectedMap; } - } - }); + }); + } }); diff --git a/b+tree.ts b/b+tree.ts index 57e9db8..72b1530 100644 --- a/b+tree.ts +++ b/b+tree.ts @@ -1101,7 +1101,7 @@ export default class BTree implements ISortedMapF, ISortedMap cursorOther: MergeCursor ) => { if (destIndex > 0 - || cmp(leaf.keys[0], cursorOther.leaf.minKey()!) >= 0 && cmp(leaf.keys[0], cursorOther.leaf.maxKey()) <= 0) { + || BTree.areOverlapping(leaf.minKey()!, leaf.maxKey(), BTree.getKey(cursorOther), cursorOther.leaf.maxKey(), cmp)) { // Similar logic to the step-down case, except in this case we also know the leaf in the other // tree overlaps a leaf in this tree (this leaf, specifically). Thus, we can disqualify both spines. cursorThis.leafPayload.disqualified = true; diff --git a/benchmarks.ts b/benchmarks.ts index 496c3ef..6ca2d58 100644 --- a/benchmarks.ts +++ b/benchmarks.ts @@ -53,7 +53,7 @@ function countTreeNodeStats(tree: BTree) { const visit = (node: any, ancestorShared: boolean): { total: number, shared: number } => { if (!node) return { total: 0, shared: 0 }; - const selfShared = node.sharedSizeTag < 0 || ancestorShared; + const selfShared = node.isShared === true || ancestorShared; let shared = selfShared ? 1 : 0; let total = 1; const children: any[] | undefined = node.children; @@ -415,6 +415,18 @@ console.log("### Merge between B+ trees"); console.log(); const sizes = [100, 1000, 10000, 100000]; + const timeBaselineMerge = (title: string, tree1: BTree, tree2: BTree) => { + const baselineResult = measure(() => title, () => { + const result = tree1.clone(); + tree2.forEachPair((k, v) => { + result.set(k, v, false); + }); + return result; + }); + const baselineStats = countTreeNodeStats(baselineResult); + console.log(`\tShared nodes (baseline): ${baselineStats.shared}/${baselineStats.total}`); + } + // Test 1: Non-overlapping ranges (best case - minimal intersections) console.log("# Non-overlapping ranges (disjoint keys)"); sizes.forEach((size) => { @@ -428,31 +440,24 @@ console.log("### Merge between B+ trees"); } const preferLeft = (_k: number, v1: number, _v2: number) => v1; - const mergeResult = measure(() => `Merge ${size}+${size} non-overlapping trees using merge()`, () => { + const baseTitle = `Merge ${size}+${size} non-overlapping trees using `; + const mergeResult = measure(() => `${baseTitle} (merge)`, () => { return tree1.merge(tree2, preferLeft); }); const mergeStats = countTreeNodeStats(mergeResult); console.log(`\tShared nodes (merge): ${mergeStats.shared}/${mergeStats.total}`); - const baselineResult = measure(() => `Merge ${size}+${size} non-overlapping trees using clone+set loop (baseline)`, () => { - const result = tree1.clone(); - tree2.forEachPair((k, v) => { - result.set(k, v, false); - }); - return result; - }); - const baselineStats = countTreeNodeStats(baselineResult); - console.log(`\tShared nodes (baseline): ${baselineStats.shared}/${baselineStats.total}`); + timeBaselineMerge(`${baseTitle} (baseline)`, tree1, tree2); }); console.log(); - console.log("# Adjacent ranges (one intersection points)"); + console.log("# Adjacent ranges (one intersection point)"); sizes.forEach((size) => { const tree1 = new BTree(); const tree2 = new BTree(); // Tree1: 0-size, Tree2: size-(2*size) - for (let i = 0; i < size; i++) { + for (let i = 0; i <= size; i++) { tree1.set(i, i); tree2.set(i + size, i + size); } @@ -483,7 +488,7 @@ console.log("### Merge between B+ trees"); // Tree1: 0-size, 2*size-3*size // Tree2: size-2*size - for (let i = 0; i < size; i++) { + for (let i = 0; i <= size; i++) { tree1.set(i, i); tree1.set(i + 2 * size, i + 2 * size); tree2.set(i + size, i + size); From 4236cb0255348dbc6bc986864f0092133537f61a Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Wed, 5 Nov 2025 08:23:49 -0800 Subject: [PATCH 032/143] cleanup --- b+tree.test.ts | 52 ++++++++++++++++++-------------------------------- 1 file changed, 19 insertions(+), 33 deletions(-) diff --git a/b+tree.test.ts b/b+tree.test.ts index a3ad740..b8d2493 100644 --- a/b+tree.test.ts +++ b/b+tree.test.ts @@ -1952,25 +1952,23 @@ function testMerge(maxNodeSize: number) { }); test('Merge trees with random overlap', () => { - const sizes = [100, 1000, 10000, 100000]; - sizes.forEach((size) => { - const keys1 = makeArray(size, true); - const keys2 = makeArray(size, true); + const size = 10000; + const keys1 = makeArray(size, true); + const keys2 = makeArray(size, true); - const tree1 = new BTree(); - const tree2 = new BTree(); + const tree1 = new BTree(); + const tree2 = new BTree(); - for (let k of keys1) { - tree1.set(k, k); - } - for (let k of keys2) { - tree2.set(k, k * 10); - } + for (let k of keys1) { + tree1.set(k, k); + } + for (let k of keys2) { + tree2.set(k, k * 10); + } - const preferLeft = (_k: number, v1: number, _v2: number) => v1; - const mergeResult = tree1.merge(tree2, preferLeft); - mergeResult.checkValid(); - }); + const preferLeft = (_k: number, v1: number, _v2: number) => v1; + const mergeResult = tree1.merge(tree2, preferLeft); + mergeResult.checkValid(); }); test('Merge trees with ~10% overlap', () => { @@ -2027,6 +2025,8 @@ function makeArray(size: number, randomOrder: boolean, spacing = 10) { return keys; } +const randomInt = (rng: MersenneTwister, maxExclusive: number) => Math.floor(rng.random() * maxExclusive); + describe('BTree merge fuzz tests', () => { const compare = (a: number, b: number) => a - b; const mergeFn = (_k: number, left: number, _right: number) => left; @@ -2043,16 +2043,6 @@ describe('BTree merge fuzz tests', () => { throw new Error('FUZZ_SETTINGS.fractionsPerOOM must contain values between 0 and 1'); }); - const randomInt = (rng: MersenneTwister, maxExclusive: number) => Math.floor(rng.random() * maxExclusive); - const shuffleInPlace = (rng: MersenneTwister, items: T[]): void => { - for (let i = items.length - 1; i > 0; i--) { - const swapIndex = Math.floor(rng.random() * (i + 1)); - const tmp = items[i]; - items[i] = items[swapIndex]; - items[swapIndex] = tmp; - } - }; - jest.setTimeout(TIMEOUT_MS); const rng = new MersenneTwister(0xBEEFCAFE); @@ -2068,11 +2058,7 @@ describe('BTree merge fuzz tests', () => { const treeA = new BTree([], compare, maxNodeSize); const treeB = new BTree([], compare, maxNodeSize); - const keys: number[] = []; - for (let value = 1; value <= size; value++) { - keys.push(value); - } - shuffleInPlace(rng, keys); + const keys = makeArray(size, true, 1); for (const value of keys) { const target = rng.random() < fractionA ? treeA : treeB; @@ -2085,8 +2071,8 @@ describe('BTree merge fuzz tests', () => { const merged = treeA.merge(treeB, mergeFn); merged.checkValid(); - for (let value = 1; value <= size; value++) { - expect(merged.get(value)).toBe(value); + for (const key of keys) { + expect(merged.get(key)).toBe(key); } expect(treeA.toArray()).toEqual(snapshotA); From 50cca9ddf62bb3818c3bb8ce23c7542f11d06f0c Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Wed, 5 Nov 2025 08:37:44 -0800 Subject: [PATCH 033/143] cleanup --- b+tree.test.ts | 42 ++++++++++++++++++++++++------------------ 1 file changed, 24 insertions(+), 18 deletions(-) diff --git a/b+tree.test.ts b/b+tree.test.ts index b8d2493..49ccc30 100644 --- a/b+tree.test.ts +++ b/b+tree.test.ts @@ -2036,7 +2036,7 @@ describe('BTree merge fuzz tests', () => { fractionsPerOOM: [0.0001, 0.01, 0.1, 0.25, 0.5] } as const; const RANDOM_EDITS_PER_TEST = 20; - const TIMEOUT_MS = 1000_000; + const TIMEOUT_MS = 30_000; FUZZ_SETTINGS.fractionsPerOOM.forEach(fraction => { if (fraction < 0 || fraction > 1) @@ -2054,20 +2054,31 @@ describe('BTree merge fuzz tests', () => { for (const fractionA of FUZZ_SETTINGS.fractionsPerOOM) { const fractionB = 1 - fractionA; + const expectEqualsArray = (tree: BTree, array: [number, number][]) => { + expect(tree.size).toBe(array.length); + for (const [key, value] of array) { + expect(tree.get(key)).toBe(value); + } + }; + test(`size ${size}, fractionA ${fractionA.toFixed(2)}, fractionB ${fractionB.toFixed(2)}`, () => { const treeA = new BTree([], compare, maxNodeSize); const treeB = new BTree([], compare, maxNodeSize); const keys = makeArray(size, true, 1); + const aArray: [number, number][] = []; + const bArray: [number, number][] = []; for (const value of keys) { - const target = rng.random() < fractionA ? treeA : treeB; - target.set(value, value); + if (rng.random() < fractionA) { + aArray.push([value, value]); + treeA.set(value, value); + } else { + bArray.push([value, value]); + treeB.set(value, value); + } } - const snapshotA = treeA.toArray(); - const snapshotB = treeB.toArray(); - const merged = treeA.merge(treeB, mergeFn); merged.checkValid(); @@ -2075,8 +2086,9 @@ describe('BTree merge fuzz tests', () => { expect(merged.get(key)).toBe(key); } - expect(treeA.toArray()).toEqual(snapshotA); - expect(treeB.toArray()).toEqual(snapshotB); + // Merge should not have mutated inputs + expectEqualsArray(treeA, aArray); + expectEqualsArray(treeB, bArray); for (let edit = 0; edit < RANDOM_EDITS_PER_TEST; edit++) { const key = 1 + randomInt(rng, size); @@ -2088,17 +2100,11 @@ describe('BTree merge fuzz tests', () => { } else { merged.delete(key); } - } - - expect(treeA.toArray()).toEqual(snapshotA); - expect(treeB.toArray()).toEqual(snapshotB); + }; - for (const [key, value] of snapshotA) { - expect(treeA.get(key)).toBe(value); - } - for (const [key, value] of snapshotB) { - expect(treeB.get(key)).toBe(value); - } + // Check for shared mutability issues + expectEqualsArray(treeA, aArray); + expectEqualsArray(treeB, bArray); }); } } From 90d246c42fbd8b306dd44d56fc921a2ec146c234 Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Wed, 5 Nov 2025 13:03:03 -0800 Subject: [PATCH 034/143] fix bug, better checkValid --- b+tree.d.ts | 6 -- b+tree.js | 119 +++++++++++++++++++++++++--------------- b+tree.test.ts | 8 +-- b+tree.ts | 146 ++++++++++++++++++++++++++++++------------------- 4 files changed, 169 insertions(+), 110 deletions(-) diff --git a/b+tree.d.ts b/b+tree.d.ts index 3feae67..60cc8f5 100644 --- a/b+tree.d.ts +++ b/b+tree.d.ts @@ -313,12 +313,6 @@ export default class BTree implements ISortedMapF, ISort */ private static createCursor; private static getKey; - /** - * Determines whether two nodes are overlapping in key range. - * Takes the leftmost known key of each node to avoid a log(n) min calculation. - * This will still catch overlapping nodes because of the alternate hopping walk of the cursors. - */ - private static areOverlapping; /** * Computes the differences between `this` and `other`. * For efficiency, the diff is returned via invocations of supplied handlers. diff --git a/b+tree.js b/b+tree.js index d6aca68..82df8b0 100644 --- a/b+tree.js +++ b/b+tree.js @@ -802,6 +802,11 @@ var BTree = /** @class */ (function () { var keys = slice.map(function (p) { return p[0]; }); var vals = slice.map(function (p) { return p[1]; }); var leaf = new BNode(keys, vals); + if (disjoint.length > 0) { + if (areOverlapping(leaf.minKey(), leaf.maxKey(), disjoint[disjoint.length - 1][1].minKey(), disjoint[disjoint.length - 1][1].maxKey(), left._compare) + || cmp(leaf.minKey(), disjoint[disjoint.length - 1][1].maxKey()) <= 0) + throw new Error("Decompose produced overlapping leaves"); + } disjoint.push([0, leaf]); if (0 > tallestHeight) { tallestIndex = disjoint.length - 1; @@ -817,6 +822,11 @@ var BTree = /** @class */ (function () { var addSharedNodeToDisjointSet = function (node, height) { flushPendingEntries(); node.isShared = true; + if (disjoint.length > 0) { + if (areOverlapping(node.minKey(), node.maxKey(), disjoint[disjoint.length - 1][1].minKey(), disjoint[disjoint.length - 1][1].maxKey(), left._compare) + || cmp(node.minKey(), disjoint[disjoint.length - 1][1].maxKey()) <= 0) + throw new Error("Decompose produced overlapping leaves"); + } disjoint.push([height, node]); if (height > tallestHeight) { tallestIndex = disjoint.length - 1; @@ -865,7 +875,7 @@ var BTree = /** @class */ (function () { pushLeafRange(leaf, start, leaf.keys.length); } }; - var onStepUp = function (parent, height, payload, fromIndex, stepDownIndex) { + var onStepUp = function (parent, height, payload, fromIndex, spineIndex, stepDownIndex, cursorThis) { if (Number.isNaN(stepDownIndex) /* still walking up */ || stepDownIndex === Number.POSITIVE_INFINITY /* target key is beyond edge of tree, done with walk */) { if (!payload.disqualified) { @@ -882,6 +892,12 @@ var BTree = /** @class */ (function () { } } else { + // We have a valid step down index, so we need to disqualify the spine if needed. + // This is identical to the step down logic, but we must also perform it here because + // in the case of stepping down into a leaf, the step down callback is never called. + if (stepDownIndex > 0) { + disqualifySpine(cursorThis, spineIndex); + } addHighestDisjoint(); for (var i = fromIndex + 1; i < stepDownIndex; ++i) addSharedNodeToDisjointSet(parent.children[i], height - 1); @@ -902,7 +918,7 @@ var BTree = /** @class */ (function () { }; var onEnterLeaf = function (leaf, destIndex, cursorThis, cursorOther) { if (destIndex > 0 - || BTree.areOverlapping(leaf.minKey(), leaf.maxKey(), BTree.getKey(cursorOther), cursorOther.leaf.maxKey(), cmp)) { + || areOverlapping(leaf.minKey(), leaf.maxKey(), BTree.getKey(cursorOther), cursorOther.leaf.maxKey(), cmp)) { // Similar logic to the step-down case, except in this case we also know the leaf in the other // tree overlaps a leaf in this tree (this leaf, specifically). Thus, we can disqualify both spines. cursorThis.leafPayload.disqualified = true; @@ -933,12 +949,12 @@ var BTree = /** @class */ (function () { var minKey = BTree.getKey(cur); var otherMin = BTree.getKey(other); var otherMax = other.leaf.maxKey(); - if (BTree.areOverlapping(minKey, cur.leaf.maxKey(), otherMin, otherMax, cmp)) + if (areOverlapping(minKey, cur.leaf.maxKey(), otherMin, otherMax, cmp)) cur.leafPayload.disqualified = true; for (var i = 0; i < cur.spine.length; ++i) { var entry = cur.spine[i]; // Since we are on the left side of the tree, we can use the leaf min key for every spine node - if (BTree.areOverlapping(minKey, entry.node.maxKey(), otherMin, otherMax, cmp)) + if (areOverlapping(minKey, entry.node.maxKey(), otherMin, otherMax, cmp)) entry.payload.disqualified = true; } }; @@ -1030,17 +1046,17 @@ var BTree = /** @class */ (function () { for (var depth = spine.length - 1; depth >= 0; depth--) { var entry_1 = spine[depth]; var sd = depth === 0 ? Number.POSITIVE_INFINITY : Number.NaN; - cur.onStepUp(entry_1.node, heightOf(depth), entry_1.payload, entry_1.childIndex, sd); + cur.onStepUp(entry_1.node, heightOf(depth), entry_1.payload, entry_1.childIndex, depth, sd, cur); } return true; } // Step up through ancestors above the descentLevel for (var depth = spine.length - 1; depth > descentLevel; depth--) { var entry_2 = spine[depth]; - cur.onStepUp(entry_2.node, heightOf(depth), entry_2.payload, entry_2.childIndex, NaN); + cur.onStepUp(entry_2.node, heightOf(depth), entry_2.payload, entry_2.childIndex, depth, NaN, cur); } var entry = spine[descentLevel]; - cur.onStepUp(entry.node, heightOf(descentLevel), entry.payload, entry.childIndex, descentIndex); + cur.onStepUp(entry.node, heightOf(descentLevel), entry.payload, entry.childIndex, descentLevel, descentIndex, cur); entry.childIndex = descentIndex; // Descend, invoking onStepDown and creating payloads var height = heightOf(descentLevel) - 1; // calculate height before changing length @@ -1096,40 +1112,6 @@ var BTree = /** @class */ (function () { BTree.getKey = function (c) { return c.leaf.keys[c.leafIndex]; }; - /** - * Determines whether two nodes are overlapping in key range. - * Takes the leftmost known key of each node to avoid a log(n) min calculation. - * This will still catch overlapping nodes because of the alternate hopping walk of the cursors. - */ - BTree.areOverlapping = function (aMin, aMax, bMin, bMax, cmp) { - // There are 4 possibilities: - // 1. aMin.........aMax - // bMin.........bMax - // (aMax between bMin and bMax) - // 2. aMin.........aMax - // bMin.........bMax - // (aMin between bMin and bMax) - // 3. aMin.............aMax - // bMin....bMax - // (aMin and aMax enclose bMin and bMax; note this includes equality cases) - // 4. aMin....aMax - // bMin.............bMax - // (bMin and bMax enclose aMin and aMax; note equality cases are identical to case 3) - var aMinBMin = cmp(aMin, bMin); - var aMinBMax = cmp(aMin, bMax); - if (aMinBMin >= 0 && aMinBMax <= 0) { - // case 2 or 4 - return true; - } - var aMaxBMin = cmp(aMax, bMin); - var aMaxBMax = cmp(aMax, bMax); - if (aMaxBMin >= 0 && aMaxBMax <= 0) { - // case 1 - return true; - } - // case 3 or no overlap - return aMinBMin <= 0 && aMaxBMax >= 0; - }; /** * Computes the differences between `this` and `other`. * For efficiency, the diff is returned via invocations of supplied handlers. @@ -1704,7 +1686,7 @@ var BTree = /** @class */ (function () { * skips the most expensive test - whether all keys are sorted - but it * does check that maxKey() of the children of internal nodes are sorted. */ BTree.prototype.checkValid = function () { - var size = this._root.checkValid(0, this, 0); + var size = this._root.checkValid(0, this, 0)[0]; check(size === this.size, "size mismatch: counted ", size, "but stored", this.size); }; return BTree; @@ -1881,7 +1863,11 @@ var BNode = /** @class */ (function () { // it can't be merged with adjacent nodes. However, the parent will // verify that the average node size is at least half of the maximum. check(depth == 0 || kL > 0, "empty leaf at depth", depth, "and baseIndex", baseIndex); - return kL; + for (var i = 1; i < kL; i++) { + var c = tree._compare(this.keys[i - 1], this.keys[i]); + check(c < 0, "keys out of order at depth", depth, "and baseIndex", baseIndex + i - 1, ": ", this.keys[i - 1], " !< ", this.keys[i]); + } + return [kL, this.keys[0], this.keys[kL - 1]]; }; ///////////////////////////////////////////////////////////////////////////// // Leaf Node: set & node splitting ////////////////////////////////////////// @@ -2107,10 +2093,19 @@ var BNodeInternal = /** @class */ (function (_super) { check(kL === cL, "keys/children length mismatch: depth", depth, "lengths", kL, cL, "baseIndex", baseIndex); check(kL > 1 || depth > 0, "internal node has length", kL, "at depth", depth, "baseIndex", baseIndex); var size = 0, c = this.children, k = this.keys, childSize = 0; + var prevMinKey = undefined; + var prevMaxKey = undefined; for (var i = 0; i < cL; i++) { var child = c[i]; - var subtreeSize = child.checkValid(depth + 1, tree, baseIndex + size); + var _a = child.checkValid(depth + 1, tree, baseIndex + size), subtreeSize = _a[0], minKey = _a[1], maxKey = _a[2]; check(subtreeSize === child.size(), "cached size mismatch at depth", depth, "index", i, "baseIndex", baseIndex); + check(subtreeSize === 1 || tree._compare(minKey, maxKey) < 0, "child node keys not sorted at depth", depth, "index", i, "baseIndex", baseIndex); + if (prevMinKey !== undefined && prevMaxKey !== undefined) { + check(!areOverlapping(prevMinKey, prevMaxKey, minKey, maxKey, tree._compare), "children keys not sorted at depth", depth, "index", i, "baseIndex", baseIndex, ": ", prevMaxKey, " !< ", minKey); + check(tree._compare(prevMaxKey, minKey) < 0, "children keys not sorted at depth", depth, "index", i, "baseIndex", baseIndex, ": ", prevMaxKey, " !< ", minKey); + } + prevMinKey = minKey; + prevMaxKey = maxKey; size += subtreeSize; childSize += child.keys.length; check(size >= childSize, "wtf", baseIndex); // no way this will ever fail @@ -2126,7 +2121,7 @@ var BNodeInternal = /** @class */ (function (_super) { var toofew = childSize === 0; // childSize < (tree.maxNodeSize >> 1)*cL; if (toofew || childSize > tree.maxNodeSize * cL) check(false, toofew ? "too few" : "too many", "children (", childSize, size, ") at depth", depth, "maxNodeSize:", tree.maxNodeSize, "children.length:", cL, "baseIndex:", baseIndex); - return size; + return [size, this.minKey(), this.maxKey()]; }; ///////////////////////////////////////////////////////////////////////////// // Internal Node: set & node splitting ////////////////////////////////////// @@ -2339,6 +2334,40 @@ var BNodeInternal = /** @class */ (function (_super) { }; return BNodeInternal; }(BNode)); +/** + * Determines whether two nodes are overlapping in key range. + * Takes the leftmost known key of each node to avoid a log(n) min calculation. + * This will still catch overlapping nodes because of the alternate hopping walk of the cursors. + */ +function areOverlapping(aMin, aMax, bMin, bMax, cmp) { + // There are 4 possibilities: + // 1. aMin.........aMax + // bMin.........bMax + // (aMax between bMin and bMax) + // 2. aMin.........aMax + // bMin.........bMax + // (aMin between bMin and bMax) + // 3. aMin.............aMax + // bMin....bMax + // (aMin and aMax enclose bMin and bMax; note this includes equality cases) + // 4. aMin....aMax + // bMin.............bMax + // (bMin and bMax enclose aMin and aMax; note equality cases are identical to case 3) + var aMinBMin = cmp(aMin, bMin); + var aMinBMax = cmp(aMin, bMax); + if (aMinBMin >= 0 && aMinBMax <= 0) { + // case 2 or 4 + return true; + } + var aMaxBMin = cmp(aMax, bMin); + var aMaxBMax = cmp(aMax, bMax); + if (aMaxBMin >= 0 && aMaxBMax <= 0) { + // case 1 + return true; + } + // case 3 or no overlap + return aMinBMin <= 0 && aMaxBMax >= 0; +} // Optimization: this array of `undefined`s is used instead of a normal // array of values in nodes where `undefined` is the only value. // Its length is extended to max node size on first use; since it can diff --git a/b+tree.test.ts b/b+tree.test.ts index 49ccc30..340d349 100644 --- a/b+tree.test.ts +++ b/b+tree.test.ts @@ -2031,7 +2031,7 @@ describe('BTree merge fuzz tests', () => { const compare = (a: number, b: number) => a - b; const mergeFn = (_k: number, left: number, _right: number) => left; const FUZZ_SETTINGS = { - branchingFactors: [4, 5, 8, 16], + branchingFactors: [4, 5, 32], ooms: [0, 1, 2, 3], fractionsPerOOM: [0.0001, 0.01, 0.1, 0.25, 0.5] } as const; @@ -2066,6 +2066,7 @@ describe('BTree merge fuzz tests', () => { const treeB = new BTree([], compare, maxNodeSize); const keys = makeArray(size, true, 1); + const sorted = Array.from(new Set(keys)).sort(compare); const aArray: [number, number][] = []; const bArray: [number, number][] = []; @@ -2082,9 +2083,8 @@ describe('BTree merge fuzz tests', () => { const merged = treeA.merge(treeB, mergeFn); merged.checkValid(); - for (const key of keys) { - expect(merged.get(key)).toBe(key); - } + const mergedArray = merged.toArray(); + expect(mergedArray).toEqual(sorted.map(k => [k, k])); // Merge should not have mutated inputs expectEqualsArray(treeA, aArray); diff --git a/b+tree.ts b/b+tree.ts index 72b1530..4faa520 100644 --- a/b+tree.ts +++ b/b+tree.ts @@ -964,6 +964,12 @@ export default class BTree implements ISortedMapF, ISortedMap const keys = slice.map(p => p[0]); const vals = slice.map(p => p[1]); const leaf = new BNode(keys, vals); + if (disjoint.length > 0) + { + if (areOverlapping(leaf.minKey()!, leaf.maxKey(), disjoint[disjoint.length - 1][1].minKey()!, disjoint[disjoint.length - 1][1].maxKey(), left._compare) + || cmp(leaf.minKey()!, disjoint[disjoint.length - 1][1].maxKey()!) <= 0) + throw new Error("Decompose produced overlapping leaves"); + } disjoint.push([0, leaf]); if (0 > tallestHeight) { tallestIndex = disjoint.length - 1; @@ -981,6 +987,12 @@ export default class BTree implements ISortedMapF, ISortedMap const addSharedNodeToDisjointSet = (node: BNode, height: number) => { flushPendingEntries(); node.isShared = true; + if (disjoint.length > 0) + { + if (areOverlapping(node.minKey()!, node.maxKey(), disjoint[disjoint.length - 1][1].minKey()!, disjoint[disjoint.length - 1][1].maxKey(), left._compare) + || cmp(node.minKey()!, disjoint[disjoint.length - 1][1].maxKey()!) <= 0) + throw new Error("Decompose produced overlapping leaves"); + } disjoint.push([height, node]); if (height > tallestHeight) { tallestIndex = disjoint.length - 1; @@ -1052,7 +1064,9 @@ export default class BTree implements ISortedMapF, ISortedMap height: number, payload: MergeCursorPayload, fromIndex: number, - stepDownIndex: number + spineIndex: number, + stepDownIndex: number, + cursorThis: MergeCursor ) => { if (Number.isNaN(stepDownIndex) /* still walking up */ || stepDownIndex === Number.POSITIVE_INFINITY /* target key is beyond edge of tree, done with walk */) { @@ -1068,6 +1082,12 @@ export default class BTree implements ISortedMapF, ISortedMap addSharedNodeToDisjointSet(parent.children[i], height - 1); } } else { + // We have a valid step down index, so we need to disqualify the spine if needed. + // This is identical to the step down logic, but we must also perform it here because + // in the case of stepping down into a leaf, the step down callback is never called. + if (stepDownIndex > 0) { + disqualifySpine(cursorThis, spineIndex); + } addHighestDisjoint(); for (let i = fromIndex + 1; i < stepDownIndex; ++i) addSharedNodeToDisjointSet(parent.children[i], height - 1); @@ -1101,7 +1121,7 @@ export default class BTree implements ISortedMapF, ISortedMap cursorOther: MergeCursor ) => { if (destIndex > 0 - || BTree.areOverlapping(leaf.minKey()!, leaf.maxKey(), BTree.getKey(cursorOther), cursorOther.leaf.maxKey(), cmp)) { + || areOverlapping(leaf.minKey()!, leaf.maxKey(), BTree.getKey(cursorOther), cursorOther.leaf.maxKey(), cmp)) { // Similar logic to the step-down case, except in this case we also know the leaf in the other // tree overlaps a leaf in this tree (this leaf, specifically). Thus, we can disqualify both spines. cursorThis.leafPayload.disqualified = true; @@ -1135,12 +1155,12 @@ export default class BTree implements ISortedMapF, ISortedMap const minKey = BTree.getKey(cur); const otherMin = BTree.getKey(other); const otherMax = other.leaf.maxKey(); - if (BTree.areOverlapping(minKey, cur.leaf.maxKey(), otherMin, otherMax, cmp)) + if (areOverlapping(minKey, cur.leaf.maxKey(), otherMin, otherMax, cmp)) cur.leafPayload.disqualified = true; for (let i = 0; i < cur.spine.length; ++i) { const entry = cur.spine[i]; // Since we are on the left side of the tree, we can use the leaf min key for every spine node - if (BTree.areOverlapping(minKey, entry.node.maxKey(), otherMin, otherMax, cmp)) + if (areOverlapping(minKey, entry.node.maxKey(), otherMin, otherMax, cmp)) entry.payload.disqualified = true; } }; @@ -1243,7 +1263,7 @@ export default class BTree implements ISortedMapF, ISortedMap for (let depth = spine.length - 1; depth >= 0; depth--) { const entry = spine[depth]; const sd = depth === 0 ? Number.POSITIVE_INFINITY : Number.NaN; - cur.onStepUp(entry.node, heightOf(depth), entry.payload, entry.childIndex, sd); + cur.onStepUp(entry.node, heightOf(depth), entry.payload, entry.childIndex, depth, sd, cur); } return true; } @@ -1251,11 +1271,11 @@ export default class BTree implements ISortedMapF, ISortedMap // Step up through ancestors above the descentLevel for (let depth = spine.length - 1; depth > descentLevel; depth--) { const entry = spine[depth]; - cur.onStepUp(entry.node, heightOf(depth), entry.payload, entry.childIndex, NaN); + cur.onStepUp(entry.node, heightOf(depth), entry.payload, entry.childIndex, depth, NaN, cur); } const entry = spine[descentLevel]; - cur.onStepUp(entry.node, heightOf(descentLevel), entry.payload, entry.childIndex, descentIndex); + cur.onStepUp(entry.node, heightOf(descentLevel), entry.payload, entry.childIndex, descentLevel, descentIndex, cur); entry.childIndex = descentIndex; // Descend, invoking onStepDown and creating payloads @@ -1317,47 +1337,6 @@ export default class BTree implements ISortedMapF, ISortedMap return c.leaf.keys[c.leafIndex]; } - /** - * Determines whether two nodes are overlapping in key range. - * Takes the leftmost known key of each node to avoid a log(n) min calculation. - * This will still catch overlapping nodes because of the alternate hopping walk of the cursors. - */ - private static areOverlapping( - aMin: K, - aMax: K, - bMin: K, - bMax: K, - cmp: (x:K,y:K)=>number - ): boolean { - // There are 4 possibilities: - // 1. aMin.........aMax - // bMin.........bMax - // (aMax between bMin and bMax) - // 2. aMin.........aMax - // bMin.........bMax - // (aMin between bMin and bMax) - // 3. aMin.............aMax - // bMin....bMax - // (aMin and aMax enclose bMin and bMax; note this includes equality cases) - // 4. aMin....aMax - // bMin.............bMax - // (bMin and bMax enclose aMin and aMax; note equality cases are identical to case 3) - const aMinBMin = cmp(aMin, bMin); - const aMinBMax = cmp(aMin, bMax); - if (aMinBMin >= 0 && aMinBMax <= 0) { - // case 2 or 4 - return true; - } - const aMaxBMin = cmp(aMax, bMin); - const aMaxBMax = cmp(aMax, bMax); - if (aMaxBMin >= 0 && aMaxBMax <= 0) { - // case 1 - return true; - } - // case 3 or no overlap - return aMinBMin <= 0 && aMaxBMax >= 0; - } - /** * Computes the differences between `this` and `other`. * For efficiency, the diff is returned via invocations of supplied handlers. @@ -1967,7 +1946,7 @@ export default class BTree implements ISortedMapF, ISortedMap * skips the most expensive test - whether all keys are sorted - but it * does check that maxKey() of the children of internal nodes are sorted. */ checkValid() { - var size = this._root.checkValid(0, this, 0); + var [size] = this._root.checkValid(0, this, 0); check(size === this.size, "size mismatch: counted ", size, "but stored", this.size); } } @@ -2156,7 +2135,7 @@ class BNode { return undefined; } - checkValid(depth: number, tree: BTree, baseIndex: number): number { + checkValid(depth: number, tree: BTree, baseIndex: number): [size: number, min: K, max: K] { var kL = this.keys.length, vL = this.values.length; check(this.values === undefVals ? kL <= vL : kL === vL, "keys/values length mismatch: depth", depth, "with lengths", kL, vL, "and baseIndex", baseIndex); @@ -2166,7 +2145,12 @@ class BNode { // it can't be merged with adjacent nodes. However, the parent will // verify that the average node size is at least half of the maximum. check(depth == 0 || kL > 0, "empty leaf at depth", depth, "and baseIndex", baseIndex); - return kL; + for (var i = 1; i < kL; i++) { + var c = tree._compare(this.keys[i-1], this.keys[i]); + check(c < 0, "keys out of order at depth", depth, "and baseIndex", baseIndex + i - 1, + ": ", this.keys[i-1], " !< ", this.keys[i]); + } + return [kL, this.keys[0], this.keys[kL - 1]]; } ///////////////////////////////////////////////////////////////////////////// @@ -2403,15 +2387,26 @@ class BNodeInternal extends BNode { return result; } - checkValid(depth: number, tree: BTree, baseIndex: number): number { + checkValid(depth: number, tree: BTree, baseIndex: number): [size: number, min: K, max: K] { let kL = this.keys.length, cL = this.children.length; check(kL === cL, "keys/children length mismatch: depth", depth, "lengths", kL, cL, "baseIndex", baseIndex); check(kL > 1 || depth > 0, "internal node has length", kL, "at depth", depth, "baseIndex", baseIndex); let size = 0, c = this.children, k = this.keys, childSize = 0; + let prevMinKey: K | undefined = undefined; + let prevMaxKey: K | undefined = undefined; for (var i = 0; i < cL; i++) { var child = c[i]; - var subtreeSize = child.checkValid(depth + 1, tree, baseIndex + size); + var [subtreeSize, minKey, maxKey] = child.checkValid(depth + 1, tree, baseIndex + size); check(subtreeSize === child.size(), "cached size mismatch at depth", depth, "index", i, "baseIndex", baseIndex); + check(subtreeSize === 1 || tree._compare(minKey, maxKey) < 0, "child node keys not sorted at depth", depth, "index", i, "baseIndex", baseIndex); + if (prevMinKey !== undefined && prevMaxKey !== undefined) { + check(!areOverlapping(prevMinKey, prevMaxKey, minKey, maxKey, tree._compare), "children keys not sorted at depth", depth, "index", i, "baseIndex", baseIndex, + ": ", prevMaxKey, " !< ", minKey); + check(tree._compare(prevMaxKey, minKey) < 0, "children keys not sorted at depth", depth, "index", i, "baseIndex", baseIndex, + ": ", prevMaxKey, " !< ", minKey); + } + prevMinKey = minKey; + prevMaxKey = maxKey; size += subtreeSize; childSize += child.keys.length; check(size >= childSize, "wtf", baseIndex); // no way this will ever fail @@ -2427,7 +2422,7 @@ class BNodeInternal extends BNode { let toofew = childSize === 0; // childSize < (tree.maxNodeSize >> 1)*cL; if (toofew || childSize > tree.maxNodeSize*cL) check(false, toofew ? "too few" : "too many", "children (", childSize, size, ") at depth", depth, "maxNodeSize:", tree.maxNodeSize, "children.length:", cL, "baseIndex:", baseIndex); - return size; + return [size, this.minKey()!, this.maxKey()]; } ///////////////////////////////////////////////////////////////////////////// @@ -2679,7 +2674,7 @@ interface MergeCursor { makePayload: () => TPayload; onMoveInLeaf: (leaf: BNode, payload: TPayload, fromIndex: number, toIndex: number, isInclusive: boolean) => void; onExitLeaf: (leaf: BNode, payload: TPayload, startingIndex: number, isInclusive: boolean, cursorThis: MergeCursor) => void; - onStepUp: (parent: BNodeInternal, height: number, payload: TPayload, fromIndex: number, stepDownIndex: number) => void; + onStepUp: (parent: BNodeInternal, height: number, payload: TPayload, fromIndex: number, spineIndex: number, stepDownIndex: number, cursorThis: MergeCursor) => void; onStepDown: (node: BNodeInternal, height: number, spineIndex: number, stepDownIndex: number, cursorThis: MergeCursor) => void; onEnterLeaf: (leaf: BNode, destIndex: number, cursorThis: MergeCursor, cursorOther: MergeCursor) => void; } @@ -2687,6 +2682,47 @@ interface MergeCursor { type DisjointEntry = [height: number, node: BNode]; type DecomposeResult = { disjoint: DisjointEntry[], tallestIndex: number }; +/** + * Determines whether two nodes are overlapping in key range. + * Takes the leftmost known key of each node to avoid a log(n) min calculation. + * This will still catch overlapping nodes because of the alternate hopping walk of the cursors. + */ +function areOverlapping( + aMin: K, + aMax: K, + bMin: K, + bMax: K, + cmp: (x:K,y:K)=>number +): boolean { + // There are 4 possibilities: + // 1. aMin.........aMax + // bMin.........bMax + // (aMax between bMin and bMax) + // 2. aMin.........aMax + // bMin.........bMax + // (aMin between bMin and bMax) + // 3. aMin.............aMax + // bMin....bMax + // (aMin and aMax enclose bMin and bMax; note this includes equality cases) + // 4. aMin....aMax + // bMin.............bMax + // (bMin and bMax enclose aMin and aMax; note equality cases are identical to case 3) + const aMinBMin = cmp(aMin, bMin); + const aMinBMax = cmp(aMin, bMax); + if (aMinBMin >= 0 && aMinBMax <= 0) { + // case 2 or 4 + return true; + } + const aMaxBMin = cmp(aMax, bMin); + const aMaxBMax = cmp(aMax, bMax); + if (aMaxBMin >= 0 && aMaxBMax <= 0) { + // case 1 + return true; + } + // case 3 or no overlap + return aMinBMin <= 0 && aMaxBMax >= 0; +} + // Optimization: this array of `undefined`s is used instead of a normal // array of values in nodes where `undefined` is the only value. // Its length is extended to max node size on first use; since it can From 2525e60ab6f457b042f7d27e52960b09d2087a99 Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Wed, 5 Nov 2025 18:57:16 -0800 Subject: [PATCH 035/143] improve fuzz test perf --- b+tree.test.ts | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/b+tree.test.ts b/b+tree.test.ts index 340d349..0ad53df 100644 --- a/b+tree.test.ts +++ b/b+tree.test.ts @@ -2054,13 +2054,6 @@ describe('BTree merge fuzz tests', () => { for (const fractionA of FUZZ_SETTINGS.fractionsPerOOM) { const fractionB = 1 - fractionA; - const expectEqualsArray = (tree: BTree, array: [number, number][]) => { - expect(tree.size).toBe(array.length); - for (const [key, value] of array) { - expect(tree.get(key)).toBe(value); - } - }; - test(`size ${size}, fractionA ${fractionA.toFixed(2)}, fractionB ${fractionB.toFixed(2)}`, () => { const treeA = new BTree([], compare, maxNodeSize); const treeB = new BTree([], compare, maxNodeSize); @@ -2080,15 +2073,17 @@ describe('BTree merge fuzz tests', () => { } } + aArray.sort((a, b) => compare(a[0], b[0])); + bArray.sort((a, b) => compare(a[0], b[0])); + const merged = treeA.merge(treeB, mergeFn); merged.checkValid(); - const mergedArray = merged.toArray(); - expect(mergedArray).toEqual(sorted.map(k => [k, k])); + expect(merged.toArray()).toEqual(sorted.map(k => [k, k])); // Merge should not have mutated inputs - expectEqualsArray(treeA, aArray); - expectEqualsArray(treeB, bArray); + expect(treeA.toArray()).toEqual(aArray); + expect(treeB.toArray()).toEqual(bArray); for (let edit = 0; edit < RANDOM_EDITS_PER_TEST; edit++) { const key = 1 + randomInt(rng, size); @@ -2103,8 +2098,8 @@ describe('BTree merge fuzz tests', () => { }; // Check for shared mutability issues - expectEqualsArray(treeA, aArray); - expectEqualsArray(treeB, bArray); + expect(treeA.toArray()).toEqual(aArray); + expect(treeB.toArray()).toEqual(bArray); }); } } From 5f52756f1dd3064efe9385c2f3d6a0f69520957e Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Wed, 5 Nov 2025 19:41:22 -0800 Subject: [PATCH 036/143] cleanup benchmark tests --- benchmarks.ts | 215 +++++++++++++++----------------------------------- 1 file changed, 63 insertions(+), 152 deletions(-) diff --git a/benchmarks.ts b/benchmarks.ts index 6ca2d58..c3ced4d 100644 --- a/benchmarks.ts +++ b/benchmarks.ts @@ -425,7 +425,26 @@ console.log("### Merge between B+ trees"); }); const baselineStats = countTreeNodeStats(baselineResult); console.log(`\tShared nodes (baseline): ${baselineStats.shared}/${baselineStats.total}`); - } + }; + + const preferLeftMerge = (_k: number, leftValue: any, _rightValue: any) => leftValue; + + const timeMergeVsBaseline = ( + baseTitle: string, + tree1: BTree, + tree2: BTree, + prefer = preferLeftMerge, + mergeLabel = 'merge()', + baselineLabel = 'clone+set loop (baseline)' + ) => { + const mergeResult = measure(() => `${baseTitle} using ${mergeLabel}`, () => { + return tree1.merge(tree2, prefer); + }); + const mergeStats = countTreeNodeStats(mergeResult); + console.log(`\tShared nodes (merge): ${mergeStats.shared}/${mergeStats.total}`); + + timeBaselineMerge(`${baseTitle} using ${baselineLabel}`, tree1, tree2); + }; // Test 1: Non-overlapping ranges (best case - minimal intersections) console.log("# Non-overlapping ranges (disjoint keys)"); @@ -439,15 +458,8 @@ console.log("### Merge between B+ trees"); tree2.set(offset + i, offset + i); // Keys well beyond tree1's range } - const preferLeft = (_k: number, v1: number, _v2: number) => v1; - const baseTitle = `Merge ${size}+${size} non-overlapping trees using `; - const mergeResult = measure(() => `${baseTitle} (merge)`, () => { - return tree1.merge(tree2, preferLeft); - }); - const mergeStats = countTreeNodeStats(mergeResult); - console.log(`\tShared nodes (merge): ${mergeStats.shared}/${mergeStats.total}`); - - timeBaselineMerge(`${baseTitle} (baseline)`, tree1, tree2); + const baseTitle = `Merge ${size}+${size} non-overlapping trees`; + timeMergeVsBaseline(baseTitle, tree1, tree2); }); console.log(); @@ -462,22 +474,8 @@ console.log("### Merge between B+ trees"); tree2.set(i + size, i + size); } - const preferLeft = (_k: number, v1: number, _v2: number) => v1; - const mergeResult = measure(() => `Merge ${size}+${size} adjacent range trees using merge()`, () => { - return tree1.merge(tree2, preferLeft); - }); - const mergeStats = countTreeNodeStats(mergeResult); - console.log(`\tShared nodes (merge): ${mergeStats.shared}/${mergeStats.total}`); - - const baselineResult = measure(() => `Merge ${size}+${size} adjacent range trees using clone+set loop (baseline)`, () => { - const result = tree1.clone(); - tree2.forEachPair((k, v) => { - result.set(k, v, false); - }); - return result; - }); - const baselineStats = countTreeNodeStats(baselineResult); - console.log(`\tShared nodes (baseline): ${baselineStats.shared}/${baselineStats.total}`); + const baseTitle = `Merge ${size}+${size} adjacent range trees`; + timeMergeVsBaseline(baseTitle, tree1, tree2); }); console.log(); @@ -494,22 +492,8 @@ console.log("### Merge between B+ trees"); tree2.set(i + size, i + size); } - const preferLeft = (_k: number, v1: number, _v2: number) => v1; - const mergeResult = measure(() => `Merge ${size*2}+${size} interleaved range trees using merge()`, () => { - return tree1.merge(tree2, preferLeft); - }); - const mergeStats = countTreeNodeStats(mergeResult); - console.log(`\tShared nodes (merge): ${mergeStats.shared}/${mergeStats.total}`); - - const baselineResult = measure(() => `Merge ${size*2}+${size} interleaved range trees using clone+set loop (baseline)`, () => { - const result = tree1.clone(); - tree2.forEachPair((k, v) => { - result.set(k, v, false); - }); - return result; - }); - const baselineStats = countTreeNodeStats(baselineResult); - console.log(`\tShared nodes (baseline): ${baselineStats.shared}/${baselineStats.total}`); + const baseTitle = `Merge ${size * 2}+${size} interleaved range trees`; + timeMergeVsBaseline(baseTitle, tree1, tree2); }); console.log(); @@ -523,12 +507,8 @@ console.log("### Merge between B+ trees"); tree2.set(i, i * 10); } - const preferLeft = (_k: number, v1: number, _v2: number) => v1; - const mergeResult = measure(() => `Merge ${size}+${size} completely overlapping trees (prefer left)`, () => { - return tree1.merge(tree2, preferLeft); - }); - const mergeStats = countTreeNodeStats(mergeResult); - console.log(`\tShared nodes (merge): ${mergeStats.shared}/${mergeStats.total}`); + const baseTitle = `Merge ${size}+${size} completely overlapping trees (prefer left)`; + timeMergeVsBaseline(baseTitle, tree1, tree2); }); console.log(); @@ -549,22 +529,8 @@ console.log("### Merge between B+ trees"); tree2.set(key, key * 10); } - const preferLeft = (_k: number, v1: number, _v2: number) => v1; - const mergeResult = measure(() => `Merge trees with 10% overlap (${size}+${size} keys) using merge()`, () => { - return tree1.merge(tree2, preferLeft); - }); - const mergeStats = countTreeNodeStats(mergeResult); - console.log(`\tShared nodes (merge): ${mergeStats.shared}/${mergeStats.total}`); - - const baselineResult = measure(() => `Merge trees with 10% overlap (${size}+${size} keys) using clone+set loop (baseline)`, () => { - const result = tree1.clone(); - tree2.forEachPair((k, v) => { - result.set(k, v, false); - }); - return result; - }); - const baselineStats = countTreeNodeStats(baselineResult); - console.log(`\tShared nodes (baseline): ${baselineStats.shared}/${baselineStats.total}`); + const baseTitle = `Merge trees with 10% overlap (${size}+${size} keys)`; + timeMergeVsBaseline(baseTitle, tree1, tree2); }); console.log(); @@ -583,22 +549,8 @@ console.log("### Merge between B+ trees"); tree2.set(k, k * 10); } - const preferLeft = (_k: number, v1: number, _v2: number) => v1; - const mergeResult = measure(() => `Merge ${tree1.size}+${tree2.size} trees with random keys using merge()`, () => { - return tree1.merge(tree2, preferLeft); - }); - const mergeStats = countTreeNodeStats(mergeResult); - console.log(`\tShared nodes (merge): ${mergeStats.shared}/${mergeStats.total}`); - - const baselineResult = measure(() => `Merge ${tree1.size}+${tree2.size} trees with random keys using clone+set loop (baseline)`, () => { - const result = tree1.clone(); - tree2.forEachPair((k, v) => { - result.set(k, v, false); - }); - return result; - }); - const baselineStats = countTreeNodeStats(baselineResult); - console.log(`\tShared nodes (baseline): ${baselineStats.shared}/${baselineStats.total}`); + const baseTitle = `Merge ${tree1.size}+${tree2.size} trees with random keys`; + timeMergeVsBaseline(baseTitle, tree1, tree2); }); console.log(); @@ -611,12 +563,8 @@ console.log("### Merge between B+ trees"); tree1.set(i, i); } - const preferLeft = (_k: number, v1: number, _v2: number) => v1; - const mergeResult = measure(() => `Merge ${size}-key tree with empty tree`, () => { - return tree1.merge(tree2, preferLeft); - }); - const mergeStats = countTreeNodeStats(mergeResult); - console.log(`\tShared nodes (merge): ${mergeStats.shared}/${mergeStats.total}`); + const baseTitle = `Merge ${size}-key tree with empty tree`; + timeMergeVsBaseline(baseTitle, tree1, tree2); }); console.log(); @@ -630,23 +578,8 @@ console.log("### Merge between B+ trees"); tree2.set(i, i * 10); } - const preferLeft = (_k: number, v1: number, _v2: number) => v1; - - const mergeResult = measure(() => `Merge ${size}+${size} overlapping trees using merge()`, () => { - return tree1.merge(tree2, preferLeft); - }); - const mergeStats = countTreeNodeStats(mergeResult); - console.log(`\tShared nodes (merge): ${mergeStats.shared}/${mergeStats.total}`); - - const baselineResult = measure(() => `Merge ${size}+${size} overlapping trees using clone+set loop (baseline)`, () => { - const result = tree1.clone(); - tree2.forEachPair((k, v) => { - result.set(k, v, false); // Don't overwrite - }); - return result; - }); - const baselineStats = countTreeNodeStats(baselineResult); - console.log(`\tShared nodes (baseline): ${baselineStats.shared}/${baselineStats.total}`); + const baseTitle = `Merge ${size}+${size} overlapping trees`; + timeMergeVsBaseline(baseTitle, tree1, tree2); }); console.log(); @@ -670,23 +603,8 @@ console.log("### Merge between B+ trees"); } } - const preferLeft = (_k: number, v1: number, _v2: number) => v1; - - const mergeResult = measure(() => `Merge ${tree1.size}+${tree2.size} sparse-overlap trees using merge()`, () => { - return tree1.merge(tree2, preferLeft); - }); - const mergeStats = countTreeNodeStats(mergeResult); - console.log(`\tShared nodes (merge): ${mergeStats.shared}/${mergeStats.total}`); - - const baselineResult = measure(() => `Merge ${tree1.size}+${tree2.size} sparse-overlap trees using clone+set loop (baseline)`, () => { - const result = tree1.clone(); - tree2.forEachPair((k, v) => { - result.set(k, v, false); - }); - return result; - }); - const baselineStats = countTreeNodeStats(baselineResult); - console.log(`\tShared nodes (baseline): ${baselineStats.shared}/${baselineStats.total}`); + const baseTitle = `Merge ${tree1.size}+${tree2.size} sparse-overlap trees`; + timeMergeVsBaseline(baseTitle, tree1, tree2); } } @@ -721,6 +639,23 @@ console.log("### Intersect between B+ trees"); return { count, checksum }; }; + const timeIntersectVsBaseline = ( + baseTitle: string, + tree1: BTree, + tree2: BTree, + intersectLabel = 'intersect()', + baselineLabel = 'sort baseline' + ) => { + measure( + result => `${baseTitle} using ${intersectLabel} [count=${result.count}, checksum=${result.checksum}]`, + () => runIntersect(tree1, tree2) + ); + measure( + result => `${baseTitle} using ${baselineLabel} [count=${result.count}, checksum=${result.checksum}]`, + () => runBaseline(tree1, tree2) + ); + }; + console.log(); console.log("# Non-overlapping ranges (no shared keys)"); sizes.forEach((size) => { @@ -732,14 +667,8 @@ console.log("### Intersect between B+ trees"); tree2.set(offset + i, offset + i); } - measure( - result => `Intersect ${size}+${size} disjoint trees using intersect() [count=${result.count}, checksum=${result.checksum}]`, - () => runIntersect(tree1, tree2) - ); - measure( - result => `Intersect ${size}+${size} disjoint trees using sort baseline [count=${result.count}, checksum=${result.checksum}]`, - () => runBaseline(tree1, tree2) - ); + const baseTitle = `Intersect ${size}+${size} disjoint trees`; + timeIntersectVsBaseline(baseTitle, tree1, tree2); }); console.log(); @@ -753,14 +682,8 @@ console.log("### Intersect between B+ trees"); tree2.set(i + offset, (i + offset) * 2); } - measure( - result => `Intersect ${size}+${size} half-overlapping trees using intersect() [count=${result.count}, checksum=${result.checksum}]`, - () => runIntersect(tree1, tree2) - ); - measure( - result => `Intersect ${size}+${size} half-overlapping trees using sort baseline [count=${result.count}, checksum=${result.checksum}]`, - () => runBaseline(tree1, tree2) - ); + const baseTitle = `Intersect ${size}+${size} half-overlapping trees`; + timeIntersectVsBaseline(baseTitle, tree1, tree2); }); console.log(); @@ -773,14 +696,8 @@ console.log("### Intersect between B+ trees"); tree2.set(i, i * 3); } - measure( - result => `Intersect ${size}+${size} identical-key trees using intersect() [count=${result.count}, checksum=${result.checksum}]`, - () => runIntersect(tree1, tree2) - ); - measure( - result => `Intersect ${size}+${size} identical-key trees using sort baseline [count=${result.count}, checksum=${result.checksum}]`, - () => runBaseline(tree1, tree2) - ); + const baseTitle = `Intersect ${size}+${size} identical-key trees`; + timeIntersectVsBaseline(baseTitle, tree1, tree2); }); console.log(); @@ -805,13 +722,7 @@ console.log("### Intersect between B+ trees"); tree2.set(key, key * 7); } - measure( - result => `Intersect ${tree1.size}+${tree2.size} random trees using intersect() [count=${result.count}, checksum=${result.checksum}]`, - () => runIntersect(tree1, tree2) - ); - measure( - result => `Intersect ${tree1.size}+${tree2.size} random trees using sort baseline [count=${result.count}, checksum=${result.checksum}]`, - () => runBaseline(tree1, tree2) - ); + const baseTitle = `Intersect ${tree1.size}+${tree2.size} random trees`; + timeIntersectVsBaseline(baseTitle, tree1, tree2); }); } From 02d30cfe14bc04aa7a33e8af866c4cb8d03a8bd1 Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Wed, 5 Nov 2025 20:21:01 -0800 Subject: [PATCH 037/143] test cleanup --- b+tree.test.ts | 514 ++++++++++++++++++++++++++----------------------- 1 file changed, 268 insertions(+), 246 deletions(-) diff --git a/b+tree.test.ts b/b+tree.test.ts index 0ad53df..a0ca0cb 100644 --- a/b+tree.test.ts +++ b/b+tree.test.ts @@ -1376,7 +1376,51 @@ function testMerge(maxNodeSize: number) { return result; }; - test('Merge disjoint roots', () => { + type MergeFn = (key: number, leftValue: number, rightValue: number) => number | undefined; + type MergeExpectationOptions = { + expectedMergeFn?: MergeFn; + }; + + const naiveMerge = ( + left: BTree, + right: BTree, + mergeFn: MergeFn + ) => { + const expected = left.clone(); + right.forEachPair((key, rightValue) => { + if (expected.has(key)) { + const leftValue = expected.get(key)!; + const mergedValue = mergeFn(key, leftValue, rightValue); + if (mergedValue === undefined) { + expected.delete(key); + } else { + expected.set(key, mergedValue); + } + } else { + expected.set(key, rightValue); + } + }); + return expected; + }; + + const expectMergeMatchesBaseline = ( + left: BTree, + right: BTree, + mergeFn: MergeFn, + after?: (ctx: { result: BTree, expected: BTree }) => void, + options: MergeExpectationOptions = {} + ) => { + const expectedMergeFn = options.expectedMergeFn ?? mergeFn; + const expected = naiveMerge(left, right, expectedMergeFn); + const result = left.merge(right, mergeFn); + expect(result.toArray()).toEqual(expected.toArray()); + result.checkValid(); + expected.checkValid(); + after?.({ result, expected }); + return { result, expected }; + }; + + test('Merge disjoint roots reuses appended subtree', () => { const size = maxNodeSize * 3; const tree1 = buildTree(range(0, size), 1, 0); const offset = size * 5; @@ -1386,20 +1430,21 @@ function testMerge(maxNodeSize: number) { expectRootLeafState(tree2, false); let mergeCalls = 0; - const result = tree1.merge(tree2, () => { + const mergeFn: MergeFn = () => { mergeCalls++; return 0; + }; + + expectMergeMatchesBaseline(tree1, tree2, mergeFn, ({ result }) => { + const resultRoot = result['_root'] as any; + expect(sharesNode(resultRoot, tree1['_root'] as any)).toBe(true); + expect(sharesNode(resultRoot, tree2['_root'] as any)).toBe(true); }); expect(mergeCalls).toBe(0); - expect(result.size).toBe(tree1.size + tree2.size); - const resultRoot = result['_root'] as any; - expect(sharesNode(resultRoot, tree1['_root'] as any)).toBe(true); - expect(sharesNode(resultRoot, tree2['_root'] as any)).toBe(true); - result.checkValid(); }); - test('Merge leaf roots with intersecting keys', () => { + test('Merge leaf roots with intersecting keys uses merge callback', () => { const tree1 = buildTree([1, 2, 4], 10, 0); const tree2 = buildTree([2, 3, 5], 100, 0); @@ -1407,14 +1452,15 @@ function testMerge(maxNodeSize: number) { expectRootLeafState(tree2, true); const calls: Array<{ key: number, leftValue: number, rightValue: number }> = []; - const result = tree1.merge(tree2, (key, leftValue, rightValue) => { + const mergeFn: MergeFn = (key, leftValue, rightValue) => { calls.push({ key, leftValue, rightValue }); return leftValue + rightValue; - }); + }; + expectMergeMatchesBaseline(tree1, tree2, mergeFn, undefined, { + expectedMergeFn: (_k, leftValue, rightValue) => leftValue + rightValue + }); expect(calls).toEqual([{ key: 2, leftValue: 20, rightValue: 200 }]); - expect(result.toArray()).toEqual([[1, 10], [2, 220], [3, 300], [4, 40], [5, 500]]); - result.checkValid(); }); test('Merge leaf roots with disjoint keys', () => { @@ -1425,11 +1471,14 @@ function testMerge(maxNodeSize: number) { expectRootLeafState(tree2, true); let mergeCalls = 0; - const result = tree1.merge(tree2, () => { + const mergeFn: MergeFn = () => { mergeCalls++; return 0; - }); + }; + const { result } = expectMergeMatchesBaseline(tree1, tree2, mergeFn, undefined, { + expectedMergeFn: (_k, leftValue, rightValue) => leftValue + rightValue + }); expect(mergeCalls).toBe(0); expect(result.toArray()).toEqual([ [1, 1], @@ -1439,7 +1488,6 @@ function testMerge(maxNodeSize: number) { [5, 5], [6, 1006] ]); - result.checkValid(); }); test('Merge trees disjoint except for shared maximum key', () => { @@ -1451,34 +1499,39 @@ function testMerge(maxNodeSize: number) { expectRootLeafState(tree2, false); let mergeCalls = 0; - const result = tree1.merge(tree2, (key, leftValue, rightValue) => { + const mergeFn: MergeFn = (_key, leftValue, rightValue) => { mergeCalls++; return leftValue + rightValue; - }); + }; + const { result } = expectMergeMatchesBaseline(tree1, tree2, mergeFn, undefined, { + expectedMergeFn: (_k, leftValue, rightValue) => leftValue + rightValue + }); expect(mergeCalls).toBe(1); expect(result.get(size - 1)).toBe((size - 1) + (size - 1) * 3); expect(result.size).toBe(tree1.size + tree2.size - 1); - result.checkValid(); }); test('Merge trees where all leaves are disjoint and one tree straddles the other', () => { - const straddleLength = 3 * 2 * maxNodeSize; // guaranteed to create multiple leaves on both trees - const tree1 = buildTree(range(0, straddleLength / 3).concat(range((straddleLength / 3) * 2, straddleLength)), 1); + const straddleLength = 3 * 2 * maxNodeSize; // creates multiple leaves on both trees + const tree1 = buildTree( + range(0, straddleLength / 3).concat(range((straddleLength / 3) * 2, straddleLength)), + 1 + ); const tree2 = buildTree(range(straddleLength / 3, (straddleLength / 3) * 2), 3); expectRootLeafState(tree1, false); expectRootLeafState(tree2, false); let mergeCalls = 0; - const result = tree1.merge(tree2, (key, leftValue, rightValue) => { + const mergeFn: MergeFn = (_key, leftValue, rightValue) => { mergeCalls++; return leftValue + rightValue; - }); + }; + const { result } = expectMergeMatchesBaseline(tree1, tree2, mergeFn); expect(mergeCalls).toBe(0); expect(result.size).toBe(tree1.size + tree2.size); - result.checkValid(); }); test('Merge where two-leaf tree intersects leaf-root tree across both leaves', () => { @@ -1490,17 +1543,15 @@ function testMerge(maxNodeSize: number) { expectRootLeafState(tree2, true); const seenKeys: number[] = []; - const result = tree1.merge(tree2, (key, leftValue, rightValue) => { + const mergeFn: MergeFn = (key, _leftValue, rightValue) => { seenKeys.push(key); return rightValue; - }); + }; + expectMergeMatchesBaseline(tree1, tree2, mergeFn, undefined, { + expectedMergeFn: (_k, _leftValue, rightValue) => rightValue + }); expect(seenKeys.sort((a, b) => a - b)).toEqual([1, Math.floor(size / 2), size - 1]); - expect(result.get(1)).toBe(5); - expect(result.get(Math.floor(size / 2))).toBe(5 * Math.floor(size / 2)); - expect(result.get(size - 1)).toBe(5 * (size - 1)); - expect(result.size).toBe(tree1.size + tree2.size - seenKeys.length); - result.checkValid(); }); test('Merge where max key equals min key of other tree', () => { @@ -1512,15 +1563,17 @@ function testMerge(maxNodeSize: number) { expectRootLeafState(tree2, false); let mergeCalls = 0; - const result = tree1.merge(tree2, (key, leftValue, rightValue) => { + const mergeFn: MergeFn = (_key, _leftValue, rightValue) => { mergeCalls++; return rightValue; - }); + }; + const { result } = expectMergeMatchesBaseline(tree1, tree2, mergeFn, undefined, { + expectedMergeFn: (_k, _leftValue, rightValue) => rightValue + }); expect(mergeCalls).toBe(1); expect(result.get(size - 1)).toBe((size - 1) * 10); expect(result.size).toBe(tree1.size + tree2.size - 1); - result.checkValid(); }); test('Merge odd and even keyed trees', () => { @@ -1532,14 +1585,34 @@ function testMerge(maxNodeSize: number) { expectRootLeafState(treeEven, false); let mergeCalls = 0; - const result = treeOdd.merge(treeEven, () => { + const mergeFn: MergeFn = () => { mergeCalls++; return 0; - }); + }; + const { result } = expectMergeMatchesBaseline(treeOdd, treeEven, mergeFn); expect(mergeCalls).toBe(0); expect(result.size).toBe(treeOdd.size + treeEven.size); - result.checkValid(); + }); + + test('Merge with single boundary overlap prefers right value', () => { + const size = maxNodeSize * 2; + const tree1 = buildTree(range(0, size), 1, 0); + const tree2 = buildTree(range(size - 1, size - 1 + size), 10, 0); + + expectRootLeafState(tree1, false); + expectRootLeafState(tree2, false); + + let mergeCalls = 0; + const mergeFn: MergeFn = (_key, _leftValue, rightValue) => { + mergeCalls++; + return rightValue; + }; + + expectMergeMatchesBaseline(tree1, tree2, mergeFn, undefined, { + expectedMergeFn: (_k, _leftValue, rightValue) => rightValue + }); + expect(mergeCalls).toBe(1); }); test('Merge overlapping prefix equal to branching factor', () => { @@ -1560,121 +1633,116 @@ function testMerge(maxNodeSize: number) { expectRootLeafState(tree2, false); const mergedKeys: number[] = []; - const result = tree1.merge(tree2, (key, leftValue, rightValue) => { + const mergeFn: MergeFn = (key, leftValue, rightValue) => { mergedKeys.push(key); return leftValue + rightValue; - }); + }; + expectMergeMatchesBaseline(tree1, tree2, mergeFn, undefined, { + expectedMergeFn: (_k, leftValue, rightValue) => leftValue + rightValue + }); expect(mergedKeys.sort((a, b) => a - b)).toEqual(range(0, shared)); - const expected = [ - ...range(0, shared).map(k => [k, k + k * 2]), - ...range(shared, shared + maxNodeSize).map(k => [k, k]), - ...range(shared + maxNodeSize, shared + maxNodeSize * 2).map(k => [k, k * 2]) - ]; - expect(result.toArray()).toEqual(expected); - expect(result.size).toBe(tree1.size + tree2.size - shared); - result.checkValid(); }); test('Merge two empty trees', () => { const tree1 = new BTree([], compare, maxNodeSize); const tree2 = new BTree([], compare, maxNodeSize); - const mergeFunc = (k: number, v1: number, v2: number) => v1 + v2; - - const result = tree1.merge(tree2, mergeFunc); + const mergeFn: MergeFn = (_k, v1, v2) => v1 + v2; + const { result } = expectMergeMatchesBaseline(tree1, tree2, mergeFn, undefined, { + expectedMergeFn: (_k, v1, v2) => v1 + v2 + }); expect(result.size).toBe(0); - expect(result.toArray()).toEqual([]); }); test('Merge empty tree with non-empty tree', () => { const tree1 = new BTree([], compare, maxNodeSize); const tree2 = new BTree([[1, 10], [2, 20], [3, 30]], compare, maxNodeSize); - const mergeFunc = (k: number, v1: number, v2: number) => v1 + v2; + const mergeFn: MergeFn = (_k, v1, v2) => v1 + v2; - const result1 = tree1.merge(tree2, mergeFunc); - const result2 = tree2.merge(tree1, mergeFunc); + const { result: leftMerge } = expectMergeMatchesBaseline(tree1, tree2, mergeFn); + expect(leftMerge.toArray()).toEqual(tree2.toArray()); - expect(result1.toArray()).toEqual([[1, 10], [2, 20], [3, 30]]); - expect(result2.toArray()).toEqual([[1, 10], [2, 20], [3, 30]]); + const { result: rightMerge } = expectMergeMatchesBaseline(tree2, tree1, mergeFn); + expect(rightMerge.toArray()).toEqual(tree2.toArray()); + expect(tree1.toArray()).toEqual([]); + expect(tree2.toArray()).toEqual([[1, 10], [2, 20], [3, 30]]); + tree1.checkValid(); + tree2.checkValid(); }); test('Merge with no overlapping keys', () => { const tree1 = new BTree([[1, 10], [3, 30], [5, 50]], compare, maxNodeSize); const tree2 = new BTree([[2, 20], [4, 40], [6, 60]], compare, maxNodeSize); - const mergeFunc = (k: number, v1: number, v2: number) => { + const mergeFn: MergeFn = () => { throw new Error('Should not be called for non-overlapping keys'); }; - const result = tree1.merge(tree2, mergeFunc); + const { result } = expectMergeMatchesBaseline(tree1, tree2, mergeFn, undefined, { + expectedMergeFn: mergeFn + }); expect(result.size).toBe(6); expect(result.toArray()).toEqual([[1, 10], [2, 20], [3, 30], [4, 40], [5, 50], [6, 60]]); - result.checkValid(); }); test('Merge with completely overlapping keys - sum values', () => { const tree1 = new BTree([[1, 10], [2, 20], [3, 30]], compare, maxNodeSize); const tree2 = new BTree([[1, 5], [2, 15], [3, 25]], compare, maxNodeSize); - const mergeFunc = (k: number, v1: number, v2: number) => v1 + v2; + const mergeFn: MergeFn = (_k, v1, v2) => v1 + v2; - const result = tree1.merge(tree2, mergeFunc); - - expect(result.size).toBe(3); - expect(result.toArray()).toEqual([[1, 15], [2, 35], [3, 55]]); - result.checkValid(); + const { result } = expectMergeMatchesBaseline(tree1, tree2, mergeFn, undefined, { + expectedMergeFn: (_k, v1, v2) => v1 + v2 + }); + expect(result.size).toBe(tree1.size); }); test('Merge with completely overlapping keys - prefer left', () => { const tree1 = new BTree([[1, 10], [2, 20], [3, 30]], compare, maxNodeSize); const tree2 = new BTree([[1, 100], [2, 200], [3, 300]], compare, maxNodeSize); - const mergeFunc = (k: number, v1: number, v2: number) => v1; + const mergeFn: MergeFn = (_k, v1, _v2) => v1; - const result = tree1.merge(tree2, mergeFunc); - - expect(result.size).toBe(3); - expect(result.toArray()).toEqual([[1, 10], [2, 20], [3, 30]]); - result.checkValid(); + const { result } = expectMergeMatchesBaseline(tree1, tree2, mergeFn, undefined, { + expectedMergeFn: (_k, v1, _v2) => v1 + }); + expect(result.toArray()).toEqual(tree1.toArray()); }); test('Merge with completely overlapping keys - prefer right', () => { const tree1 = new BTree([[1, 10], [2, 20], [3, 30]], compare, maxNodeSize); const tree2 = new BTree([[1, 100], [2, 200], [3, 300]], compare, maxNodeSize); - const mergeFunc = (k: number, v1: number, v2: number) => v2; + const mergeFn: MergeFn = (_k, _v1, v2) => v2; - const result = tree1.merge(tree2, mergeFunc); - - expect(result.size).toBe(3); - expect(result.toArray()).toEqual([[1, 100], [2, 200], [3, 300]]); - result.checkValid(); + const { result } = expectMergeMatchesBaseline(tree1, tree2, mergeFn); + expect(result.toArray()).toEqual(tree2.toArray()); }); test('Merge with partially overlapping keys', () => { const tree1 = new BTree([[1, 10], [2, 20], [3, 30], [4, 40]], compare, maxNodeSize); const tree2 = new BTree([[3, 300], [4, 400], [5, 500], [6, 600]], compare, maxNodeSize); - const mergeFunc = (k: number, v1: number, v2: number) => v1 + v2; - const result = tree1.merge(tree2, mergeFunc); + const mergedKeys: number[] = []; + const mergeFn: MergeFn = (key, v1, v2) => { + mergedKeys.push(key); + return v1 + v2; + }; - expect(result.size).toBe(6); - expect(result.toArray()).toEqual([[1, 10], [2, 20], [3, 330], [4, 440], [5, 500], [6, 600]]); - result.checkValid(); + expectMergeMatchesBaseline(tree1, tree2, mergeFn, undefined, { + expectedMergeFn: (_k, v1, v2) => v1 + v2 + }); + expect(mergedKeys.sort((a, b) => a - b)).toEqual([3, 4]); }); - test('Merge with overlapping keys - exclude some keys via undefined', () => { + test('Merge with overlapping keys can delete entries', () => { const tree1 = new BTree([[1, 10], [2, 20], [3, 30], [4, 40]], compare, maxNodeSize); const tree2 = new BTree([[2, 200], [3, 300], [4, 400], [5, 500]], compare, maxNodeSize); - const mergeFunc = (k: number, v1: number, v2: number) => { - // Exclude key 3 by returning undefined + const mergeFn: MergeFn = (k, v1, v2) => { if (k === 3) return undefined; return v1 + v2; }; - const result = tree1.merge(tree2, mergeFunc); - - expect(result.size).toBe(4); // Keys 1, 2, 4, 5 (key 3 excluded) - expect(result.toArray()).toEqual([[1, 10], [2, 220], [4, 440], [5, 500]]); - result.checkValid(); + const { result } = expectMergeMatchesBaseline(tree1, tree2, mergeFn); + expect(result.has(3)).toBe(false); }); test('Merge is called even when values are equal', () => { @@ -1682,16 +1750,15 @@ function testMerge(maxNodeSize: number) { const tree2 = new BTree([[2, 20], [3, 30]], compare, maxNodeSize); const mergeCallLog: Array<{k: number, v1: number, v2: number}> = []; - const mergeFunc = (k: number, v1: number, v2: number) => { + const mergeFn: MergeFn = (k, v1, v2) => { mergeCallLog.push({k, v1, v2}); return v1; }; - const result = tree1.merge(tree2, mergeFunc); - + expectMergeMatchesBaseline(tree1, tree2, mergeFn, undefined, { + expectedMergeFn: (_k, v1, v2) => v1 + }); expect(mergeCallLog).toEqual([{k: 2, v1: 20, v2: 20}]); - expect(result.toArray()).toEqual([[1, 10], [2, 20], [3, 30]]); - result.checkValid(); }); test('Merge does not mutate input trees', () => { @@ -1699,54 +1766,23 @@ function testMerge(maxNodeSize: number) { const entries2: [number, number][] = [[2, 200], [3, 300], [4, 400]]; const tree1 = new BTree(entries1, compare, maxNodeSize); const tree2 = new BTree(entries2, compare, maxNodeSize); - const mergeFunc = (k: number, v1: number, v2: number) => v1 + v2; - - const result = tree1.merge(tree2, mergeFunc); - - // Verify original trees are unchanged - expect(tree1.toArray()).toEqual(entries1); - expect(tree2.toArray()).toEqual(entries2); + const mergeFn: MergeFn = (_k, v1, v2) => v1 + v2; - // Verify result is correct - expect(result.toArray()).toEqual([[1, 10], [2, 220], [3, 330], [4, 400]]); - result.checkValid(); - }); - - test('Merge with disjoint ranges', () => { - // Tree with keys 1-100 and 201-300 - const entries1: [number, number][] = []; - for (let i = 1; i <= 100; i++) entries1.push([i, i]); - for (let i = 201; i <= 300; i++) entries1.push([i, i]); - - // Tree with keys 101-200 - const entries2: [number, number][] = []; - for (let i = 101; i <= 200; i++) entries2.push([i, i]); - - const tree1 = new BTree(entries1, compare, maxNodeSize); - const tree2 = new BTree(entries2, compare, maxNodeSize); - const mergeFunc = (k: number, v1: number, v2: number) => { - throw new Error('Should not be called - no overlaps'); - }; + const snapshot1 = tree1.toArray(); + const snapshot2 = tree2.toArray(); - const result = tree1.merge(tree2, mergeFunc); + expectMergeMatchesBaseline(tree1, tree2, mergeFn); - expect(result.size).toBe(300); - // Check first few, last few, and boundaries - expect(result.get(1)).toBe(1); - expect(result.get(100)).toBe(100); - expect(result.get(101)).toBe(101); - expect(result.get(200)).toBe(200); - expect(result.get(201)).toBe(201); - expect(result.get(300)).toBe(300); - result.checkValid(); + expect(tree1.toArray()).toEqual(snapshot1); + expect(tree2.toArray()).toEqual(snapshot2); + tree1.checkValid(); + tree2.checkValid(); }); test('Merge large trees with some overlaps', () => { - // Tree 1: keys 0-999 const entries1: [number, number][] = []; for (let i = 0; i < 1000; i++) entries1.push([i, i]); - // Tree 2: keys 500-1499 const entries2: [number, number][] = []; for (let i = 500; i < 1500; i++) entries2.push([i, i * 10]); @@ -1754,90 +1790,54 @@ function testMerge(maxNodeSize: number) { const tree2 = new BTree(entries2, compare, maxNodeSize); let mergeCount = 0; - const mergeFunc = (k: number, v1: number, v2: number) => { + const mergeFn: MergeFn = (k, v1, v2) => { mergeCount++; - return v1 + v2; // Sum the values + return v1 + v2; }; - const result = tree1.merge(tree2, mergeFunc); - - // Verify merge was called for overlapping keys (500-999) + expectMergeMatchesBaseline(tree1, tree2, mergeFn, undefined, { + expectedMergeFn: (_k, v1, v2) => v1 + v2 + }); expect(mergeCount).toBe(500); - - // Total unique keys: 1500 - expect(result.size).toBe(1500); - - // Check various ranges - expect(result.get(0)).toBe(0); // Only in tree1 - expect(result.get(499)).toBe(499); // Only in tree1 - expect(result.get(500)).toBe(500 + 5000); // In both: 500 + (500*10) - expect(result.get(999)).toBe(999 + 9990); // In both: 999 + (999*10) - expect(result.get(1000)).toBe(10000); // Only in tree2 - expect(result.get(1499)).toBe(14990); // Only in tree2 - - result.checkValid(); }); test('Merge with overlaps at boundaries', () => { - // Test edge case where overlaps occur at the boundaries of node ranges const tree1 = new BTree([], compare, maxNodeSize); const tree2 = new BTree([], compare, maxNodeSize); - // Fill tree1 with even numbers for (let i = 0; i < 100; i++) { tree1.set(i * 2, i * 2); } - // Fill tree2 with numbers in a different pattern for (let i = 50; i < 150; i++) { tree2.set(i, i * 10); } - let mergeCallCount = 0; - const mergeFunc = (k: number, v1: number, v2: number) => { - mergeCallCount++; - expect(k % 2).toBe(0); // Only even keys should overlap + const mergedKeys: number[] = []; + const mergeFn: MergeFn = (key, v1, v2) => { + mergedKeys.push(key); return v1 + v2; }; - const result = tree1.merge(tree2, mergeFunc); - - // Keys 100, 102, 104, ..., 198 overlap (50 keys) - expect(mergeCallCount).toBe(50); - - result.checkValid(); - }); - - test('Merge throws error when comparators differ', () => { - const tree1 = new BTree([[1, 10]], compare, maxNodeSize); - const tree2 = new BTree([[2, 20]], (a, b) => b - a, maxNodeSize); // Reverse comparator - const mergeFunc = (k: number, v1: number, v2: number) => v1 + v2; - - expect(() => tree1.merge(tree2, mergeFunc)).toThrow(); - }); - - test('Merge throws error when max node sizes differ', () => { - const otherFanout = maxNodeSize === 32 ? 16 : 32; - const tree1 = new BTree([[1, 10]], compare, maxNodeSize); - const tree2 = new BTree([[2, 20]], compare, otherFanout); - const mergeFunc = (k: number, v1: number, v2: number) => v1 + v2; + expectMergeMatchesBaseline(tree1, tree2, mergeFn, undefined, { + expectedMergeFn: (_k, v1, v2) => v1 + v2 + }); - expect(() => tree1.merge(tree2, mergeFunc)).toThrow(); + const expectedMergedKeys = range(50, 150).filter(k => k % 2 === 0); + expect(mergedKeys.sort((a, b) => a - b)).toEqual(expectedMergedKeys); }); test('Merge result can be modified without affecting inputs', () => { const tree1 = new BTree([[1, 10], [2, 20]], compare, maxNodeSize); const tree2 = new BTree([[3, 30], [4, 40]], compare, maxNodeSize); - const mergeFunc = (k: number, v1: number, v2: number) => v1 + v2; + const mergeFn: MergeFn = (_k, v1, v2) => v1 + v2; - const result = tree1.merge(tree2, mergeFunc); + const { result } = expectMergeMatchesBaseline(tree1, tree2, mergeFn); - // Modify result result.set(1, 100); result.set(5, 50); result.delete(2); - // Verify inputs unchanged expect(tree1.get(1)).toBe(10); expect(tree1.get(2)).toBe(20); expect(tree1.has(5)).toBe(false); @@ -1848,54 +1848,70 @@ function testMerge(maxNodeSize: number) { result.checkValid(); }); + test('Merge with disjoint ranges', () => { + const entries1: [number, number][] = []; + for (let i = 1; i <= 100; i++) entries1.push([i, i]); + for (let i = 201; i <= 300; i++) entries1.push([i, i]); + + const entries2: [number, number][] = []; + for (let i = 101; i <= 200; i++) entries2.push([i, i]); + + const tree1 = new BTree(entries1, compare, maxNodeSize); + const tree2 = new BTree(entries2, compare, maxNodeSize); + const mergeFn: MergeFn = () => { + throw new Error('Should not be called - no overlaps'); + }; + + const { result } = expectMergeMatchesBaseline(tree1, tree2, mergeFn, undefined, { + expectedMergeFn: mergeFn + }); + + expect(result.size).toBe(300); + expect(result.get(1)).toBe(1); + expect(result.get(100)).toBe(100); + expect(result.get(101)).toBe(101); + expect(result.get(200)).toBe(200); + expect(result.get(201)).toBe(201); + expect(result.get(300)).toBe(300); + }); + test('Merge with single element trees', () => { const tree1 = new BTree([[5, 50]], compare, maxNodeSize); const tree2 = new BTree([[5, 500]], compare, maxNodeSize); - const mergeFunc = (k: number, v1: number, v2: number) => Math.max(v1, v2); - - const result = tree1.merge(tree2, mergeFunc); + const mergeFn: MergeFn = (_k, v1, v2) => Math.max(v1, v2); - expect(result.size).toBe(1); - expect(result.get(5)).toBe(500); + const { result } = expectMergeMatchesBaseline(tree1, tree2, mergeFn); + expect(result.toArray()).toEqual([[5, 500]]); }); test('Merge interleaved keys', () => { - // Tree1 has keys: 1, 3, 5, 7, 9, ... const tree1 = new BTree([], compare, maxNodeSize); - for (let i = 1; i <= 100; i += 2) { + for (let i = 1; i <= 100; i += 2) tree1.set(i, i); - } - // Tree2 has keys: 2, 4, 6, 8, 10, ... const tree2 = new BTree([], compare, maxNodeSize); - for (let i = 2; i <= 100; i += 2) { + for (let i = 2; i <= 100; i += 2) tree2.set(i, i); - } - const mergeFunc = (k: number, v1: number, v2: number) => { + const mergeFn: MergeFn = () => { throw new Error('Should not be called - no overlapping keys'); }; - const result = tree1.merge(tree2, mergeFunc); - + const { result } = expectMergeMatchesBaseline(tree1, tree2, mergeFn, undefined, { + expectedMergeFn: mergeFn + }); expect(result.size).toBe(100); - for (let i = 1; i <= 100; i++) { + for (let i = 1; i <= 100; i++) expect(result.get(i)).toBe(i); - } - result.checkValid(); }); test('Merge excluding all overlapping keys', () => { const tree1 = new BTree([[1, 10], [2, 20], [3, 30]], compare, maxNodeSize); const tree2 = new BTree([[2, 200], [3, 300], [4, 400]], compare, maxNodeSize); - // Exclude all overlapping keys - const mergeFunc = (k: number, v1: number, v2: number) => undefined; - - const result = tree1.merge(tree2, mergeFunc); + const mergeFn: MergeFn = () => undefined; - // Only non-overlapping keys remain + const { result } = expectMergeMatchesBaseline(tree1, tree2, mergeFn); expect(result.toArray()).toEqual([[1, 10], [4, 400]]); - result.checkValid(); }); test('Merge reuses appended subtree with minimum fanout', () => { @@ -1909,46 +1925,40 @@ function testMerge(maxNodeSize: number) { tree2.set(i, i * 2); } - const mergeFunc = (k: number, v1: number, v2: number) => { + const mergeFn: MergeFn = () => { throw new Error('Should not be called for disjoint ranges'); }; - const result = tree1.merge(tree2, mergeFunc); - - expect(result.size).toBe(tree1.size + tree2.size); - const resultRoot = result['_root'] as any; - const tree2Root = tree2['_root'] as any; - expect(sharesNode(resultRoot, tree2Root)).toBe(true); - result.checkValid(); + expectMergeMatchesBaseline(tree1, tree2, mergeFn, ({ result }) => { + const resultRoot = result['_root'] as any; + const tree2Root = tree2['_root'] as any; + expect(sharesNode(resultRoot, tree2Root)).toBe(true); + }); }); test('Merge with large disjoint ranges', () => { const tree1 = new BTree([], compare, maxNodeSize); const tree2 = new BTree([], compare, maxNodeSize); - for (let i = 0; i <= 10000; i++) { + for (let i = 0; i <= 10000; i++) tree1.set(i, i); - } - for (let i = 10001; i <= 20000; i++) { + for (let i = 10001; i <= 20000; i++) tree2.set(i, i); - } let mergeCalls = 0; - const mergeFunc = (k: number, v1: number, v2: number) => { + const mergeFn: MergeFn = (_k, v1, v2) => { mergeCalls++; return v1 + v2; }; - const result = tree1.merge(tree2, mergeFunc); + const { result } = expectMergeMatchesBaseline(tree1, tree2, mergeFn, undefined, { + expectedMergeFn: mergeFn + }); expect(mergeCalls).toBe(0); expect(result.size).toBe(tree1.size + tree2.size); expect(result.get(0)).toBe(0); expect(result.get(20000)).toBe(20000); - const resultRoot = result['_root'] as any; - const tree2Root = tree2['_root'] as any; - expect(sharesNode(resultRoot, tree2Root)).toBe(true); - result.checkValid(); }); test('Merge trees with random overlap', () => { @@ -1959,16 +1969,15 @@ function testMerge(maxNodeSize: number) { const tree1 = new BTree(); const tree2 = new BTree(); - for (let k of keys1) { + for (let k of keys1) tree1.set(k, k); - } - for (let k of keys2) { + for (let k of keys2) tree2.set(k, k * 10); - } - const preferLeft = (_k: number, v1: number, _v2: number) => v1; - const mergeResult = tree1.merge(tree2, preferLeft); - mergeResult.checkValid(); + const preferLeft: MergeFn = (_key, leftValue) => leftValue; + expectMergeMatchesBaseline(tree1, tree2, preferLeft, undefined, { + expectedMergeFn: preferLeft + }); }); test('Merge trees with ~10% overlap', () => { @@ -1979,34 +1988,47 @@ function testMerge(maxNodeSize: number) { const tree1 = new BTree([], compare, maxNodeSize); const tree2 = new BTree([], compare, maxNodeSize); - for (let i = 0; i < size; i++) { + for (let i = 0; i < size; i++) tree1.set(i, i); - } + for (let i = 0; i < size; i++) { const key = offset + i; tree2.set(key, key * 10); } - const preferLeft = (_k: number, v1: number, _v2: number) => v1; - const result = tree1.merge(tree2, preferLeft); + const preferLeft: MergeFn = (_key, leftValue) => leftValue; - expect(result.size).toBe(size + size - overlap); - result.checkValid(); + const { result } = expectMergeMatchesBaseline(tree1, tree2, preferLeft, undefined, { + expectedMergeFn: preferLeft + }); - for (let i = 0; i < offset; i++) { + expect(result.size).toBe(size + size - overlap); + for (let i = 0; i < offset; i++) expect(result.get(i)).toBe(i); - } - for (let i = offset; i < size; i++) { + for (let i = offset; i < size; i++) expect(result.get(i)).toBe(i); - } const upperBound = offset + size; - for (let i = size; i < upperBound; i++) { + for (let i = size; i < upperBound; i++) expect(result.get(i)).toBe(i * 10); - } + }); + + test('Merge throws error when comparators differ', () => { + const tree1 = new BTree([[1, 10]], compare, maxNodeSize); + const tree2 = new BTree([[2, 20]], (a, b) => b - a, maxNodeSize); + const mergeFn: MergeFn = (_k, v1, v2) => v1 + v2; + + expect(() => tree1.merge(tree2, mergeFn)).toThrow(); + }); - expect(tree1.size).toBe(size); - expect(tree2.size).toBe(size); + test('Merge throws error when max node sizes differ', () => { + const otherFanout = maxNodeSize === 32 ? 16 : 32; + const tree1 = new BTree([[1, 10]], compare, maxNodeSize); + const tree2 = new BTree([[2, 20]], compare, otherFanout); + const mergeFn: MergeFn = (_k, v1, v2) => v1 + v2; + + expect(() => tree1.merge(tree2, mergeFn)).toThrow(); }); + } function swap(keys: any[], i: number, j: number) { From 40f9448ffcf2ae7ef639e109c99208a68969755b Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Wed, 5 Nov 2025 20:47:18 -0800 Subject: [PATCH 038/143] cleanup --- b+tree.js | 4 ++-- b+tree.ts | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/b+tree.js b/b+tree.js index 82df8b0..7eb7140 100644 --- a/b+tree.js +++ b/b+tree.js @@ -858,7 +858,7 @@ var BTree = /** @class */ (function () { var onMoveInLeaf = function (leaf, payload, fromIndex, toIndex, startedEqual) { check(payload.disqualified === true, "onMoveInLeaf: leaf must be disqualified"); var start = startedEqual ? fromIndex + 1 : fromIndex; - pushLeafRange(leaf, start, Math.min(toIndex, leaf.keys.length)); + pushLeafRange(leaf, start, toIndex); }; var onExitLeaf = function (leaf, payload, startingIndex, startedEqual, cursorThis) { highestDisjoint = undefined; @@ -925,7 +925,7 @@ var BTree = /** @class */ (function () { cursorOther.leafPayload.disqualified = true; disqualifySpine(cursorThis, cursorThis.spine.length - 1); disqualifySpine(cursorOther, cursorOther.spine.length - 1); - pushLeafRange(leaf, 0, Math.min(destIndex, leaf.keys.length)); + pushLeafRange(leaf, 0, destIndex); } }; // Need the max key of both trees to perform the "finishing" walk of which ever cursor finishes second diff --git a/b+tree.ts b/b+tree.ts index 4faa520..6e949e3 100644 --- a/b+tree.ts +++ b/b+tree.ts @@ -1035,7 +1035,7 @@ export default class BTree implements ISortedMapF, ISortedMap ) => { check(payload.disqualified === true, "onMoveInLeaf: leaf must be disqualified"); const start = startedEqual ? fromIndex + 1 : fromIndex; - pushLeafRange(leaf, start, Math.min(toIndex, leaf.keys.length)); + pushLeafRange(leaf, start, toIndex); }; const onExitLeaf = ( @@ -1128,7 +1128,7 @@ export default class BTree implements ISortedMapF, ISortedMap cursorOther.leafPayload.disqualified = true; disqualifySpine(cursorThis, cursorThis.spine.length - 1); disqualifySpine(cursorOther, cursorOther.spine.length - 1); - pushLeafRange(leaf, 0, Math.min(destIndex, leaf.keys.length)); + pushLeafRange(leaf, 0, destIndex); } }; From 4f8d9408ea9407459a7c2bb35893f27340a29f7a Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Wed, 5 Nov 2025 21:04:48 -0800 Subject: [PATCH 039/143] optimizations --- b+tree.d.ts | 1 + b+tree.js | 20 +++++++------------- b+tree.ts | 23 ++++++++--------------- 3 files changed, 16 insertions(+), 28 deletions(-) diff --git a/b+tree.d.ts b/b+tree.d.ts index 60cc8f5..eca0216 100644 --- a/b+tree.d.ts +++ b/b+tree.d.ts @@ -303,6 +303,7 @@ export default class BTree implements ISortedMapF, ISort * in the other tree. Overlapping leaf nodes are broken down into new leaf nodes containing merged entries. */ private static decompose; + private static heightOf; /** * Move cursor strictly forward to the first key >= (inclusive) or > (exclusive) target. * Returns true if end-of-tree was reached (cursor not structurally mutated). diff --git a/b+tree.js b/b+tree.js index 7eb7140..393862a 100644 --- a/b+tree.js +++ b/b+tree.js @@ -850,10 +850,8 @@ var BTree = /** @class */ (function () { // Cursor payload factory var makePayload = function () { return ({ disqualified: false }); }; var pushLeafRange = function (leaf, from, toExclusive) { - if (from < toExclusive) { - for (var i = from; i < toExclusive; ++i) - pending.push([leaf.keys[i], leaf.values[i]]); - } + for (var i = from; i < toExclusive; ++i) + pending.push([leaf.keys[i], leaf.values[i]]); }; var onMoveInLeaf = function (leaf, payload, fromIndex, toIndex, startedEqual) { check(payload.disqualified === true, "onMoveInLeaf: leaf must be disqualified"); @@ -1004,14 +1002,12 @@ var BTree = /** @class */ (function () { flushPendingEntries(); return { disjoint: disjoint, tallestIndex: tallestIndex }; }; + BTree.heightOf = function (spine, depth) { return spine.length - depth; }; /** * Move cursor strictly forward to the first key >= (inclusive) or > (exclusive) target. * Returns true if end-of-tree was reached (cursor not structurally mutated). */ BTree.moveTo = function (cur, other, targetKey, isInclusive, startedEqual, cmp) { - // We should start before the target (or at it if inclusive) - var keyPos = cmp(BTree.getKey(cur), targetKey); - check(isInclusive && keyPos < 0 || !isInclusive && keyPos <= 0, "moveTo requires alternating hop pattern"); // Fast path: destination within current leaf var leaf = cur.leaf; var i = leaf.indexOf(targetKey, -1, cmp); @@ -1036,8 +1032,6 @@ var BTree = /** @class */ (function () { break; } } - // Heights for callbacks: height = distance to leaf. Parent-of-leaf height = 1. - var heightOf = function (depth) { return spine.length - depth; }; // Exit leaf; we did walk out of it conceptually var startIndex = cur.leafIndex; cur.onExitLeaf(leaf, cur.leafPayload, startIndex, startedEqual, cur); @@ -1046,20 +1040,20 @@ var BTree = /** @class */ (function () { for (var depth = spine.length - 1; depth >= 0; depth--) { var entry_1 = spine[depth]; var sd = depth === 0 ? Number.POSITIVE_INFINITY : Number.NaN; - cur.onStepUp(entry_1.node, heightOf(depth), entry_1.payload, entry_1.childIndex, depth, sd, cur); + cur.onStepUp(entry_1.node, BTree.heightOf(spine, depth), entry_1.payload, entry_1.childIndex, depth, sd, cur); } return true; } // Step up through ancestors above the descentLevel for (var depth = spine.length - 1; depth > descentLevel; depth--) { var entry_2 = spine[depth]; - cur.onStepUp(entry_2.node, heightOf(depth), entry_2.payload, entry_2.childIndex, depth, NaN, cur); + cur.onStepUp(entry_2.node, BTree.heightOf(spine, depth), entry_2.payload, entry_2.childIndex, depth, NaN, cur); } var entry = spine[descentLevel]; - cur.onStepUp(entry.node, heightOf(descentLevel), entry.payload, entry.childIndex, descentLevel, descentIndex, cur); + cur.onStepUp(entry.node, BTree.heightOf(spine, descentLevel), entry.payload, entry.childIndex, descentLevel, descentIndex, cur); entry.childIndex = descentIndex; // Descend, invoking onStepDown and creating payloads - var height = heightOf(descentLevel) - 1; // calculate height before changing length + var height = BTree.heightOf(spine, descentLevel) - 1; // calculate height before changing length spine.length = descentLevel + 1; var node = spine[descentLevel].node.children[descentIndex]; while (!node.isLeaf) { diff --git a/b+tree.ts b/b+tree.ts index 6e949e3..4c775ef 100644 --- a/b+tree.ts +++ b/b+tree.ts @@ -1020,10 +1020,8 @@ export default class BTree implements ISortedMapF, ISortedMap const makePayload = (): MergeCursorPayload => ({ disqualified: false }); const pushLeafRange = (leaf: BNode, from: number, toExclusive: number) => { - if (from < toExclusive) { - for (let i = from; i < toExclusive; ++i) - pending.push([leaf.keys[i], leaf.values[i]]); - } + for (let i = from; i < toExclusive; ++i) + pending.push([leaf.keys[i], leaf.values[i]]); }; const onMoveInLeaf = ( @@ -1208,6 +1206,8 @@ export default class BTree implements ISortedMapF, ISortedMap return { disjoint, tallestIndex }; } + private static heightOf(spine: MergeCursor['spine'], depth: number) { return spine.length - depth; } + /** * Move cursor strictly forward to the first key >= (inclusive) or > (exclusive) target. * Returns true if end-of-tree was reached (cursor not structurally mutated). @@ -1220,10 +1220,6 @@ export default class BTree implements ISortedMapF, ISortedMap startedEqual: boolean, cmp: (a:K,b:K)=>number ): boolean { - // We should start before the target (or at it if inclusive) - const keyPos = cmp(BTree.getKey(cur), targetKey); - check(isInclusive && keyPos < 0 || !isInclusive && keyPos <= 0, "moveTo requires alternating hop pattern"); - // Fast path: destination within current leaf const leaf = cur.leaf; const i = leaf.indexOf(targetKey, -1, cmp); @@ -1251,9 +1247,6 @@ export default class BTree implements ISortedMapF, ISortedMap } } - // Heights for callbacks: height = distance to leaf. Parent-of-leaf height = 1. - const heightOf = (depth: number) => spine.length - depth; - // Exit leaf; we did walk out of it conceptually const startIndex = cur.leafIndex; cur.onExitLeaf(leaf, cur.leafPayload, startIndex, startedEqual, cur); @@ -1263,7 +1256,7 @@ export default class BTree implements ISortedMapF, ISortedMap for (let depth = spine.length - 1; depth >= 0; depth--) { const entry = spine[depth]; const sd = depth === 0 ? Number.POSITIVE_INFINITY : Number.NaN; - cur.onStepUp(entry.node, heightOf(depth), entry.payload, entry.childIndex, depth, sd, cur); + cur.onStepUp(entry.node, BTree.heightOf(spine, depth), entry.payload, entry.childIndex, depth, sd, cur); } return true; } @@ -1271,15 +1264,15 @@ export default class BTree implements ISortedMapF, ISortedMap // Step up through ancestors above the descentLevel for (let depth = spine.length - 1; depth > descentLevel; depth--) { const entry = spine[depth]; - cur.onStepUp(entry.node, heightOf(depth), entry.payload, entry.childIndex, depth, NaN, cur); + cur.onStepUp(entry.node, BTree.heightOf(spine, depth), entry.payload, entry.childIndex, depth, NaN, cur); } const entry = spine[descentLevel]; - cur.onStepUp(entry.node, heightOf(descentLevel), entry.payload, entry.childIndex, descentLevel, descentIndex, cur); + cur.onStepUp(entry.node, BTree.heightOf(spine, descentLevel), entry.payload, entry.childIndex, descentLevel, descentIndex, cur); entry.childIndex = descentIndex; // Descend, invoking onStepDown and creating payloads - let height = heightOf(descentLevel) - 1; // calculate height before changing length + let height = BTree.heightOf(spine, descentLevel) - 1; // calculate height before changing length spine.length = descentLevel + 1; let node: BNode = spine[descentLevel].node.children[descentIndex]; From af1d384cea8f49d49dc710529ac73509190f5fd5 Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Wed, 5 Nov 2025 21:24:51 -0800 Subject: [PATCH 040/143] cleanup + perf --- b+tree.js | 12 +++++++++--- b+tree.ts | 12 +++++++++--- 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/b+tree.js b/b+tree.js index 393862a..47878f3 100644 --- a/b+tree.js +++ b/b+tree.js @@ -1023,10 +1023,16 @@ var BTree = /** @class */ (function () { var descentIndex = -1; for (var s = spine.length - 1; s >= 0; s--) { var parent = spine[s].node; - var indexOf = parent.indexOf(targetKey, 0, cmp); // insertion index or exact - var stepDownIndex = indexOf + (isInclusive ? 0 : (indexOf < parent.keys.length && cmp(parent.keys[indexOf], targetKey) === 0 ? 1 : 0)); + var indexOf = parent.indexOf(targetKey, -1, cmp); + var stepDownIndex = void 0; + if (indexOf < 0) { + stepDownIndex = ~indexOf; + } + else { + stepDownIndex = isInclusive ? indexOf : indexOf + 1; + } // Note: when key not found, indexOf with failXor=0 already returns insertion index - if (stepDownIndex <= parent.keys.length - 1) { + if (stepDownIndex < parent.keys.length) { descentLevel = s; descentIndex = stepDownIndex; break; diff --git a/b+tree.ts b/b+tree.ts index 4c775ef..9727f5f 100644 --- a/b+tree.ts +++ b/b+tree.ts @@ -1237,10 +1237,16 @@ export default class BTree implements ISortedMapF, ISortedMap for (let s = spine.length - 1; s >= 0; s--) { const parent = spine[s].node; - const indexOf = parent.indexOf(targetKey, 0, cmp); // insertion index or exact - const stepDownIndex = indexOf + (isInclusive ? 0 : (indexOf < parent.keys.length && cmp(parent.keys[indexOf], targetKey) === 0 ? 1 : 0)); + const indexOf = parent.indexOf(targetKey, -1, cmp); + let stepDownIndex: number; + if (indexOf < 0) { + stepDownIndex = ~indexOf; + } else { + stepDownIndex = isInclusive ? indexOf : indexOf + 1; + } + // Note: when key not found, indexOf with failXor=0 already returns insertion index - if (stepDownIndex <= parent.keys.length - 1) { + if (stepDownIndex < parent.keys.length) { descentLevel = s; descentIndex = stepDownIndex; break; From 87bb2b1265019ea2ff6a8bfe1f1404c2bb0db4a5 Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Wed, 5 Nov 2025 21:28:06 -0800 Subject: [PATCH 041/143] cleanup --- b+tree.js | 10 ---------- b+tree.ts | 12 ------------ 2 files changed, 22 deletions(-) diff --git a/b+tree.js b/b+tree.js index 47878f3..8036e32 100644 --- a/b+tree.js +++ b/b+tree.js @@ -802,11 +802,6 @@ var BTree = /** @class */ (function () { var keys = slice.map(function (p) { return p[0]; }); var vals = slice.map(function (p) { return p[1]; }); var leaf = new BNode(keys, vals); - if (disjoint.length > 0) { - if (areOverlapping(leaf.minKey(), leaf.maxKey(), disjoint[disjoint.length - 1][1].minKey(), disjoint[disjoint.length - 1][1].maxKey(), left._compare) - || cmp(leaf.minKey(), disjoint[disjoint.length - 1][1].maxKey()) <= 0) - throw new Error("Decompose produced overlapping leaves"); - } disjoint.push([0, leaf]); if (0 > tallestHeight) { tallestIndex = disjoint.length - 1; @@ -822,11 +817,6 @@ var BTree = /** @class */ (function () { var addSharedNodeToDisjointSet = function (node, height) { flushPendingEntries(); node.isShared = true; - if (disjoint.length > 0) { - if (areOverlapping(node.minKey(), node.maxKey(), disjoint[disjoint.length - 1][1].minKey(), disjoint[disjoint.length - 1][1].maxKey(), left._compare) - || cmp(node.minKey(), disjoint[disjoint.length - 1][1].maxKey()) <= 0) - throw new Error("Decompose produced overlapping leaves"); - } disjoint.push([height, node]); if (height > tallestHeight) { tallestIndex = disjoint.length - 1; diff --git a/b+tree.ts b/b+tree.ts index 9727f5f..edb6d9d 100644 --- a/b+tree.ts +++ b/b+tree.ts @@ -964,12 +964,6 @@ export default class BTree implements ISortedMapF, ISortedMap const keys = slice.map(p => p[0]); const vals = slice.map(p => p[1]); const leaf = new BNode(keys, vals); - if (disjoint.length > 0) - { - if (areOverlapping(leaf.minKey()!, leaf.maxKey(), disjoint[disjoint.length - 1][1].minKey()!, disjoint[disjoint.length - 1][1].maxKey(), left._compare) - || cmp(leaf.minKey()!, disjoint[disjoint.length - 1][1].maxKey()!) <= 0) - throw new Error("Decompose produced overlapping leaves"); - } disjoint.push([0, leaf]); if (0 > tallestHeight) { tallestIndex = disjoint.length - 1; @@ -987,12 +981,6 @@ export default class BTree implements ISortedMapF, ISortedMap const addSharedNodeToDisjointSet = (node: BNode, height: number) => { flushPendingEntries(); node.isShared = true; - if (disjoint.length > 0) - { - if (areOverlapping(node.minKey()!, node.maxKey(), disjoint[disjoint.length - 1][1].minKey()!, disjoint[disjoint.length - 1][1].maxKey(), left._compare) - || cmp(node.minKey()!, disjoint[disjoint.length - 1][1].maxKey()!) <= 0) - throw new Error("Decompose produced overlapping leaves"); - } disjoint.push([height, node]); if (height > tallestHeight) { tallestIndex = disjoint.length - 1; From 6c0748870601e80ae18f3d3e11fe293b0d1576a8 Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Wed, 5 Nov 2025 21:56:13 -0800 Subject: [PATCH 042/143] optimization --- b+tree.js | 47 +++++++++++++++++++++++++++-------------------- b+tree.ts | 35 +++++++++++++++++++++-------------- 2 files changed, 48 insertions(+), 34 deletions(-) diff --git a/b+tree.js b/b+tree.js index 8036e32..293c6c6 100644 --- a/b+tree.js +++ b/b+tree.js @@ -514,25 +514,28 @@ var BTree = /** @class */ (function () { // Walk both cursors while (true) { var order = cmp(BTree.getKey(curA), BTree.getKey(curB)); - var trailing = curA, leading = curB; - if (order > 0) { - trailing = curB; - leading = curA; - } var areEqual = order === 0; if (areEqual) { - var key = BTree.getKey(leading); + var key = BTree.getKey(curA); var vA = curA.leaf.values[curA.leafIndex]; var vB = curB.leaf.values[curB.leafIndex]; intersection(key, vA, vB); - var outT = BTree.moveTo(trailing, leading, key, false, areEqual, cmp); - var outL = BTree.moveTo(leading, trailing, key, false, areEqual, cmp); + var outT = BTree.moveTo(curB, curA, key, false, areEqual, cmp); + var outL = BTree.moveTo(curA, curB, key, false, areEqual, cmp); if (outT && outL) break; } else { - var out = BTree.moveTo(trailing, leading, BTree.getKey(leading), true, areEqual, cmp); - if (out) { + var leading = void 0, trailing = void 0; + if (order > 0) { + trailing = curB; + leading = curA; + } + else { + trailing = curA; + leading = curB; + } + if (BTree.moveTo(trailing, leading, BTree.getKey(leading), true, areEqual, cmp)) { // We've reached the end of one tree, so intersections are guaranteed to be done. break; } @@ -951,37 +954,41 @@ var BTree = /** @class */ (function () { // Walk both cursors in alternating hops while (true) { var order = cmp(BTree.getKey(curA), BTree.getKey(curB)); - var trailing = curA, leading = curB; - if (order > 0) { - trailing = curB; - leading = curA; - } var areEqual = order === 0; if (areEqual) { - var key = BTree.getKey(leading); + var key = BTree.getKey(curA); var vA = curA.leaf.values[curA.leafIndex]; var vB = curB.leaf.values[curB.leafIndex]; var merged = mergeValues(key, vA, vB); if (merged !== undefined) pending.push([key, merged]); - var outT = BTree.moveTo(trailing, leading, key, false, areEqual, cmp); - var outL = BTree.moveTo(leading, trailing, key, false, areEqual, cmp); + var outT = BTree.moveTo(curB, curA, key, false, areEqual, cmp); + var outL = BTree.moveTo(curA, curB, key, false, areEqual, cmp); if (outT || outL) { if (!outT || !outL) { // In these cases, we pass areEqual=false because a return value of "out of tree" means // the cursor did not move. This must be true because they started equal and one of them had more tree // to walk (one is !out), so they cannot be equal at this point. if (outT) { - BTree.moveTo(leading, trailing, maxKey, false, false, cmp); + BTree.moveTo(curA, curB, maxKey, false, false, cmp); } else { - BTree.moveTo(trailing, leading, maxKey, false, false, cmp); + BTree.moveTo(curB, curA, maxKey, false, false, cmp); } } break; } } else { + var trailing = void 0, leading = void 0; + if (order > 0) { + trailing = curB; + leading = curA; + } + else { + trailing = curA; + leading = curB; + } var out = BTree.moveTo(trailing, leading, BTree.getKey(leading), true, areEqual, cmp); if (out) { BTree.moveTo(leading, trailing, maxKey, false, areEqual, cmp); diff --git a/b+tree.ts b/b+tree.ts index edb6d9d..b8c5bf6 100644 --- a/b+tree.ts +++ b/b+tree.ts @@ -596,22 +596,25 @@ export default class BTree implements ISortedMapF, ISortedMap // Walk both cursors while (true) { const order = cmp(BTree.getKey(curA), BTree.getKey(curB)); - let trailing = curA, leading = curB; - if (order > 0) { trailing = curB; leading = curA; } const areEqual = order === 0; if (areEqual) { - const key = BTree.getKey(leading); + const key = BTree.getKey(curA); const vA = curA.leaf.values[curA.leafIndex]; const vB = curB.leaf.values[curB.leafIndex]; intersection(key, vA, vB); - const outT = BTree.moveTo(trailing, leading, key, false, areEqual, cmp); - const outL = BTree.moveTo(leading, trailing, key, false, areEqual, cmp); + const outT = BTree.moveTo(curB, curA, key, false, areEqual, cmp); + const outL = BTree.moveTo(curA, curB, key, false, areEqual, cmp); if (outT && outL) break; } else { - const out = BTree.moveTo(trailing, leading, BTree.getKey(leading), true, areEqual, cmp); - if (out) { + let leading: MergeCursor, trailing: MergeCursor; + if (order > 0) { + trailing = curB; leading = curA; + } else { + trailing = curA; leading = curB; + } + if (BTree.moveTo(trailing, leading, BTree.getKey(leading), true, areEqual, cmp)) { // We've reached the end of one tree, so intersections are guaranteed to be done. break; } @@ -1156,32 +1159,36 @@ export default class BTree implements ISortedMapF, ISortedMap // Walk both cursors in alternating hops while (true) { const order = cmp(BTree.getKey(curA), BTree.getKey(curB)); - let trailing = curA, leading = curB; - if (order > 0) { trailing = curB; leading = curA; } const areEqual = order === 0; if (areEqual) { - const key = BTree.getKey(leading); + const key = BTree.getKey(curA); const vA = curA.leaf.values[curA.leafIndex]; const vB = curB.leaf.values[curB.leafIndex]; const merged = mergeValues(key, vA, vB); if (merged !== undefined) pending.push([key, merged]); - const outT = BTree.moveTo(trailing, leading, key, false, areEqual, cmp); - const outL = BTree.moveTo(leading, trailing, key, false, areEqual, cmp); + const outT = BTree.moveTo(curB, curA, key, false, areEqual, cmp); + const outL = BTree.moveTo(curA, curB, key, false, areEqual, cmp); if (outT || outL) { if (!outT || !outL) { // In these cases, we pass areEqual=false because a return value of "out of tree" means // the cursor did not move. This must be true because they started equal and one of them had more tree // to walk (one is !out), so they cannot be equal at this point. if (outT) { - BTree.moveTo(leading, trailing, maxKey, false, false, cmp); + BTree.moveTo(curA, curB, maxKey, false, false, cmp); } else { - BTree.moveTo(trailing, leading, maxKey, false, false, cmp); + BTree.moveTo(curB, curA, maxKey, false, false, cmp); } } break; } } else { + let trailing: MergeCursor, leading: MergeCursor; + if (order > 0) { + trailing = curB; leading = curA; + } else { + trailing = curA; leading = curB; + } const out = BTree.moveTo(trailing, leading, BTree.getKey(leading), true, areEqual, cmp); if (out) { BTree.moveTo(leading, trailing, maxKey, false, areEqual, cmp); From 5390068b3071c0c6dd9bb7b3706a340208a258ae Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Thu, 6 Nov 2025 07:39:42 -0800 Subject: [PATCH 043/143] micro optimization --- b+tree.js | 145 +++++++++++++++++++++++++++++++++--------------------- b+tree.ts | 126 +++++++++++++++++++++++++++++------------------ 2 files changed, 167 insertions(+), 104 deletions(-) diff --git a/b+tree.js b/b+tree.js index 293c6c6..05fe512 100644 --- a/b+tree.js +++ b/b+tree.js @@ -790,30 +790,32 @@ var BTree = /** @class */ (function () { var pending = []; var tallestIndex = -1, tallestHeight = -1; var flushPendingEntries = function () { - // Flush pending overlapped entries into new leaves - if (pending.length > 0) { - var max = left._maxNodeSize; - var total = pending.length; - var remaining = total; - var leafCount = Math.ceil(total / max); - var offset = 0; - while (leafCount > 0) { - var newLeafSize = Math.ceil(remaining / leafCount); - var slice = pending.slice(offset, offset + newLeafSize); - offset += newLeafSize; - remaining -= newLeafSize; - var keys = slice.map(function (p) { return p[0]; }); - var vals = slice.map(function (p) { return p[1]; }); - var leaf = new BNode(keys, vals); - disjoint.push([0, leaf]); - if (0 > tallestHeight) { - tallestIndex = disjoint.length - 1; - tallestHeight = 0; - } - leafCount--; + var total = pending.length; + if (total === 0) + return; + var max = left._maxNodeSize; + var leafCount = Math.ceil(total / max); + var remaining = total; + var offset = 0; + while (leafCount > 0) { + var chunkSize = Math.ceil(remaining / leafCount); + var keys = new Array(chunkSize); + var vals = new Array(chunkSize); + for (var i = 0; i < chunkSize; ++i) { + var entry = pending[offset++]; + keys[i] = entry[0]; + vals[i] = entry[1]; + } + remaining -= chunkSize; + leafCount--; + var leaf = new BNode(keys, vals); + disjoint.push([0, leaf]); + if (tallestHeight < 0) { + tallestIndex = disjoint.length - 1; + tallestHeight = 0; } - pending.length = 0; } + pending.length = 0; }; // Have to do this as cast to convince TS it's ever assigned var highestDisjoint = undefined; @@ -833,23 +835,27 @@ var BTree = /** @class */ (function () { } }; var disqualifySpine = function (cursor, depthFrom) { + var spine = cursor.spine; for (var i = depthFrom; i >= 0; --i) { - var entry = cursor.spine[i]; - if (entry.payload.disqualified) + var payload = spine[i].payload; + if (payload.disqualified) break; - entry.payload.disqualified = true; + payload.disqualified = true; } }; // Cursor payload factory var makePayload = function () { return ({ disqualified: false }); }; var pushLeafRange = function (leaf, from, toExclusive) { + var keys = leaf.keys; + var values = leaf.values; for (var i = from; i < toExclusive; ++i) - pending.push([leaf.keys[i], leaf.values[i]]); + pending.push([keys[i], values[i]]); }; var onMoveInLeaf = function (leaf, payload, fromIndex, toIndex, startedEqual) { check(payload.disqualified === true, "onMoveInLeaf: leaf must be disqualified"); var start = startedEqual ? fromIndex + 1 : fromIndex; - pushLeafRange(leaf, start, toIndex); + if (start < toIndex) + pushLeafRange(leaf, start, toIndex); }; var onExitLeaf = function (leaf, payload, startingIndex, startedEqual, cursorThis) { highestDisjoint = undefined; @@ -863,11 +869,15 @@ var BTree = /** @class */ (function () { } else { var start = startedEqual ? startingIndex + 1 : startingIndex; - pushLeafRange(leaf, start, leaf.keys.length); + var leafSize = leaf.keys.length; + if (start < leafSize) + pushLeafRange(leaf, start, leafSize); } }; var onStepUp = function (parent, height, payload, fromIndex, spineIndex, stepDownIndex, cursorThis) { - if (Number.isNaN(stepDownIndex) /* still walking up */ + var children = parent.children; + var nextHeight = height - 1; + if (stepDownIndex !== stepDownIndex /* NaN: still walking up */ || stepDownIndex === Number.POSITIVE_INFINITY /* target key is beyond edge of tree, done with walk */) { if (!payload.disqualified) { highestDisjoint = { node: parent, height: height }; @@ -878,8 +888,9 @@ var BTree = /** @class */ (function () { } else { addHighestDisjoint(); - for (var i = fromIndex + 1; i < parent.children.length; ++i) - addSharedNodeToDisjointSet(parent.children[i], height - 1); + var len = children.length; + for (var i = fromIndex + 1; i < len; ++i) + addSharedNodeToDisjointSet(children[i], nextHeight); } } else { @@ -891,7 +902,7 @@ var BTree = /** @class */ (function () { } addHighestDisjoint(); for (var i = fromIndex + 1; i < stepDownIndex; ++i) - addSharedNodeToDisjointSet(parent.children[i], height - 1); + addSharedNodeToDisjointSet(children[i], nextHeight); } }; var onStepDown = function (node, height, spineIndex, stepDownIndex, cursorThis) { @@ -903,8 +914,10 @@ var BTree = /** @class */ (function () { // If a child overlaps, the entire spine overlaps because a parent in a btree always encloses the range // of its children. disqualifySpine(cursorThis, spineIndex); + var children = node.children; + var nextHeight = height - 1; for (var i = 0; i < stepDownIndex; ++i) - addSharedNodeToDisjointSet(node.children[i], height - 1); + addSharedNodeToDisjointSet(children[i], nextHeight); } }; var onEnterLeaf = function (leaf, destIndex, cursorThis, cursorOther) { @@ -953,17 +966,18 @@ var BTree = /** @class */ (function () { initDisqualify(curB, curA); // Walk both cursors in alternating hops while (true) { - var order = cmp(BTree.getKey(curA), BTree.getKey(curB)); + var keyA = curA.leaf.keys[curA.leafIndex]; + var keyB = curB.leaf.keys[curB.leafIndex]; + var order = cmp(keyA, keyB); var areEqual = order === 0; if (areEqual) { - var key = BTree.getKey(curA); var vA = curA.leaf.values[curA.leafIndex]; var vB = curB.leaf.values[curB.leafIndex]; - var merged = mergeValues(key, vA, vB); + var merged = mergeValues(keyA, vA, vB); if (merged !== undefined) - pending.push([key, merged]); - var outT = BTree.moveTo(curB, curA, key, false, areEqual, cmp); - var outL = BTree.moveTo(curA, curB, key, false, areEqual, cmp); + pending.push([keyA, merged]); + var outT = BTree.moveTo(curB, curA, keyA, false, areEqual, cmp); + var outL = BTree.moveTo(curA, curB, keyA, false, areEqual, cmp); if (outT || outL) { if (!outT || !outL) { // In these cases, we pass areEqual=false because a return value of "out of tree" means @@ -1005,20 +1019,29 @@ var BTree = /** @class */ (function () { * Returns true if end-of-tree was reached (cursor not structurally mutated). */ BTree.moveTo = function (cur, other, targetKey, isInclusive, startedEqual, cmp) { + var onMoveInLeaf = cur.onMoveInLeaf; + var onExitLeaf = cur.onExitLeaf; + var onStepUp = cur.onStepUp; + var onStepDown = cur.onStepDown; + var onEnterLeaf = cur.onEnterLeaf; + var makePayload = cur.makePayload; // Fast path: destination within current leaf var leaf = cur.leaf; + var leafPayload = cur.leafPayload; var i = leaf.indexOf(targetKey, -1, cmp); var destInLeaf = i < 0 ? ~i : (isInclusive ? i : i + 1); - if (destInLeaf < leaf.keys.length) { - cur.onMoveInLeaf(leaf, cur.leafPayload, cur.leafIndex, destInLeaf, startedEqual); + var leafKeyCount = leaf.keys.length; + if (destInLeaf < leafKeyCount) { + onMoveInLeaf(leaf, leafPayload, cur.leafIndex, destInLeaf, startedEqual); cur.leafIndex = destInLeaf; return false; } // Find first ancestor with a viable right step var spine = cur.spine; + var initialSpineLength = spine.length; var descentLevel = -1; var descentIndex = -1; - for (var s = spine.length - 1; s >= 0; s--) { + for (var s = initialSpineLength - 1; s >= 0; s--) { var parent = spine[s].node; var indexOf = parent.indexOf(targetKey, -1, cmp); var stepDownIndex = void 0; @@ -1037,46 +1060,54 @@ var BTree = /** @class */ (function () { } // Exit leaf; we did walk out of it conceptually var startIndex = cur.leafIndex; - cur.onExitLeaf(leaf, cur.leafPayload, startIndex, startedEqual, cur); + onExitLeaf(leaf, leafPayload, startIndex, startedEqual, cur); if (descentLevel < 0) { // No descent point; step up all the way; last callback gets infinity - for (var depth = spine.length - 1; depth >= 0; depth--) { + for (var depth = initialSpineLength - 1; depth >= 0; depth--) { var entry_1 = spine[depth]; var sd = depth === 0 ? Number.POSITIVE_INFINITY : Number.NaN; - cur.onStepUp(entry_1.node, BTree.heightOf(spine, depth), entry_1.payload, entry_1.childIndex, depth, sd, cur); + onStepUp(entry_1.node, initialSpineLength - depth, entry_1.payload, entry_1.childIndex, depth, sd, cur); } return true; } // Step up through ancestors above the descentLevel - for (var depth = spine.length - 1; depth > descentLevel; depth--) { + for (var depth = initialSpineLength - 1; depth > descentLevel; depth--) { var entry_2 = spine[depth]; - cur.onStepUp(entry_2.node, BTree.heightOf(spine, depth), entry_2.payload, entry_2.childIndex, depth, NaN, cur); + onStepUp(entry_2.node, initialSpineLength - depth, entry_2.payload, entry_2.childIndex, depth, Number.NaN, cur); } var entry = spine[descentLevel]; - cur.onStepUp(entry.node, BTree.heightOf(spine, descentLevel), entry.payload, entry.childIndex, descentLevel, descentIndex, cur); + onStepUp(entry.node, initialSpineLength - descentLevel, entry.payload, entry.childIndex, descentLevel, descentIndex, cur); entry.childIndex = descentIndex; // Descend, invoking onStepDown and creating payloads - var height = BTree.heightOf(spine, descentLevel) - 1; // calculate height before changing length + var height = initialSpineLength - descentLevel - 1; // calculate height before changing length spine.length = descentLevel + 1; var node = spine[descentLevel].node.children[descentIndex]; while (!node.isLeaf) { var ni = node; - var j = ni.indexOf(targetKey, 0, cmp); - var stepDownIndex = j + (isInclusive ? 0 : (j < ni.keys.length && cmp(ni.keys[j], targetKey) === 0 ? 1 : 0)); - var payload = cur.makePayload(); + var keys = ni.keys; + var stepDownIndex = ni.indexOf(targetKey, 0, cmp); + if (!isInclusive && stepDownIndex < keys.length && cmp(keys[stepDownIndex], targetKey) === 0) + stepDownIndex++; + var payload = makePayload(); + var spineIndex = spine.length; spine.push({ node: ni, childIndex: stepDownIndex, payload: payload }); - cur.onStepDown(ni, height, spine.length - 1, stepDownIndex, cur); + onStepDown(ni, height, spineIndex, stepDownIndex, cur); node = ni.children[stepDownIndex]; height -= 1; } // Enter destination leaf var idx = node.indexOf(targetKey, -1, cmp); - var destIndex = idx < 0 ? ~idx : (isInclusive ? idx : idx + 1); - check(destIndex >= 0 && destIndex < node.keys.length, "moveTo: destination out of bounds"); + var destIndex; + if (idx < 0) + destIndex = ~idx; + else + destIndex = isInclusive ? idx : idx + 1; + var nodeKeys = node.keys; + check(destIndex >= 0 && destIndex < nodeKeys.length, "moveTo: destination out of bounds"); cur.leaf = node; - cur.leafPayload = cur.makePayload(); + cur.leafPayload = makePayload(); cur.leafIndex = destIndex; - cur.onEnterLeaf(node, destIndex, cur, other); + onEnterLeaf(node, destIndex, cur, other); return false; }; /** diff --git a/b+tree.ts b/b+tree.ts index b8c5bf6..05ea082 100644 --- a/b+tree.ts +++ b/b+tree.ts @@ -952,30 +952,33 @@ export default class BTree implements ISortedMapF, ISortedMap let tallestIndex = -1, tallestHeight = -1; const flushPendingEntries = () => { - // Flush pending overlapped entries into new leaves - if (pending.length > 0) { - const max = left._maxNodeSize; const total = pending.length; - let remaining = total; + if (total === 0) + return; + + const max = left._maxNodeSize; let leafCount = Math.ceil(total / max); + let remaining = total; let offset = 0; while (leafCount > 0) { - const newLeafSize = Math.ceil(remaining / leafCount); - const slice = pending.slice(offset, offset + newLeafSize); - offset += newLeafSize; - remaining -= newLeafSize; - const keys = slice.map(p => p[0]); - const vals = slice.map(p => p[1]); + const chunkSize = Math.ceil(remaining / leafCount); + const keys = new Array(chunkSize); + const vals = new Array(chunkSize); + for (let i = 0; i < chunkSize; ++i) { + const entry = pending[offset++]; + keys[i] = entry[0]; + vals[i] = entry[1]; + } + remaining -= chunkSize; + leafCount--; const leaf = new BNode(keys, vals); disjoint.push([0, leaf]); - if (0 > tallestHeight) { + if (tallestHeight < 0) { tallestIndex = disjoint.length - 1; tallestHeight = 0; } - leafCount--; } pending.length = 0; - } }; // Have to do this as cast to convince TS it's ever assigned @@ -999,11 +1002,12 @@ export default class BTree implements ISortedMapF, ISortedMap }; const disqualifySpine = (cursor: MergeCursor, depthFrom: number) => { + const spine = cursor.spine; for (let i = depthFrom; i >= 0; --i) { - const entry = cursor.spine[i]; - if (entry.payload.disqualified) + const payload = spine[i].payload; + if (payload.disqualified) break; - entry.payload.disqualified = true; + payload.disqualified = true; } }; @@ -1011,8 +1015,10 @@ export default class BTree implements ISortedMapF, ISortedMap const makePayload = (): MergeCursorPayload => ({ disqualified: false }); const pushLeafRange = (leaf: BNode, from: number, toExclusive: number) => { + const keys = leaf.keys; + const values = leaf.values; for (let i = from; i < toExclusive; ++i) - pending.push([leaf.keys[i], leaf.values[i]]); + pending.push([keys[i], values[i]]); }; const onMoveInLeaf = ( @@ -1024,6 +1030,7 @@ export default class BTree implements ISortedMapF, ISortedMap ) => { check(payload.disqualified === true, "onMoveInLeaf: leaf must be disqualified"); const start = startedEqual ? fromIndex + 1 : fromIndex; + if (start < toIndex) pushLeafRange(leaf, start, toIndex); }; @@ -1044,7 +1051,9 @@ export default class BTree implements ISortedMapF, ISortedMap } } else { const start = startedEqual ? startingIndex + 1 : startingIndex; - pushLeafRange(leaf, start, leaf.keys.length); + const leafSize = leaf.keys.length; + if (start < leafSize) + pushLeafRange(leaf, start, leafSize); } }; @@ -1057,7 +1066,9 @@ export default class BTree implements ISortedMapF, ISortedMap stepDownIndex: number, cursorThis: MergeCursor ) => { - if (Number.isNaN(stepDownIndex) /* still walking up */ + const children = parent.children; + const nextHeight = height - 1; + if (stepDownIndex !== stepDownIndex /* NaN: still walking up */ || stepDownIndex === Number.POSITIVE_INFINITY /* target key is beyond edge of tree, done with walk */) { if (!payload.disqualified) { highestDisjoint = { node: parent, height }; @@ -1067,8 +1078,9 @@ export default class BTree implements ISortedMapF, ISortedMap } } else { addHighestDisjoint(); - for (let i = fromIndex + 1; i < parent.children.length; ++i) - addSharedNodeToDisjointSet(parent.children[i], height - 1); + const len = children.length; + for (let i = fromIndex + 1; i < len; ++i) + addSharedNodeToDisjointSet(children[i], nextHeight); } } else { // We have a valid step down index, so we need to disqualify the spine if needed. @@ -1079,7 +1091,7 @@ export default class BTree implements ISortedMapF, ISortedMap } addHighestDisjoint(); for (let i = fromIndex + 1; i < stepDownIndex; ++i) - addSharedNodeToDisjointSet(parent.children[i], height - 1); + addSharedNodeToDisjointSet(children[i], nextHeight); } }; @@ -1098,8 +1110,10 @@ export default class BTree implements ISortedMapF, ISortedMap // If a child overlaps, the entire spine overlaps because a parent in a btree always encloses the range // of its children. disqualifySpine(cursorThis, spineIndex); + const children = node.children; + const nextHeight = height - 1; for (let i = 0; i < stepDownIndex; ++i) - addSharedNodeToDisjointSet(node.children[i], height - 1); + addSharedNodeToDisjointSet(children[i], nextHeight); } }; @@ -1158,17 +1172,18 @@ export default class BTree implements ISortedMapF, ISortedMap // Walk both cursors in alternating hops while (true) { - const order = cmp(BTree.getKey(curA), BTree.getKey(curB)); + const keyA = curA.leaf.keys[curA.leafIndex]; + const keyB = curB.leaf.keys[curB.leafIndex]; + const order = cmp(keyA, keyB); const areEqual = order === 0; if (areEqual) { - const key = BTree.getKey(curA); const vA = curA.leaf.values[curA.leafIndex]; const vB = curB.leaf.values[curB.leafIndex]; - const merged = mergeValues(key, vA, vB); - if (merged !== undefined) pending.push([key, merged]); - const outT = BTree.moveTo(curB, curA, key, false, areEqual, cmp); - const outL = BTree.moveTo(curA, curB, key, false, areEqual, cmp); + const merged = mergeValues(keyA, vA, vB); + if (merged !== undefined) pending.push([keyA, merged]); + const outT = BTree.moveTo(curB, curA, keyA, false, areEqual, cmp); + const outL = BTree.moveTo(curA, curB, keyA, false, areEqual, cmp); if (outT || outL) { if (!outT || !outL) { // In these cases, we pass areEqual=false because a return value of "out of tree" means @@ -1215,22 +1230,31 @@ export default class BTree implements ISortedMapF, ISortedMap startedEqual: boolean, cmp: (a:K,b:K)=>number ): boolean { + const onMoveInLeaf = cur.onMoveInLeaf; + const onExitLeaf = cur.onExitLeaf; + const onStepUp = cur.onStepUp; + const onStepDown = cur.onStepDown; + const onEnterLeaf = cur.onEnterLeaf; + const makePayload = cur.makePayload; // Fast path: destination within current leaf const leaf = cur.leaf; + const leafPayload = cur.leafPayload; const i = leaf.indexOf(targetKey, -1, cmp); const destInLeaf = i < 0 ? ~i : (isInclusive ? i : i + 1); - if (destInLeaf < leaf.keys.length) { - cur.onMoveInLeaf(leaf, cur.leafPayload, cur.leafIndex, destInLeaf, startedEqual); + const leafKeyCount = leaf.keys.length; + if (destInLeaf < leafKeyCount) { + onMoveInLeaf(leaf, leafPayload, cur.leafIndex, destInLeaf, startedEqual); cur.leafIndex = destInLeaf; return false; } // Find first ancestor with a viable right step const spine = cur.spine; + const initialSpineLength = spine.length; let descentLevel = -1; let descentIndex = -1; - for (let s = spine.length - 1; s >= 0; s--) { + for (let s = initialSpineLength - 1; s >= 0; s--) { const parent = spine[s].node; const indexOf = parent.indexOf(targetKey, -1, cmp); let stepDownIndex: number; @@ -1250,52 +1274,60 @@ export default class BTree implements ISortedMapF, ISortedMap // Exit leaf; we did walk out of it conceptually const startIndex = cur.leafIndex; - cur.onExitLeaf(leaf, cur.leafPayload, startIndex, startedEqual, cur); + onExitLeaf(leaf, leafPayload, startIndex, startedEqual, cur); if (descentLevel < 0) { // No descent point; step up all the way; last callback gets infinity - for (let depth = spine.length - 1; depth >= 0; depth--) { + for (let depth = initialSpineLength - 1; depth >= 0; depth--) { const entry = spine[depth]; const sd = depth === 0 ? Number.POSITIVE_INFINITY : Number.NaN; - cur.onStepUp(entry.node, BTree.heightOf(spine, depth), entry.payload, entry.childIndex, depth, sd, cur); + onStepUp(entry.node, initialSpineLength - depth, entry.payload, entry.childIndex, depth, sd, cur); } return true; } // Step up through ancestors above the descentLevel - for (let depth = spine.length - 1; depth > descentLevel; depth--) { + for (let depth = initialSpineLength - 1; depth > descentLevel; depth--) { const entry = spine[depth]; - cur.onStepUp(entry.node, BTree.heightOf(spine, depth), entry.payload, entry.childIndex, depth, NaN, cur); + onStepUp(entry.node, initialSpineLength - depth, entry.payload, entry.childIndex, depth, Number.NaN, cur); } const entry = spine[descentLevel]; - cur.onStepUp(entry.node, BTree.heightOf(spine, descentLevel), entry.payload, entry.childIndex, descentLevel, descentIndex, cur); + onStepUp(entry.node, initialSpineLength - descentLevel, entry.payload, entry.childIndex, descentLevel, descentIndex, cur); entry.childIndex = descentIndex; // Descend, invoking onStepDown and creating payloads - let height = BTree.heightOf(spine, descentLevel) - 1; // calculate height before changing length + let height = initialSpineLength - descentLevel - 1; // calculate height before changing length spine.length = descentLevel + 1; let node: BNode = spine[descentLevel].node.children[descentIndex]; while (!node.isLeaf) { const ni = node as BNodeInternal; - const j = ni.indexOf(targetKey, 0, cmp); - const stepDownIndex = j + (isInclusive ? 0 : (j < ni.keys.length && cmp(ni.keys[j], targetKey) === 0 ? 1 : 0)); - const payload = cur.makePayload(); + const keys = ni.keys; + let stepDownIndex = ni.indexOf(targetKey, 0, cmp); + if (!isInclusive && stepDownIndex < keys.length && cmp(keys[stepDownIndex], targetKey) === 0) + stepDownIndex++; + const payload = makePayload(); + const spineIndex = spine.length; spine.push({ node: ni, childIndex: stepDownIndex, payload }); - cur.onStepDown(ni, height, spine.length - 1, stepDownIndex, cur); + onStepDown(ni, height, spineIndex, stepDownIndex, cur); node = ni.children[stepDownIndex]; height -= 1; } // Enter destination leaf const idx = node.indexOf(targetKey, -1, cmp); - const destIndex = idx < 0 ? ~idx : (isInclusive ? idx : idx + 1); - check(destIndex >= 0 && destIndex < node.keys.length, "moveTo: destination out of bounds"); + let destIndex: number; + if (idx < 0) + destIndex = ~idx; + else + destIndex = isInclusive ? idx : idx + 1; + const nodeKeys = node.keys; + check(destIndex >= 0 && destIndex < nodeKeys.length, "moveTo: destination out of bounds"); cur.leaf = node; - cur.leafPayload = cur.makePayload(); + cur.leafPayload = makePayload(); cur.leafIndex = destIndex; - cur.onEnterLeaf(node, destIndex, cur, other); + onEnterLeaf(node, destIndex, cur, other); return false; } From 225eda7c7cc43143a2c7e306fc8d1c6069facea6 Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Thu, 6 Nov 2025 07:41:08 -0800 Subject: [PATCH 044/143] optimization --- b+tree.d.ts | 1 - b+tree.js | 3 --- b+tree.ts | 4 ---- 3 files changed, 8 deletions(-) diff --git a/b+tree.d.ts b/b+tree.d.ts index eca0216..60cc8f5 100644 --- a/b+tree.d.ts +++ b/b+tree.d.ts @@ -303,7 +303,6 @@ export default class BTree implements ISortedMapF, ISort * in the other tree. Overlapping leaf nodes are broken down into new leaf nodes containing merged entries. */ private static decompose; - private static heightOf; /** * Move cursor strictly forward to the first key >= (inclusive) or > (exclusive) target. * Returns true if end-of-tree was reached (cursor not structurally mutated). diff --git a/b+tree.js b/b+tree.js index 05fe512..c036eb2 100644 --- a/b+tree.js +++ b/b+tree.js @@ -1013,7 +1013,6 @@ var BTree = /** @class */ (function () { flushPendingEntries(); return { disjoint: disjoint, tallestIndex: tallestIndex }; }; - BTree.heightOf = function (spine, depth) { return spine.length - depth; }; /** * Move cursor strictly forward to the first key >= (inclusive) or > (exclusive) target. * Returns true if end-of-tree was reached (cursor not structurally mutated). @@ -1102,8 +1101,6 @@ var BTree = /** @class */ (function () { destIndex = ~idx; else destIndex = isInclusive ? idx : idx + 1; - var nodeKeys = node.keys; - check(destIndex >= 0 && destIndex < nodeKeys.length, "moveTo: destination out of bounds"); cur.leaf = node; cur.leafPayload = makePayload(); cur.leafIndex = destIndex; diff --git a/b+tree.ts b/b+tree.ts index 05ea082..3f859e0 100644 --- a/b+tree.ts +++ b/b+tree.ts @@ -1216,8 +1216,6 @@ export default class BTree implements ISortedMapF, ISortedMap return { disjoint, tallestIndex }; } - private static heightOf(spine: MergeCursor['spine'], depth: number) { return spine.length - depth; } - /** * Move cursor strictly forward to the first key >= (inclusive) or > (exclusive) target. * Returns true if end-of-tree was reached (cursor not structurally mutated). @@ -1322,8 +1320,6 @@ export default class BTree implements ISortedMapF, ISortedMap destIndex = ~idx; else destIndex = isInclusive ? idx : idx + 1; - const nodeKeys = node.keys; - check(destIndex >= 0 && destIndex < nodeKeys.length, "moveTo: destination out of bounds"); cur.leaf = node; cur.leafPayload = makePayload(); cur.leafIndex = destIndex; From 0579f55a1d7ee7950e8fdcf4175bbf0c8c6c0d0a Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Thu, 6 Nov 2025 08:28:11 -0800 Subject: [PATCH 045/143] cleanup --- b+tree.d.ts | 4 +++ b+tree.js | 53 ++++++++++++++++++++++-------------- b+tree.ts | 77 ++++++++++++++++++++++++++++++++--------------------- 3 files changed, 84 insertions(+), 50 deletions(-) diff --git a/b+tree.d.ts b/b+tree.d.ts index 60cc8f5..f0a8b86 100644 --- a/b+tree.d.ts +++ b/b+tree.d.ts @@ -303,6 +303,10 @@ export default class BTree implements ISortedMapF, ISort * in the other tree. Overlapping leaf nodes are broken down into new leaf nodes containing merged entries. */ private static decompose; + private static alternatingCount; + private static alternatingGetFirst; + private static alternatingGetSecond; + private static alternatingPush; /** * Move cursor strictly forward to the first key >= (inclusive) or > (exclusive) target. * Returns true if end-of-tree was reached (cursor not structurally mutated). diff --git a/b+tree.js b/b+tree.js index c036eb2..2feb6ac 100644 --- a/b+tree.js +++ b/b+tree.js @@ -565,14 +565,15 @@ var BTree = /** @class */ (function () { return this.clone(); // Decompose into disjoint subtrees and merged leaves var _a = BTree.decompose(this, other, merge), disjoint = _a.disjoint, tallestIndex = _a.tallestIndex; + var disjointEntryCount = BTree.alternatingCount(disjoint); // Start result at the tallest subtree from the disjoint set - var initialRoot = disjoint[tallestIndex][1]; + var initialRoot = BTree.alternatingGetSecond(disjoint, tallestIndex); var branchingFactor = this._maxNodeSize; var frontier = [initialRoot]; // Process all subtrees to the right of the tallest subtree - if (tallestIndex + 1 <= disjoint.length - 1) { + if (tallestIndex + 1 <= disjointEntryCount - 1) { BTree.updateFrontier(frontier, 0, BTree.getRightmostIndex); - BTree.processSide(branchingFactor, disjoint, frontier, tallestIndex + 1, disjoint.length, 1, BTree.getRightmostIndex, BTree.getRightInsertionIndex, BTree.splitOffRightSide, BTree.updateRightMax); + BTree.processSide(branchingFactor, disjoint, frontier, tallestIndex + 1, disjointEntryCount, 1, BTree.getRightmostIndex, BTree.getRightInsertionIndex, BTree.splitOffRightSide, BTree.updateRightMax); } // Process all subtrees to the left of the tallest subtree (reverse order) if (tallestIndex - 1 >= 0) { @@ -607,8 +608,8 @@ var BTree = /** @class */ (function () { // Iterate the assigned half of the disjoint set for (var i = start; i != end; i += step) { var currentHeight = spine.length - 1; // height is number of internal levels; 0 means leaf - var subtree = disjoint[i][1]; - var subtreeHeight = disjoint[i][0]; + var subtree = BTree.alternatingGetSecond(disjoint, i); + var subtreeHeight = BTree.alternatingGetFirst(disjoint, i); var insertionDepth = currentHeight - (subtreeHeight + 1); // node at this depth has children of height 'subtreeHeight' // Ensure path is unshared before mutation BTree.ensureNotShared(spine, isSharedFrontierDepth, insertionDepth, sideIndex); @@ -790,28 +791,27 @@ var BTree = /** @class */ (function () { var pending = []; var tallestIndex = -1, tallestHeight = -1; var flushPendingEntries = function () { - var total = pending.length; - if (total === 0) + var totalPairs = BTree.alternatingCount(pending); + if (totalPairs === 0) return; var max = left._maxNodeSize; - var leafCount = Math.ceil(total / max); - var remaining = total; - var offset = 0; + var leafCount = Math.ceil(totalPairs / max); + var remaining = totalPairs; + var pairIndex = 0; while (leafCount > 0) { var chunkSize = Math.ceil(remaining / leafCount); var keys = new Array(chunkSize); var vals = new Array(chunkSize); - for (var i = 0; i < chunkSize; ++i) { - var entry = pending[offset++]; - keys[i] = entry[0]; - vals[i] = entry[1]; + for (var i = 0; i < chunkSize; ++i, ++pairIndex) { + keys[i] = BTree.alternatingGetFirst(pending, pairIndex); + vals[i] = BTree.alternatingGetSecond(pending, pairIndex); } remaining -= chunkSize; leafCount--; var leaf = new BNode(keys, vals); - disjoint.push([0, leaf]); + BTree.alternatingPush(disjoint, 0, leaf); if (tallestHeight < 0) { - tallestIndex = disjoint.length - 1; + tallestIndex = BTree.alternatingCount(disjoint) - 1; tallestHeight = 0; } } @@ -822,9 +822,9 @@ var BTree = /** @class */ (function () { var addSharedNodeToDisjointSet = function (node, height) { flushPendingEntries(); node.isShared = true; - disjoint.push([height, node]); + BTree.alternatingPush(disjoint, height, node); if (height > tallestHeight) { - tallestIndex = disjoint.length - 1; + tallestIndex = BTree.alternatingCount(disjoint) - 1; tallestHeight = height; } }; @@ -849,7 +849,7 @@ var BTree = /** @class */ (function () { var keys = leaf.keys; var values = leaf.values; for (var i = from; i < toExclusive; ++i) - pending.push([keys[i], values[i]]); + BTree.alternatingPush(pending, keys[i], values[i]); }; var onMoveInLeaf = function (leaf, payload, fromIndex, toIndex, startedEqual) { check(payload.disqualified === true, "onMoveInLeaf: leaf must be disqualified"); @@ -975,7 +975,7 @@ var BTree = /** @class */ (function () { var vB = curB.leaf.values[curB.leafIndex]; var merged = mergeValues(keyA, vA, vB); if (merged !== undefined) - pending.push([keyA, merged]); + BTree.alternatingPush(pending, keyA, merged); var outT = BTree.moveTo(curB, curA, keyA, false, areEqual, cmp); var outL = BTree.moveTo(curA, curB, keyA, false, areEqual, cmp); if (outT || outL) { @@ -1013,6 +1013,19 @@ var BTree = /** @class */ (function () { flushPendingEntries(); return { disjoint: disjoint, tallestIndex: tallestIndex }; }; + BTree.alternatingCount = function (list) { + return list.length >> 1; + }; + BTree.alternatingGetFirst = function (list, index) { + return list[index << 1]; + }; + BTree.alternatingGetSecond = function (list, index) { + return list[(index << 1) + 1]; + }; + BTree.alternatingPush = function (list, first, second) { + // Micro benchmarks show this is the fastest way to do this + list.push(first, second); + }; /** * Move cursor strictly forward to the first key >= (inclusive) or > (exclusive) target. * Returns true if end-of-tree was reached (cursor not structurally mutated). diff --git a/b+tree.ts b/b+tree.ts index 3f859e0..d30c1a0 100644 --- a/b+tree.ts +++ b/b+tree.ts @@ -647,21 +647,22 @@ export default class BTree implements ISortedMapF, ISortedMap // Decompose into disjoint subtrees and merged leaves const { disjoint, tallestIndex } = BTree.decompose(this, other, merge); + const disjointEntryCount = BTree.alternatingCount(disjoint); // Start result at the tallest subtree from the disjoint set - const initialRoot = disjoint[tallestIndex][1]; + const initialRoot = BTree.alternatingGetSecond>(disjoint, tallestIndex); const branchingFactor = this._maxNodeSize; const frontier: BNode[] = [initialRoot]; // Process all subtrees to the right of the tallest subtree - if (tallestIndex + 1 <= disjoint.length - 1) { + if (tallestIndex + 1 <= disjointEntryCount - 1) { BTree.updateFrontier(frontier, 0, BTree.getRightmostIndex); BTree.processSide( branchingFactor, disjoint, frontier, tallestIndex + 1, - disjoint.length, 1, + disjointEntryCount, 1, BTree.getRightmostIndex, BTree.getRightInsertionIndex, BTree.splitOffRightSide, @@ -699,7 +700,7 @@ export default class BTree implements ISortedMapF, ISortedMap */ private static processSide( branchingFactor: number, - disjoint: DisjointEntry[], + disjoint: (number | BNode)[], spine: BNode[], start: number, end: number, @@ -728,8 +729,8 @@ export default class BTree implements ISortedMapF, ISortedMap // Iterate the assigned half of the disjoint set for (let i = start; i != end; i += step) { const currentHeight = spine.length - 1; // height is number of internal levels; 0 means leaf - const subtree = disjoint[i][1]; - const subtreeHeight = disjoint[i][0]; + const subtree = BTree.alternatingGetSecond>(disjoint, i); + const subtreeHeight = BTree.alternatingGetFirst>(disjoint, i); const insertionDepth = currentHeight - (subtreeHeight + 1); // node at this depth has children of height 'subtreeHeight' // Ensure path is unshared before mutation @@ -947,38 +948,37 @@ export default class BTree implements ISortedMapF, ISortedMap ): DecomposeResult { const cmp = left._compare; check(left._root.size() > 0 && right._root.size() > 0, "decompose requires non-empty inputs"); - const disjoint: DisjointEntry[] = []; - const pending: [K,V][] = []; + const disjoint: (number | BNode)[] = []; + const pending: (K | V)[] = []; let tallestIndex = -1, tallestHeight = -1; const flushPendingEntries = () => { - const total = pending.length; - if (total === 0) + const totalPairs = BTree.alternatingCount(pending); + if (totalPairs === 0) return; const max = left._maxNodeSize; - let leafCount = Math.ceil(total / max); - let remaining = total; - let offset = 0; - while (leafCount > 0) { + let leafCount = Math.ceil(totalPairs / max); + let remaining = totalPairs; + let pairIndex = 0; + while (leafCount > 0) { const chunkSize = Math.ceil(remaining / leafCount); const keys = new Array(chunkSize); const vals = new Array(chunkSize); - for (let i = 0; i < chunkSize; ++i) { - const entry = pending[offset++]; - keys[i] = entry[0]; - vals[i] = entry[1]; + for (let i = 0; i < chunkSize; ++i, ++pairIndex) { + keys[i] = BTree.alternatingGetFirst(pending, pairIndex); + vals[i] = BTree.alternatingGetSecond(pending, pairIndex); } remaining -= chunkSize; leafCount--; - const leaf = new BNode(keys, vals); - disjoint.push([0, leaf]); + const leaf = new BNode(keys, vals); + BTree.alternatingPush>(disjoint, 0, leaf); if (tallestHeight < 0) { - tallestIndex = disjoint.length - 1; - tallestHeight = 0; - } + tallestIndex = BTree.alternatingCount(disjoint) - 1; + tallestHeight = 0; } - pending.length = 0; + } + pending.length = 0; }; // Have to do this as cast to convince TS it's ever assigned @@ -987,9 +987,9 @@ export default class BTree implements ISortedMapF, ISortedMap const addSharedNodeToDisjointSet = (node: BNode, height: number) => { flushPendingEntries(); node.isShared = true; - disjoint.push([height, node]); + BTree.alternatingPush>(disjoint, height, node); if (height > tallestHeight) { - tallestIndex = disjoint.length - 1; + tallestIndex = BTree.alternatingCount(disjoint) - 1; tallestHeight = height; } }; @@ -1018,7 +1018,7 @@ export default class BTree implements ISortedMapF, ISortedMap const keys = leaf.keys; const values = leaf.values; for (let i = from; i < toExclusive; ++i) - pending.push([keys[i], values[i]]); + BTree.alternatingPush(pending, keys[i], values[i]); }; const onMoveInLeaf = ( @@ -1181,7 +1181,8 @@ export default class BTree implements ISortedMapF, ISortedMap const vA = curA.leaf.values[curA.leafIndex]; const vB = curB.leaf.values[curB.leafIndex]; const merged = mergeValues(keyA, vA, vB); - if (merged !== undefined) pending.push([keyA, merged]); + if (merged !== undefined) + BTree.alternatingPush(pending, keyA, merged); const outT = BTree.moveTo(curB, curA, keyA, false, areEqual, cmp); const outL = BTree.moveTo(curA, curB, keyA, false, areEqual, cmp); if (outT || outL) { @@ -1216,6 +1217,23 @@ export default class BTree implements ISortedMapF, ISortedMap return { disjoint, tallestIndex }; } + private static alternatingCount(list: unknown[]): number { + return list.length >> 1; + } + + private static alternatingGetFirst(list: Array, index: number): TFirst { + return list[index << 1] as TFirst; + } + + private static alternatingGetSecond(list: Array, index: number): TSecond { + return list[(index << 1) + 1] as TSecond; + } + + private static alternatingPush(list: Array, first: TFirst, second: TSecond): void { + // Micro benchmarks show this is the fastest way to do this + list.push(first, second); + } + /** * Move cursor strictly forward to the first key >= (inclusive) or > (exclusive) target. * Returns true if end-of-tree was reached (cursor not structurally mutated). @@ -2701,8 +2719,7 @@ interface MergeCursor { onEnterLeaf: (leaf: BNode, destIndex: number, cursorThis: MergeCursor, cursorOther: MergeCursor) => void; } -type DisjointEntry = [height: number, node: BNode]; -type DecomposeResult = { disjoint: DisjointEntry[], tallestIndex: number }; +type DecomposeResult = { disjoint: (number | BNode)[], tallestIndex: number }; /** * Determines whether two nodes are overlapping in key range. From 57e211d8ffcd246d5c3e4c5f3b6bd84d0bf8d29b Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Thu, 6 Nov 2025 12:36:45 -0800 Subject: [PATCH 046/143] speed up intersect --- b+tree.d.ts | 1 + b+tree.js | 98 +++++++++++++++++++++++++++++++++++--------------- b+tree.ts | 101 +++++++++++++++++++++++++++++++++++++--------------- 3 files changed, 144 insertions(+), 56 deletions(-) diff --git a/b+tree.d.ts b/b+tree.d.ts index f0a8b86..cff900b 100644 --- a/b+tree.d.ts +++ b/b+tree.d.ts @@ -307,6 +307,7 @@ export default class BTree implements ISortedMapF, ISort private static alternatingGetFirst; private static alternatingGetSecond; private static alternatingPush; + private static moveOne; /** * Move cursor strictly forward to the first key >= (inclusive) or > (exclusive) target. * Returns true if end-of-tree was reached (cursor not structurally mutated). diff --git a/b+tree.js b/b+tree.js index 2feb6ac..7436388 100644 --- a/b+tree.js +++ b/b+tree.js @@ -509,36 +509,42 @@ var BTree = /** @class */ (function () { return; var makePayload = function () { return undefined; }; // Initialize cursors at minimum keys. - var curA = BTree.createCursor(this, makePayload, BTree.noop, BTree.noop, BTree.noop, BTree.noop, BTree.noop); - var curB = BTree.createCursor(other, makePayload, BTree.noop, BTree.noop, BTree.noop, BTree.noop, BTree.noop); - // Walk both cursors + var cursorA = BTree.createCursor(this, makePayload, BTree.noop, BTree.noop, BTree.noop, BTree.noop, BTree.noop); + var cursorB = BTree.createCursor(other, makePayload, BTree.noop, BTree.noop, BTree.noop, BTree.noop, BTree.noop); + var leading = cursorA; + var trailing = cursorB; + var order = cmp(BTree.getKey(leading), BTree.getKey(trailing)); while (true) { - var order = cmp(BTree.getKey(curA), BTree.getKey(curB)); var areEqual = order === 0; if (areEqual) { - var key = BTree.getKey(curA); - var vA = curA.leaf.values[curA.leafIndex]; - var vB = curB.leaf.values[curB.leafIndex]; + var key = BTree.getKey(leading); + var vA = cursorA.leaf.values[cursorA.leafIndex]; + var vB = cursorB.leaf.values[cursorB.leafIndex]; intersection(key, vA, vB); - var outT = BTree.moveTo(curB, curA, key, false, areEqual, cmp); - var outL = BTree.moveTo(curA, curB, key, false, areEqual, cmp); + var outT = BTree.moveOne(trailing, leading, key, false, areEqual, cmp); + var outL = BTree.moveOne(leading, trailing, key, false, areEqual, cmp); if (outT && outL) break; + order = cmp(BTree.getKey(leading), BTree.getKey(trailing)); } else { - var leading = void 0, trailing = void 0; - if (order > 0) { - trailing = curB; - leading = curA; - } - else { - trailing = curA; - leading = curB; + if (order < 0) { + var tmp = trailing; + trailing = leading; + leading = tmp; } - if (BTree.moveTo(trailing, leading, BTree.getKey(leading), true, areEqual, cmp)) { + // At this point, leading is guaranteed to be ahead of trailing. + var _a = BTree.moveTo(trailing, leading, BTree.getKey(leading), true, areEqual, cmp), out = _a[0], nowEqual = _a[1]; + if (out) { // We've reached the end of one tree, so intersections are guaranteed to be done. break; } + else if (nowEqual) { + order = 0; + } + else { + order = -1; // trailing is ahead of leading + } } } }; @@ -976,8 +982,8 @@ var BTree = /** @class */ (function () { var merged = mergeValues(keyA, vA, vB); if (merged !== undefined) BTree.alternatingPush(pending, keyA, merged); - var outT = BTree.moveTo(curB, curA, keyA, false, areEqual, cmp); - var outL = BTree.moveTo(curA, curB, keyA, false, areEqual, cmp); + var outT = BTree.moveTo(curB, curA, keyA, false, areEqual, cmp)[0]; + var outL = BTree.moveTo(curA, curB, keyA, false, areEqual, cmp)[0]; if (outT || outL) { if (!outT || !outL) { // In these cases, we pass areEqual=false because a return value of "out of tree" means @@ -1003,7 +1009,7 @@ var BTree = /** @class */ (function () { trailing = curA; leading = curB; } - var out = BTree.moveTo(trailing, leading, BTree.getKey(leading), true, areEqual, cmp); + var out = BTree.moveTo(trailing, leading, BTree.getKey(leading), true, areEqual, cmp)[0]; if (out) { BTree.moveTo(leading, trailing, maxKey, false, areEqual, cmp); break; @@ -1026,6 +1032,17 @@ var BTree = /** @class */ (function () { // Micro benchmarks show this is the fastest way to do this list.push(first, second); }; + BTree.moveOne = function (cur, other, targetKey, isInclusive, startedEqual, cmp) { + var leaf = cur.leaf; + var nextIndex = cur.leafIndex + 1; + if (nextIndex < leaf.keys.length) { + // Still within current leaf + cur.onMoveInLeaf(leaf, cur.leafPayload, cur.leafIndex, nextIndex, startedEqual); + cur.leafIndex = nextIndex; + return false; + } + return BTree.moveTo(cur, other, targetKey, isInclusive, startedEqual, cmp)[0]; + }; /** * Move cursor strictly forward to the first key >= (inclusive) or > (exclusive) target. * Returns true if end-of-tree was reached (cursor not structurally mutated). @@ -1041,12 +1058,27 @@ var BTree = /** @class */ (function () { var leaf = cur.leaf; var leafPayload = cur.leafPayload; var i = leaf.indexOf(targetKey, -1, cmp); - var destInLeaf = i < 0 ? ~i : (isInclusive ? i : i + 1); + var destInLeaf; + var areEqual; + if (i < 0) { + destInLeaf = ~i; + areEqual = false; + } + else { + if (isInclusive) { + destInLeaf = i; + areEqual = true; + } + else { + destInLeaf = i + 1; + areEqual = false; + } + } var leafKeyCount = leaf.keys.length; if (destInLeaf < leafKeyCount) { onMoveInLeaf(leaf, leafPayload, cur.leafIndex, destInLeaf, startedEqual); cur.leafIndex = destInLeaf; - return false; + return [false, areEqual]; } // Find first ancestor with a viable right step var spine = cur.spine; @@ -1080,7 +1112,7 @@ var BTree = /** @class */ (function () { var sd = depth === 0 ? Number.POSITIVE_INFINITY : Number.NaN; onStepUp(entry_1.node, initialSpineLength - depth, entry_1.payload, entry_1.childIndex, depth, sd, cur); } - return true; + return [true, false]; } // Step up through ancestors above the descentLevel for (var depth = initialSpineLength - 1; depth > descentLevel; depth--) { @@ -1110,15 +1142,25 @@ var BTree = /** @class */ (function () { // Enter destination leaf var idx = node.indexOf(targetKey, -1, cmp); var destIndex; - if (idx < 0) + if (idx < 0) { destIndex = ~idx; - else - destIndex = isInclusive ? idx : idx + 1; + areEqual = false; + } + else { + if (isInclusive) { + destIndex = idx; + areEqual = true; + } + else { + destIndex = idx + 1; + areEqual = false; + } + } cur.leaf = node; cur.leafPayload = makePayload(); cur.leafIndex = destIndex; onEnterLeaf(node, destIndex, cur, other); - return false; + return [false, areEqual]; }; /** * Create a cursor pointing to the leftmost key of the supplied tree. diff --git a/b+tree.ts b/b+tree.ts index d30c1a0..de062ff 100644 --- a/b+tree.ts +++ b/b+tree.ts @@ -590,33 +590,38 @@ export default class BTree implements ISortedMapF, ISortedMap const makePayload = (): undefined => undefined; // Initialize cursors at minimum keys. - const curA = BTree.createCursor(this, makePayload, BTree.noop, BTree.noop, BTree.noop, BTree.noop, BTree.noop); - const curB = BTree.createCursor(other, makePayload, BTree.noop, BTree.noop, BTree.noop, BTree.noop, BTree.noop); - - // Walk both cursors + let cursorA = BTree.createCursor(this, makePayload, BTree.noop, BTree.noop, BTree.noop, BTree.noop, BTree.noop); + let cursorB = BTree.createCursor(other, makePayload, BTree.noop, BTree.noop, BTree.noop, BTree.noop, BTree.noop); + let leading = cursorA; + let trailing = cursorB; + let order = cmp(BTree.getKey(leading), BTree.getKey(trailing)); + while (true) { - const order = cmp(BTree.getKey(curA), BTree.getKey(curB)); const areEqual = order === 0; - if (areEqual) { - const key = BTree.getKey(curA); - const vA = curA.leaf.values[curA.leafIndex]; - const vB = curB.leaf.values[curB.leafIndex]; + const key = BTree.getKey(leading); + const vA = cursorA.leaf.values[cursorA.leafIndex]; + const vB = cursorB.leaf.values[cursorB.leafIndex]; intersection(key, vA, vB); - const outT = BTree.moveTo(curB, curA, key, false, areEqual, cmp); - const outL = BTree.moveTo(curA, curB, key, false, areEqual, cmp); + const outT = BTree.moveOne(trailing, leading, key, false, areEqual, cmp); + const outL = BTree.moveOne(leading, trailing, key, false, areEqual, cmp); if (outT && outL) break; + order = cmp(BTree.getKey(leading), BTree.getKey(trailing)); } else { - let leading: MergeCursor, trailing: MergeCursor; - if (order > 0) { - trailing = curB; leading = curA; - } else { - trailing = curA; leading = curB; + if (order < 0) { + const tmp = trailing; + trailing = leading; leading = tmp; } - if (BTree.moveTo(trailing, leading, BTree.getKey(leading), true, areEqual, cmp)) { + // At this point, leading is guaranteed to be ahead of trailing. + const [out, nowEqual] = BTree.moveTo(trailing, leading, BTree.getKey(leading), true, areEqual, cmp) + if (out) { // We've reached the end of one tree, so intersections are guaranteed to be done. break; + } else if (nowEqual) { + order = 0; + } else { + order = -1; // trailing is ahead of leading } } } @@ -1183,8 +1188,8 @@ export default class BTree implements ISortedMapF, ISortedMap const merged = mergeValues(keyA, vA, vB); if (merged !== undefined) BTree.alternatingPush(pending, keyA, merged); - const outT = BTree.moveTo(curB, curA, keyA, false, areEqual, cmp); - const outL = BTree.moveTo(curA, curB, keyA, false, areEqual, cmp); + const [outT] = BTree.moveTo(curB, curA, keyA, false, areEqual, cmp); + const [outL] = BTree.moveTo(curA, curB, keyA, false, areEqual, cmp); if (outT || outL) { if (!outT || !outL) { // In these cases, we pass areEqual=false because a return value of "out of tree" means @@ -1205,7 +1210,7 @@ export default class BTree implements ISortedMapF, ISortedMap } else { trailing = curA; leading = curB; } - const out = BTree.moveTo(trailing, leading, BTree.getKey(leading), true, areEqual, cmp); + const [out] = BTree.moveTo(trailing, leading, BTree.getKey(leading), true, areEqual, cmp); if (out) { BTree.moveTo(leading, trailing, maxKey, false, areEqual, cmp); break; @@ -1234,6 +1239,25 @@ export default class BTree implements ISortedMapF, ISortedMap list.push(first, second); } + private static moveOne( + cur: MergeCursor, + other: MergeCursor, + targetKey: K, + isInclusive: boolean, + startedEqual: boolean, + cmp: (a:K,b:K)=>number + ): boolean { + const leaf = cur.leaf; + const nextIndex = cur.leafIndex + 1; + if (nextIndex < leaf.keys.length) { + // Still within current leaf + cur.onMoveInLeaf(leaf, cur.leafPayload, cur.leafIndex, nextIndex, startedEqual); + cur.leafIndex = nextIndex; + return false; + } + return BTree.moveTo(cur, other, targetKey, isInclusive, startedEqual, cmp)[0]; + } + /** * Move cursor strictly forward to the first key >= (inclusive) or > (exclusive) target. * Returns true if end-of-tree was reached (cursor not structurally mutated). @@ -1245,7 +1269,7 @@ export default class BTree implements ISortedMapF, ISortedMap isInclusive: boolean, startedEqual: boolean, cmp: (a:K,b:K)=>number - ): boolean { + ): [outOfTree: boolean, areEqual: boolean] { const onMoveInLeaf = cur.onMoveInLeaf; const onExitLeaf = cur.onExitLeaf; const onStepUp = cur.onStepUp; @@ -1256,12 +1280,25 @@ export default class BTree implements ISortedMapF, ISortedMap const leaf = cur.leaf; const leafPayload = cur.leafPayload; const i = leaf.indexOf(targetKey, -1, cmp); - const destInLeaf = i < 0 ? ~i : (isInclusive ? i : i + 1); + let destInLeaf: number; + let areEqual: boolean; + if (i < 0) { + destInLeaf = ~i; + areEqual = false; + } else { + if (isInclusive) { + destInLeaf = i; + areEqual = true; + } else { + destInLeaf = i + 1; + areEqual = false; + } + } const leafKeyCount = leaf.keys.length; if (destInLeaf < leafKeyCount) { onMoveInLeaf(leaf, leafPayload, cur.leafIndex, destInLeaf, startedEqual); cur.leafIndex = destInLeaf; - return false; + return [false, areEqual]; } // Find first ancestor with a viable right step @@ -1299,7 +1336,7 @@ export default class BTree implements ISortedMapF, ISortedMap const sd = depth === 0 ? Number.POSITIVE_INFINITY : Number.NaN; onStepUp(entry.node, initialSpineLength - depth, entry.payload, entry.childIndex, depth, sd, cur); } - return true; + return [true, false]; } // Step up through ancestors above the descentLevel @@ -1334,15 +1371,23 @@ export default class BTree implements ISortedMapF, ISortedMap // Enter destination leaf const idx = node.indexOf(targetKey, -1, cmp); let destIndex: number; - if (idx < 0) + if (idx < 0) { destIndex = ~idx; - else - destIndex = isInclusive ? idx : idx + 1; + areEqual = false; + } else { + if (isInclusive) { + destIndex = idx; + areEqual = true; + } else { + destIndex = idx + 1; + areEqual = false; + } + } cur.leaf = node; cur.leafPayload = makePayload(); cur.leafIndex = destIndex; onEnterLeaf(node, destIndex, cur, other); - return false; + return [false, areEqual]; } /** From a63a804394ad9f58f7b5199de6e03ac8f2204826 Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Thu, 6 Nov 2025 12:53:00 -0800 Subject: [PATCH 047/143] perf of decompose --- b+tree.js | 48 +++++++++++++++++++++++++++--------------------- b+tree.ts | 45 +++++++++++++++++++++++++++------------------ 2 files changed, 54 insertions(+), 39 deletions(-) diff --git a/b+tree.js b/b+tree.js index 7436388..22d975a 100644 --- a/b+tree.js +++ b/b+tree.js @@ -970,50 +970,56 @@ var BTree = /** @class */ (function () { }; initDisqualify(curA, curB); initDisqualify(curB, curA); + var leading = curA; + var trailing = curB; + var order = cmp(BTree.getKey(leading), BTree.getKey(trailing)); // Walk both cursors in alternating hops while (true) { - var keyA = curA.leaf.keys[curA.leafIndex]; - var keyB = curB.leaf.keys[curB.leafIndex]; - var order = cmp(keyA, keyB); var areEqual = order === 0; if (areEqual) { + var key = BTree.getKey(leading); var vA = curA.leaf.values[curA.leafIndex]; var vB = curB.leaf.values[curB.leafIndex]; - var merged = mergeValues(keyA, vA, vB); + // Perform the actual merge of values here. The cursors will avoid adding a duplicate of this key/value + // to pending because they respect the areEqual flag during their moves. + var merged = mergeValues(key, vA, vB); if (merged !== undefined) - BTree.alternatingPush(pending, keyA, merged); - var outT = BTree.moveTo(curB, curA, keyA, false, areEqual, cmp)[0]; - var outL = BTree.moveTo(curA, curB, keyA, false, areEqual, cmp)[0]; - if (outT || outL) { - if (!outT || !outL) { + BTree.alternatingPush(pending, key, merged); + var outTrailing = BTree.moveOne(trailing, leading, key, false, areEqual, cmp); + var outLeading = BTree.moveOne(leading, trailing, key, false, areEqual, cmp); + if (outTrailing || outLeading) { + if (!outTrailing || !outLeading) { // In these cases, we pass areEqual=false because a return value of "out of tree" means // the cursor did not move. This must be true because they started equal and one of them had more tree // to walk (one is !out), so they cannot be equal at this point. - if (outT) { - BTree.moveTo(curA, curB, maxKey, false, false, cmp); + if (outTrailing) { + BTree.moveTo(leading, trailing, maxKey, false, false, cmp); } else { - BTree.moveTo(curB, curA, maxKey, false, false, cmp); + BTree.moveTo(trailing, leading, maxKey, false, false, cmp); } } break; } + order = cmp(BTree.getKey(leading), BTree.getKey(trailing)); } else { - var trailing = void 0, leading = void 0; - if (order > 0) { - trailing = curB; - leading = curA; - } - else { - trailing = curA; - leading = curB; + if (order < 0) { + var tmp = trailing; + trailing = leading; + leading = tmp; } - var out = BTree.moveTo(trailing, leading, BTree.getKey(leading), true, areEqual, cmp)[0]; + var _a = BTree.moveTo(trailing, leading, BTree.getKey(leading), true, areEqual, cmp), out = _a[0], nowEqual = _a[1]; if (out) { BTree.moveTo(leading, trailing, maxKey, false, areEqual, cmp); break; } + else if (nowEqual) { + order = 0; + } + else { + order = -1; + } } } flushPendingEntries(); diff --git a/b+tree.ts b/b+tree.ts index de062ff..0811ddb 100644 --- a/b+tree.ts +++ b/b+tree.ts @@ -1172,48 +1172,57 @@ export default class BTree implements ISortedMapF, ISortedMap entry.payload.disqualified = true; } }; + initDisqualify(curA, curB); initDisqualify(curB, curA); + let leading = curA; + let trailing = curB; + let order = cmp(BTree.getKey(leading), BTree.getKey(trailing)); + // Walk both cursors in alternating hops while (true) { - const keyA = curA.leaf.keys[curA.leafIndex]; - const keyB = curB.leaf.keys[curB.leafIndex]; - const order = cmp(keyA, keyB); const areEqual = order === 0; if (areEqual) { + const key = BTree.getKey(leading); const vA = curA.leaf.values[curA.leafIndex]; const vB = curB.leaf.values[curB.leafIndex]; - const merged = mergeValues(keyA, vA, vB); + // Perform the actual merge of values here. The cursors will avoid adding a duplicate of this key/value + // to pending because they respect the areEqual flag during their moves. + const merged = mergeValues(key, vA, vB); if (merged !== undefined) - BTree.alternatingPush(pending, keyA, merged); - const [outT] = BTree.moveTo(curB, curA, keyA, false, areEqual, cmp); - const [outL] = BTree.moveTo(curA, curB, keyA, false, areEqual, cmp); - if (outT || outL) { - if (!outT || !outL) { + BTree.alternatingPush(pending, key, merged); + const outTrailing = BTree.moveOne(trailing, leading, key, false, areEqual, cmp); + const outLeading = BTree.moveOne(leading, trailing, key, false, areEqual, cmp); + if (outTrailing || outLeading) { + if (!outTrailing || !outLeading) { // In these cases, we pass areEqual=false because a return value of "out of tree" means // the cursor did not move. This must be true because they started equal and one of them had more tree // to walk (one is !out), so they cannot be equal at this point. - if (outT) { - BTree.moveTo(curA, curB, maxKey, false, false, cmp); + if (outTrailing) { + BTree.moveTo(leading, trailing, maxKey, false, false, cmp); } else { - BTree.moveTo(curB, curA, maxKey, false, false, cmp); + BTree.moveTo(trailing, leading, maxKey, false, false, cmp); } } break; } + order = cmp(BTree.getKey(leading), BTree.getKey(trailing)); } else { - let trailing: MergeCursor, leading: MergeCursor; - if (order > 0) { - trailing = curB; leading = curA; - } else { - trailing = curA; leading = curB; + if (order < 0) { + const tmp = trailing; + trailing = leading; + leading = tmp; } - const [out] = BTree.moveTo(trailing, leading, BTree.getKey(leading), true, areEqual, cmp); + const [out, nowEqual] = BTree.moveTo(trailing, leading, BTree.getKey(leading), true, areEqual, cmp); if (out) { BTree.moveTo(leading, trailing, maxKey, false, areEqual, cmp); break; + } else if (nowEqual) { + order = 0; + } else { + order = -1; } } } From da716d371f809096ec8f14018fbd310702775e20 Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Thu, 6 Nov 2025 13:20:25 -0800 Subject: [PATCH 048/143] improve fuzz tests for merge --- b+tree.test.ts | 131 +++++++++++++++++++++++++++++-------------------- 1 file changed, 79 insertions(+), 52 deletions(-) diff --git a/b+tree.test.ts b/b+tree.test.ts index a0ca0cb..7a262d3 100644 --- a/b+tree.test.ts +++ b/b+tree.test.ts @@ -2037,13 +2037,36 @@ function swap(keys: any[], i: number, j: number) { keys[j] = tmp; } -function makeArray(size: number, randomOrder: boolean, spacing = 10) { - var keys: number[] = [], i, n; - for (i = 0, n = 0; i < size; i++, n += 1 + randInt(spacing)) - keys[i] = n; - if (randomOrder) - for (i = 0; i < size; i++) - swap(keys, i, randInt(size)); +function makeArray(size: number, randomOrder: boolean, spacing = 10, collisionChance = 0, rng?: MersenneTwister) { + const randomizer = rng ?? rand; + const useGlobalRand = rng === undefined; + const randomFloat = () => { + if (typeof randomizer.random === 'function') + return randomizer.random(); + return Math.random(); + }; + const randomIntWithMax = (max: number) => { + if (max <= 0) + return 0; + if (useGlobalRand) + return randInt(max); + return Math.floor(randomFloat() * max); + }; + + const keys: number[] = []; + let current = 0; + for (let i = 0; i < size; i++) { + if (i > 0 && collisionChance > 0 && randomFloat() < collisionChance) { + keys[i] = keys[i - 1]; + } else { + current += 1 + randomIntWithMax(spacing); + keys[i] = current; + } + } + if (randomOrder) { + for (let i = 0; i < size; i++) + swap(keys, i, randomIntWithMax(size)); + } return keys; } @@ -2055,7 +2078,8 @@ describe('BTree merge fuzz tests', () => { const FUZZ_SETTINGS = { branchingFactors: [4, 5, 32], ooms: [0, 1, 2, 3], - fractionsPerOOM: [0.0001, 0.01, 0.1, 0.25, 0.5] + fractionsPerOOM: [0.0001, 0.01, 0.1, 0.25, 0.5], + collisionChances: [0, 0.01, 0.1, 0.5] } as const; const RANDOM_EDITS_PER_TEST = 20; const TIMEOUT_MS = 30_000; @@ -2064,6 +2088,10 @@ describe('BTree merge fuzz tests', () => { if (fraction < 0 || fraction > 1) throw new Error('FUZZ_SETTINGS.fractionsPerOOM must contain values between 0 and 1'); }); + FUZZ_SETTINGS.collisionChances.forEach(chance => { + if (chance < 0 || chance > 1) + throw new Error('FUZZ_SETTINGS.collisionChances must contain values between 0 and 1'); + }); jest.setTimeout(TIMEOUT_MS); @@ -2071,58 +2099,57 @@ describe('BTree merge fuzz tests', () => { for (const maxNodeSize of FUZZ_SETTINGS.branchingFactors) { describe(`branching factor ${maxNodeSize}`, () => { - for (const oom of FUZZ_SETTINGS.ooms) { - const size = 5 * Math.pow(10, oom); - for (const fractionA of FUZZ_SETTINGS.fractionsPerOOM) { - const fractionB = 1 - fractionA; - - test(`size ${size}, fractionA ${fractionA.toFixed(2)}, fractionB ${fractionB.toFixed(2)}`, () => { - const treeA = new BTree([], compare, maxNodeSize); - const treeB = new BTree([], compare, maxNodeSize); - - const keys = makeArray(size, true, 1); - const sorted = Array.from(new Set(keys)).sort(compare); - - const aArray: [number, number][] = []; - const bArray: [number, number][] = []; - for (const value of keys) { - if (rng.random() < fractionA) { - aArray.push([value, value]); - treeA.set(value, value); - } else { - bArray.push([value, value]); - treeB.set(value, value); + for (const collisionChance of FUZZ_SETTINGS.collisionChances) { + for (const oom of FUZZ_SETTINGS.ooms) { + const size = 5 * Math.pow(10, oom); + for (const fractionA of FUZZ_SETTINGS.fractionsPerOOM) { + const fractionB = 1 - fractionA; + const collisionLabel = collisionChance.toFixed(2); + + test(`size ${size}, fractionA ${fractionA.toFixed(2)}, fractionB ${fractionB.toFixed(2)}, collision ${collisionLabel}`, () => { + const treeA = new BTree([], compare, maxNodeSize); + const treeB = new BTree([], compare, maxNodeSize); + + const keys = makeArray(size, true, 1, collisionChance, rng); + const sorted = Array.from(new Set(keys)).sort(compare); + + for (const value of keys) { + if (rng.random() < fractionA) { + treeA.set(value, value); + } else { + treeB.set(value, value); + } } - } - aArray.sort((a, b) => compare(a[0], b[0])); - bArray.sort((a, b) => compare(a[0], b[0])); + const aArray = treeA.toArray(); + const bArray = treeB.toArray(); - const merged = treeA.merge(treeB, mergeFn); - merged.checkValid(); + const merged = treeA.merge(treeB, mergeFn); + merged.checkValid(); - expect(merged.toArray()).toEqual(sorted.map(k => [k, k])); + expect(merged.toArray()).toEqual(sorted.map(k => [k, k])); - // Merge should not have mutated inputs - expect(treeA.toArray()).toEqual(aArray); - expect(treeB.toArray()).toEqual(bArray); + // Merge should not have mutated inputs + expect(treeA.toArray()).toEqual(aArray); + expect(treeB.toArray()).toEqual(bArray); - for (let edit = 0; edit < RANDOM_EDITS_PER_TEST; edit++) { - const key = 1 + randomInt(rng, size); - const action = rng.random(); - if (action < 0.33) { - merged.set(key, key); - } else if (action < 0.66) { - merged.set(key, -key); - } else { - merged.delete(key); + for (let edit = 0; edit < RANDOM_EDITS_PER_TEST; edit++) { + const key = 1 + randomInt(rng, size); + const action = rng.random(); + if (action < 0.33) { + merged.set(key, key); + } else if (action < 0.66) { + merged.set(key, -key); + } else { + merged.delete(key); + } } - }; - // Check for shared mutability issues - expect(treeA.toArray()).toEqual(aArray); - expect(treeB.toArray()).toEqual(bArray); - }); + // Check for shared mutability issues + expect(treeA.toArray()).toEqual(aArray); + expect(treeB.toArray()).toEqual(bArray); + }); + } } } }); From eaf703bf90ea2f3d4660bde5148248f843b6793d Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Thu, 6 Nov 2025 13:23:14 -0800 Subject: [PATCH 049/143] reduce fuzz tests --- b+tree.test.ts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/b+tree.test.ts b/b+tree.test.ts index 7a262d3..37bcacb 100644 --- a/b+tree.test.ts +++ b/b+tree.test.ts @@ -2077,9 +2077,9 @@ describe('BTree merge fuzz tests', () => { const mergeFn = (_k: number, left: number, _right: number) => left; const FUZZ_SETTINGS = { branchingFactors: [4, 5, 32], - ooms: [0, 1, 2, 3], - fractionsPerOOM: [0.0001, 0.01, 0.1, 0.25, 0.5], - collisionChances: [0, 0.01, 0.1, 0.5] + ooms: [0, 1, 2], // [0, 1, 2, 3], + fractionsPerOOM: [0.1, 0.25, 0.5], // [0.0001, 0.01, 0.1, 0.25, 0.5], + collisionChances: [0.1, 0.5], // [0, 0.01, 0.1, 0.5] } as const; const RANDOM_EDITS_PER_TEST = 20; const TIMEOUT_MS = 30_000; From e190e2199e7448bf04b1234a1206bd3ccc072256 Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Thu, 6 Nov 2025 13:28:43 -0800 Subject: [PATCH 050/143] test improvement --- b+tree.test.ts | 144 +++++++++++++++++++++++++------------------------ 1 file changed, 73 insertions(+), 71 deletions(-) diff --git a/b+tree.test.ts b/b+tree.test.ts index 37bcacb..ab4f9c0 100644 --- a/b+tree.test.ts +++ b/b+tree.test.ts @@ -1250,91 +1250,93 @@ function testIntersect(maxNodeSize: number) { describe('BTree intersect fuzz tests', () => { const compare = (a: number, b: number) => a - b; - const branchingFactors = [4, 8, 16, 32]; - const seeds = [0x1234ABCD, 0x9ABCDEFF]; const FUZZ_SETTINGS = { - scenarioBudget: 2, - iterationsPerScenario: 3, - maxInsertSize: 200, - keyRange: 5_000, - valueRange: 1_000, - timeoutMs: 8_000 + branchingFactors: [4, 5, 32], + ooms: [2, 3], + fractionsPerOOM: [0.1, 0.25, 0.5], + collisionChances: [0.05, 0.1, 0.3], + timeoutMs: 30_000 } as const; - test('randomized intersects across branching factors', () => { - jest.setTimeout(FUZZ_SETTINGS.timeoutMs); + FUZZ_SETTINGS.fractionsPerOOM.forEach(fraction => { + if (fraction < 0 || fraction > 1) + throw new Error('FUZZ_SETTINGS.fractionsPerOOM must contain values between 0 and 1'); + }); + FUZZ_SETTINGS.collisionChances.forEach(chance => { + if (chance < 0 || chance > 1) + throw new Error('FUZZ_SETTINGS.collisionChances must contain values between 0 and 1'); + }); - const scenarioConfigs: Array<{ seedBase: number, maxNodeSize: number }> = []; - for (const seedBase of seeds) - for (const maxNodeSize of branchingFactors) - scenarioConfigs.push({ seedBase, maxNodeSize }); + jest.setTimeout(FUZZ_SETTINGS.timeoutMs); - const scenariosToRun = Math.min(FUZZ_SETTINGS.scenarioBudget, scenarioConfigs.length); - const selectedScenarios = scenarioConfigs.slice(0, scenariosToRun); + const rng = new MersenneTwister(0xC0FFEE); - for (const { seedBase, maxNodeSize } of selectedScenarios) { - const baseSeed = (seedBase ^ (maxNodeSize * 0x9E3779B1)) >>> 0; - const fuzzRand = new MersenneTwister(baseSeed); - const nextInt = (limit: number) => limit <= 0 ? 0 : Math.floor(fuzzRand.random() * limit); + for (const maxNodeSize of FUZZ_SETTINGS.branchingFactors) { + describe(`branching factor ${maxNodeSize}`, () => { + for (const collisionChance of FUZZ_SETTINGS.collisionChances) { + for (const oom of FUZZ_SETTINGS.ooms) { + const size = 5 * Math.pow(10, oom); + for (const fractionA of FUZZ_SETTINGS.fractionsPerOOM) { + const fractionB = 1 - fractionA; + const collisionLabel = collisionChance.toFixed(2); - for (let iteration = 0; iteration < FUZZ_SETTINGS.iterationsPerScenario; iteration++) { - const treeA = new BTree([], compare, maxNodeSize); - const treeB = new BTree([], compare, maxNodeSize); - const mapA = new Map(); - const mapB = new Map(); + test(`size ${size}, fractionA ${fractionA.toFixed(2)}, fractionB ${fractionB.toFixed(2)}, collision ${collisionLabel}`, () => { + const treeA = new BTree([], compare, maxNodeSize); + const treeB = new BTree([], compare, maxNodeSize); - const sizeA = nextInt(FUZZ_SETTINGS.maxInsertSize); - const sizeB = nextInt(FUZZ_SETTINGS.maxInsertSize); + const keys = makeArray(size, true, 1, collisionChance, rng); - for (let i = 0; i < sizeA; i++) { - const key = nextInt(FUZZ_SETTINGS.keyRange); - const value = nextInt(FUZZ_SETTINGS.valueRange); - treeA.set(key, value); - mapA.set(key, value); - } + for (const value of keys) { + const assignToA = rng.random() < fractionA; + const assignToB = rng.random() < fractionB; + + if (!assignToA && !assignToB) { + if (rng.random() < 0.5) + treeA.set(value, value); + else + treeB.set(value, value); + continue; + } - for (let i = 0; i < sizeB; i++) { - const key = nextInt(FUZZ_SETTINGS.keyRange); - const value = nextInt(FUZZ_SETTINGS.valueRange); - treeB.set(key, value); - mapB.set(key, value); - } + if (assignToA) + treeA.set(value, value); + if (assignToB) + treeB.set(value, value); + } - const expected: Array<{ key: number, leftValue: number, rightValue: number }> = []; - mapA.forEach((leftValue, key) => { - const rightValue = mapB.get(key); - if (rightValue !== undefined) { - expected.push({ key, leftValue, rightValue }); - } - }); - expected.sort((a, b) => a.key - b.key); + const aArray = treeA.toArray(); + const bArray = treeB.toArray(); + const bMap = new Map(bArray); + const expected: Array<[number, number, number]> = []; + for (const [key, leftValue] of aArray) { + const rightValue = bMap.get(key); + if (rightValue !== undefined) + expected.push([key, leftValue, rightValue]); + } - const actual: Array<{ key: number, leftValue: number, rightValue: number }> = []; - treeA.intersect(treeB, (key, leftValue, rightValue) => { - actual.push({ key, leftValue, rightValue }); - }); - expect(actual).toEqual(expected); + const actual: Array<[number, number, number]> = []; + treeA.intersect(treeB, (key, leftValue, rightValue) => { + actual.push([key, leftValue, rightValue]); + }); + expect(actual).toEqual(expected); - const swapped: Array<{ key: number, leftValue: number, rightValue: number }> = []; - treeB.intersect(treeA, (key, leftValue, rightValue) => { - swapped.push({ key, leftValue, rightValue }); - }); - const swapExpected = expected.map(({ key, leftValue, rightValue }) => ({ - key, - leftValue: rightValue, - rightValue: leftValue - })); - expect(swapped).toEqual(swapExpected); - - const sortedA = Array.from(mapA.entries()).sort((a, b) => a[0] - b[0]); - const sortedB = Array.from(mapB.entries()).sort((a, b) => a[0] - b[0]); - expect(treeA.toArray()).toEqual(sortedA); - expect(treeB.toArray()).toEqual(sortedB); - treeA.checkValid(); - treeB.checkValid(); + const swappedActual: Array<[number, number, number]> = []; + treeB.intersect(treeA, (key, leftValue, rightValue) => { + swappedActual.push([key, leftValue, rightValue]); + }); + const swappedExpected = expected.map(([key, leftValue, rightValue]) => [key, rightValue, leftValue]); + expect(swappedActual).toEqual(swappedExpected); + + expect(treeA.toArray()).toEqual(aArray); + expect(treeB.toArray()).toEqual(bArray); + treeA.checkValid(); + treeB.checkValid(); + }); + } + } } - } - }); + }); + } }); describe('BTree merge tests with fanout 32', testMerge.bind(null, 32)); From 633f687ece61608c518bab2642de5980cf313f5a Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Thu, 6 Nov 2025 13:35:28 -0800 Subject: [PATCH 051/143] docs --- b+tree.d.ts | 10 ++++++++-- b+tree.js | 47 +++++++++++++++++++++++++++------------------ b+tree.ts | 55 ++++++++++++++++++++++++++++++----------------------- 3 files changed, 67 insertions(+), 45 deletions(-) diff --git a/b+tree.d.ts b/b+tree.d.ts index cff900b..5344165 100644 --- a/b+tree.d.ts +++ b/b+tree.d.ts @@ -307,11 +307,17 @@ export default class BTree implements ISortedMapF, ISort private static alternatingGetFirst; private static alternatingGetSecond; private static alternatingPush; - private static moveOne; /** - * Move cursor strictly forward to the first key >= (inclusive) or > (exclusive) target. + * Walks the cursor forward by one key. + * Should only be called to advance cursors that started equal. * Returns true if end-of-tree was reached (cursor not structurally mutated). */ + private static moveForwardOne; + /** + * Move cursor strictly forward to the first key >= (inclusive) or > (exclusive) target. + * Returns a boolean indicating if end-of-tree was reached (cursor not structurally mutated). + * Also returns a boolean indicating if the target key was landed on exactly. + */ private static moveTo; /** * Create a cursor pointing to the leftmost key of the supplied tree. diff --git a/b+tree.js b/b+tree.js index 22d975a..5fe9d9e 100644 --- a/b+tree.js +++ b/b+tree.js @@ -500,7 +500,6 @@ var BTree = /** @class */ (function () { */ BTree.prototype.intersect = function (other, intersection) { var cmp = this._compare; - // Ensure both trees share the same comparator reference if (cmp !== other._compare) throw new Error("Cannot merge BTrees with different comparators."); if (this._maxNodeSize !== other._maxNodeSize) @@ -508,12 +507,14 @@ var BTree = /** @class */ (function () { if (other.size === 0 || this.size === 0) return; var makePayload = function () { return undefined; }; - // Initialize cursors at minimum keys. var cursorA = BTree.createCursor(this, makePayload, BTree.noop, BTree.noop, BTree.noop, BTree.noop, BTree.noop); var cursorB = BTree.createCursor(other, makePayload, BTree.noop, BTree.noop, BTree.noop, BTree.noop, BTree.noop); var leading = cursorA; var trailing = cursorB; var order = cmp(BTree.getKey(leading), BTree.getKey(trailing)); + // The intersect walk is somewhat similar to a merge walk in that it does an alternating hop walk with cursors. + // However, the only thing we care about is when the two cursors are equal (equality is intersection). + // When they are not equal we just advance the trailing cursor. while (true) { var areEqual = order === 0; if (areEqual) { @@ -521,8 +522,8 @@ var BTree = /** @class */ (function () { var vA = cursorA.leaf.values[cursorA.leafIndex]; var vB = cursorB.leaf.values[cursorB.leafIndex]; intersection(key, vA, vB); - var outT = BTree.moveOne(trailing, leading, key, false, areEqual, cmp); - var outL = BTree.moveOne(leading, trailing, key, false, areEqual, cmp); + var outT = BTree.moveForwardOne(trailing, leading, key, cmp); + var outL = BTree.moveForwardOne(leading, trailing, key, cmp); if (outT && outL) break; order = cmp(BTree.getKey(leading), BTree.getKey(trailing)); @@ -985,8 +986,8 @@ var BTree = /** @class */ (function () { var merged = mergeValues(key, vA, vB); if (merged !== undefined) BTree.alternatingPush(pending, key, merged); - var outTrailing = BTree.moveOne(trailing, leading, key, false, areEqual, cmp); - var outLeading = BTree.moveOne(leading, trailing, key, false, areEqual, cmp); + var outTrailing = BTree.moveForwardOne(trailing, leading, key, cmp); + var outLeading = BTree.moveForwardOne(leading, trailing, key, cmp); if (outTrailing || outLeading) { if (!outTrailing || !outLeading) { // In these cases, we pass areEqual=false because a return value of "out of tree" means @@ -1038,20 +1039,28 @@ var BTree = /** @class */ (function () { // Micro benchmarks show this is the fastest way to do this list.push(first, second); }; - BTree.moveOne = function (cur, other, targetKey, isInclusive, startedEqual, cmp) { + /** + * Walks the cursor forward by one key. + * Should only be called to advance cursors that started equal. + * Returns true if end-of-tree was reached (cursor not structurally mutated). + */ + BTree.moveForwardOne = function (cur, other, currentKey, cmp) { var leaf = cur.leaf; var nextIndex = cur.leafIndex + 1; if (nextIndex < leaf.keys.length) { // Still within current leaf - cur.onMoveInLeaf(leaf, cur.leafPayload, cur.leafIndex, nextIndex, startedEqual); + cur.onMoveInLeaf(leaf, cur.leafPayload, cur.leafIndex, nextIndex, true); cur.leafIndex = nextIndex; return false; } - return BTree.moveTo(cur, other, targetKey, isInclusive, startedEqual, cmp)[0]; + // If our optimizaed step within leaf failed, use full moveTo logic + // Pass isInclusive=false to ensure we walk forward to the key exactly after the current + return BTree.moveTo(cur, other, currentKey, false, true, cmp)[0]; }; /** * Move cursor strictly forward to the first key >= (inclusive) or > (exclusive) target. - * Returns true if end-of-tree was reached (cursor not structurally mutated). + * Returns a boolean indicating if end-of-tree was reached (cursor not structurally mutated). + * Also returns a boolean indicating if the target key was landed on exactly. */ BTree.moveTo = function (cur, other, targetKey, isInclusive, startedEqual, cmp) { var onMoveInLeaf = cur.onMoveInLeaf; @@ -1065,26 +1074,26 @@ var BTree = /** @class */ (function () { var leafPayload = cur.leafPayload; var i = leaf.indexOf(targetKey, -1, cmp); var destInLeaf; - var areEqual; + var targetExactlyReached; if (i < 0) { destInLeaf = ~i; - areEqual = false; + targetExactlyReached = false; } else { if (isInclusive) { destInLeaf = i; - areEqual = true; + targetExactlyReached = true; } else { destInLeaf = i + 1; - areEqual = false; + targetExactlyReached = false; } } var leafKeyCount = leaf.keys.length; if (destInLeaf < leafKeyCount) { onMoveInLeaf(leaf, leafPayload, cur.leafIndex, destInLeaf, startedEqual); cur.leafIndex = destInLeaf; - return [false, areEqual]; + return [false, targetExactlyReached]; } // Find first ancestor with a viable right step var spine = cur.spine; @@ -1150,23 +1159,23 @@ var BTree = /** @class */ (function () { var destIndex; if (idx < 0) { destIndex = ~idx; - areEqual = false; + targetExactlyReached = false; } else { if (isInclusive) { destIndex = idx; - areEqual = true; + targetExactlyReached = true; } else { destIndex = idx + 1; - areEqual = false; + targetExactlyReached = false; } } cur.leaf = node; cur.leafPayload = makePayload(); cur.leafIndex = destIndex; onEnterLeaf(node, destIndex, cur, other); - return [false, areEqual]; + return [false, targetExactlyReached]; }; /** * Create a cursor pointing to the leftmost key of the supplied tree. diff --git a/b+tree.ts b/b+tree.ts index 0811ddb..242967c 100644 --- a/b+tree.ts +++ b/b+tree.ts @@ -578,7 +578,6 @@ export default class BTree implements ISortedMapF, ISortedMap */ intersect(other: BTree, intersection: (key: K, leftValue: V, rightValue: V) => void): void { const cmp = this._compare; - // Ensure both trees share the same comparator reference if (cmp !== other._compare) throw new Error("Cannot merge BTrees with different comparators."); if (this._maxNodeSize !== other._maxNodeSize) @@ -588,14 +587,15 @@ export default class BTree implements ISortedMapF, ISortedMap return; const makePayload = (): undefined => undefined; - - // Initialize cursors at minimum keys. let cursorA = BTree.createCursor(this, makePayload, BTree.noop, BTree.noop, BTree.noop, BTree.noop, BTree.noop); let cursorB = BTree.createCursor(other, makePayload, BTree.noop, BTree.noop, BTree.noop, BTree.noop, BTree.noop); let leading = cursorA; let trailing = cursorB; let order = cmp(BTree.getKey(leading), BTree.getKey(trailing)); + // The intersect walk is somewhat similar to a merge walk in that it does an alternating hop walk with cursors. + // However, the only thing we care about is when the two cursors are equal (equality is intersection). + // When they are not equal we just advance the trailing cursor. while (true) { const areEqual = order === 0; if (areEqual) { @@ -603,8 +603,8 @@ export default class BTree implements ISortedMapF, ISortedMap const vA = cursorA.leaf.values[cursorA.leafIndex]; const vB = cursorB.leaf.values[cursorB.leafIndex]; intersection(key, vA, vB); - const outT = BTree.moveOne(trailing, leading, key, false, areEqual, cmp); - const outL = BTree.moveOne(leading, trailing, key, false, areEqual, cmp); + const outT = BTree.moveForwardOne(trailing, leading, key, cmp); + const outL = BTree.moveForwardOne(leading, trailing, key, cmp); if (outT && outL) break; order = cmp(BTree.getKey(leading), BTree.getKey(trailing)); @@ -1193,8 +1193,8 @@ export default class BTree implements ISortedMapF, ISortedMap const merged = mergeValues(key, vA, vB); if (merged !== undefined) BTree.alternatingPush(pending, key, merged); - const outTrailing = BTree.moveOne(trailing, leading, key, false, areEqual, cmp); - const outLeading = BTree.moveOne(leading, trailing, key, false, areEqual, cmp); + const outTrailing = BTree.moveForwardOne(trailing, leading, key, cmp); + const outLeading = BTree.moveForwardOne(leading, trailing, key, cmp); if (outTrailing || outLeading) { if (!outTrailing || !outLeading) { // In these cases, we pass areEqual=false because a return value of "out of tree" means @@ -1248,28 +1248,35 @@ export default class BTree implements ISortedMapF, ISortedMap list.push(first, second); } - private static moveOne( + /** + * Walks the cursor forward by one key. + * Should only be called to advance cursors that started equal. + * Returns true if end-of-tree was reached (cursor not structurally mutated). + */ + private static moveForwardOne( cur: MergeCursor, other: MergeCursor, - targetKey: K, - isInclusive: boolean, - startedEqual: boolean, + currentKey: K, cmp: (a:K,b:K)=>number ): boolean { const leaf = cur.leaf; const nextIndex = cur.leafIndex + 1; if (nextIndex < leaf.keys.length) { // Still within current leaf - cur.onMoveInLeaf(leaf, cur.leafPayload, cur.leafIndex, nextIndex, startedEqual); + cur.onMoveInLeaf(leaf, cur.leafPayload, cur.leafIndex, nextIndex, true); cur.leafIndex = nextIndex; return false; } - return BTree.moveTo(cur, other, targetKey, isInclusive, startedEqual, cmp)[0]; + + // If our optimizaed step within leaf failed, use full moveTo logic + // Pass isInclusive=false to ensure we walk forward to the key exactly after the current + return BTree.moveTo(cur, other, currentKey, false, true, cmp)[0]; } /** * Move cursor strictly forward to the first key >= (inclusive) or > (exclusive) target. - * Returns true if end-of-tree was reached (cursor not structurally mutated). + * Returns a boolean indicating if end-of-tree was reached (cursor not structurally mutated). + * Also returns a boolean indicating if the target key was landed on exactly. */ private static moveTo( cur: MergeCursor, @@ -1278,7 +1285,7 @@ export default class BTree implements ISortedMapF, ISortedMap isInclusive: boolean, startedEqual: boolean, cmp: (a:K,b:K)=>number - ): [outOfTree: boolean, areEqual: boolean] { + ): [outOfTree: boolean, targetExactlyReached: boolean] { const onMoveInLeaf = cur.onMoveInLeaf; const onExitLeaf = cur.onExitLeaf; const onStepUp = cur.onStepUp; @@ -1290,24 +1297,24 @@ export default class BTree implements ISortedMapF, ISortedMap const leafPayload = cur.leafPayload; const i = leaf.indexOf(targetKey, -1, cmp); let destInLeaf: number; - let areEqual: boolean; + let targetExactlyReached: boolean; if (i < 0) { destInLeaf = ~i; - areEqual = false; + targetExactlyReached = false; } else { if (isInclusive) { destInLeaf = i; - areEqual = true; + targetExactlyReached = true; } else { destInLeaf = i + 1; - areEqual = false; + targetExactlyReached = false; } } const leafKeyCount = leaf.keys.length; if (destInLeaf < leafKeyCount) { onMoveInLeaf(leaf, leafPayload, cur.leafIndex, destInLeaf, startedEqual); cur.leafIndex = destInLeaf; - return [false, areEqual]; + return [false, targetExactlyReached]; } // Find first ancestor with a viable right step @@ -1382,21 +1389,21 @@ export default class BTree implements ISortedMapF, ISortedMap let destIndex: number; if (idx < 0) { destIndex = ~idx; - areEqual = false; + targetExactlyReached = false; } else { if (isInclusive) { destIndex = idx; - areEqual = true; + targetExactlyReached = true; } else { destIndex = idx + 1; - areEqual = false; + targetExactlyReached = false; } } cur.leaf = node; cur.leafPayload = makePayload(); cur.leafIndex = destIndex; onEnterLeaf(node, destIndex, cur, other); - return [false, areEqual]; + return [false, targetExactlyReached]; } /** From ed46afa908baa356b2138d35eb545b7a0bf65a1f Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Thu, 6 Nov 2025 15:01:08 -0800 Subject: [PATCH 052/143] docs and cleanup --- b+tree.d.ts | 16 ++++++++++++++-- b+tree.js | 20 ++++++++++++++++---- b+tree.test.ts | 4 ++-- b+tree.ts | 20 ++++++++++++++++---- 4 files changed, 48 insertions(+), 12 deletions(-) diff --git a/b+tree.d.ts b/b+tree.d.ts index 5344165..86600c9 100644 --- a/b+tree.d.ts +++ b/b+tree.d.ts @@ -258,7 +258,13 @@ export default class BTree implements ISortedMapF, ISort * Neither tree is modified. * @param other The other tree to intersect with this one. * @param intersection Called for keys that appear in both trees. - * @description Complexity: O(N + M), but often much faster in practice due to skipping any non-intersecting subtrees. + * @description Complexity is bounded O(N + M) time and O(log(N + M)) for allocations. + * However, time is additionally bounded by O(log(N + M) * D) where D is the number of disjoint ranges of keys between + * the two trees. In practice, that means for keys of random distribution the performance is O(N + M) and for + * keys with significant numbers of non-overlapping key ranges it is O(log(N + M) * D) which is much faster. + * The algorithm achieves this additional non-linear bound by skipping over non-intersecting subtrees entirely. + * Note that in benchmarks even the worst case (fully interleaved keys) performance is faster than calling `toArray` + * on both trees and performing a walk on the sorted contents due to the reduced allocation overhead. */ intersect(other: BTree, intersection: (key: K, leftValue: V, rightValue: V) => void): void; /** @@ -268,7 +274,13 @@ export default class BTree implements ISortedMapF, ISort * @param merge Called for keys that appear in both trees. Return the desired value, or * `undefined` to omit the key from the result. * @returns A new BTree that contains the merged key/value pairs. - * @description Complexity: O(N + M), but often much faster in practice due to skipping any non-intersecting subtrees. + * @description Complexity is bounded O(N + M) for both time and allocations. + * However, it is additionally bounded by O(log(N + M) * D) where D is the number of disjoint ranges of keys between + * the two trees. In practice, that means for keys of random distribution the performance is O(N + M) and for + * keys with significant numbers of non-overlapping key ranges it is O(log(N + M) * D) which is much faster. + * The algorithm achieves this additional non-linear bound by skipping over non-intersecting subtrees entirely. + * Note that in benchmarks even the worst case (fully interleaved keys) performance is faster than cloning `this` + * and inserting the contents of `other` into the clone. */ merge(other: BTree, merge: (key: K, leftValue: V, rightValue: V) => V | undefined): BTree; /** diff --git a/b+tree.js b/b+tree.js index 5fe9d9e..72b4c79 100644 --- a/b+tree.js +++ b/b+tree.js @@ -496,14 +496,20 @@ var BTree = /** @class */ (function () { * Neither tree is modified. * @param other The other tree to intersect with this one. * @param intersection Called for keys that appear in both trees. - * @description Complexity: O(N + M), but often much faster in practice due to skipping any non-intersecting subtrees. + * @description Complexity is bounded O(N + M) time and O(log(N + M)) for allocations. + * However, time is additionally bounded by O(log(N + M) * D) where D is the number of disjoint ranges of keys between + * the two trees. In practice, that means for keys of random distribution the performance is O(N + M) and for + * keys with significant numbers of non-overlapping key ranges it is O(log(N + M) * D) which is much faster. + * The algorithm achieves this additional non-linear bound by skipping over non-intersecting subtrees entirely. + * Note that in benchmarks even the worst case (fully interleaved keys) performance is faster than calling `toArray` + * on both trees and performing a walk on the sorted contents due to the reduced allocation overhead. */ BTree.prototype.intersect = function (other, intersection) { var cmp = this._compare; if (cmp !== other._compare) - throw new Error("Cannot merge BTrees with different comparators."); + throw new Error("Cannot intersect BTrees with different comparators."); if (this._maxNodeSize !== other._maxNodeSize) - throw new Error("Cannot merge BTrees with different max node sizes."); + throw new Error("Cannot intersect BTrees with different max node sizes."); if (other.size === 0 || this.size === 0) return; var makePayload = function () { return undefined; }; @@ -556,7 +562,13 @@ var BTree = /** @class */ (function () { * @param merge Called for keys that appear in both trees. Return the desired value, or * `undefined` to omit the key from the result. * @returns A new BTree that contains the merged key/value pairs. - * @description Complexity: O(N + M), but often much faster in practice due to skipping any non-intersecting subtrees. + * @description Complexity is bounded O(N + M) for both time and allocations. + * However, it is additionally bounded by O(log(N + M) * D) where D is the number of disjoint ranges of keys between + * the two trees. In practice, that means for keys of random distribution the performance is O(N + M) and for + * keys with significant numbers of non-overlapping key ranges it is O(log(N + M) * D) which is much faster. + * The algorithm achieves this additional non-linear bound by skipping over non-intersecting subtrees entirely. + * Note that in benchmarks even the worst case (fully interleaved keys) performance is faster than cloning `this` + * and inserting the contents of `other` into the clone. */ BTree.prototype.merge = function (other, merge) { if (this._compare !== other._compare) diff --git a/b+tree.test.ts b/b+tree.test.ts index ab4f9c0..bc68f66 100644 --- a/b+tree.test.ts +++ b/b+tree.test.ts @@ -1238,13 +1238,13 @@ function testIntersect(maxNodeSize: number) { const compareB = (a: number, b: number) => a - b; const tree1 = new BTree([[1, 1]], compareA, maxNodeSize); const tree2 = new BTree([[1, 1]], compareB, maxNodeSize); - expect(() => tree1.intersect(tree2, () => {})).toThrow("Cannot merge BTrees with different comparators."); + expect(() => tree1.intersect(tree2, () => {})).toThrow("Cannot intersect BTrees with different comparators."); }); test('Intersect throws for max node size mismatch', () => { const tree1 = new BTree([[1, 1]], compare, maxNodeSize); const tree2 = new BTree([[1, 1]], compare, maxNodeSize + 1); - expect(() => tree1.intersect(tree2, () => {})).toThrow("Cannot merge BTrees with different max node sizes."); + expect(() => tree1.intersect(tree2, () => {})).toThrow("Cannot intersect BTrees with different max node sizes."); }); } diff --git a/b+tree.ts b/b+tree.ts index 242967c..57d6df8 100644 --- a/b+tree.ts +++ b/b+tree.ts @@ -574,14 +574,20 @@ export default class BTree implements ISortedMapF, ISortedMap * Neither tree is modified. * @param other The other tree to intersect with this one. * @param intersection Called for keys that appear in both trees. - * @description Complexity: O(N + M), but often much faster in practice due to skipping any non-intersecting subtrees. + * @description Complexity is bounded O(N + M) time and O(log(N + M)) for allocations. + * However, time is additionally bounded by O(log(N + M) * D) where D is the number of disjoint ranges of keys between + * the two trees. In practice, that means for keys of random distribution the performance is O(N + M) and for + * keys with significant numbers of non-overlapping key ranges it is O(log(N + M) * D) which is much faster. + * The algorithm achieves this additional non-linear bound by skipping over non-intersecting subtrees entirely. + * Note that in benchmarks even the worst case (fully interleaved keys) performance is faster than calling `toArray` + * on both trees and performing a walk on the sorted contents due to the reduced allocation overhead. */ intersect(other: BTree, intersection: (key: K, leftValue: V, rightValue: V) => void): void { const cmp = this._compare; if (cmp !== other._compare) - throw new Error("Cannot merge BTrees with different comparators."); + throw new Error("Cannot intersect BTrees with different comparators."); if (this._maxNodeSize !== other._maxNodeSize) - throw new Error("Cannot merge BTrees with different max node sizes."); + throw new Error("Cannot intersect BTrees with different max node sizes."); if (other.size === 0 || this.size === 0) return; @@ -634,7 +640,13 @@ export default class BTree implements ISortedMapF, ISortedMap * @param merge Called for keys that appear in both trees. Return the desired value, or * `undefined` to omit the key from the result. * @returns A new BTree that contains the merged key/value pairs. - * @description Complexity: O(N + M), but often much faster in practice due to skipping any non-intersecting subtrees. + * @description Complexity is bounded O(N + M) for both time and allocations. + * However, it is additionally bounded by O(log(N + M) * D) where D is the number of disjoint ranges of keys between + * the two trees. In practice, that means for keys of random distribution the performance is O(N + M) and for + * keys with significant numbers of non-overlapping key ranges it is O(log(N + M) * D) which is much faster. + * The algorithm achieves this additional non-linear bound by skipping over non-intersecting subtrees entirely. + * Note that in benchmarks even the worst case (fully interleaved keys) performance is faster than cloning `this` + * and inserting the contents of `other` into the clone. */ merge(other: BTree, merge: (key: K, leftValue: V, rightValue: V) => V | undefined): BTree { if (this._compare !== other._compare) From a4856590f46f553a17fc8fd981c60f33814dba5f Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Thu, 6 Nov 2025 19:36:08 -0800 Subject: [PATCH 053/143] docs --- b+tree.d.ts | 27 +++++++++-- b+tree.js | 125 ++++++++++++++++++++++++++++++++++++------------ b+tree.ts | 135 ++++++++++++++++++++++++++++++++++++++++------------ 3 files changed, 222 insertions(+), 65 deletions(-) diff --git a/b+tree.d.ts b/b+tree.d.ts index 86600c9..41b344c 100644 --- a/b+tree.d.ts +++ b/b+tree.d.ts @@ -288,20 +288,34 @@ export default class BTree implements ISortedMapF, ISort * Merges each subtree in the disjoint set from start to end (exclusive) into the given spine. */ private static processSide; + /** + * Append a subtree at a given depth on the chosen side; cascade splits upward if needed. + * All un-propagated sizes must have already been applied to the spine up to the end of any cascading expansions. + * This method guarantees that the size of the inserted subtree will not propagate upward beyond the insertion point. + * Returns a new root if the root was split, otherwise undefined. + */ private static appendAndCascade; + /** + * Clone along the spine from [isSharedFrontierDepth to depthTo] inclusive so path is safe to mutate. + * Short-circuits if first shared node is deeper than depthTo (the insertion depth). + */ private static ensureNotShared; /** - * Refresh sizes on the spine for nodes in (isSharedFrontierDepth, depthTo) + * Propogates size updates and updates max keys for nodes in (isSharedFrontierDepth, depthTo) */ private static updateSizeAndMax; /** - * Update a spine (frontier) from a specific depth down, inclusive + * Update a spine (frontier) from a specific depth down, inclusive. + * Extends the frontier array if it is not already as long as the frontier. */ private static updateFrontier; /** - * Find the first ancestor (starting at insertionDepth) with capacity + * Find the first ancestor (starting at insertionDepth) with capacity. */ private static findCascadeEndDepth; + /** + * Inserts the child without updating cached size counts. + */ private static insertNoCount; private static getLeftmostIndex; private static getRightmostIndex; @@ -311,8 +325,13 @@ export default class BTree implements ISortedMapF, ISort private static updateRightMax; private static noop; /** - * Decomposes two BTrees into disjoint nodes. Reuses interior nodes when they do not overlap/intersect with any leaf nodes + * Decomposes two trees into disjoint nodes. Reuses interior nodes when they do not overlap/intersect with any leaf nodes * in the other tree. Overlapping leaf nodes are broken down into new leaf nodes containing merged entries. + * The algorithm is a parallel tree walk using two cursors. The trailing cursor (behind in key space) is walked forward + * until it is at or after the leading cursor. As it does this, any whole nodes or subtrees it passes are guaranteed to + * be disjoint. This is true because the leading cursor was also previously walked in this way, and is thus pointing to + * the first key at or after the trailing cursor's previous position. + * The cursor walk is efficient, meaning it skips over disjoint subtrees entirely rather than visiting every leaf. */ private static decompose; private static alternatingCount; diff --git a/b+tree.js b/b+tree.js index 72b4c79..5c83b0b 100644 --- a/b+tree.js +++ b/b+tree.js @@ -573,29 +573,36 @@ var BTree = /** @class */ (function () { BTree.prototype.merge = function (other, merge) { if (this._compare !== other._compare) throw new Error("Cannot merge BTrees with different comparators."); - if (this._maxNodeSize !== other._maxNodeSize) + var branchingFactor = this._maxNodeSize; + if (branchingFactor !== other._maxNodeSize) throw new Error("Cannot merge BTrees with different max node sizes."); - // Early outs for empty trees (cheap clone of the non-empty tree) - var sizeThis = this._root.size(); - var sizeOther = other._root.size(); - if (sizeThis === 0) + if (this._root.size() === 0) return other.clone(); - if (sizeOther === 0) + if (other._root.size() === 0) return this.clone(); - // Decompose into disjoint subtrees and merged leaves + // Decompose both trees into disjoint subtrees leaves. + // As many of these as possible will be reused from the original trees, and the remaining + // will be leaves that are the result of merging intersecting leaves. var _a = BTree.decompose(this, other, merge), disjoint = _a.disjoint, tallestIndex = _a.tallestIndex; var disjointEntryCount = BTree.alternatingCount(disjoint); - // Start result at the tallest subtree from the disjoint set + // Now we have a set of disjoint subtrees and we need to merge them into a single tree. + // To do this, we start with the tallest subtree from the disjoint set and, for all subtrees + // to the "right" and "left" of it in sorted order, we append them onto the appropriate side + // of the current tree, splitting nodes as necessary to maintain balance. + // A "side" is referred to as a frontier, as it is a linked list of nodes from the root down to + // the leaf level on that side of the tree. Each appended subtree is appended to the node at the + // same height as itself on the frontier. Each tree is guaranteed to be at most as tall as the + // current frontier because we start from the tallest subtree and work outward. var initialRoot = BTree.alternatingGetSecond(disjoint, tallestIndex); - var branchingFactor = this._maxNodeSize; var frontier = [initialRoot]; // Process all subtrees to the right of the tallest subtree if (tallestIndex + 1 <= disjointEntryCount - 1) { BTree.updateFrontier(frontier, 0, BTree.getRightmostIndex); BTree.processSide(branchingFactor, disjoint, frontier, tallestIndex + 1, disjointEntryCount, 1, BTree.getRightmostIndex, BTree.getRightInsertionIndex, BTree.splitOffRightSide, BTree.updateRightMax); } - // Process all subtrees to the left of the tallest subtree (reverse order) + // Process all subtrees to the left of the current tree if (tallestIndex - 1 >= 0) { + // Note we need to update the frontier here because the right-side processing may have grown the tree taller. BTree.updateFrontier(frontier, 0, BTree.getLeftmostIndex); BTree.processSide(branchingFactor, disjoint, frontier, tallestIndex - 1, -1, -1, BTree.getLeftmostIndex, BTree.getLeftmostIndex, BTree.splitOffLeftSide, BTree.noop // left side appending doesn't update max keys ); @@ -610,6 +617,10 @@ var BTree = /** @class */ (function () { * Merges each subtree in the disjoint set from start to end (exclusive) into the given spine. */ BTree.processSide = function (branchingFactor, disjoint, spine, start, end, step, sideIndex, sideInsertionIndex, splitOffSide, updateMax) { + // Determine the depth of the first shared node on the frontier. + // Appending subtrees to the frontier must respect the copy-on-write semantics by cloning + // any shared nodes down to the insertion point. We track it by depth to avoid a log(n) walk of the + // frontier for each insertion as that would fundamentally change our asymptotics. var isSharedFrontierDepth = 0; var cur = spine[0]; // Find the first shared node on the frontier @@ -624,7 +635,6 @@ var BTree = /** @class */ (function () { // E.g. in our example, if we later insert at depth 0, we will add 5 to the node at depth 1 and the root at depth 0 before inserting. // This scheme enables us to avoid a log(n) propagation of sizes for each insertion. var unflushedSizes = new Array(spine.length).fill(0); // pre-fill to avoid "holey" array - // Iterate the assigned half of the disjoint set for (var i = start; i != end; i += step) { var currentHeight = spine.length - 1; // height is number of internal levels; 0 means leaf var subtree = BTree.alternatingGetSecond(disjoint, i); @@ -661,10 +671,21 @@ var BTree = /** @class */ (function () { BTree.updateSizeAndMax(spine, unflushedSizes, isSharedFrontierDepth, 0, updateMax); }; ; - // Append a subtree at a given depth on the chosen side; cascade splits upward if needed. + /** + * Append a subtree at a given depth on the chosen side; cascade splits upward if needed. + * All un-propagated sizes must have already been applied to the spine up to the end of any cascading expansions. + * This method guarantees that the size of the inserted subtree will not propagate upward beyond the insertion point. + * Returns a new root if the root was split, otherwise undefined. + */ BTree.appendAndCascade = function (spine, insertionDepth, branchingFactor, subtree, sideIndex, sideInsertionIndex, splitOffSide) { + // We must take care to avoid accidental propogation upward of the size of the inserted subtree. + // To do this, we first split nodes upward from the insertion point until we find a node with capacity + // or create a new root. Since all un-propagated sizes have already been applied to the spine up to this point, + // inserting at the end ensures no accidental propagation. + // Depth is -1 if the subtree is the same height as the current tree if (insertionDepth >= 0) { var carry = undefined; + // Determine initially where to insert after any splits var insertTarget = spine[insertionDepth]; if (insertTarget.keys.length >= branchingFactor) { insertTarget = carry = splitOffSide(insertTarget); @@ -676,10 +697,16 @@ var BTree = /** @class */ (function () { // Refresh last key since child was split parent.keys[idx] = parent.children[idx].maxKey(); if (parent.keys.length < branchingFactor) { + // We have reached the end of the cascade BTree.insertNoCount(parent, sideInsertionIndex(parent), carry); carry = undefined; } else { + // Splitting the parent here requires care to avoid incorrectly double counting sizes + // Example: a node is at max capacity 4, with children each of size 4 for 16 total. + // We split the node into two nodes of 2 children each, but this does *not* modify the size + // of its parent. Therefore when we insert the carry into the torn-off node, we must not + // increase its size or we will double-count the size of the carry subtree. var tornOff = splitOffSide(parent); BTree.insertNoCount(tornOff, sideInsertionIndex(tornOff), carry); carry = tornOff; @@ -688,10 +715,12 @@ var BTree = /** @class */ (function () { } var newRoot = undefined; if (carry !== undefined) { + // Expansion reached the root, need a new root to hold carry var oldRoot = spine[0]; newRoot = new BNodeInternal([oldRoot], oldRoot.size() + carry.size()); BTree.insertNoCount(newRoot, sideInsertionIndex(newRoot), carry); } + // Finally, insert the subtree at the insertion point BTree.insertNoCount(insertTarget, sideInsertionIndex(insertTarget), subtree); return newRoot; } @@ -704,7 +733,10 @@ var BTree = /** @class */ (function () { } }; ; - // Clone along the spine from isSharedFrontierDepth..depthTo inclusive so path is mutable + /** + * Clone along the spine from [isSharedFrontierDepth to depthTo] inclusive so path is safe to mutate. + * Short-circuits if first shared node is deeper than depthTo (the insertion depth). + */ BTree.ensureNotShared = function (spine, isSharedFrontierDepth, depthToInclusive, sideIndex) { if (spine.length === 1 /* only a leaf */ || depthToInclusive < 0 /* new root case */) return; // nothing to clone when root is a leaf; equal-height case will handle this @@ -724,7 +756,7 @@ var BTree = /** @class */ (function () { }; ; /** - * Refresh sizes on the spine for nodes in (isSharedFrontierDepth, depthTo) + * Propogates size updates and updates max keys for nodes in (isSharedFrontierDepth, depthTo) */ BTree.updateSizeAndMax = function (spine, unflushedSizes, isSharedFrontierDepth, depthUpToInclusive, updateMax) { // If isSharedFrontierDepth is <= depthUpToInclusive there is nothing to update because @@ -741,20 +773,22 @@ var BTree = /** @class */ (function () { } var node = spine[depth]; node._size += sizeAtLevel; + // No-op if left side, as max keys in parents are unchanged by appending to the beginning of a node updateMax(node, maxKey); } }; ; /** - * Update a spine (frontier) from a specific depth down, inclusive + * Update a spine (frontier) from a specific depth down, inclusive. + * Extends the frontier array if it is not already as long as the frontier. */ BTree.updateFrontier = function (frontier, depthLastValid, sideIndex) { check(frontier.length > depthLastValid, "updateFrontier: depthLastValid exceeds frontier height"); var startingAncestor = frontier[depthLastValid]; if (startingAncestor.isLeaf) return; - var an = startingAncestor; - var cur = an.children[sideIndex(an)]; + var internal = startingAncestor; + var cur = internal.children[sideIndex(internal)]; var depth = depthLastValid + 1; while (!cur.isLeaf) { var ni = cur; @@ -766,7 +800,7 @@ var BTree = /** @class */ (function () { }; ; /** - * Find the first ancestor (starting at insertionDepth) with capacity + * Find the first ancestor (starting at insertionDepth) with capacity. */ BTree.findCascadeEndDepth = function (spine, insertionDepth, branchingFactor) { for (var depth = insertionDepth; depth >= 0; depth--) { @@ -776,10 +810,14 @@ var BTree = /** @class */ (function () { return -1; // no capacity, will need a new root }; ; + /** + * Inserts the child without updating cached size counts. + */ BTree.insertNoCount = function (parent, index, child) { parent.children.splice(index, 0, child); parent.keys.splice(index, 0, child.maxKey()); }; + // ---- Side-specific delegates for merging subtrees into a frontier ---- BTree.getLeftmostIndex = function () { return 0; }; @@ -800,19 +838,39 @@ var BTree = /** @class */ (function () { }; BTree.noop = function () { }; /** - * Decomposes two BTrees into disjoint nodes. Reuses interior nodes when they do not overlap/intersect with any leaf nodes + * Decomposes two trees into disjoint nodes. Reuses interior nodes when they do not overlap/intersect with any leaf nodes * in the other tree. Overlapping leaf nodes are broken down into new leaf nodes containing merged entries. + * The algorithm is a parallel tree walk using two cursors. The trailing cursor (behind in key space) is walked forward + * until it is at or after the leading cursor. As it does this, any whole nodes or subtrees it passes are guaranteed to + * be disjoint. This is true because the leading cursor was also previously walked in this way, and is thus pointing to + * the first key at or after the trailing cursor's previous position. + * The cursor walk is efficient, meaning it skips over disjoint subtrees entirely rather than visiting every leaf. */ BTree.decompose = function (left, right, mergeValues) { var cmp = left._compare; check(left._root.size() > 0 && right._root.size() > 0, "decompose requires non-empty inputs"); + // Holds the disjoint nodes that result from decomposition. + // Alternating entries of (height, node) to avoid creating small tuples var disjoint = []; + // During the decomposition, leaves that are not disjoint are decomposed into individual entries + // that accumulate in this array in sorted order. They are flushed into leaf nodes whenever a reused + // disjoint subtree is added to the disjoint set. + // Note that there are unavoidable cases in which this will generate underfilled leaves. + // An example of this would be a leaf in one tree that contained keys [0, 100, 101, 102]. + // In the other tree, there is a leaf that contains [2, 3, 4, 5]. This leaf can be ruesed entirely, + // but the first tree's leaf must be decomposed into [0] and [100, 101, 102] var pending = []; var tallestIndex = -1, tallestHeight = -1; + // During the upward part of the cursor walk, this holds the highest disjoint node seen so far. + // This is done because we cannot know immediately whether we can add the node to the disjoint set + // because its ancestor may also be disjoint and should be reused instead. + var highestDisjoint = undefined; var flushPendingEntries = function () { var totalPairs = BTree.alternatingCount(pending); if (totalPairs === 0) return; + // This method creates as many evenly filled leaves as possible from + // the pending entries. All will be > 50% full if we are creating more than one leaf. var max = left._maxNodeSize; var leafCount = Math.ceil(totalPairs / max); var remaining = totalPairs; @@ -821,9 +879,10 @@ var BTree = /** @class */ (function () { var chunkSize = Math.ceil(remaining / leafCount); var keys = new Array(chunkSize); var vals = new Array(chunkSize); - for (var i = 0; i < chunkSize; ++i, ++pairIndex) { + for (var i = 0; i < chunkSize; i++) { keys[i] = BTree.alternatingGetFirst(pending, pairIndex); vals[i] = BTree.alternatingGetSecond(pending, pairIndex); + pairIndex++; } remaining -= chunkSize; leafCount--; @@ -836,8 +895,6 @@ var BTree = /** @class */ (function () { } pending.length = 0; }; - // Have to do this as cast to convince TS it's ever assigned - var highestDisjoint = undefined; var addSharedNodeToDisjointSet = function (node, height) { flushPendingEntries(); node.isShared = true; @@ -853,10 +910,14 @@ var BTree = /** @class */ (function () { highestDisjoint = undefined; } }; + // Mark all nodes at or above depthFrom in the cursor spine as disqualified (non-disjoint) var disqualifySpine = function (cursor, depthFrom) { var spine = cursor.spine; for (var i = depthFrom; i >= 0; --i) { var payload = spine[i].payload; + // Safe to early out because we always disqualify all ancestors of a disqualified node + // That is correct because every ancestor of a non-disjoint node is also non-disjoint + // because it must enclose the non-disjoint range. if (payload.disqualified) break; payload.disqualified = true; @@ -1035,9 +1096,14 @@ var BTree = /** @class */ (function () { } } } + // Ensure any trailing non-disjoint entries are added flushPendingEntries(); return { disjoint: disjoint, tallestIndex: tallestIndex }; }; + // ------- Alternating list helpers ------- + // These helpers manage a list that alternates between two types of entries. + // Storing data this way avoids small tuple allocations and shows major improvements + // in GC time in benchmarks. BTree.alternatingCount = function (list) { return list.length >> 1; }; @@ -1075,12 +1141,8 @@ var BTree = /** @class */ (function () { * Also returns a boolean indicating if the target key was landed on exactly. */ BTree.moveTo = function (cur, other, targetKey, isInclusive, startedEqual, cmp) { + // Cache callbacks for perf var onMoveInLeaf = cur.onMoveInLeaf; - var onExitLeaf = cur.onExitLeaf; - var onStepUp = cur.onStepUp; - var onStepDown = cur.onStepDown; - var onEnterLeaf = cur.onEnterLeaf; - var makePayload = cur.makePayload; // Fast path: destination within current leaf var leaf = cur.leaf; var leafPayload = cur.leafPayload; @@ -1129,9 +1191,10 @@ var BTree = /** @class */ (function () { break; } } - // Exit leaf; we did walk out of it conceptually + // Exit leaf; even if no spine, we did walk out of it conceptually var startIndex = cur.leafIndex; - onExitLeaf(leaf, leafPayload, startIndex, startedEqual, cur); + cur.onExitLeaf(leaf, leafPayload, startIndex, startedEqual, cur); + var onStepUp = cur.onStepUp; if (descentLevel < 0) { // No descent point; step up all the way; last callback gets infinity for (var depth = initialSpineLength - 1; depth >= 0; depth--) { @@ -1149,6 +1212,8 @@ var BTree = /** @class */ (function () { var entry = spine[descentLevel]; onStepUp(entry.node, initialSpineLength - descentLevel, entry.payload, entry.childIndex, descentLevel, descentIndex, cur); entry.childIndex = descentIndex; + var onStepDown = cur.onStepDown; + var makePayload = cur.makePayload; // Descend, invoking onStepDown and creating payloads var height = initialSpineLength - descentLevel - 1; // calculate height before changing length spine.length = descentLevel + 1; @@ -1186,7 +1251,7 @@ var BTree = /** @class */ (function () { cur.leaf = node; cur.leafPayload = makePayload(); cur.leafIndex = destIndex; - onEnterLeaf(node, destIndex, cur, other); + cur.onEnterLeaf(node, destIndex, cur, other); return [false, targetExactlyReached]; }; /** diff --git a/b+tree.ts b/b+tree.ts index 57d6df8..10f3605 100644 --- a/b+tree.ts +++ b/b+tree.ts @@ -651,24 +651,31 @@ export default class BTree implements ISortedMapF, ISortedMap merge(other: BTree, merge: (key: K, leftValue: V, rightValue: V) => V | undefined): BTree { if (this._compare !== other._compare) throw new Error("Cannot merge BTrees with different comparators."); - if (this._maxNodeSize !== other._maxNodeSize) + + const branchingFactor = this._maxNodeSize; + if (branchingFactor !== other._maxNodeSize) throw new Error("Cannot merge BTrees with different max node sizes."); - // Early outs for empty trees (cheap clone of the non-empty tree) - const sizeThis = this._root.size(); - const sizeOther = other._root.size(); - if (sizeThis === 0) + if (this._root.size() === 0) return other.clone(); - if (sizeOther === 0) + if (other._root.size() === 0) return this.clone(); - // Decompose into disjoint subtrees and merged leaves + // Decompose both trees into disjoint subtrees leaves. + // As many of these as possible will be reused from the original trees, and the remaining + // will be leaves that are the result of merging intersecting leaves. const { disjoint, tallestIndex } = BTree.decompose(this, other, merge); const disjointEntryCount = BTree.alternatingCount(disjoint); - // Start result at the tallest subtree from the disjoint set + // Now we have a set of disjoint subtrees and we need to merge them into a single tree. + // To do this, we start with the tallest subtree from the disjoint set and, for all subtrees + // to the "right" and "left" of it in sorted order, we append them onto the appropriate side + // of the current tree, splitting nodes as necessary to maintain balance. + // A "side" is referred to as a frontier, as it is a linked list of nodes from the root down to + // the leaf level on that side of the tree. Each appended subtree is appended to the node at the + // same height as itself on the frontier. Each tree is guaranteed to be at most as tall as the + // current frontier because we start from the tallest subtree and work outward. const initialRoot = BTree.alternatingGetSecond>(disjoint, tallestIndex); - const branchingFactor = this._maxNodeSize; const frontier: BNode[] = [initialRoot]; // Process all subtrees to the right of the tallest subtree @@ -687,8 +694,9 @@ export default class BTree implements ISortedMapF, ISortedMap ); } - // Process all subtrees to the left of the tallest subtree (reverse order) + // Process all subtrees to the left of the current tree if (tallestIndex - 1 >= 0) { + // Note we need to update the frontier here because the right-side processing may have grown the tree taller. BTree.updateFrontier(frontier, 0, BTree.getLeftmostIndex); BTree.processSide( branchingFactor, @@ -727,6 +735,10 @@ export default class BTree implements ISortedMapF, ISortedMap splitOffSide: (node: BNodeInternal) => BNodeInternal, updateMax: (node: BNodeInternal, maxBelow: K) => void ): void { + // Determine the depth of the first shared node on the frontier. + // Appending subtrees to the frontier must respect the copy-on-write semantics by cloning + // any shared nodes down to the insertion point. We track it by depth to avoid a log(n) walk of the + // frontier for each insertion as that would fundamentally change our asymptotics. let isSharedFrontierDepth = 0; let cur = spine[0]; // Find the first shared node on the frontier @@ -743,7 +755,6 @@ export default class BTree implements ISortedMapF, ISortedMap // This scheme enables us to avoid a log(n) propagation of sizes for each insertion. const unflushedSizes: number[] = new Array(spine.length).fill(0); // pre-fill to avoid "holey" array - // Iterate the assigned half of the disjoint set for (let i = start; i != end; i += step) { const currentHeight = spine.length - 1; // height is number of internal levels; 0 means leaf const subtree = BTree.alternatingGetSecond>(disjoint, i); @@ -785,7 +796,12 @@ export default class BTree implements ISortedMapF, ISortedMap BTree.updateSizeAndMax(spine, unflushedSizes, isSharedFrontierDepth, 0, updateMax); }; - // Append a subtree at a given depth on the chosen side; cascade splits upward if needed. + /** + * Append a subtree at a given depth on the chosen side; cascade splits upward if needed. + * All un-propagated sizes must have already been applied to the spine up to the end of any cascading expansions. + * This method guarantees that the size of the inserted subtree will not propagate upward beyond the insertion point. + * Returns a new root if the root was split, otherwise undefined. + */ private static appendAndCascade( spine: BNode[], insertionDepth: number, @@ -795,8 +811,15 @@ export default class BTree implements ISortedMapF, ISortedMap sideInsertionIndex: (node: BNodeInternal) => number, splitOffSide: (node: BNodeInternal) => BNodeInternal ): BNodeInternal | undefined { + // We must take care to avoid accidental propogation upward of the size of the inserted subtree. + // To do this, we first split nodes upward from the insertion point until we find a node with capacity + // or create a new root. Since all un-propagated sizes have already been applied to the spine up to this point, + // inserting at the end ensures no accidental propagation. + + // Depth is -1 if the subtree is the same height as the current tree if (insertionDepth >= 0) { let carry: BNode | undefined = undefined; + // Determine initially where to insert after any splits let insertTarget: BNodeInternal = spine[insertionDepth] as BNodeInternal; if (insertTarget.keys.length >= branchingFactor) { insertTarget = carry = splitOffSide(insertTarget); @@ -809,9 +832,15 @@ export default class BTree implements ISortedMapF, ISortedMap // Refresh last key since child was split parent.keys[idx] = parent.children[idx].maxKey(); if (parent.keys.length < branchingFactor) { + // We have reached the end of the cascade BTree.insertNoCount(parent, sideInsertionIndex(parent), carry); carry = undefined; } else { + // Splitting the parent here requires care to avoid incorrectly double counting sizes + // Example: a node is at max capacity 4, with children each of size 4 for 16 total. + // We split the node into two nodes of 2 children each, but this does *not* modify the size + // of its parent. Therefore when we insert the carry into the torn-off node, we must not + // increase its size or we will double-count the size of the carry subtree. const tornOff = splitOffSide(parent); BTree.insertNoCount(tornOff, sideInsertionIndex(tornOff), carry); carry = tornOff; @@ -821,10 +850,13 @@ export default class BTree implements ISortedMapF, ISortedMap let newRoot: BNodeInternal | undefined = undefined; if (carry !== undefined) { + // Expansion reached the root, need a new root to hold carry const oldRoot = spine[0] as BNodeInternal; newRoot = new BNodeInternal([oldRoot], oldRoot.size() + carry.size()); BTree.insertNoCount(newRoot, sideInsertionIndex(newRoot), carry); } + + // Finally, insert the subtree at the insertion point BTree.insertNoCount(insertTarget, sideInsertionIndex(insertTarget), subtree); return newRoot; } else { @@ -836,7 +868,10 @@ export default class BTree implements ISortedMapF, ISortedMap } }; - // Clone along the spine from isSharedFrontierDepth..depthTo inclusive so path is mutable + /** + * Clone along the spine from [isSharedFrontierDepth to depthTo] inclusive so path is safe to mutate. + * Short-circuits if first shared node is deeper than depthTo (the insertion depth). + */ private static ensureNotShared( spine: BNode[], isSharedFrontierDepth: number, @@ -862,7 +897,7 @@ export default class BTree implements ISortedMapF, ISortedMap }; /** - * Refresh sizes on the spine for nodes in (isSharedFrontierDepth, depthTo) + * Propogates size updates and updates max keys for nodes in (isSharedFrontierDepth, depthTo) */ private static updateSizeAndMax( spine: BNode[], @@ -884,20 +919,22 @@ export default class BTree implements ISortedMapF, ISortedMap } const node = spine[depth] as BNodeInternal; node._size += sizeAtLevel; + // No-op if left side, as max keys in parents are unchanged by appending to the beginning of a node updateMax(node, maxKey); } }; /** - * Update a spine (frontier) from a specific depth down, inclusive + * Update a spine (frontier) from a specific depth down, inclusive. + * Extends the frontier array if it is not already as long as the frontier. */ private static updateFrontier(frontier: BNode[], depthLastValid: number, sideIndex: (node: BNodeInternal) => number): void { check(frontier.length > depthLastValid, "updateFrontier: depthLastValid exceeds frontier height"); const startingAncestor = frontier[depthLastValid]; if (startingAncestor.isLeaf) return; - const an = startingAncestor as BNodeInternal; - let cur: BNode = an.children[sideIndex(an)]; + const internal = startingAncestor as BNodeInternal; + let cur: BNode = internal.children[sideIndex(internal)]; let depth = depthLastValid + 1; while (!cur.isLeaf) { const ni = cur as BNodeInternal; @@ -909,7 +946,7 @@ export default class BTree implements ISortedMapF, ISortedMap }; /** - * Find the first ancestor (starting at insertionDepth) with capacity + * Find the first ancestor (starting at insertionDepth) with capacity. */ private static findCascadeEndDepth(spine: BNode[], insertionDepth: number, branchingFactor: number): number { for (let depth = insertionDepth; depth >= 0; depth--) { @@ -919,6 +956,9 @@ export default class BTree implements ISortedMapF, ISortedMap return -1; // no capacity, will need a new root }; + /** + * Inserts the child without updating cached size counts. + */ private static insertNoCount( parent: BNodeInternal, index: number, @@ -928,6 +968,8 @@ export default class BTree implements ISortedMapF, ISortedMap parent.keys.splice(index, 0, child.maxKey()); } + // ---- Side-specific delegates for merging subtrees into a frontier ---- + private static getLeftmostIndex(): number { return 0; } @@ -955,8 +997,13 @@ export default class BTree implements ISortedMapF, ISortedMap private static noop(): void {} /** - * Decomposes two BTrees into disjoint nodes. Reuses interior nodes when they do not overlap/intersect with any leaf nodes + * Decomposes two trees into disjoint nodes. Reuses interior nodes when they do not overlap/intersect with any leaf nodes * in the other tree. Overlapping leaf nodes are broken down into new leaf nodes containing merged entries. + * The algorithm is a parallel tree walk using two cursors. The trailing cursor (behind in key space) is walked forward + * until it is at or after the leading cursor. As it does this, any whole nodes or subtrees it passes are guaranteed to + * be disjoint. This is true because the leading cursor was also previously walked in this way, and is thus pointing to + * the first key at or after the trailing cursor's previous position. + * The cursor walk is efficient, meaning it skips over disjoint subtrees entirely rather than visiting every leaf. */ private static decompose( left: BTree, @@ -965,15 +1012,33 @@ export default class BTree implements ISortedMapF, ISortedMap ): DecomposeResult { const cmp = left._compare; check(left._root.size() > 0 && right._root.size() > 0, "decompose requires non-empty inputs"); + // Holds the disjoint nodes that result from decomposition. + // Alternating entries of (height, node) to avoid creating small tuples const disjoint: (number | BNode)[] = []; + // During the decomposition, leaves that are not disjoint are decomposed into individual entries + // that accumulate in this array in sorted order. They are flushed into leaf nodes whenever a reused + // disjoint subtree is added to the disjoint set. + // Note that there are unavoidable cases in which this will generate underfilled leaves. + // An example of this would be a leaf in one tree that contained keys [0, 100, 101, 102]. + // In the other tree, there is a leaf that contains [2, 3, 4, 5]. This leaf can be ruesed entirely, + // but the first tree's leaf must be decomposed into [0] and [100, 101, 102] const pending: (K | V)[] = []; let tallestIndex = -1, tallestHeight = -1; + // During the upward part of the cursor walk, this holds the highest disjoint node seen so far. + // This is done because we cannot know immediately whether we can add the node to the disjoint set + // because its ancestor may also be disjoint and should be reused instead. + let highestDisjoint: { node: BNode, height: number } | undefined + // Have to do this as cast to convince TS it's ever assigned + = undefined as { node: BNode, height: number } | undefined; + const flushPendingEntries = () => { const totalPairs = BTree.alternatingCount(pending); if (totalPairs === 0) return; + // This method creates as many evenly filled leaves as possible from + // the pending entries. All will be > 50% full if we are creating more than one leaf. const max = left._maxNodeSize; let leafCount = Math.ceil(totalPairs / max); let remaining = totalPairs; @@ -982,9 +1047,10 @@ export default class BTree implements ISortedMapF, ISortedMap const chunkSize = Math.ceil(remaining / leafCount); const keys = new Array(chunkSize); const vals = new Array(chunkSize); - for (let i = 0; i < chunkSize; ++i, ++pairIndex) { + for (let i = 0; i < chunkSize; i++) { keys[i] = BTree.alternatingGetFirst(pending, pairIndex); vals[i] = BTree.alternatingGetSecond(pending, pairIndex); + pairIndex++; } remaining -= chunkSize; leafCount--; @@ -998,9 +1064,6 @@ export default class BTree implements ISortedMapF, ISortedMap pending.length = 0; }; - // Have to do this as cast to convince TS it's ever assigned - let highestDisjoint: { node: BNode, height: number } | undefined = undefined as { node: BNode, height: number } | undefined; - const addSharedNodeToDisjointSet = (node: BNode, height: number) => { flushPendingEntries(); node.isShared = true; @@ -1018,10 +1081,14 @@ export default class BTree implements ISortedMapF, ISortedMap } }; + // Mark all nodes at or above depthFrom in the cursor spine as disqualified (non-disjoint) const disqualifySpine = (cursor: MergeCursor, depthFrom: number) => { const spine = cursor.spine; for (let i = depthFrom; i >= 0; --i) { const payload = spine[i].payload; + // Safe to early out because we always disqualify all ancestors of a disqualified node + // That is correct because every ancestor of a non-disjoint node is also non-disjoint + // because it must enclose the non-disjoint range. if (payload.disqualified) break; payload.disqualified = true; @@ -1239,10 +1306,16 @@ export default class BTree implements ISortedMapF, ISortedMap } } + // Ensure any trailing non-disjoint entries are added flushPendingEntries(); return { disjoint, tallestIndex }; } + // ------- Alternating list helpers ------- + // These helpers manage a list that alternates between two types of entries. + // Storing data this way avoids small tuple allocations and shows major improvements + // in GC time in benchmarks. + private static alternatingCount(list: unknown[]): number { return list.length >> 1; } @@ -1298,12 +1371,8 @@ export default class BTree implements ISortedMapF, ISortedMap startedEqual: boolean, cmp: (a:K,b:K)=>number ): [outOfTree: boolean, targetExactlyReached: boolean] { + // Cache callbacks for perf const onMoveInLeaf = cur.onMoveInLeaf; - const onExitLeaf = cur.onExitLeaf; - const onStepUp = cur.onStepUp; - const onStepDown = cur.onStepDown; - const onEnterLeaf = cur.onEnterLeaf; - const makePayload = cur.makePayload; // Fast path: destination within current leaf const leaf = cur.leaf; const leafPayload = cur.leafPayload; @@ -1353,10 +1422,11 @@ export default class BTree implements ISortedMapF, ISortedMap } } - // Exit leaf; we did walk out of it conceptually + // Exit leaf; even if no spine, we did walk out of it conceptually const startIndex = cur.leafIndex; - onExitLeaf(leaf, leafPayload, startIndex, startedEqual, cur); + cur.onExitLeaf(leaf, leafPayload, startIndex, startedEqual, cur); + const onStepUp = cur.onStepUp; if (descentLevel < 0) { // No descent point; step up all the way; last callback gets infinity for (let depth = initialSpineLength - 1; depth >= 0; depth--) { @@ -1377,6 +1447,9 @@ export default class BTree implements ISortedMapF, ISortedMap onStepUp(entry.node, initialSpineLength - descentLevel, entry.payload, entry.childIndex, descentLevel, descentIndex, cur); entry.childIndex = descentIndex; + const onStepDown = cur.onStepDown; + const makePayload = cur.makePayload; + // Descend, invoking onStepDown and creating payloads let height = initialSpineLength - descentLevel - 1; // calculate height before changing length spine.length = descentLevel + 1; @@ -1414,7 +1487,7 @@ export default class BTree implements ISortedMapF, ISortedMap cur.leaf = node; cur.leafPayload = makePayload(); cur.leafIndex = destIndex; - onEnterLeaf(node, destIndex, cur, other); + cur.onEnterLeaf(node, destIndex, cur, other); return [false, targetExactlyReached]; } From d652d04fe62d8e28d1af9c61d3c1bda0a2de8b35 Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Thu, 6 Nov 2025 19:36:19 -0800 Subject: [PATCH 054/143] cleanup --- benchmarks.ts | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/benchmarks.ts b/benchmarks.ts index c3ced4d..3afd1b2 100644 --- a/benchmarks.ts +++ b/benchmarks.ts @@ -1,5 +1,5 @@ #!/usr/bin/env ts-node -import BTree, {IMap} from '.'; +import BTree from '.'; import SortedArray from './sorted-array'; // Note: The `bintrees` package also includes a `BinTree` type which turned // out to be an unbalanced binary tree. It is faster than `RBTree` for @@ -77,8 +77,6 @@ function intersectBySorting( ) { const left = tree1.toArray(); const right = tree2.toArray(); - left.sort((a, b) => a[0] - b[0]); - right.sort((a, b) => a[0] - b[0]); let i = 0; let j = 0; const leftLen = left.length; From 477b58442a0492558a2280a0cb2b1f14c9d27fd1 Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Thu, 6 Nov 2025 19:56:25 -0800 Subject: [PATCH 055/143] cleanup --- b+tree.d.ts | 2 +- b+tree.js | 8 ++++---- b+tree.ts | 8 ++++---- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/b+tree.d.ts b/b+tree.d.ts index 41b344c..fd18968 100644 --- a/b+tree.d.ts +++ b/b+tree.d.ts @@ -301,7 +301,7 @@ export default class BTree implements ISortedMapF, ISort */ private static ensureNotShared; /** - * Propogates size updates and updates max keys for nodes in (isSharedFrontierDepth, depthTo) + * Propagates size updates and updates max keys for nodes in (isSharedFrontierDepth, depthTo) */ private static updateSizeAndMax; /** diff --git a/b+tree.js b/b+tree.js index 5c83b0b..71b26bf 100644 --- a/b+tree.js +++ b/b+tree.js @@ -678,7 +678,7 @@ var BTree = /** @class */ (function () { * Returns a new root if the root was split, otherwise undefined. */ BTree.appendAndCascade = function (spine, insertionDepth, branchingFactor, subtree, sideIndex, sideInsertionIndex, splitOffSide) { - // We must take care to avoid accidental propogation upward of the size of the inserted subtree. + // We must take care to avoid accidental propagation upward of the size of the inserted subtree. // To do this, we first split nodes upward from the insertion point until we find a node with capacity // or create a new root. Since all un-propagated sizes have already been applied to the spine up to this point, // inserting at the end ensures no accidental propagation. @@ -756,7 +756,7 @@ var BTree = /** @class */ (function () { }; ; /** - * Propogates size updates and updates max keys for nodes in (isSharedFrontierDepth, depthTo) + * Propagates size updates and updates max keys for nodes in (isSharedFrontierDepth, depthTo) */ BTree.updateSizeAndMax = function (spine, unflushedSizes, isSharedFrontierDepth, depthUpToInclusive, updateMax) { // If isSharedFrontierDepth is <= depthUpToInclusive there is nothing to update because @@ -857,7 +857,7 @@ var BTree = /** @class */ (function () { // disjoint subtree is added to the disjoint set. // Note that there are unavoidable cases in which this will generate underfilled leaves. // An example of this would be a leaf in one tree that contained keys [0, 100, 101, 102]. - // In the other tree, there is a leaf that contains [2, 3, 4, 5]. This leaf can be ruesed entirely, + // In the other tree, there is a leaf that contains [2, 3, 4, 5]. This leaf can be reused entirely, // but the first tree's leaf must be decomposed into [0] and [100, 101, 102] var pending = []; var tallestIndex = -1, tallestHeight = -1; @@ -1131,7 +1131,7 @@ var BTree = /** @class */ (function () { cur.leafIndex = nextIndex; return false; } - // If our optimizaed step within leaf failed, use full moveTo logic + // If our optimized step within leaf failed, use full moveTo logic // Pass isInclusive=false to ensure we walk forward to the key exactly after the current return BTree.moveTo(cur, other, currentKey, false, true, cmp)[0]; }; diff --git a/b+tree.ts b/b+tree.ts index 10f3605..2c745bc 100644 --- a/b+tree.ts +++ b/b+tree.ts @@ -811,7 +811,7 @@ export default class BTree implements ISortedMapF, ISortedMap sideInsertionIndex: (node: BNodeInternal) => number, splitOffSide: (node: BNodeInternal) => BNodeInternal ): BNodeInternal | undefined { - // We must take care to avoid accidental propogation upward of the size of the inserted subtree. + // We must take care to avoid accidental propagation upward of the size of the inserted subtree. // To do this, we first split nodes upward from the insertion point until we find a node with capacity // or create a new root. Since all un-propagated sizes have already been applied to the spine up to this point, // inserting at the end ensures no accidental propagation. @@ -897,7 +897,7 @@ export default class BTree implements ISortedMapF, ISortedMap }; /** - * Propogates size updates and updates max keys for nodes in (isSharedFrontierDepth, depthTo) + * Propagates size updates and updates max keys for nodes in (isSharedFrontierDepth, depthTo) */ private static updateSizeAndMax( spine: BNode[], @@ -1020,7 +1020,7 @@ export default class BTree implements ISortedMapF, ISortedMap // disjoint subtree is added to the disjoint set. // Note that there are unavoidable cases in which this will generate underfilled leaves. // An example of this would be a leaf in one tree that contained keys [0, 100, 101, 102]. - // In the other tree, there is a leaf that contains [2, 3, 4, 5]. This leaf can be ruesed entirely, + // In the other tree, there is a leaf that contains [2, 3, 4, 5]. This leaf can be reused entirely, // but the first tree's leaf must be decomposed into [0] and [100, 101, 102] const pending: (K | V)[] = []; let tallestIndex = -1, tallestHeight = -1; @@ -1353,7 +1353,7 @@ export default class BTree implements ISortedMapF, ISortedMap return false; } - // If our optimizaed step within leaf failed, use full moveTo logic + // If our optimized step within leaf failed, use full moveTo logic // Pass isInclusive=false to ensure we walk forward to the key exactly after the current return BTree.moveTo(cur, other, currentKey, false, true, cmp)[0]; } From 08aaee729400547806c56a120d63220fc2d58bc4 Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Thu, 6 Nov 2025 20:23:40 -0800 Subject: [PATCH 056/143] readme update --- readme.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/readme.md b/readme.md index 841ce6c..2ef5081 100644 --- a/readme.md +++ b/readme.md @@ -81,6 +81,8 @@ Features - Includes a lattice of interfaces for TypeScript users (see below) - Supports diffing computation between two trees that is highly optimized for the case in which a majority of nodes are shared (such as when persistent methods are used). +- Supports fast merge & intersection operations with asymptotic speedups when large disjoint ranges of keys are present. + The merge operation generates a new tree that shares nodes with the original trees when possible. ### Additional operations supported on this B+ tree ### @@ -100,6 +102,8 @@ Features - Freeze to prevent modifications: `t.freeze()` (you can also `t.unfreeze()`) - Fast clone: `t.clone()` - Compute a diff between two trees (quickly skipping shared subtrees): `t.diffAgainst(otherTree, ...)` +- Efficiently merge two trees, sharing nodes where possible: `t.merge(other, ...)` +- Efficiently intersect two trees: `t.intersect(other, ...)` - For more information, **see [full documentation](https://github.com/qwertie/btree-typescript/blob/master/b%2Btree.ts) in the source code.** **Note:** Confusingly, the ES6 `Map.forEach(c)` method calls `c(value,key)` instead of `c(key,value)`, in contrast to other methods such as `set()` and `entries()` which put the key first. I can only assume that they reversed the order on the hypothesis that users would usually want to examine values and ignore keys. BTree's `forEach()` therefore works the same way, but there is a second method `.forEachPair((key,value)=>{...})` which sends you the key first and the value second; this method is slightly faster because it is the "native" for-each method for this class. @@ -122,6 +126,7 @@ The "scanning" methods (`forEach, forRange, editRange, deleteRange`) will normal - Get a new tree with one pair removed: `t.without(key)` - Get a new tree with specific pairs removed: `t.withoutKeys(keys)` - Get a new tree with a range of keys removed: `t.withoutRange(low, high, includeHi)` +- Get a new tree that is the result of a merge: `t.merge(other, mergeFn)` **Things to keep in mind:** I ran a test which suggested `t.with` is three times slower than `t.set`. These methods do not return a frozen tree even if the original tree was frozen (for performance reasons, e.g. frozen trees use slightly more memory.) From c2e4adfab8870eaeccbc5a9861ec6c7a7a302355 Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Mon, 10 Nov 2025 13:04:19 -0800 Subject: [PATCH 057/143] prep for merge --- b+tree.ts | 976 +----------------------------------------------- decompose.ts | 676 +++++++++++++++++++++++++++++++++ intersect.ts | 63 ++++ merge.ts | 33 ++ parallelWalk.ts | 219 +++++++++++ 5 files changed, 1011 insertions(+), 956 deletions(-) create mode 100644 decompose.ts create mode 100644 intersect.ts create mode 100644 merge.ts create mode 100644 parallelWalk.ts diff --git a/b+tree.ts b/b+tree.ts index 2c745bc..6165d0c 100644 --- a/b+tree.ts +++ b/b+tree.ts @@ -583,54 +583,7 @@ export default class BTree implements ISortedMapF, ISortedMap * on both trees and performing a walk on the sorted contents due to the reduced allocation overhead. */ intersect(other: BTree, intersection: (key: K, leftValue: V, rightValue: V) => void): void { - const cmp = this._compare; - if (cmp !== other._compare) - throw new Error("Cannot intersect BTrees with different comparators."); - if (this._maxNodeSize !== other._maxNodeSize) - throw new Error("Cannot intersect BTrees with different max node sizes."); - - if (other.size === 0 || this.size === 0) - return; - - const makePayload = (): undefined => undefined; - let cursorA = BTree.createCursor(this, makePayload, BTree.noop, BTree.noop, BTree.noop, BTree.noop, BTree.noop); - let cursorB = BTree.createCursor(other, makePayload, BTree.noop, BTree.noop, BTree.noop, BTree.noop, BTree.noop); - let leading = cursorA; - let trailing = cursorB; - let order = cmp(BTree.getKey(leading), BTree.getKey(trailing)); - - // The intersect walk is somewhat similar to a merge walk in that it does an alternating hop walk with cursors. - // However, the only thing we care about is when the two cursors are equal (equality is intersection). - // When they are not equal we just advance the trailing cursor. - while (true) { - const areEqual = order === 0; - if (areEqual) { - const key = BTree.getKey(leading); - const vA = cursorA.leaf.values[cursorA.leafIndex]; - const vB = cursorB.leaf.values[cursorB.leafIndex]; - intersection(key, vA, vB); - const outT = BTree.moveForwardOne(trailing, leading, key, cmp); - const outL = BTree.moveForwardOne(leading, trailing, key, cmp); - if (outT && outL) - break; - order = cmp(BTree.getKey(leading), BTree.getKey(trailing)); - } else { - if (order < 0) { - const tmp = trailing; - trailing = leading; leading = tmp; - } - // At this point, leading is guaranteed to be ahead of trailing. - const [out, nowEqual] = BTree.moveTo(trailing, leading, BTree.getKey(leading), true, areEqual, cmp) - if (out) { - // We've reached the end of one tree, so intersections are guaranteed to be done. - break; - } else if (nowEqual) { - order = 0; - } else { - order = -1; // trailing is ahead of leading - } - } - } + } /** @@ -649,878 +602,7 @@ export default class BTree implements ISortedMapF, ISortedMap * and inserting the contents of `other` into the clone. */ merge(other: BTree, merge: (key: K, leftValue: V, rightValue: V) => V | undefined): BTree { - if (this._compare !== other._compare) - throw new Error("Cannot merge BTrees with different comparators."); - - const branchingFactor = this._maxNodeSize; - if (branchingFactor !== other._maxNodeSize) - throw new Error("Cannot merge BTrees with different max node sizes."); - - if (this._root.size() === 0) - return other.clone(); - if (other._root.size() === 0) - return this.clone(); - - // Decompose both trees into disjoint subtrees leaves. - // As many of these as possible will be reused from the original trees, and the remaining - // will be leaves that are the result of merging intersecting leaves. - const { disjoint, tallestIndex } = BTree.decompose(this, other, merge); - const disjointEntryCount = BTree.alternatingCount(disjoint); - - // Now we have a set of disjoint subtrees and we need to merge them into a single tree. - // To do this, we start with the tallest subtree from the disjoint set and, for all subtrees - // to the "right" and "left" of it in sorted order, we append them onto the appropriate side - // of the current tree, splitting nodes as necessary to maintain balance. - // A "side" is referred to as a frontier, as it is a linked list of nodes from the root down to - // the leaf level on that side of the tree. Each appended subtree is appended to the node at the - // same height as itself on the frontier. Each tree is guaranteed to be at most as tall as the - // current frontier because we start from the tallest subtree and work outward. - const initialRoot = BTree.alternatingGetSecond>(disjoint, tallestIndex); - const frontier: BNode[] = [initialRoot]; - - // Process all subtrees to the right of the tallest subtree - if (tallestIndex + 1 <= disjointEntryCount - 1) { - BTree.updateFrontier(frontier, 0, BTree.getRightmostIndex); - BTree.processSide( - branchingFactor, - disjoint, - frontier, - tallestIndex + 1, - disjointEntryCount, 1, - BTree.getRightmostIndex, - BTree.getRightInsertionIndex, - BTree.splitOffRightSide, - BTree.updateRightMax - ); - } - - // Process all subtrees to the left of the current tree - if (tallestIndex - 1 >= 0) { - // Note we need to update the frontier here because the right-side processing may have grown the tree taller. - BTree.updateFrontier(frontier, 0, BTree.getLeftmostIndex); - BTree.processSide( - branchingFactor, - disjoint, - frontier, - tallestIndex - 1, - -1, - -1, - BTree.getLeftmostIndex, - BTree.getLeftmostIndex, - BTree.splitOffLeftSide, - BTree.noop // left side appending doesn't update max keys - ); - } - - const merged = new BTree(undefined, this._compare, this._maxNodeSize); - merged._root = frontier[0]; - - // Return the resulting tree - return merged; - } - - /** - * Processes one side (left or right) of the disjoint subtree set during a merge operation. - * Merges each subtree in the disjoint set from start to end (exclusive) into the given spine. - */ - private static processSide( - branchingFactor: number, - disjoint: (number | BNode)[], - spine: BNode[], - start: number, - end: number, - step: number, - sideIndex: (node: BNodeInternal) => number, - sideInsertionIndex: (node: BNodeInternal) => number, - splitOffSide: (node: BNodeInternal) => BNodeInternal, - updateMax: (node: BNodeInternal, maxBelow: K) => void - ): void { - // Determine the depth of the first shared node on the frontier. - // Appending subtrees to the frontier must respect the copy-on-write semantics by cloning - // any shared nodes down to the insertion point. We track it by depth to avoid a log(n) walk of the - // frontier for each insertion as that would fundamentally change our asymptotics. - let isSharedFrontierDepth = 0; - let cur = spine[0]; - // Find the first shared node on the frontier - while (!cur.isShared && isSharedFrontierDepth < spine.length - 1) { - isSharedFrontierDepth++; - cur = (cur as BNodeInternal).children[sideIndex(cur as BNodeInternal)]; - } - - // This array holds the sum of sizes of nodes that have been inserted but not yet propagated upward. - // For example, if a subtree of size 5 is inserted at depth 2, then unflushedSizes[1] += 5. - // These sizes are added to the depth above the insertion point because the insertion updates the direct parent of the insertion. - // These sizes are flushed upward any time we need to insert at level higher than pending unflushed sizes. - // E.g. in our example, if we later insert at depth 0, we will add 5 to the node at depth 1 and the root at depth 0 before inserting. - // This scheme enables us to avoid a log(n) propagation of sizes for each insertion. - const unflushedSizes: number[] = new Array(spine.length).fill(0); // pre-fill to avoid "holey" array - - for (let i = start; i != end; i += step) { - const currentHeight = spine.length - 1; // height is number of internal levels; 0 means leaf - const subtree = BTree.alternatingGetSecond>(disjoint, i); - const subtreeHeight = BTree.alternatingGetFirst>(disjoint, i); - const insertionDepth = currentHeight - (subtreeHeight + 1); // node at this depth has children of height 'subtreeHeight' - - // Ensure path is unshared before mutation - BTree.ensureNotShared(spine, isSharedFrontierDepth, insertionDepth, sideIndex); - - // Calculate expansion depth (first ancestor with capacity) - const expansionDepth = Math.max(0, BTree.findCascadeEndDepth(spine, insertionDepth, branchingFactor)); - - // Update sizes on spine above the shared ancestor before we expand - BTree.updateSizeAndMax(spine, unflushedSizes, isSharedFrontierDepth, expansionDepth, updateMax); - - // Append and cascade splits upward - const newRoot = BTree.appendAndCascade(spine, insertionDepth, branchingFactor, subtree, sideIndex, sideInsertionIndex, splitOffSide); - if (newRoot) { - // Set the spine root to the highest up new node; the rest of the spine is updated below - spine[0] = newRoot; - unflushedSizes.forEach((count) => check(count === 0, "Unexpected unflushed size after root split.")); - unflushedSizes.push(0); // new root level - isSharedFrontierDepth = insertionDepth + 2; - unflushedSizes[insertionDepth + 1] += subtree.size(); - } else { - isSharedFrontierDepth = insertionDepth + 1; - unflushedSizes[insertionDepth] += subtree.size(); - } - - // Finally, update the frontier from the highest new node downward - // Note that this is often the point where the new subtree is attached, - // but in the case of cascaded splits it may be higher up. - BTree.updateFrontier(spine, expansionDepth, sideIndex); - check(isSharedFrontierDepth === spine.length - 1 || spine[isSharedFrontierDepth].isShared === true, "Non-leaf subtrees must be shared."); - check(unflushedSizes.length === spine.length, "Unflushed sizes length mismatch after root split."); - } - - // Finally, propagate any remaining unflushed sizes upward and update max keys - BTree.updateSizeAndMax(spine, unflushedSizes, isSharedFrontierDepth, 0, updateMax); - }; - - /** - * Append a subtree at a given depth on the chosen side; cascade splits upward if needed. - * All un-propagated sizes must have already been applied to the spine up to the end of any cascading expansions. - * This method guarantees that the size of the inserted subtree will not propagate upward beyond the insertion point. - * Returns a new root if the root was split, otherwise undefined. - */ - private static appendAndCascade( - spine: BNode[], - insertionDepth: number, - branchingFactor: number, - subtree: BNode, - sideIndex: (node: BNodeInternal) => number, - sideInsertionIndex: (node: BNodeInternal) => number, - splitOffSide: (node: BNodeInternal) => BNodeInternal - ): BNodeInternal | undefined { - // We must take care to avoid accidental propagation upward of the size of the inserted subtree. - // To do this, we first split nodes upward from the insertion point until we find a node with capacity - // or create a new root. Since all un-propagated sizes have already been applied to the spine up to this point, - // inserting at the end ensures no accidental propagation. - - // Depth is -1 if the subtree is the same height as the current tree - if (insertionDepth >= 0) { - let carry: BNode | undefined = undefined; - // Determine initially where to insert after any splits - let insertTarget: BNodeInternal = spine[insertionDepth] as BNodeInternal; - if (insertTarget.keys.length >= branchingFactor) { - insertTarget = carry = splitOffSide(insertTarget); - } - - let d = insertionDepth - 1; - while (carry && d >= 0) { - const parent = spine[d] as BNodeInternal; - const idx = sideIndex(parent); - // Refresh last key since child was split - parent.keys[idx] = parent.children[idx].maxKey(); - if (parent.keys.length < branchingFactor) { - // We have reached the end of the cascade - BTree.insertNoCount(parent, sideInsertionIndex(parent), carry); - carry = undefined; - } else { - // Splitting the parent here requires care to avoid incorrectly double counting sizes - // Example: a node is at max capacity 4, with children each of size 4 for 16 total. - // We split the node into two nodes of 2 children each, but this does *not* modify the size - // of its parent. Therefore when we insert the carry into the torn-off node, we must not - // increase its size or we will double-count the size of the carry subtree. - const tornOff = splitOffSide(parent); - BTree.insertNoCount(tornOff, sideInsertionIndex(tornOff), carry); - carry = tornOff; - } - d--; - } - - let newRoot: BNodeInternal | undefined = undefined; - if (carry !== undefined) { - // Expansion reached the root, need a new root to hold carry - const oldRoot = spine[0] as BNodeInternal; - newRoot = new BNodeInternal([oldRoot], oldRoot.size() + carry.size()); - BTree.insertNoCount(newRoot, sideInsertionIndex(newRoot), carry); - } - - // Finally, insert the subtree at the insertion point - BTree.insertNoCount(insertTarget, sideInsertionIndex(insertTarget), subtree); - return newRoot; - } else { - // Insertion of subtree with equal height to current tree - const oldRoot = spine[0] as BNodeInternal; - const newRoot = new BNodeInternal([oldRoot], oldRoot.size()); - BTree.insertNoCount(newRoot, sideInsertionIndex(newRoot), subtree); - return newRoot; - } - }; - - /** - * Clone along the spine from [isSharedFrontierDepth to depthTo] inclusive so path is safe to mutate. - * Short-circuits if first shared node is deeper than depthTo (the insertion depth). - */ - private static ensureNotShared( - spine: BNode[], - isSharedFrontierDepth: number, - depthToInclusive: number, - sideIndex: (node: BNodeInternal) => number) { - if (spine.length === 1 /* only a leaf */ || depthToInclusive < 0 /* new root case */) - return; // nothing to clone when root is a leaf; equal-height case will handle this - - // Clone root if needed first (depth 0) - if (isSharedFrontierDepth === 0) { - const root = spine[0]; - spine[0] = root.clone() as BNodeInternal; - } - - // Clone downward along the frontier to 'depthToInclusive' - for (let depth = Math.max(isSharedFrontierDepth, 1); depth <= depthToInclusive; depth++) { - const parent = spine[depth - 1] as BNodeInternal; - const childIndex = sideIndex(parent); - const clone = parent.children[childIndex].clone(); - parent.children[childIndex] = clone; - spine[depth] = clone as BNodeInternal; - } - }; - - /** - * Propagates size updates and updates max keys for nodes in (isSharedFrontierDepth, depthTo) - */ - private static updateSizeAndMax( - spine: BNode[], - unflushedSizes: number[], - isSharedFrontierDepth: number, - depthUpToInclusive: number, - updateMax: (node: BNodeInternal, maxBelow: K) => void) { - // If isSharedFrontierDepth is <= depthUpToInclusive there is nothing to update because - // the insertion point is inside a shared node which will always have correct sizes - const maxKey = spine[isSharedFrontierDepth].maxKey(); - const startDepth = isSharedFrontierDepth - 1; - for (let depth = startDepth; depth >= depthUpToInclusive; depth--) { - const sizeAtLevel = unflushedSizes[depth]; - unflushedSizes[depth] = 0; // we are propagating it now - if (depth > 0) { - // propagate size upward, will be added lazily, either when a subtree is appended at or above that level or - // at the end of processing the entire side - unflushedSizes[depth - 1] += sizeAtLevel; - } - const node = spine[depth] as BNodeInternal; - node._size += sizeAtLevel; - // No-op if left side, as max keys in parents are unchanged by appending to the beginning of a node - updateMax(node, maxKey); - } - }; - - /** - * Update a spine (frontier) from a specific depth down, inclusive. - * Extends the frontier array if it is not already as long as the frontier. - */ - private static updateFrontier(frontier: BNode[], depthLastValid: number, sideIndex: (node: BNodeInternal) => number): void { - check(frontier.length > depthLastValid, "updateFrontier: depthLastValid exceeds frontier height"); - const startingAncestor = frontier[depthLastValid]; - if (startingAncestor.isLeaf) - return; - const internal = startingAncestor as BNodeInternal; - let cur: BNode = internal.children[sideIndex(internal)]; - let depth = depthLastValid + 1; - while (!cur.isLeaf) { - const ni = cur as BNodeInternal; - frontier[depth] = ni; - cur = ni.children[sideIndex(ni)]; - depth++; - } - frontier[depth] = cur; - }; - - /** - * Find the first ancestor (starting at insertionDepth) with capacity. - */ - private static findCascadeEndDepth(spine: BNode[], insertionDepth: number, branchingFactor: number): number { - for (let depth = insertionDepth; depth >= 0; depth--) { - if (spine[depth].keys.length < branchingFactor) - return depth; - } - return -1; // no capacity, will need a new root - }; - - /** - * Inserts the child without updating cached size counts. - */ - private static insertNoCount( - parent: BNodeInternal, - index: number, - child: BNode - ): void { - parent.children.splice(index, 0, child); - parent.keys.splice(index, 0, child.maxKey()); - } - - // ---- Side-specific delegates for merging subtrees into a frontier ---- - - private static getLeftmostIndex(): number { - return 0; - } - - private static getRightmostIndex(node: BNodeInternal): number { - return node.children.length - 1; - } - - private static getRightInsertionIndex(node: BNodeInternal): number { - return node.children.length; - } - - private static splitOffRightSide(node: BNodeInternal): BNodeInternal { - return node.splitOffRightSide(); - } - - private static splitOffLeftSide(node: BNodeInternal): BNodeInternal { - return node.splitOffLeftSide(); - } - - private static updateRightMax(node: BNodeInternal, maxBelow: K): void { - node.keys[node.keys.length - 1] = maxBelow; - } - - private static noop(): void {} - - /** - * Decomposes two trees into disjoint nodes. Reuses interior nodes when they do not overlap/intersect with any leaf nodes - * in the other tree. Overlapping leaf nodes are broken down into new leaf nodes containing merged entries. - * The algorithm is a parallel tree walk using two cursors. The trailing cursor (behind in key space) is walked forward - * until it is at or after the leading cursor. As it does this, any whole nodes or subtrees it passes are guaranteed to - * be disjoint. This is true because the leading cursor was also previously walked in this way, and is thus pointing to - * the first key at or after the trailing cursor's previous position. - * The cursor walk is efficient, meaning it skips over disjoint subtrees entirely rather than visiting every leaf. - */ - private static decompose( - left: BTree, - right: BTree, - mergeValues: (key: K, leftValue: V, rightValue: V) => V | undefined - ): DecomposeResult { - const cmp = left._compare; - check(left._root.size() > 0 && right._root.size() > 0, "decompose requires non-empty inputs"); - // Holds the disjoint nodes that result from decomposition. - // Alternating entries of (height, node) to avoid creating small tuples - const disjoint: (number | BNode)[] = []; - // During the decomposition, leaves that are not disjoint are decomposed into individual entries - // that accumulate in this array in sorted order. They are flushed into leaf nodes whenever a reused - // disjoint subtree is added to the disjoint set. - // Note that there are unavoidable cases in which this will generate underfilled leaves. - // An example of this would be a leaf in one tree that contained keys [0, 100, 101, 102]. - // In the other tree, there is a leaf that contains [2, 3, 4, 5]. This leaf can be reused entirely, - // but the first tree's leaf must be decomposed into [0] and [100, 101, 102] - const pending: (K | V)[] = []; - let tallestIndex = -1, tallestHeight = -1; - - // During the upward part of the cursor walk, this holds the highest disjoint node seen so far. - // This is done because we cannot know immediately whether we can add the node to the disjoint set - // because its ancestor may also be disjoint and should be reused instead. - let highestDisjoint: { node: BNode, height: number } | undefined - // Have to do this as cast to convince TS it's ever assigned - = undefined as { node: BNode, height: number } | undefined; - - const flushPendingEntries = () => { - const totalPairs = BTree.alternatingCount(pending); - if (totalPairs === 0) - return; - - // This method creates as many evenly filled leaves as possible from - // the pending entries. All will be > 50% full if we are creating more than one leaf. - const max = left._maxNodeSize; - let leafCount = Math.ceil(totalPairs / max); - let remaining = totalPairs; - let pairIndex = 0; - while (leafCount > 0) { - const chunkSize = Math.ceil(remaining / leafCount); - const keys = new Array(chunkSize); - const vals = new Array(chunkSize); - for (let i = 0; i < chunkSize; i++) { - keys[i] = BTree.alternatingGetFirst(pending, pairIndex); - vals[i] = BTree.alternatingGetSecond(pending, pairIndex); - pairIndex++; - } - remaining -= chunkSize; - leafCount--; - const leaf = new BNode(keys, vals); - BTree.alternatingPush>(disjoint, 0, leaf); - if (tallestHeight < 0) { - tallestIndex = BTree.alternatingCount(disjoint) - 1; - tallestHeight = 0; - } - } - pending.length = 0; - }; - - const addSharedNodeToDisjointSet = (node: BNode, height: number) => { - flushPendingEntries(); - node.isShared = true; - BTree.alternatingPush>(disjoint, height, node); - if (height > tallestHeight) { - tallestIndex = BTree.alternatingCount(disjoint) - 1; - tallestHeight = height; - } - }; - - const addHighestDisjoint = () => { - if (highestDisjoint !== undefined) { - addSharedNodeToDisjointSet(highestDisjoint.node, highestDisjoint.height); - highestDisjoint = undefined; - } - }; - - // Mark all nodes at or above depthFrom in the cursor spine as disqualified (non-disjoint) - const disqualifySpine = (cursor: MergeCursor, depthFrom: number) => { - const spine = cursor.spine; - for (let i = depthFrom; i >= 0; --i) { - const payload = spine[i].payload; - // Safe to early out because we always disqualify all ancestors of a disqualified node - // That is correct because every ancestor of a non-disjoint node is also non-disjoint - // because it must enclose the non-disjoint range. - if (payload.disqualified) - break; - payload.disqualified = true; - } - }; - - // Cursor payload factory - const makePayload = (): MergeCursorPayload => ({ disqualified: false }); - - const pushLeafRange = (leaf: BNode, from: number, toExclusive: number) => { - const keys = leaf.keys; - const values = leaf.values; - for (let i = from; i < toExclusive; ++i) - BTree.alternatingPush(pending, keys[i], values[i]); - }; - - const onMoveInLeaf = ( - leaf: BNode, - payload: MergeCursorPayload, - fromIndex: number, - toIndex: number, - startedEqual: boolean - ) => { - check(payload.disqualified === true, "onMoveInLeaf: leaf must be disqualified"); - const start = startedEqual ? fromIndex + 1 : fromIndex; - if (start < toIndex) - pushLeafRange(leaf, start, toIndex); - }; - - const onExitLeaf = ( - leaf: BNode, - payload: MergeCursorPayload, - startingIndex: number, - startedEqual: boolean, - cursorThis: MergeCursor, - ) => { - highestDisjoint = undefined; - if (!payload.disqualified) { - highestDisjoint = { node: leaf, height: 0 }; - if (cursorThis.spine.length === 0) { - // if we are exiting a leaf and there are no internal nodes, we will reach the end of the tree. - // In this case we need to add the leaf now because step up will not be called. - addHighestDisjoint(); - } - } else { - const start = startedEqual ? startingIndex + 1 : startingIndex; - const leafSize = leaf.keys.length; - if (start < leafSize) - pushLeafRange(leaf, start, leafSize); - } - }; - - const onStepUp = ( - parent: BNodeInternal, - height: number, - payload: MergeCursorPayload, - fromIndex: number, - spineIndex: number, - stepDownIndex: number, - cursorThis: MergeCursor - ) => { - const children = parent.children; - const nextHeight = height - 1; - if (stepDownIndex !== stepDownIndex /* NaN: still walking up */ - || stepDownIndex === Number.POSITIVE_INFINITY /* target key is beyond edge of tree, done with walk */) { - if (!payload.disqualified) { - highestDisjoint = { node: parent, height }; - if (stepDownIndex === Number.POSITIVE_INFINITY) { - // We have finished our walk, and we won't be stepping down, so add the root - addHighestDisjoint(); - } - } else { - addHighestDisjoint(); - const len = children.length; - for (let i = fromIndex + 1; i < len; ++i) - addSharedNodeToDisjointSet(children[i], nextHeight); - } - } else { - // We have a valid step down index, so we need to disqualify the spine if needed. - // This is identical to the step down logic, but we must also perform it here because - // in the case of stepping down into a leaf, the step down callback is never called. - if (stepDownIndex > 0) { - disqualifySpine(cursorThis, spineIndex); - } - addHighestDisjoint(); - for (let i = fromIndex + 1; i < stepDownIndex; ++i) - addSharedNodeToDisjointSet(children[i], nextHeight); - } - }; - - const onStepDown = ( - node: BNodeInternal, - height: number, - spineIndex: number, - stepDownIndex: number, - cursorThis: MergeCursor - ) => { - if (stepDownIndex > 0) { - // When we step down into a node, we know that we have walked from a key that is less than our target. - // Because of this, if we are not stepping down into the first child, we know that all children before - // the stepDownIndex must overlap with the other tree because they must be before our target key. Since - // the child we are stepping into has a key greater than our target key, this node must overlap. - // If a child overlaps, the entire spine overlaps because a parent in a btree always encloses the range - // of its children. - disqualifySpine(cursorThis, spineIndex); - const children = node.children; - const nextHeight = height - 1; - for (let i = 0; i < stepDownIndex; ++i) - addSharedNodeToDisjointSet(children[i], nextHeight); - } - }; - - const onEnterLeaf = ( - leaf: BNode, - destIndex: number, - cursorThis: MergeCursor, - cursorOther: MergeCursor - ) => { - if (destIndex > 0 - || areOverlapping(leaf.minKey()!, leaf.maxKey(), BTree.getKey(cursorOther), cursorOther.leaf.maxKey(), cmp)) { - // Similar logic to the step-down case, except in this case we also know the leaf in the other - // tree overlaps a leaf in this tree (this leaf, specifically). Thus, we can disqualify both spines. - cursorThis.leafPayload.disqualified = true; - cursorOther.leafPayload.disqualified = true; - disqualifySpine(cursorThis, cursorThis.spine.length - 1); - disqualifySpine(cursorOther, cursorOther.spine.length - 1); - pushLeafRange(leaf, 0, destIndex); - } - }; - - // Need the max key of both trees to perform the "finishing" walk of which ever cursor finishes second - const maxKeyLeft = left._root.maxKey() as K; - const maxKeyRight = right._root.maxKey() as K; - const maxKey = cmp(maxKeyLeft, maxKeyRight) >= 0 ? maxKeyLeft : maxKeyRight; - - // Initialize cursors at minimum keys. - const curA = BTree.createCursor(left, makePayload, onEnterLeaf, onMoveInLeaf, onExitLeaf, onStepUp, onStepDown); - const curB = BTree.createCursor(right, makePayload, onEnterLeaf, onMoveInLeaf, onExitLeaf, onStepUp, onStepDown); - - // The guarantee that no overlapping interior nodes are accidentally reused relies on the careful - // alternating hopping walk of the cursors: WLOG, cursorA always--with one exception--walks from a key just behind (in key space) - // the key of cursorB to the first key >= cursorB. Call this transition a "crossover point." All interior nodes that - // overlap cause a crossover point, and all crossover points are guaranteed to be walked using this method. Thus, - // all overlapping interior nodes will be found if they are checked for on step-down. - // The one exception mentioned above is when they start at the same key. In this case, they are both advanced forward and then - // their new ordering determines how they walk from there. - // The one issue then is detecting any overlaps that occur based on their very initial position (minimum key of each tree). - // This is handled by the initial disqualification step below, which essentially emulates the step down disqualification for each spine. - // Initialize disqualification w.r.t. opposite leaf. - const initDisqualify = (cur: MergeCursor, other: MergeCursor) => { - const minKey = BTree.getKey(cur); - const otherMin = BTree.getKey(other); - const otherMax = other.leaf.maxKey(); - if (areOverlapping(minKey, cur.leaf.maxKey(), otherMin, otherMax, cmp)) - cur.leafPayload.disqualified = true; - for (let i = 0; i < cur.spine.length; ++i) { - const entry = cur.spine[i]; - // Since we are on the left side of the tree, we can use the leaf min key for every spine node - if (areOverlapping(minKey, entry.node.maxKey(), otherMin, otherMax, cmp)) - entry.payload.disqualified = true; - } - }; - - initDisqualify(curA, curB); - initDisqualify(curB, curA); - - let leading = curA; - let trailing = curB; - let order = cmp(BTree.getKey(leading), BTree.getKey(trailing)); - - // Walk both cursors in alternating hops - while (true) { - const areEqual = order === 0; - - if (areEqual) { - const key = BTree.getKey(leading); - const vA = curA.leaf.values[curA.leafIndex]; - const vB = curB.leaf.values[curB.leafIndex]; - // Perform the actual merge of values here. The cursors will avoid adding a duplicate of this key/value - // to pending because they respect the areEqual flag during their moves. - const merged = mergeValues(key, vA, vB); - if (merged !== undefined) - BTree.alternatingPush(pending, key, merged); - const outTrailing = BTree.moveForwardOne(trailing, leading, key, cmp); - const outLeading = BTree.moveForwardOne(leading, trailing, key, cmp); - if (outTrailing || outLeading) { - if (!outTrailing || !outLeading) { - // In these cases, we pass areEqual=false because a return value of "out of tree" means - // the cursor did not move. This must be true because they started equal and one of them had more tree - // to walk (one is !out), so they cannot be equal at this point. - if (outTrailing) { - BTree.moveTo(leading, trailing, maxKey, false, false, cmp); - } else { - BTree.moveTo(trailing, leading, maxKey, false, false, cmp); - } - } - break; - } - order = cmp(BTree.getKey(leading), BTree.getKey(trailing)); - } else { - if (order < 0) { - const tmp = trailing; - trailing = leading; - leading = tmp; - } - const [out, nowEqual] = BTree.moveTo(trailing, leading, BTree.getKey(leading), true, areEqual, cmp); - if (out) { - BTree.moveTo(leading, trailing, maxKey, false, areEqual, cmp); - break; - } else if (nowEqual) { - order = 0; - } else { - order = -1; - } - } - } - - // Ensure any trailing non-disjoint entries are added - flushPendingEntries(); - return { disjoint, tallestIndex }; - } - - // ------- Alternating list helpers ------- - // These helpers manage a list that alternates between two types of entries. - // Storing data this way avoids small tuple allocations and shows major improvements - // in GC time in benchmarks. - - private static alternatingCount(list: unknown[]): number { - return list.length >> 1; - } - - private static alternatingGetFirst(list: Array, index: number): TFirst { - return list[index << 1] as TFirst; - } - - private static alternatingGetSecond(list: Array, index: number): TSecond { - return list[(index << 1) + 1] as TSecond; - } - - private static alternatingPush(list: Array, first: TFirst, second: TSecond): void { - // Micro benchmarks show this is the fastest way to do this - list.push(first, second); - } - - /** - * Walks the cursor forward by one key. - * Should only be called to advance cursors that started equal. - * Returns true if end-of-tree was reached (cursor not structurally mutated). - */ - private static moveForwardOne( - cur: MergeCursor, - other: MergeCursor, - currentKey: K, - cmp: (a:K,b:K)=>number - ): boolean { - const leaf = cur.leaf; - const nextIndex = cur.leafIndex + 1; - if (nextIndex < leaf.keys.length) { - // Still within current leaf - cur.onMoveInLeaf(leaf, cur.leafPayload, cur.leafIndex, nextIndex, true); - cur.leafIndex = nextIndex; - return false; - } - - // If our optimized step within leaf failed, use full moveTo logic - // Pass isInclusive=false to ensure we walk forward to the key exactly after the current - return BTree.moveTo(cur, other, currentKey, false, true, cmp)[0]; - } - - /** - * Move cursor strictly forward to the first key >= (inclusive) or > (exclusive) target. - * Returns a boolean indicating if end-of-tree was reached (cursor not structurally mutated). - * Also returns a boolean indicating if the target key was landed on exactly. - */ - private static moveTo( - cur: MergeCursor, - other: MergeCursor, - targetKey: K, - isInclusive: boolean, - startedEqual: boolean, - cmp: (a:K,b:K)=>number - ): [outOfTree: boolean, targetExactlyReached: boolean] { - // Cache callbacks for perf - const onMoveInLeaf = cur.onMoveInLeaf; - // Fast path: destination within current leaf - const leaf = cur.leaf; - const leafPayload = cur.leafPayload; - const i = leaf.indexOf(targetKey, -1, cmp); - let destInLeaf: number; - let targetExactlyReached: boolean; - if (i < 0) { - destInLeaf = ~i; - targetExactlyReached = false; - } else { - if (isInclusive) { - destInLeaf = i; - targetExactlyReached = true; - } else { - destInLeaf = i + 1; - targetExactlyReached = false; - } - } - const leafKeyCount = leaf.keys.length; - if (destInLeaf < leafKeyCount) { - onMoveInLeaf(leaf, leafPayload, cur.leafIndex, destInLeaf, startedEqual); - cur.leafIndex = destInLeaf; - return [false, targetExactlyReached]; - } - - // Find first ancestor with a viable right step - const spine = cur.spine; - const initialSpineLength = spine.length; - let descentLevel = -1; - let descentIndex = -1; - - for (let s = initialSpineLength - 1; s >= 0; s--) { - const parent = spine[s].node; - const indexOf = parent.indexOf(targetKey, -1, cmp); - let stepDownIndex: number; - if (indexOf < 0) { - stepDownIndex = ~indexOf; - } else { - stepDownIndex = isInclusive ? indexOf : indexOf + 1; - } - - // Note: when key not found, indexOf with failXor=0 already returns insertion index - if (stepDownIndex < parent.keys.length) { - descentLevel = s; - descentIndex = stepDownIndex; - break; - } - } - - // Exit leaf; even if no spine, we did walk out of it conceptually - const startIndex = cur.leafIndex; - cur.onExitLeaf(leaf, leafPayload, startIndex, startedEqual, cur); - - const onStepUp = cur.onStepUp; - if (descentLevel < 0) { - // No descent point; step up all the way; last callback gets infinity - for (let depth = initialSpineLength - 1; depth >= 0; depth--) { - const entry = spine[depth]; - const sd = depth === 0 ? Number.POSITIVE_INFINITY : Number.NaN; - onStepUp(entry.node, initialSpineLength - depth, entry.payload, entry.childIndex, depth, sd, cur); - } - return [true, false]; - } - - // Step up through ancestors above the descentLevel - for (let depth = initialSpineLength - 1; depth > descentLevel; depth--) { - const entry = spine[depth]; - onStepUp(entry.node, initialSpineLength - depth, entry.payload, entry.childIndex, depth, Number.NaN, cur); - } - - const entry = spine[descentLevel]; - onStepUp(entry.node, initialSpineLength - descentLevel, entry.payload, entry.childIndex, descentLevel, descentIndex, cur); - entry.childIndex = descentIndex; - - const onStepDown = cur.onStepDown; - const makePayload = cur.makePayload; - - // Descend, invoking onStepDown and creating payloads - let height = initialSpineLength - descentLevel - 1; // calculate height before changing length - spine.length = descentLevel + 1; - let node: BNode = spine[descentLevel].node.children[descentIndex]; - - while (!node.isLeaf) { - const ni = node as BNodeInternal; - const keys = ni.keys; - let stepDownIndex = ni.indexOf(targetKey, 0, cmp); - if (!isInclusive && stepDownIndex < keys.length && cmp(keys[stepDownIndex], targetKey) === 0) - stepDownIndex++; - const payload = makePayload(); - const spineIndex = spine.length; - spine.push({ node: ni, childIndex: stepDownIndex, payload }); - onStepDown(ni, height, spineIndex, stepDownIndex, cur); - node = ni.children[stepDownIndex]; - height -= 1; - } - // Enter destination leaf - const idx = node.indexOf(targetKey, -1, cmp); - let destIndex: number; - if (idx < 0) { - destIndex = ~idx; - targetExactlyReached = false; - } else { - if (isInclusive) { - destIndex = idx; - targetExactlyReached = true; - } else { - destIndex = idx + 1; - targetExactlyReached = false; - } - } - cur.leaf = node; - cur.leafPayload = makePayload(); - cur.leafIndex = destIndex; - cur.onEnterLeaf(node, destIndex, cur, other); - return [false, targetExactlyReached]; - } - - /** - * Create a cursor pointing to the leftmost key of the supplied tree. - */ - private static createCursor( - tree: BTree, - makePayload: MergeCursor["makePayload"], - onEnterLeaf: MergeCursor["onEnterLeaf"], - onMoveInLeaf: MergeCursor["onMoveInLeaf"], - onExitLeaf: MergeCursor["onExitLeaf"], - onStepUp: MergeCursor["onStepUp"], - onStepDown: MergeCursor["onStepDown"], - ): MergeCursor { - const spine: Array<{ node: BNodeInternal, childIndex: number, payload: TP }> = []; - let n: BNode = tree._root; - while (!n.isLeaf) { - const ni = n as BNodeInternal; - const payload = makePayload(); - spine.push({ node: ni, childIndex: 0, payload }); - n = ni.children[0]; - } - const leafPayload = makePayload(); - const cur: MergeCursor = { - tree, leaf: n, leafIndex: 0, spine, leafPayload, makePayload: makePayload, - onEnterLeaf, onMoveInLeaf, onExitLeaf, onStepUp, onStepDown - }; - return cur; - } - - private static getKey(c: MergeCursor): K { - return c.leaf.keys[c.leafIndex]; } /** @@ -2849,30 +1931,32 @@ class BNodeInternal extends BNode { */ type DiffCursor = { height: number, internalSpine: BNode[][], levelIndices: number[], leaf: BNode | undefined, currentKey: K }; -type MergeCursorPayload = { disqualified: boolean }; - -interface MergeCursor { - tree: BTree; - leaf: BNode; - leafIndex: number; - spine: Array<{ node: BNodeInternal, childIndex: number, payload: TPayload }>; - leafPayload: TPayload; - makePayload: () => TPayload; - onMoveInLeaf: (leaf: BNode, payload: TPayload, fromIndex: number, toIndex: number, isInclusive: boolean) => void; - onExitLeaf: (leaf: BNode, payload: TPayload, startingIndex: number, isInclusive: boolean, cursorThis: MergeCursor) => void; - onStepUp: (parent: BNodeInternal, height: number, payload: TPayload, fromIndex: number, spineIndex: number, stepDownIndex: number, cursorThis: MergeCursor) => void; - onStepDown: (node: BNodeInternal, height: number, spineIndex: number, stepDownIndex: number, cursorThis: MergeCursor) => void; - onEnterLeaf: (leaf: BNode, destIndex: number, cursorThis: MergeCursor, cursorOther: MergeCursor) => void; -} +// Optimization: this array of `undefined`s is used instead of a normal +// array of values in nodes where `undefined` is the only value. +// Its length is extended to max node size on first use; since it can +// be shared between trees with different maximums, its length can only +// increase, never decrease. Its type should be undefined[] but strangely +// TypeScript won't allow the comparison V[] === undefined[]. To prevent +// users from making this array too large, BTree has a maximum node size. +// +// FAQ: undefVals[i] is already undefined, so why increase the array size? +// Reading outside the bounds of an array is relatively slow because it +// has the side effect of scanning the prototype chain. +var undefVals: any[] = []; -type DecomposeResult = { disjoint: (number | BNode)[], tallestIndex: number }; +function sumChildSizes(children: BNode[]): number { + var total = 0; + for (var i = 0; i < children.length; i++) + total += children[i].size(); + return total; +} /** * Determines whether two nodes are overlapping in key range. * Takes the leftmost known key of each node to avoid a log(n) min calculation. * This will still catch overlapping nodes because of the alternate hopping walk of the cursors. */ -function areOverlapping( +export function areOverlapping( aMin: K, aMax: K, bMin: K, @@ -2908,26 +1992,6 @@ function areOverlapping( return aMinBMin <= 0 && aMaxBMax >= 0; } -// Optimization: this array of `undefined`s is used instead of a normal -// array of values in nodes where `undefined` is the only value. -// Its length is extended to max node size on first use; since it can -// be shared between trees with different maximums, its length can only -// increase, never decrease. Its type should be undefined[] but strangely -// TypeScript won't allow the comparison V[] === undefined[]. To prevent -// users from making this array too large, BTree has a maximum node size. -// -// FAQ: undefVals[i] is already undefined, so why increase the array size? -// Reading outside the bounds of an array is relatively slow because it -// has the side effect of scanning the prototype chain. -var undefVals: any[] = []; - -function sumChildSizes(children: BNode[]): number { - var total = 0; - for (var i = 0; i < children.length; i++) - total += children[i].size(); - return total; -} - const Delete = {delete: true}, DeleteRange = () => Delete; const Break = {break: true}; const EmptyLeaf = (function() { diff --git a/decompose.ts b/decompose.ts new file mode 100644 index 0000000..da42ff0 --- /dev/null +++ b/decompose.ts @@ -0,0 +1,676 @@ +import BTree, { areOverlapping } from "./b+tree"; +import { createCursor, getKey, MergeCursor, MergeCursorPayload, moveForwardOne } from "./parallelWalk"; + +export type DecomposeResult = { disjoint: (number | BNode)[], tallestIndex: number }; + +/** + * Decomposes two trees into disjoint nodes. Reuses interior nodes when they do not overlap/intersect with any leaf nodes + * in the other tree. Overlapping leaf nodes are broken down into new leaf nodes containing merged entries. + * The algorithm is a parallel tree walk using two cursors. The trailing cursor (behind in key space) is walked forward + * until it is at or after the leading cursor. As it does this, any whole nodes or subtrees it passes are guaranteed to + * be disjoint. This is true because the leading cursor was also previously walked in this way, and is thus pointing to + * the first key at or after the trailing cursor's previous position. + * The cursor walk is efficient, meaning it skips over disjoint subtrees entirely rather than visiting every leaf. + */ + export function decompose( + left: BTree, + right: BTree, + mergeValues: (key: K, leftValue: V, rightValue: V) => V | undefined + ): DecomposeResult { + const cmp = left._compare; + check(left._root.size() > 0 && right._root.size() > 0, "decompose requires non-empty inputs"); + // Holds the disjoint nodes that result from decomposition. + // Alternating entries of (height, node) to avoid creating small tuples + const disjoint: (number | BNode)[] = []; + // During the decomposition, leaves that are not disjoint are decomposed into individual entries + // that accumulate in this array in sorted order. They are flushed into leaf nodes whenever a reused + // disjoint subtree is added to the disjoint set. + // Note that there are unavoidable cases in which this will generate underfilled leaves. + // An example of this would be a leaf in one tree that contained keys [0, 100, 101, 102]. + // In the other tree, there is a leaf that contains [2, 3, 4, 5]. This leaf can be reused entirely, + // but the first tree's leaf must be decomposed into [0] and [100, 101, 102] + const pending: (K | V)[] = []; + let tallestIndex = -1, tallestHeight = -1; + + // During the upward part of the cursor walk, this holds the highest disjoint node seen so far. + // This is done because we cannot know immediately whether we can add the node to the disjoint set + // because its ancestor may also be disjoint and should be reused instead. + let highestDisjoint: { node: BNode, height: number } | undefined + // Have to do this as cast to convince TS it's ever assigned + = undefined as { node: BNode, height: number } | undefined; + + const flushPendingEntries = () => { + const totalPairs = alternatingCount(pending); + if (totalPairs === 0) + return; + + // This method creates as many evenly filled leaves as possible from + // the pending entries. All will be > 50% full if we are creating more than one leaf. + const max = left._maxNodeSize; + let leafCount = Math.ceil(totalPairs / max); + let remaining = totalPairs; + let pairIndex = 0; + while (leafCount > 0) { + const chunkSize = Math.ceil(remaining / leafCount); + const keys = new Array(chunkSize); + const vals = new Array(chunkSize); + for (let i = 0; i < chunkSize; i++) { + keys[i] = alternatingGetFirst(pending, pairIndex); + vals[i] = alternatingGetSecond(pending, pairIndex); + pairIndex++; + } + remaining -= chunkSize; + leafCount--; + const leaf = new BNode(keys, vals); + alternatingPush>(disjoint, 0, leaf); + if (tallestHeight < 0) { + tallestIndex = alternatingCount(disjoint) - 1; + tallestHeight = 0; + } + } + pending.length = 0; + }; + + const addSharedNodeToDisjointSet = (node: BNode, height: number) => { + flushPendingEntries(); + node.isShared = true; + alternatingPush>(disjoint, height, node); + if (height > tallestHeight) { + tallestIndex = alternatingCount(disjoint) - 1; + tallestHeight = height; + } + }; + + const addHighestDisjoint = () => { + if (highestDisjoint !== undefined) { + addSharedNodeToDisjointSet(highestDisjoint.node, highestDisjoint.height); + highestDisjoint = undefined; + } + }; + + // Mark all nodes at or above depthFrom in the cursor spine as disqualified (non-disjoint) + const disqualifySpine = (cursor: MergeCursor, depthFrom: number) => { + const spine = cursor.spine; + for (let i = depthFrom; i >= 0; --i) { + const payload = spine[i].payload; + // Safe to early out because we always disqualify all ancestors of a disqualified node + // That is correct because every ancestor of a non-disjoint node is also non-disjoint + // because it must enclose the non-disjoint range. + if (payload.disqualified) + break; + payload.disqualified = true; + } + }; + + // Cursor payload factory + const makePayload = (): MergeCursorPayload => ({ disqualified: false }); + + const pushLeafRange = (leaf: BNode, from: number, toExclusive: number) => { + const keys = leaf.keys; + const values = leaf.values; + for (let i = from; i < toExclusive; ++i) + alternatingPush(pending, keys[i], values[i]); + }; + + const onMoveInLeaf = ( + leaf: BNode, + payload: MergeCursorPayload, + fromIndex: number, + toIndex: number, + startedEqual: boolean + ) => { + check(payload.disqualified === true, "onMoveInLeaf: leaf must be disqualified"); + const start = startedEqual ? fromIndex + 1 : fromIndex; + if (start < toIndex) + pushLeafRange(leaf, start, toIndex); + }; + + const onExitLeaf = ( + leaf: BNode, + payload: MergeCursorPayload, + startingIndex: number, + startedEqual: boolean, + cursorThis: MergeCursor, + ) => { + highestDisjoint = undefined; + if (!payload.disqualified) { + highestDisjoint = { node: leaf, height: 0 }; + if (cursorThis.spine.length === 0) { + // if we are exiting a leaf and there are no internal nodes, we will reach the end of the tree. + // In this case we need to add the leaf now because step up will not be called. + addHighestDisjoint(); + } + } else { + const start = startedEqual ? startingIndex + 1 : startingIndex; + const leafSize = leaf.keys.length; + if (start < leafSize) + pushLeafRange(leaf, start, leafSize); + } + }; + + const onStepUp = ( + parent: BNodeInternal, + height: number, + payload: MergeCursorPayload, + fromIndex: number, + spineIndex: number, + stepDownIndex: number, + cursorThis: MergeCursor + ) => { + const children = parent.children; + const nextHeight = height - 1; + if (stepDownIndex !== stepDownIndex /* NaN: still walking up */ + || stepDownIndex === Number.POSITIVE_INFINITY /* target key is beyond edge of tree, done with walk */) { + if (!payload.disqualified) { + highestDisjoint = { node: parent, height }; + if (stepDownIndex === Number.POSITIVE_INFINITY) { + // We have finished our walk, and we won't be stepping down, so add the root + addHighestDisjoint(); + } + } else { + addHighestDisjoint(); + const len = children.length; + for (let i = fromIndex + 1; i < len; ++i) + addSharedNodeToDisjointSet(children[i], nextHeight); + } + } else { + // We have a valid step down index, so we need to disqualify the spine if needed. + // This is identical to the step down logic, but we must also perform it here because + // in the case of stepping down into a leaf, the step down callback is never called. + if (stepDownIndex > 0) { + disqualifySpine(cursorThis, spineIndex); + } + addHighestDisjoint(); + for (let i = fromIndex + 1; i < stepDownIndex; ++i) + addSharedNodeToDisjointSet(children[i], nextHeight); + } + }; + + const onStepDown = ( + node: BNodeInternal, + height: number, + spineIndex: number, + stepDownIndex: number, + cursorThis: MergeCursor + ) => { + if (stepDownIndex > 0) { + // When we step down into a node, we know that we have walked from a key that is less than our target. + // Because of this, if we are not stepping down into the first child, we know that all children before + // the stepDownIndex must overlap with the other tree because they must be before our target key. Since + // the child we are stepping into has a key greater than our target key, this node must overlap. + // If a child overlaps, the entire spine overlaps because a parent in a btree always encloses the range + // of its children. + disqualifySpine(cursorThis, spineIndex); + const children = node.children; + const nextHeight = height - 1; + for (let i = 0; i < stepDownIndex; ++i) + addSharedNodeToDisjointSet(children[i], nextHeight); + } + }; + + const onEnterLeaf = ( + leaf: BNode, + destIndex: number, + cursorThis: MergeCursor, + cursorOther: MergeCursor + ) => { + if (destIndex > 0 + || areOverlapping(leaf.minKey()!, leaf.maxKey(), getKey(cursorOther), cursorOther.leaf.maxKey(), cmp)) { + // Similar logic to the step-down case, except in this case we also know the leaf in the other + // tree overlaps a leaf in this tree (this leaf, specifically). Thus, we can disqualify both spines. + cursorThis.leafPayload.disqualified = true; + cursorOther.leafPayload.disqualified = true; + disqualifySpine(cursorThis, cursorThis.spine.length - 1); + disqualifySpine(cursorOther, cursorOther.spine.length - 1); + pushLeafRange(leaf, 0, destIndex); + } + }; + + // Need the max key of both trees to perform the "finishing" walk of which ever cursor finishes second + const maxKeyLeft = left._root.maxKey() as K; + const maxKeyRight = right._root.maxKey() as K; + const maxKey = cmp(maxKeyLeft, maxKeyRight) >= 0 ? maxKeyLeft : maxKeyRight; + + // Initialize cursors at minimum keys. + const curA = createCursor(left, makePayload, onEnterLeaf, onMoveInLeaf, onExitLeaf, onStepUp, onStepDown); + const curB = createCursor(right, makePayload, onEnterLeaf, onMoveInLeaf, onExitLeaf, onStepUp, onStepDown); + + // The guarantee that no overlapping interior nodes are accidentally reused relies on the careful + // alternating hopping walk of the cursors: WLOG, cursorA always--with one exception--walks from a key just behind (in key space) + // the key of cursorB to the first key >= cursorB. Call this transition a "crossover point." All interior nodes that + // overlap cause a crossover point, and all crossover points are guaranteed to be walked using this method. Thus, + // all overlapping interior nodes will be found if they are checked for on step-down. + // The one exception mentioned above is when they start at the same key. In this case, they are both advanced forward and then + // their new ordering determines how they walk from there. + // The one issue then is detecting any overlaps that occur based on their very initial position (minimum key of each tree). + // This is handled by the initial disqualification step below, which essentially emulates the step down disqualification for each spine. + // Initialize disqualification w.r.t. opposite leaf. + const initDisqualify = (cur: MergeCursor, other: MergeCursor) => { + const minKey = getKey(cur); + const otherMin = getKey(other); + const otherMax = other.leaf.maxKey(); + if (areOverlapping(minKey, cur.leaf.maxKey(), otherMin, otherMax, cmp)) + cur.leafPayload.disqualified = true; + for (let i = 0; i < cur.spine.length; ++i) { + const entry = cur.spine[i]; + // Since we are on the left side of the tree, we can use the leaf min key for every spine node + if (areOverlapping(minKey, entry.node.maxKey(), otherMin, otherMax, cmp)) + entry.payload.disqualified = true; + } + }; + + initDisqualify(curA, curB); + initDisqualify(curB, curA); + + let leading = curA; + let trailing = curB; + let order = cmp(getKey(leading), getKey(trailing)); + + // Walk both cursors in alternating hops + while (true) { + const areEqual = order === 0; + + if (areEqual) { + const key = getKey(leading); + const vA = curA.leaf.values[curA.leafIndex]; + const vB = curB.leaf.values[curB.leafIndex]; + // Perform the actual merge of values here. The cursors will avoid adding a duplicate of this key/value + // to pending because they respect the areEqual flag during their moves. + const merged = mergeValues(key, vA, vB); + if (merged !== undefined) + alternatingPush(pending, key, merged); + const outTrailing = moveForwardOne(trailing, leading, key, cmp); + const outLeading = moveForwardOne(leading, trailing, key, cmp); + if (outTrailing || outLeading) { + if (!outTrailing || !outLeading) { + // In these cases, we pass areEqual=false because a return value of "out of tree" means + // the cursor did not move. This must be true because they started equal and one of them had more tree + // to walk (one is !out), so they cannot be equal at this point. + if (outTrailing) { + moveTo(leading, trailing, maxKey, false, false, cmp); + } else { + moveTo(trailing, leading, maxKey, false, false, cmp); + } + } + break; + } + order = cmp(getKey(leading), getKey(trailing)); + } else { + if (order < 0) { + const tmp = trailing; + trailing = leading; + leading = tmp; + } + const [out, nowEqual] = moveTo(trailing, leading, getKey(leading), true, areEqual, cmp); + if (out) { + moveTo(leading, trailing, maxKey, false, areEqual, cmp); + break; + } else if (nowEqual) { + order = 0; + } else { + order = -1; + } + } + } + + // Ensure any trailing non-disjoint entries are added + flushPendingEntries(); + return { disjoint, tallestIndex }; + } + + export function buildFromDecomposition( + branchingFactor: number, + decomposed: DecomposeResult + ): BTree { + + const { disjoint, tallestIndex } = decomposed; + const disjointEntryCount = alternatingCount(disjoint); + + // Now we have a set of disjoint subtrees and we need to merge them into a single tree. + // To do this, we start with the tallest subtree from the disjoint set and, for all subtrees + // to the "right" and "left" of it in sorted order, we append them onto the appropriate side + // of the current tree, splitting nodes as necessary to maintain balance. + // A "side" is referred to as a frontier, as it is a linked list of nodes from the root down to + // the leaf level on that side of the tree. Each appended subtree is appended to the node at the + // same height as itself on the frontier. Each tree is guaranteed to be at most as tall as the + // current frontier because we start from the tallest subtree and work outward. + const initialRoot = alternatingGetSecond>(disjoint, tallestIndex); + const frontier: BNode[] = [initialRoot]; + + // Process all subtrees to the right of the tallest subtree + if (tallestIndex + 1 <= disjointEntryCount - 1) { + updateFrontier(frontier, 0, getRightmostIndex); + processSide( + branchingFactor, + disjoint, + frontier, + tallestIndex + 1, + disjointEntryCount, 1, + getRightmostIndex, + getRightInsertionIndex, + splitOffRightSide, + updateRightMax + ); + } + + // Process all subtrees to the left of the current tree + if (tallestIndex - 1 >= 0) { + // Note we need to update the frontier here because the right-side processing may have grown the tree taller. + updateFrontier(frontier, 0, getLeftmostIndex); + processSide( + branchingFactor, + disjoint, + frontier, + tallestIndex - 1, + -1, + -1, + getLeftmostIndex, + getLeftmostIndex, + splitOffLeftSide, + noop // left side appending doesn't update max keys + ); + } + + const merged = new BTree(undefined, this._compare, this._maxNodeSize); + merged._root = frontier[0]; + + // Return the resulting tree + return merged; + } + + /** + * Processes one side (left or right) of the disjoint subtree set during a merge operation. + * Merges each subtree in the disjoint set from start to end (exclusive) into the given spine. + */ + function processSide( + branchingFactor: number, + disjoint: (number | BNode)[], + spine: BNode[], + start: number, + end: number, + step: number, + sideIndex: (node: BNodeInternal) => number, + sideInsertionIndex: (node: BNodeInternal) => number, + splitOffSide: (node: BNodeInternal) => BNodeInternal, + updateMax: (node: BNodeInternal, maxBelow: K) => void + ): void { + // Determine the depth of the first shared node on the frontier. + // Appending subtrees to the frontier must respect the copy-on-write semantics by cloning + // any shared nodes down to the insertion point. We track it by depth to avoid a log(n) walk of the + // frontier for each insertion as that would fundamentally change our asymptotics. + let isSharedFrontierDepth = 0; + let cur = spine[0]; + // Find the first shared node on the frontier + while (!cur.isShared && isSharedFrontierDepth < spine.length - 1) { + isSharedFrontierDepth++; + cur = (cur as BNodeInternal).children[sideIndex(cur as BNodeInternal)]; + } + + // This array holds the sum of sizes of nodes that have been inserted but not yet propagated upward. + // For example, if a subtree of size 5 is inserted at depth 2, then unflushedSizes[1] += 5. + // These sizes are added to the depth above the insertion point because the insertion updates the direct parent of the insertion. + // These sizes are flushed upward any time we need to insert at level higher than pending unflushed sizes. + // E.g. in our example, if we later insert at depth 0, we will add 5 to the node at depth 1 and the root at depth 0 before inserting. + // This scheme enables us to avoid a log(n) propagation of sizes for each insertion. + const unflushedSizes: number[] = new Array(spine.length).fill(0); // pre-fill to avoid "holey" array + + for (let i = start; i != end; i += step) { + const currentHeight = spine.length - 1; // height is number of internal levels; 0 means leaf + const subtree = alternatingGetSecond>(disjoint, i); + const subtreeHeight = alternatingGetFirst>(disjoint, i); + const insertionDepth = currentHeight - (subtreeHeight + 1); // node at this depth has children of height 'subtreeHeight' + + // Ensure path is unshared before mutation + ensureNotShared(spine, isSharedFrontierDepth, insertionDepth, sideIndex); + + // Calculate expansion depth (first ancestor with capacity) + const expansionDepth = Math.max(0, findCascadeEndDepth(spine, insertionDepth, branchingFactor)); + + // Update sizes on spine above the shared ancestor before we expand + updateSizeAndMax(spine, unflushedSizes, isSharedFrontierDepth, expansionDepth, updateMax); + + // Append and cascade splits upward + const newRoot = appendAndCascade(spine, insertionDepth, branchingFactor, subtree, sideIndex, sideInsertionIndex, splitOffSide); + if (newRoot) { + // Set the spine root to the highest up new node; the rest of the spine is updated below + spine[0] = newRoot; + unflushedSizes.forEach((count) => check(count === 0, "Unexpected unflushed size after root split.")); + unflushedSizes.push(0); // new root level + isSharedFrontierDepth = insertionDepth + 2; + unflushedSizes[insertionDepth + 1] += susize(); + } else { + isSharedFrontierDepth = insertionDepth + 1; + unflushedSizes[insertionDepth] += susize(); + } + + // Finally, update the frontier from the highest new node downward + // Note that this is often the point where the new subtree is attached, + // but in the case of cascaded splits it may be higher up. + updateFrontier(spine, expansionDepth, sideIndex); + check(isSharedFrontierDepth === spine.length - 1 || spine[isSharedFrontierDepth].isShared === true, "Non-leaf subtrees must be shared."); + check(unflushedSizes.length === spine.length, "Unflushed sizes length mismatch after root split."); + } + + // Finally, propagate any remaining unflushed sizes upward and update max keys + updateSizeAndMax(spine, unflushedSizes, isSharedFrontierDepth, 0, updateMax); + }; + + /** + * Append a subtree at a given depth on the chosen side; cascade splits upward if needed. + * All un-propagated sizes must have already been applied to the spine up to the end of any cascading expansions. + * This method guarantees that the size of the inserted subtree will not propagate upward beyond the insertion point. + * Returns a new root if the root was split, otherwise undefined. + */ + function appendAndCascade( + spine: BNode[], + insertionDepth: number, + branchingFactor: number, + subtree: BNode, + sideIndex: (node: BNodeInternal) => number, + sideInsertionIndex: (node: BNodeInternal) => number, + splitOffSide: (node: BNodeInternal) => BNodeInternal + ): BNodeInternal | undefined { + // We must take care to avoid accidental propagation upward of the size of the inserted su + // To do this, we first split nodes upward from the insertion point until we find a node with capacity + // or create a new root. Since all un-propagated sizes have already been applied to the spine up to this point, + // inserting at the end ensures no accidental propagation. + + // Depth is -1 if the subtree is the same height as the current tree + if (insertionDepth >= 0) { + let carry: BNode | undefined = undefined; + // Determine initially where to insert after any splits + let insertTarget: BNodeInternal = spine[insertionDepth] as BNodeInternal; + if (insertTarget.keys.length >= branchingFactor) { + insertTarget = carry = splitOffSide(insertTarget); + } + + let d = insertionDepth - 1; + while (carry && d >= 0) { + const parent = spine[d] as BNodeInternal; + const idx = sideIndex(parent); + // Refresh last key since child was split + parent.keys[idx] = parent.children[idx].maxKey(); + if (parent.keys.length < branchingFactor) { + // We have reached the end of the cascade + insertNoCount(parent, sideInsertionIndex(parent), carry); + carry = undefined; + } else { + // Splitting the parent here requires care to avoid incorrectly double counting sizes + // Example: a node is at max capacity 4, with children each of size 4 for 16 total. + // We split the node into two nodes of 2 children each, but this does *not* modify the size + // of its parent. Therefore when we insert the carry into the torn-off node, we must not + // increase its size or we will double-count the size of the carry su + const tornOff = splitOffSide(parent); + insertNoCount(tornOff, sideInsertionIndex(tornOff), carry); + carry = tornOff; + } + d--; + } + + let newRoot: BNodeInternal | undefined = undefined; + if (carry !== undefined) { + // Expansion reached the root, need a new root to hold carry + const oldRoot = spine[0] as BNodeInternal; + newRoot = new BNodeInternal([oldRoot], oldRoot.size() + carry.size()); + insertNoCount(newRoot, sideInsertionIndex(newRoot), carry); + } + + // Finally, insert the subtree at the insertion point + insertNoCount(insertTarget, sideInsertionIndex(insertTarget), subtree); + return newRoot; + } else { + // Insertion of subtree with equal height to current tree + const oldRoot = spine[0] as BNodeInternal; + const newRoot = new BNodeInternal([oldRoot], oldRoot.size()); + insertNoCount(newRoot, sideInsertionIndex(newRoot), subtree); + return newRoot; + } + }; + + /** + * Clone along the spine from [isSharedFrontierDepth to depthTo] inclusive so path is safe to mutate. + * Short-circuits if first shared node is deeper than depthTo (the insertion depth). + */ + function ensureNotShared( + spine: BNode[], + isSharedFrontierDepth: number, + depthToInclusive: number, + sideIndex: (node: BNodeInternal) => number) { + if (spine.length === 1 /* only a leaf */ || depthToInclusive < 0 /* new root case */) + return; // nothing to clone when root is a leaf; equal-height case will handle this + + // Clone root if needed first (depth 0) + if (isSharedFrontierDepth === 0) { + const root = spine[0]; + spine[0] = root.clone() as BNodeInternal; + } + + // Clone downward along the frontier to 'depthToInclusive' + for (let depth = Math.max(isSharedFrontierDepth, 1); depth <= depthToInclusive; depth++) { + const parent = spine[depth - 1] as BNodeInternal; + const childIndex = sideIndex(parent); + const clone = parent.children[childIndex].clone(); + parent.children[childIndex] = clone; + spine[depth] = clone as BNodeInternal; + } + }; + + /** + * Propagates size updates and updates max keys for nodes in (isSharedFrontierDepth, depthTo) + */ + function updateSizeAndMax( + spine: BNode[], + unflushedSizes: number[], + isSharedFrontierDepth: number, + depthUpToInclusive: number, + updateMax: (node: BNodeInternal, maxBelow: K) => void) { + // If isSharedFrontierDepth is <= depthUpToInclusive there is nothing to update because + // the insertion point is inside a shared node which will always have correct sizes + const maxKey = spine[isSharedFrontierDepth].maxKey(); + const startDepth = isSharedFrontierDepth - 1; + for (let depth = startDepth; depth >= depthUpToInclusive; depth--) { + const sizeAtLevel = unflushedSizes[depth]; + unflushedSizes[depth] = 0; // we are propagating it now + if (depth > 0) { + // propagate size upward, will be added lazily, either when a subtree is appended at or above that level or + // at the end of processing the entire side + unflushedSizes[depth - 1] += sizeAtLevel; + } + const node = spine[depth] as BNodeInternal; + node._size += sizeAtLevel; + // No-op if left side, as max keys in parents are unchanged by appending to the beginning of a node + updateMax(node, maxKey); + } + }; + + /** + * Update a spine (frontier) from a specific depth down, inclusive. + * Extends the frontier array if it is not already as long as the frontier. + */ + function updateFrontier(frontier: BNode[], depthLastValid: number, sideIndex: (node: BNodeInternal) => number): void { + check(frontier.length > depthLastValid, "updateFrontier: depthLastValid exceeds frontier height"); + const startingAncestor = frontier[depthLastValid]; + if (startingAncestor.isLeaf) + return; + const internal = startingAncestor as BNodeInternal; + let cur: BNode = internal.children[sideIndex(internal)]; + let depth = depthLastValid + 1; + while (!cur.isLeaf) { + const ni = cur as BNodeInternal; + frontier[depth] = ni; + cur = ni.children[sideIndex(ni)]; + depth++; + } + frontier[depth] = cur; + }; + + /** + * Find the first ancestor (starting at insertionDepth) with capacity. + */ + function findCascadeEndDepth(spine: BNode[], insertionDepth: number, branchingFactor: number): number { + for (let depth = insertionDepth; depth >= 0; depth--) { + if (spine[depth].keys.length < branchingFactor) + return depth; + } + return -1; // no capacity, will need a new root + }; + + /** + * Inserts the child without updating cached size counts. + */ + function insertNoCount( + parent: BNodeInternal, + index: number, + child: BNode + ): void { + parent.children.splice(index, 0, child); + parent.keys.splice(index, 0, child.maxKey()); + } + + // ---- Side-specific delegates for merging subtrees into a frontier ---- + + function getLeftmostIndex(): number { + return 0; + } + + function getRightmostIndex(node: BNodeInternal): number { + return node.children.length - 1; + } + + function getRightInsertionIndex(node: BNodeInternal): number { + return node.children.length; + } + + function splitOffRightSide(node: BNodeInternal): BNodeInternal { + return node.splitOffRightSide(); + } + + function splitOffLeftSide(node: BNodeInternal): BNodeInternal { + return node.splitOffLeftSide(); + } + + function updateRightMax(node: BNodeInternal, maxBelow: K): void { + node.keys[node.keys.length - 1] = maxBelow; + } + + // ------- Alternating list helpers ------- + // These helpers manage a list that alternates between two types of entries. + // Storing data this way avoids small tuple allocations and shows major improvements + // in GC time in benchmarks. + + function alternatingCount(list: unknown[]): number { + return list.length >> 1; + } + + function alternatingGetFirst(list: Array, index: number): TFirst { + return list[index << 1] as TFirst; + } + + function alternatingGetSecond(list: Array, index: number): TSecond { + return list[(index << 1) + 1] as TSecond; + } + + function alternatingPush(list: Array, first: TFirst, second: TSecond): void { + // Micro benchmarks show this is the fastest way to do this + list.push(first, second); + } \ No newline at end of file diff --git a/intersect.ts b/intersect.ts new file mode 100644 index 0000000..d69e5d8 --- /dev/null +++ b/intersect.ts @@ -0,0 +1,63 @@ +import BTree from "./b+tree"; +import { createCursor, moveForwardOne, moveTo, getKey, noop, checkCanDoSetOperation } from "./parallelWalk" + + /** + * Intersects the two trees, calling the supplied `intersection` callback for each intersecting key/value pair. + * Neither tree is modified. + * @param treeA First tree to intersect. + * @param treeB Second tree to intersect. + * @param intersection Called for keys that appear in both trees. + * @description Complexity is bounded O(N + M) time and O(log(N + M)) for allocations. + * However, time is additionally bounded by O(log(N + M) * D) where D is the number of disjoint ranges of keys between + * the two trees. In practice, that means for keys of random distribution the performance is O(N + M) and for + * keys with significant numbers of non-overlapping key ranges it is O(log(N + M) * D) which is much faster. + * The algorithm achieves this additional non-linear bound by skipping over non-intersecting subtrees entirely. + * Note that in benchmarks even the worst case (fully interleaved keys) performance is faster than calling `toArray` + * on both trees and performing a walk on the sorted contents due to the reduced allocation overhead. + */ + export function intersect(treeA: BTree, treeB: BTree, intersection: (key: K, leftValue: V, rightValue: V) => void): void { + checkCanDoSetOperation(treeA, treeB); + if (treeB.size === 0 || treeA.size === 0) + return; + + const cmp = treeA._compare; + const makePayload = (): undefined => undefined; + let cursorA = createCursor(treeA, makePayload, noop, noop, noop, noop, noop); + let cursorB = createCursor(treeB, makePayload, noop, noop, noop, noop, noop); + let leading = cursorA; + let trailing = cursorB; + let order = cmp(getKey(leading), getKey(trailing)); + + // The intersect walk is somewhat similar to a merge walk in that it does an alternating hop walk with cursors. + // However, the only thing we care about is when the two cursors are equal (equality is intersection). + // When they are not equal we just advance the trailing cursor. + while (true) { + const areEqual = order === 0; + if (areEqual) { + const key = getKey(leading); + const vA = cursorA.leaf.values[cursorA.leafIndex]; + const vB = cursorB.leaf.values[cursorB.leafIndex]; + intersection(key, vA, vB); + const outT = moveForwardOne(trailing, leading, key, cmp); + const outL = moveForwardOne(leading, trailing, key, cmp); + if (outT && outL) + break; + order = cmp(getKey(leading), getKey(trailing)); + } else { + if (order < 0) { + const tmp = trailing; + trailing = leading; leading = tmp; + } + // At this point, leading is guaranteed to be ahead of trailing. + const [out, nowEqual] = moveTo(trailing, leading, getKey(leading), true, areEqual, cmp) + if (out) { + // We've reached the end of one tree, so intersections are guaranteed to be done. + break; + } else if (nowEqual) { + order = 0; + } else { + order = -1; // trailing is ahead of leading + } + } + } + } \ No newline at end of file diff --git a/merge.ts b/merge.ts new file mode 100644 index 0000000..87a8e1a --- /dev/null +++ b/merge.ts @@ -0,0 +1,33 @@ +import BTree from "./b+tree"; +import { decompose, buildFromDecomposition } from "./decompose"; +import { checkCanDoSetOperation } from "./parallelWalk"; + + /** + * Efficiently merges two trees, reusing subtrees wherever possible. + * Neither input tree is modified. + * @param treeA First tree to merge. + * @param treeB Second tree to merge. + * @param merge Called for keys that appear in both trees. Return the desired value, or + * `undefined` to omit the key from the result. + * @returns A new BTree that contains the merged key/value pairs. + * @description Complexity is bounded O(N + M) for both time and allocations. + * However, it is additionally bounded by O(log(N + M) * D) where D is the number of disjoint ranges of keys between + * the two trees. In practice, that means for keys of random distribution the performance is O(N + M) and for + * keys with significant numbers of non-overlapping key ranges it is O(log(N + M) * D) which is much faster. + * The algorithm achieves this additional non-linear bound by skipping over non-intersecting subtrees entirely. + * Note that in benchmarks even the worst case (fully interleaved keys) performance is faster than cloning `this` + * and inserting the contents of `other` into the clone. + */ + export function merge(treeA: BTree, treeB: BTree, merge: (key: K, leftValue: V, rightValue: V) => V | undefined): BTree { + const branchingFactor = checkCanDoSetOperation(treeA, treeB); + if (treeA._root.size() === 0) + return treeB.clone(); + if (treeB._root.size() === 0) + return treeA.clone(); + + // Decompose both trees into disjoint subtrees leaves. + // As many of these as possible will be reused from the original trees, and the remaining + // will be leaves that are the result of merging intersecting leaves. + const decomposed = decompose(treeA, treeB, merge); + return buildFromDecomposition(branchingFactor, decomposed); + } \ No newline at end of file diff --git a/parallelWalk.ts b/parallelWalk.ts new file mode 100644 index 0000000..144f99d --- /dev/null +++ b/parallelWalk.ts @@ -0,0 +1,219 @@ +import BTree from "./b+tree"; + +export type MergeCursorPayload = { disqualified: boolean }; + +export interface MergeCursor { + tree: BTree; + leaf: BNode; + leafIndex: number; + spine: Array<{ node: BNodeInternal, childIndex: number, payload: TPayload }>; + leafPayload: TPayload; + makePayload: () => TPayload; + onMoveInLeaf: (leaf: BNode, payload: TPayload, fromIndex: number, toIndex: number, isInclusive: boolean) => void; + onExitLeaf: (leaf: BNode, payload: TPayload, startingIndex: number, isInclusive: boolean, cursorThis: MergeCursor) => void; + onStepUp: (parent: BNodeInternal, height: number, payload: TPayload, fromIndex: number, spineIndex: number, stepDownIndex: number, cursorThis: MergeCursor) => void; + onStepDown: (node: BNodeInternal, height: number, spineIndex: number, stepDownIndex: number, cursorThis: MergeCursor) => void; + onEnterLeaf: (leaf: BNode, destIndex: number, cursorThis: MergeCursor, cursorOther: MergeCursor) => void; +} + + /** + * Walks the cursor forward by one key. + * Should only be called to advance cursors that started equal. + * Returns true if end-of-tree was reached (cursor not structurally mutated). + */ + export function moveForwardOne( + cur: MergeCursor, + other: MergeCursor, + currentKey: K, + cmp: (a:K,b:K)=>number + ): boolean { + const leaf = cur.leaf; + const nextIndex = cur.leafIndex + 1; + if (nextIndex < leaf.keys.length) { + // Still within current leaf + cur.onMoveInLeaf(leaf, cur.leafPayload, cur.leafIndex, nextIndex, true); + cur.leafIndex = nextIndex; + return false; + } + + // If our optimized step within leaf failed, use full moveTo logic + // Pass isInclusive=false to ensure we walk forward to the key exactly after the current + return BTree.moveTo(cur, other, currentKey, false, true, cmp)[0]; + } + +/** + * Create a cursor pointing to the leftmost key of the supplied tree. + */ +export function createCursor( + tree: BTree, + makePayload: MergeCursor["makePayload"], + onEnterLeaf: MergeCursor["onEnterLeaf"], + onMoveInLeaf: MergeCursor["onMoveInLeaf"], + onExitLeaf: MergeCursor["onExitLeaf"], + onStepUp: MergeCursor["onStepUp"], + onStepDown: MergeCursor["onStepDown"], + ): MergeCursor { + const spine: Array<{ node: BNodeInternal, childIndex: number, payload: TP }> = []; + let n: BNode = tree._root; + while (!n.isLeaf) { + const ni = n as BNodeInternal; + const payload = makePayload(); + spine.push({ node: ni, childIndex: 0, payload }); + n = ni.children[0]; + } + const leafPayload = makePayload(); + const cur: MergeCursor = { + tree, leaf: n, leafIndex: 0, spine, leafPayload, makePayload: makePayload, + onEnterLeaf, onMoveInLeaf, onExitLeaf, onStepUp, onStepDown + }; + return cur; + } + +export function getKey(c: MergeCursor): K { + return c.leaf.keys[c.leafIndex]; +} + + /** + * Move cursor strictly forward to the first key >= (inclusive) or > (exclusive) target. + * Returns a boolean indicating if end-of-tree was reached (cursor not structurally mutated). + * Also returns a boolean indicating if the target key was landed on exactly. + */ + export function moveTo( + cur: MergeCursor, + other: MergeCursor, + targetKey: K, + isInclusive: boolean, + startedEqual: boolean, + cmp: (a:K,b:K)=>number + ): [outOfTree: boolean, targetExactlyReached: boolean] { + // Cache callbacks for perf + const onMoveInLeaf = cur.onMoveInLeaf; + // Fast path: destination within current leaf + const leaf = cur.leaf; + const leafPayload = cur.leafPayload; + const i = leaf.indexOf(targetKey, -1, cmp); + let destInLeaf: number; + let targetExactlyReached: boolean; + if (i < 0) { + destInLeaf = ~i; + targetExactlyReached = false; + } else { + if (isInclusive) { + destInLeaf = i; + targetExactlyReached = true; + } else { + destInLeaf = i + 1; + targetExactlyReached = false; + } + } + const leafKeyCount = leaf.keys.length; + if (destInLeaf < leafKeyCount) { + onMoveInLeaf(leaf, leafPayload, cur.leafIndex, destInLeaf, startedEqual); + cur.leafIndex = destInLeaf; + return [false, targetExactlyReached]; + } + + // Find first ancestor with a viable right step + const spine = cur.spine; + const initialSpineLength = spine.length; + let descentLevel = -1; + let descentIndex = -1; + + for (let s = initialSpineLength - 1; s >= 0; s--) { + const parent = spine[s].node; + const indexOf = parent.indexOf(targetKey, -1, cmp); + let stepDownIndex: number; + if (indexOf < 0) { + stepDownIndex = ~indexOf; + } else { + stepDownIndex = isInclusive ? indexOf : indexOf + 1; + } + + // Note: when key not found, indexOf with failXor=0 already returns insertion index + if (stepDownIndex < parent.keys.length) { + descentLevel = s; + descentIndex = stepDownIndex; + break; + } + } + + // Exit leaf; even if no spine, we did walk out of it conceptually + const startIndex = cur.leafIndex; + cur.onExitLeaf(leaf, leafPayload, startIndex, startedEqual, cur); + + const onStepUp = cur.onStepUp; + if (descentLevel < 0) { + // No descent point; step up all the way; last callback gets infinity + for (let depth = initialSpineLength - 1; depth >= 0; depth--) { + const entry = spine[depth]; + const sd = depth === 0 ? Number.POSITIVE_INFINITY : Number.NaN; + onStepUp(entry.node, initialSpineLength - depth, entry.payload, entry.childIndex, depth, sd, cur); + } + return [true, false]; + } + + // Step up through ancestors above the descentLevel + for (let depth = initialSpineLength - 1; depth > descentLevel; depth--) { + const entry = spine[depth]; + onStepUp(entry.node, initialSpineLength - depth, entry.payload, entry.childIndex, depth, Number.NaN, cur); + } + + const entry = spine[descentLevel]; + onStepUp(entry.node, initialSpineLength - descentLevel, entry.payload, entry.childIndex, descentLevel, descentIndex, cur); + entry.childIndex = descentIndex; + + const onStepDown = cur.onStepDown; + const makePayload = cur.makePayload; + + // Descend, invoking onStepDown and creating payloads + let height = initialSpineLength - descentLevel - 1; // calculate height before changing length + spine.length = descentLevel + 1; + let node: BNode = spine[descentLevel].node.children[descentIndex]; + + while (!node.isLeaf) { + const ni = node as BNodeInternal; + const keys = ni.keys; + let stepDownIndex = ni.indexOf(targetKey, 0, cmp); + if (!isInclusive && stepDownIndex < keys.length && cmp(keys[stepDownIndex], targetKey) === 0) + stepDownIndex++; + const payload = makePayload(); + const spineIndex = spine.length; + spine.push({ node: ni, childIndex: stepDownIndex, payload }); + onStepDown(ni, height, spineIndex, stepDownIndex, cur); + node = ni.children[stepDownIndex]; + height -= 1; + } + + // Enter destination leaf + const idx = node.indexOf(targetKey, -1, cmp); + let destIndex: number; + if (idx < 0) { + destIndex = ~idx; + targetExactlyReached = false; + } else { + if (isInclusive) { + destIndex = idx; + targetExactlyReached = true; + } else { + destIndex = idx + 1; + targetExactlyReached = false; + } + } + cur.leaf = node; + cur.leafPayload = makePayload(); + cur.leafIndex = destIndex; + cur.onEnterLeaf(node, destIndex, cur, other); + return [false, targetExactlyReached]; + } + +export function noop(): void {} + + export function checkCanDoSetOperation(treeA: BTree, treeB: BTree): number { + if (treeA._compare !== treeB._compare) + throw new Error("Cannot merge BTrees with different comparators."); + + const branchingFactor = treeA._maxNodeSize; + if (branchingFactor !== treeB._maxNodeSize) + throw new Error("Cannot merge BTrees with different max node sizes."); + return branchingFactor; + } \ No newline at end of file From bc443ddcd2f3c0bed69c44c48d08204962504c0a Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Tue, 11 Nov 2025 12:26:09 -0800 Subject: [PATCH 058/143] fix spacing --- decompose.ts | 1230 +++++++++++++++++++++++------------------------ intersect.ts | 112 ++--- merge.ts | 56 +-- parallelWalk.ts | 344 ++++++------- 4 files changed, 871 insertions(+), 871 deletions(-) diff --git a/decompose.ts b/decompose.ts index da42ff0..3cdf5d0 100644 --- a/decompose.ts +++ b/decompose.ts @@ -4,673 +4,673 @@ import { createCursor, getKey, MergeCursor, MergeCursorPayload, moveForwardOne } export type DecomposeResult = { disjoint: (number | BNode)[], tallestIndex: number }; /** - * Decomposes two trees into disjoint nodes. Reuses interior nodes when they do not overlap/intersect with any leaf nodes - * in the other tree. Overlapping leaf nodes are broken down into new leaf nodes containing merged entries. - * The algorithm is a parallel tree walk using two cursors. The trailing cursor (behind in key space) is walked forward - * until it is at or after the leading cursor. As it does this, any whole nodes or subtrees it passes are guaranteed to - * be disjoint. This is true because the leading cursor was also previously walked in this way, and is thus pointing to - * the first key at or after the trailing cursor's previous position. - * The cursor walk is efficient, meaning it skips over disjoint subtrees entirely rather than visiting every leaf. - */ - export function decompose( - left: BTree, - right: BTree, - mergeValues: (key: K, leftValue: V, rightValue: V) => V | undefined - ): DecomposeResult { - const cmp = left._compare; - check(left._root.size() > 0 && right._root.size() > 0, "decompose requires non-empty inputs"); - // Holds the disjoint nodes that result from decomposition. - // Alternating entries of (height, node) to avoid creating small tuples - const disjoint: (number | BNode)[] = []; - // During the decomposition, leaves that are not disjoint are decomposed into individual entries - // that accumulate in this array in sorted order. They are flushed into leaf nodes whenever a reused - // disjoint subtree is added to the disjoint set. - // Note that there are unavoidable cases in which this will generate underfilled leaves. - // An example of this would be a leaf in one tree that contained keys [0, 100, 101, 102]. - // In the other tree, there is a leaf that contains [2, 3, 4, 5]. This leaf can be reused entirely, - // but the first tree's leaf must be decomposed into [0] and [100, 101, 102] - const pending: (K | V)[] = []; - let tallestIndex = -1, tallestHeight = -1; - - // During the upward part of the cursor walk, this holds the highest disjoint node seen so far. - // This is done because we cannot know immediately whether we can add the node to the disjoint set - // because its ancestor may also be disjoint and should be reused instead. - let highestDisjoint: { node: BNode, height: number } | undefined - // Have to do this as cast to convince TS it's ever assigned - = undefined as { node: BNode, height: number } | undefined; - - const flushPendingEntries = () => { - const totalPairs = alternatingCount(pending); - if (totalPairs === 0) - return; - - // This method creates as many evenly filled leaves as possible from - // the pending entries. All will be > 50% full if we are creating more than one leaf. - const max = left._maxNodeSize; - let leafCount = Math.ceil(totalPairs / max); - let remaining = totalPairs; - let pairIndex = 0; - while (leafCount > 0) { - const chunkSize = Math.ceil(remaining / leafCount); - const keys = new Array(chunkSize); - const vals = new Array(chunkSize); - for (let i = 0; i < chunkSize; i++) { - keys[i] = alternatingGetFirst(pending, pairIndex); - vals[i] = alternatingGetSecond(pending, pairIndex); - pairIndex++; - } - remaining -= chunkSize; - leafCount--; - const leaf = new BNode(keys, vals); - alternatingPush>(disjoint, 0, leaf); - if (tallestHeight < 0) { - tallestIndex = alternatingCount(disjoint) - 1; - tallestHeight = 0; - } - } - pending.length = 0; - }; - - const addSharedNodeToDisjointSet = (node: BNode, height: number) => { - flushPendingEntries(); - node.isShared = true; - alternatingPush>(disjoint, height, node); - if (height > tallestHeight) { - tallestIndex = alternatingCount(disjoint) - 1; - tallestHeight = height; - } - }; + * Decomposes two trees into disjoint nodes. Reuses interior nodes when they do not overlap/intersect with any leaf nodes + * in the other tree. Overlapping leaf nodes are broken down into new leaf nodes containing merged entries. + * The algorithm is a parallel tree walk using two cursors. The trailing cursor (behind in key space) is walked forward + * until it is at or after the leading cursor. As it does this, any whole nodes or subtrees it passes are guaranteed to + * be disjoint. This is true because the leading cursor was also previously walked in this way, and is thus pointing to + * the first key at or after the trailing cursor's previous position. + * The cursor walk is efficient, meaning it skips over disjoint subtrees entirely rather than visiting every leaf. + */ +export function decompose( + left: BTree, + right: BTree, + mergeValues: (key: K, leftValue: V, rightValue: V) => V | undefined +): DecomposeResult { + const cmp = left._compare; + check(left._root.size() > 0 && right._root.size() > 0, "decompose requires non-empty inputs"); + // Holds the disjoint nodes that result from decomposition. + // Alternating entries of (height, node) to avoid creating small tuples + const disjoint: (number | BNode)[] = []; + // During the decomposition, leaves that are not disjoint are decomposed into individual entries + // that accumulate in this array in sorted order. They are flushed into leaf nodes whenever a reused + // disjoint subtree is added to the disjoint set. + // Note that there are unavoidable cases in which this will generate underfilled leaves. + // An example of this would be a leaf in one tree that contained keys [0, 100, 101, 102]. + // In the other tree, there is a leaf that contains [2, 3, 4, 5]. This leaf can be reused entirely, + // but the first tree's leaf must be decomposed into [0] and [100, 101, 102] + const pending: (K | V)[] = []; + let tallestIndex = -1, tallestHeight = -1; + + // During the upward part of the cursor walk, this holds the highest disjoint node seen so far. + // This is done because we cannot know immediately whether we can add the node to the disjoint set + // because its ancestor may also be disjoint and should be reused instead. + let highestDisjoint: { node: BNode, height: number } | undefined + // Have to do this as cast to convince TS it's ever assigned + = undefined as { node: BNode, height: number } | undefined; + + const flushPendingEntries = () => { + const totalPairs = alternatingCount(pending); + if (totalPairs === 0) + return; - const addHighestDisjoint = () => { - if (highestDisjoint !== undefined) { - addSharedNodeToDisjointSet(highestDisjoint.node, highestDisjoint.height); - highestDisjoint = undefined; - } - }; - - // Mark all nodes at or above depthFrom in the cursor spine as disqualified (non-disjoint) - const disqualifySpine = (cursor: MergeCursor, depthFrom: number) => { - const spine = cursor.spine; - for (let i = depthFrom; i >= 0; --i) { - const payload = spine[i].payload; - // Safe to early out because we always disqualify all ancestors of a disqualified node - // That is correct because every ancestor of a non-disjoint node is also non-disjoint - // because it must enclose the non-disjoint range. - if (payload.disqualified) - break; - payload.disqualified = true; - } - }; - - // Cursor payload factory - const makePayload = (): MergeCursorPayload => ({ disqualified: false }); - - const pushLeafRange = (leaf: BNode, from: number, toExclusive: number) => { - const keys = leaf.keys; - const values = leaf.values; - for (let i = from; i < toExclusive; ++i) - alternatingPush(pending, keys[i], values[i]); - }; - - const onMoveInLeaf = ( - leaf: BNode, - payload: MergeCursorPayload, - fromIndex: number, - toIndex: number, - startedEqual: boolean - ) => { - check(payload.disqualified === true, "onMoveInLeaf: leaf must be disqualified"); - const start = startedEqual ? fromIndex + 1 : fromIndex; - if (start < toIndex) - pushLeafRange(leaf, start, toIndex); - }; - - const onExitLeaf = ( - leaf: BNode, - payload: MergeCursorPayload, - startingIndex: number, - startedEqual: boolean, - cursorThis: MergeCursor, - ) => { - highestDisjoint = undefined; - if (!payload.disqualified) { - highestDisjoint = { node: leaf, height: 0 }; - if (cursorThis.spine.length === 0) { - // if we are exiting a leaf and there are no internal nodes, we will reach the end of the tree. - // In this case we need to add the leaf now because step up will not be called. - addHighestDisjoint(); - } - } else { - const start = startedEqual ? startingIndex + 1 : startingIndex; - const leafSize = leaf.keys.length; - if (start < leafSize) - pushLeafRange(leaf, start, leafSize); - } - }; - - const onStepUp = ( - parent: BNodeInternal, - height: number, - payload: MergeCursorPayload, - fromIndex: number, - spineIndex: number, - stepDownIndex: number, - cursorThis: MergeCursor - ) => { - const children = parent.children; - const nextHeight = height - 1; - if (stepDownIndex !== stepDownIndex /* NaN: still walking up */ - || stepDownIndex === Number.POSITIVE_INFINITY /* target key is beyond edge of tree, done with walk */) { - if (!payload.disqualified) { - highestDisjoint = { node: parent, height }; - if (stepDownIndex === Number.POSITIVE_INFINITY) { - // We have finished our walk, and we won't be stepping down, so add the root - addHighestDisjoint(); - } - } else { - addHighestDisjoint(); - const len = children.length; - for (let i = fromIndex + 1; i < len; ++i) - addSharedNodeToDisjointSet(children[i], nextHeight); - } - } else { - // We have a valid step down index, so we need to disqualify the spine if needed. - // This is identical to the step down logic, but we must also perform it here because - // in the case of stepping down into a leaf, the step down callback is never called. - if (stepDownIndex > 0) { - disqualifySpine(cursorThis, spineIndex); - } - addHighestDisjoint(); - for (let i = fromIndex + 1; i < stepDownIndex; ++i) - addSharedNodeToDisjointSet(children[i], nextHeight); - } - }; - - const onStepDown = ( - node: BNodeInternal, - height: number, - spineIndex: number, - stepDownIndex: number, - cursorThis: MergeCursor - ) => { - if (stepDownIndex > 0) { - // When we step down into a node, we know that we have walked from a key that is less than our target. - // Because of this, if we are not stepping down into the first child, we know that all children before - // the stepDownIndex must overlap with the other tree because they must be before our target key. Since - // the child we are stepping into has a key greater than our target key, this node must overlap. - // If a child overlaps, the entire spine overlaps because a parent in a btree always encloses the range - // of its children. - disqualifySpine(cursorThis, spineIndex); - const children = node.children; - const nextHeight = height - 1; - for (let i = 0; i < stepDownIndex; ++i) - addSharedNodeToDisjointSet(children[i], nextHeight); - } - }; - - const onEnterLeaf = ( - leaf: BNode, - destIndex: number, - cursorThis: MergeCursor, - cursorOther: MergeCursor - ) => { - if (destIndex > 0 - || areOverlapping(leaf.minKey()!, leaf.maxKey(), getKey(cursorOther), cursorOther.leaf.maxKey(), cmp)) { - // Similar logic to the step-down case, except in this case we also know the leaf in the other - // tree overlaps a leaf in this tree (this leaf, specifically). Thus, we can disqualify both spines. - cursorThis.leafPayload.disqualified = true; - cursorOther.leafPayload.disqualified = true; - disqualifySpine(cursorThis, cursorThis.spine.length - 1); - disqualifySpine(cursorOther, cursorOther.spine.length - 1); - pushLeafRange(leaf, 0, destIndex); + // This method creates as many evenly filled leaves as possible from + // the pending entries. All will be > 50% full if we are creating more than one leaf. + const max = left._maxNodeSize; + let leafCount = Math.ceil(totalPairs / max); + let remaining = totalPairs; + let pairIndex = 0; + while (leafCount > 0) { + const chunkSize = Math.ceil(remaining / leafCount); + const keys = new Array(chunkSize); + const vals = new Array(chunkSize); + for (let i = 0; i < chunkSize; i++) { + keys[i] = alternatingGetFirst(pending, pairIndex); + vals[i] = alternatingGetSecond(pending, pairIndex); + pairIndex++; } - }; - - // Need the max key of both trees to perform the "finishing" walk of which ever cursor finishes second - const maxKeyLeft = left._root.maxKey() as K; - const maxKeyRight = right._root.maxKey() as K; - const maxKey = cmp(maxKeyLeft, maxKeyRight) >= 0 ? maxKeyLeft : maxKeyRight; - - // Initialize cursors at minimum keys. - const curA = createCursor(left, makePayload, onEnterLeaf, onMoveInLeaf, onExitLeaf, onStepUp, onStepDown); - const curB = createCursor(right, makePayload, onEnterLeaf, onMoveInLeaf, onExitLeaf, onStepUp, onStepDown); - - // The guarantee that no overlapping interior nodes are accidentally reused relies on the careful - // alternating hopping walk of the cursors: WLOG, cursorA always--with one exception--walks from a key just behind (in key space) - // the key of cursorB to the first key >= cursorB. Call this transition a "crossover point." All interior nodes that - // overlap cause a crossover point, and all crossover points are guaranteed to be walked using this method. Thus, - // all overlapping interior nodes will be found if they are checked for on step-down. - // The one exception mentioned above is when they start at the same key. In this case, they are both advanced forward and then - // their new ordering determines how they walk from there. - // The one issue then is detecting any overlaps that occur based on their very initial position (minimum key of each tree). - // This is handled by the initial disqualification step below, which essentially emulates the step down disqualification for each spine. - // Initialize disqualification w.r.t. opposite leaf. - const initDisqualify = (cur: MergeCursor, other: MergeCursor) => { - const minKey = getKey(cur); - const otherMin = getKey(other); - const otherMax = other.leaf.maxKey(); - if (areOverlapping(minKey, cur.leaf.maxKey(), otherMin, otherMax, cmp)) - cur.leafPayload.disqualified = true; - for (let i = 0; i < cur.spine.length; ++i) { - const entry = cur.spine[i]; - // Since we are on the left side of the tree, we can use the leaf min key for every spine node - if (areOverlapping(minKey, entry.node.maxKey(), otherMin, otherMax, cmp)) - entry.payload.disqualified = true; - } - }; - - initDisqualify(curA, curB); - initDisqualify(curB, curA); - - let leading = curA; - let trailing = curB; - let order = cmp(getKey(leading), getKey(trailing)); - - // Walk both cursors in alternating hops - while (true) { - const areEqual = order === 0; - - if (areEqual) { - const key = getKey(leading); - const vA = curA.leaf.values[curA.leafIndex]; - const vB = curB.leaf.values[curB.leafIndex]; - // Perform the actual merge of values here. The cursors will avoid adding a duplicate of this key/value - // to pending because they respect the areEqual flag during their moves. - const merged = mergeValues(key, vA, vB); - if (merged !== undefined) - alternatingPush(pending, key, merged); - const outTrailing = moveForwardOne(trailing, leading, key, cmp); - const outLeading = moveForwardOne(leading, trailing, key, cmp); - if (outTrailing || outLeading) { - if (!outTrailing || !outLeading) { - // In these cases, we pass areEqual=false because a return value of "out of tree" means - // the cursor did not move. This must be true because they started equal and one of them had more tree - // to walk (one is !out), so they cannot be equal at this point. - if (outTrailing) { - moveTo(leading, trailing, maxKey, false, false, cmp); - } else { - moveTo(trailing, leading, maxKey, false, false, cmp); - } - } - break; - } - order = cmp(getKey(leading), getKey(trailing)); - } else { - if (order < 0) { - const tmp = trailing; - trailing = leading; - leading = tmp; - } - const [out, nowEqual] = moveTo(trailing, leading, getKey(leading), true, areEqual, cmp); - if (out) { - moveTo(leading, trailing, maxKey, false, areEqual, cmp); - break; - } else if (nowEqual) { - order = 0; - } else { - order = -1; - } + remaining -= chunkSize; + leafCount--; + const leaf = new BNode(keys, vals); + alternatingPush>(disjoint, 0, leaf); + if (tallestHeight < 0) { + tallestIndex = alternatingCount(disjoint) - 1; + tallestHeight = 0; } } + pending.length = 0; + }; - // Ensure any trailing non-disjoint entries are added + const addSharedNodeToDisjointSet = (node: BNode, height: number) => { flushPendingEntries(); - return { disjoint, tallestIndex }; - } - - export function buildFromDecomposition( - branchingFactor: number, - decomposed: DecomposeResult - ): BTree { - - const { disjoint, tallestIndex } = decomposed; - const disjointEntryCount = alternatingCount(disjoint); - - // Now we have a set of disjoint subtrees and we need to merge them into a single tree. - // To do this, we start with the tallest subtree from the disjoint set and, for all subtrees - // to the "right" and "left" of it in sorted order, we append them onto the appropriate side - // of the current tree, splitting nodes as necessary to maintain balance. - // A "side" is referred to as a frontier, as it is a linked list of nodes from the root down to - // the leaf level on that side of the tree. Each appended subtree is appended to the node at the - // same height as itself on the frontier. Each tree is guaranteed to be at most as tall as the - // current frontier because we start from the tallest subtree and work outward. - const initialRoot = alternatingGetSecond>(disjoint, tallestIndex); - const frontier: BNode[] = [initialRoot]; - - // Process all subtrees to the right of the tallest subtree - if (tallestIndex + 1 <= disjointEntryCount - 1) { - updateFrontier(frontier, 0, getRightmostIndex); - processSide( - branchingFactor, - disjoint, - frontier, - tallestIndex + 1, - disjointEntryCount, 1, - getRightmostIndex, - getRightInsertionIndex, - splitOffRightSide, - updateRightMax - ); + node.isShared = true; + alternatingPush>(disjoint, height, node); + if (height > tallestHeight) { + tallestIndex = alternatingCount(disjoint) - 1; + tallestHeight = height; } + }; - // Process all subtrees to the left of the current tree - if (tallestIndex - 1 >= 0) { - // Note we need to update the frontier here because the right-side processing may have grown the tree taller. - updateFrontier(frontier, 0, getLeftmostIndex); - processSide( - branchingFactor, - disjoint, - frontier, - tallestIndex - 1, - -1, - -1, - getLeftmostIndex, - getLeftmostIndex, - splitOffLeftSide, - noop // left side appending doesn't update max keys - ); + const addHighestDisjoint = () => { + if (highestDisjoint !== undefined) { + addSharedNodeToDisjointSet(highestDisjoint.node, highestDisjoint.height); + highestDisjoint = undefined; } + }; - const merged = new BTree(undefined, this._compare, this._maxNodeSize); - merged._root = frontier[0]; - - // Return the resulting tree - return merged; - } - - /** - * Processes one side (left or right) of the disjoint subtree set during a merge operation. - * Merges each subtree in the disjoint set from start to end (exclusive) into the given spine. - */ - function processSide( - branchingFactor: number, - disjoint: (number | BNode)[], - spine: BNode[], - start: number, - end: number, - step: number, - sideIndex: (node: BNodeInternal) => number, - sideInsertionIndex: (node: BNodeInternal) => number, - splitOffSide: (node: BNodeInternal) => BNodeInternal, - updateMax: (node: BNodeInternal, maxBelow: K) => void - ): void { - // Determine the depth of the first shared node on the frontier. - // Appending subtrees to the frontier must respect the copy-on-write semantics by cloning - // any shared nodes down to the insertion point. We track it by depth to avoid a log(n) walk of the - // frontier for each insertion as that would fundamentally change our asymptotics. - let isSharedFrontierDepth = 0; - let cur = spine[0]; - // Find the first shared node on the frontier - while (!cur.isShared && isSharedFrontierDepth < spine.length - 1) { - isSharedFrontierDepth++; - cur = (cur as BNodeInternal).children[sideIndex(cur as BNodeInternal)]; + // Mark all nodes at or above depthFrom in the cursor spine as disqualified (non-disjoint) + const disqualifySpine = (cursor: MergeCursor, depthFrom: number) => { + const spine = cursor.spine; + for (let i = depthFrom; i >= 0; --i) { + const payload = spine[i].payload; + // Safe to early out because we always disqualify all ancestors of a disqualified node + // That is correct because every ancestor of a non-disjoint node is also non-disjoint + // because it must enclose the non-disjoint range. + if (payload.disqualified) + break; + payload.disqualified = true; } + }; - // This array holds the sum of sizes of nodes that have been inserted but not yet propagated upward. - // For example, if a subtree of size 5 is inserted at depth 2, then unflushedSizes[1] += 5. - // These sizes are added to the depth above the insertion point because the insertion updates the direct parent of the insertion. - // These sizes are flushed upward any time we need to insert at level higher than pending unflushed sizes. - // E.g. in our example, if we later insert at depth 0, we will add 5 to the node at depth 1 and the root at depth 0 before inserting. - // This scheme enables us to avoid a log(n) propagation of sizes for each insertion. - const unflushedSizes: number[] = new Array(spine.length).fill(0); // pre-fill to avoid "holey" array - - for (let i = start; i != end; i += step) { - const currentHeight = spine.length - 1; // height is number of internal levels; 0 means leaf - const subtree = alternatingGetSecond>(disjoint, i); - const subtreeHeight = alternatingGetFirst>(disjoint, i); - const insertionDepth = currentHeight - (subtreeHeight + 1); // node at this depth has children of height 'subtreeHeight' - - // Ensure path is unshared before mutation - ensureNotShared(spine, isSharedFrontierDepth, insertionDepth, sideIndex); - - // Calculate expansion depth (first ancestor with capacity) - const expansionDepth = Math.max(0, findCascadeEndDepth(spine, insertionDepth, branchingFactor)); - - // Update sizes on spine above the shared ancestor before we expand - updateSizeAndMax(spine, unflushedSizes, isSharedFrontierDepth, expansionDepth, updateMax); - - // Append and cascade splits upward - const newRoot = appendAndCascade(spine, insertionDepth, branchingFactor, subtree, sideIndex, sideInsertionIndex, splitOffSide); - if (newRoot) { - // Set the spine root to the highest up new node; the rest of the spine is updated below - spine[0] = newRoot; - unflushedSizes.forEach((count) => check(count === 0, "Unexpected unflushed size after root split.")); - unflushedSizes.push(0); // new root level - isSharedFrontierDepth = insertionDepth + 2; - unflushedSizes[insertionDepth + 1] += susize(); - } else { - isSharedFrontierDepth = insertionDepth + 1; - unflushedSizes[insertionDepth] += susize(); - } + // Cursor payload factory + const makePayload = (): MergeCursorPayload => ({ disqualified: false }); - // Finally, update the frontier from the highest new node downward - // Note that this is often the point where the new subtree is attached, - // but in the case of cascaded splits it may be higher up. - updateFrontier(spine, expansionDepth, sideIndex); - check(isSharedFrontierDepth === spine.length - 1 || spine[isSharedFrontierDepth].isShared === true, "Non-leaf subtrees must be shared."); - check(unflushedSizes.length === spine.length, "Unflushed sizes length mismatch after root split."); - } + const pushLeafRange = (leaf: BNode, from: number, toExclusive: number) => { + const keys = leaf.keys; + const values = leaf.values; + for (let i = from; i < toExclusive; ++i) + alternatingPush(pending, keys[i], values[i]); + }; - // Finally, propagate any remaining unflushed sizes upward and update max keys - updateSizeAndMax(spine, unflushedSizes, isSharedFrontierDepth, 0, updateMax); + const onMoveInLeaf = ( + leaf: BNode, + payload: MergeCursorPayload, + fromIndex: number, + toIndex: number, + startedEqual: boolean + ) => { + check(payload.disqualified === true, "onMoveInLeaf: leaf must be disqualified"); + const start = startedEqual ? fromIndex + 1 : fromIndex; + if (start < toIndex) + pushLeafRange(leaf, start, toIndex); }; - /** - * Append a subtree at a given depth on the chosen side; cascade splits upward if needed. - * All un-propagated sizes must have already been applied to the spine up to the end of any cascading expansions. - * This method guarantees that the size of the inserted subtree will not propagate upward beyond the insertion point. - * Returns a new root if the root was split, otherwise undefined. - */ - function appendAndCascade( - spine: BNode[], - insertionDepth: number, - branchingFactor: number, - subtree: BNode, - sideIndex: (node: BNodeInternal) => number, - sideInsertionIndex: (node: BNodeInternal) => number, - splitOffSide: (node: BNodeInternal) => BNodeInternal - ): BNodeInternal | undefined { - // We must take care to avoid accidental propagation upward of the size of the inserted su - // To do this, we first split nodes upward from the insertion point until we find a node with capacity - // or create a new root. Since all un-propagated sizes have already been applied to the spine up to this point, - // inserting at the end ensures no accidental propagation. - - // Depth is -1 if the subtree is the same height as the current tree - if (insertionDepth >= 0) { - let carry: BNode | undefined = undefined; - // Determine initially where to insert after any splits - let insertTarget: BNodeInternal = spine[insertionDepth] as BNodeInternal; - if (insertTarget.keys.length >= branchingFactor) { - insertTarget = carry = splitOffSide(insertTarget); + const onExitLeaf = ( + leaf: BNode, + payload: MergeCursorPayload, + startingIndex: number, + startedEqual: boolean, + cursorThis: MergeCursor, + ) => { + highestDisjoint = undefined; + if (!payload.disqualified) { + highestDisjoint = { node: leaf, height: 0 }; + if (cursorThis.spine.length === 0) { + // if we are exiting a leaf and there are no internal nodes, we will reach the end of the tree. + // In this case we need to add the leaf now because step up will not be called. + addHighestDisjoint(); } + } else { + const start = startedEqual ? startingIndex + 1 : startingIndex; + const leafSize = leaf.keys.length; + if (start < leafSize) + pushLeafRange(leaf, start, leafSize); + } + }; - let d = insertionDepth - 1; - while (carry && d >= 0) { - const parent = spine[d] as BNodeInternal; - const idx = sideIndex(parent); - // Refresh last key since child was split - parent.keys[idx] = parent.children[idx].maxKey(); - if (parent.keys.length < branchingFactor) { - // We have reached the end of the cascade - insertNoCount(parent, sideInsertionIndex(parent), carry); - carry = undefined; - } else { - // Splitting the parent here requires care to avoid incorrectly double counting sizes - // Example: a node is at max capacity 4, with children each of size 4 for 16 total. - // We split the node into two nodes of 2 children each, but this does *not* modify the size - // of its parent. Therefore when we insert the carry into the torn-off node, we must not - // increase its size or we will double-count the size of the carry su - const tornOff = splitOffSide(parent); - insertNoCount(tornOff, sideInsertionIndex(tornOff), carry); - carry = tornOff; + const onStepUp = ( + parent: BNodeInternal, + height: number, + payload: MergeCursorPayload, + fromIndex: number, + spineIndex: number, + stepDownIndex: number, + cursorThis: MergeCursor + ) => { + const children = parent.children; + const nextHeight = height - 1; + if (stepDownIndex !== stepDownIndex /* NaN: still walking up */ + || stepDownIndex === Number.POSITIVE_INFINITY /* target key is beyond edge of tree, done with walk */) { + if (!payload.disqualified) { + highestDisjoint = { node: parent, height }; + if (stepDownIndex === Number.POSITIVE_INFINITY) { + // We have finished our walk, and we won't be stepping down, so add the root + addHighestDisjoint(); } - d--; - } - - let newRoot: BNodeInternal | undefined = undefined; - if (carry !== undefined) { - // Expansion reached the root, need a new root to hold carry - const oldRoot = spine[0] as BNodeInternal; - newRoot = new BNodeInternal([oldRoot], oldRoot.size() + carry.size()); - insertNoCount(newRoot, sideInsertionIndex(newRoot), carry); + } else { + addHighestDisjoint(); + const len = children.length; + for (let i = fromIndex + 1; i < len; ++i) + addSharedNodeToDisjointSet(children[i], nextHeight); } - - // Finally, insert the subtree at the insertion point - insertNoCount(insertTarget, sideInsertionIndex(insertTarget), subtree); - return newRoot; } else { - // Insertion of subtree with equal height to current tree - const oldRoot = spine[0] as BNodeInternal; - const newRoot = new BNodeInternal([oldRoot], oldRoot.size()); - insertNoCount(newRoot, sideInsertionIndex(newRoot), subtree); - return newRoot; + // We have a valid step down index, so we need to disqualify the spine if needed. + // This is identical to the step down logic, but we must also perform it here because + // in the case of stepping down into a leaf, the step down callback is never called. + if (stepDownIndex > 0) { + disqualifySpine(cursorThis, spineIndex); + } + addHighestDisjoint(); + for (let i = fromIndex + 1; i < stepDownIndex; ++i) + addSharedNodeToDisjointSet(children[i], nextHeight); } }; - /** - * Clone along the spine from [isSharedFrontierDepth to depthTo] inclusive so path is safe to mutate. - * Short-circuits if first shared node is deeper than depthTo (the insertion depth). - */ - function ensureNotShared( - spine: BNode[], - isSharedFrontierDepth: number, - depthToInclusive: number, - sideIndex: (node: BNodeInternal) => number) { - if (spine.length === 1 /* only a leaf */ || depthToInclusive < 0 /* new root case */) - return; // nothing to clone when root is a leaf; equal-height case will handle this - - // Clone root if needed first (depth 0) - if (isSharedFrontierDepth === 0) { - const root = spine[0]; - spine[0] = root.clone() as BNodeInternal; - } - - // Clone downward along the frontier to 'depthToInclusive' - for (let depth = Math.max(isSharedFrontierDepth, 1); depth <= depthToInclusive; depth++) { - const parent = spine[depth - 1] as BNodeInternal; - const childIndex = sideIndex(parent); - const clone = parent.children[childIndex].clone(); - parent.children[childIndex] = clone; - spine[depth] = clone as BNodeInternal; + const onStepDown = ( + node: BNodeInternal, + height: number, + spineIndex: number, + stepDownIndex: number, + cursorThis: MergeCursor + ) => { + if (stepDownIndex > 0) { + // When we step down into a node, we know that we have walked from a key that is less than our target. + // Because of this, if we are not stepping down into the first child, we know that all children before + // the stepDownIndex must overlap with the other tree because they must be before our target key. Since + // the child we are stepping into has a key greater than our target key, this node must overlap. + // If a child overlaps, the entire spine overlaps because a parent in a btree always encloses the range + // of its children. + disqualifySpine(cursorThis, spineIndex); + const children = node.children; + const nextHeight = height - 1; + for (let i = 0; i < stepDownIndex; ++i) + addSharedNodeToDisjointSet(children[i], nextHeight); } }; - /** - * Propagates size updates and updates max keys for nodes in (isSharedFrontierDepth, depthTo) - */ - function updateSizeAndMax( - spine: BNode[], - unflushedSizes: number[], - isSharedFrontierDepth: number, - depthUpToInclusive: number, - updateMax: (node: BNodeInternal, maxBelow: K) => void) { - // If isSharedFrontierDepth is <= depthUpToInclusive there is nothing to update because - // the insertion point is inside a shared node which will always have correct sizes - const maxKey = spine[isSharedFrontierDepth].maxKey(); - const startDepth = isSharedFrontierDepth - 1; - for (let depth = startDepth; depth >= depthUpToInclusive; depth--) { - const sizeAtLevel = unflushedSizes[depth]; - unflushedSizes[depth] = 0; // we are propagating it now - if (depth > 0) { - // propagate size upward, will be added lazily, either when a subtree is appended at or above that level or - // at the end of processing the entire side - unflushedSizes[depth - 1] += sizeAtLevel; - } - const node = spine[depth] as BNodeInternal; - node._size += sizeAtLevel; - // No-op if left side, as max keys in parents are unchanged by appending to the beginning of a node - updateMax(node, maxKey); + const onEnterLeaf = ( + leaf: BNode, + destIndex: number, + cursorThis: MergeCursor, + cursorOther: MergeCursor + ) => { + if (destIndex > 0 + || areOverlapping(leaf.minKey()!, leaf.maxKey(), getKey(cursorOther), cursorOther.leaf.maxKey(), cmp)) { + // Similar logic to the step-down case, except in this case we also know the leaf in the other + // tree overlaps a leaf in this tree (this leaf, specifically). Thus, we can disqualify both spines. + cursorThis.leafPayload.disqualified = true; + cursorOther.leafPayload.disqualified = true; + disqualifySpine(cursorThis, cursorThis.spine.length - 1); + disqualifySpine(cursorOther, cursorOther.spine.length - 1); + pushLeafRange(leaf, 0, destIndex); } }; - /** - * Update a spine (frontier) from a specific depth down, inclusive. - * Extends the frontier array if it is not already as long as the frontier. - */ - function updateFrontier(frontier: BNode[], depthLastValid: number, sideIndex: (node: BNodeInternal) => number): void { - check(frontier.length > depthLastValid, "updateFrontier: depthLastValid exceeds frontier height"); - const startingAncestor = frontier[depthLastValid]; - if (startingAncestor.isLeaf) - return; - const internal = startingAncestor as BNodeInternal; - let cur: BNode = internal.children[sideIndex(internal)]; - let depth = depthLastValid + 1; - while (!cur.isLeaf) { - const ni = cur as BNodeInternal; - frontier[depth] = ni; - cur = ni.children[sideIndex(ni)]; - depth++; + // Need the max key of both trees to perform the "finishing" walk of which ever cursor finishes second + const maxKeyLeft = left._root.maxKey() as K; + const maxKeyRight = right._root.maxKey() as K; + const maxKey = cmp(maxKeyLeft, maxKeyRight) >= 0 ? maxKeyLeft : maxKeyRight; + + // Initialize cursors at minimum keys. + const curA = createCursor(left, makePayload, onEnterLeaf, onMoveInLeaf, onExitLeaf, onStepUp, onStepDown); + const curB = createCursor(right, makePayload, onEnterLeaf, onMoveInLeaf, onExitLeaf, onStepUp, onStepDown); + + // The guarantee that no overlapping interior nodes are accidentally reused relies on the careful + // alternating hopping walk of the cursors: WLOG, cursorA always--with one exception--walks from a key just behind (in key space) + // the key of cursorB to the first key >= cursorB. Call this transition a "crossover point." All interior nodes that + // overlap cause a crossover point, and all crossover points are guaranteed to be walked using this method. Thus, + // all overlapping interior nodes will be found if they are checked for on step-down. + // The one exception mentioned above is when they start at the same key. In this case, they are both advanced forward and then + // their new ordering determines how they walk from there. + // The one issue then is detecting any overlaps that occur based on their very initial position (minimum key of each tree). + // This is handled by the initial disqualification step below, which essentially emulates the step down disqualification for each spine. + // Initialize disqualification w.r.t. opposite leaf. + const initDisqualify = (cur: MergeCursor, other: MergeCursor) => { + const minKey = getKey(cur); + const otherMin = getKey(other); + const otherMax = other.leaf.maxKey(); + if (areOverlapping(minKey, cur.leaf.maxKey(), otherMin, otherMax, cmp)) + cur.leafPayload.disqualified = true; + for (let i = 0; i < cur.spine.length; ++i) { + const entry = cur.spine[i]; + // Since we are on the left side of the tree, we can use the leaf min key for every spine node + if (areOverlapping(minKey, entry.node.maxKey(), otherMin, otherMax, cmp)) + entry.payload.disqualified = true; } - frontier[depth] = cur; }; - /** - * Find the first ancestor (starting at insertionDepth) with capacity. - */ - function findCascadeEndDepth(spine: BNode[], insertionDepth: number, branchingFactor: number): number { - for (let depth = insertionDepth; depth >= 0; depth--) { - if (spine[depth].keys.length < branchingFactor) - return depth; + initDisqualify(curA, curB); + initDisqualify(curB, curA); + + let leading = curA; + let trailing = curB; + let order = cmp(getKey(leading), getKey(trailing)); + + // Walk both cursors in alternating hops + while (true) { + const areEqual = order === 0; + + if (areEqual) { + const key = getKey(leading); + const vA = curA.leaf.values[curA.leafIndex]; + const vB = curB.leaf.values[curB.leafIndex]; + // Perform the actual merge of values here. The cursors will avoid adding a duplicate of this key/value + // to pending because they respect the areEqual flag during their moves. + const merged = mergeValues(key, vA, vB); + if (merged !== undefined) + alternatingPush(pending, key, merged); + const outTrailing = moveForwardOne(trailing, leading, key, cmp); + const outLeading = moveForwardOne(leading, trailing, key, cmp); + if (outTrailing || outLeading) { + if (!outTrailing || !outLeading) { + // In these cases, we pass areEqual=false because a return value of "out of tree" means + // the cursor did not move. This must be true because they started equal and one of them had more tree + // to walk (one is !out), so they cannot be equal at this point. + if (outTrailing) { + moveTo(leading, trailing, maxKey, false, false, cmp); + } else { + moveTo(trailing, leading, maxKey, false, false, cmp); + } + } + break; + } + order = cmp(getKey(leading), getKey(trailing)); + } else { + if (order < 0) { + const tmp = trailing; + trailing = leading; + leading = tmp; + } + const [out, nowEqual] = moveTo(trailing, leading, getKey(leading), true, areEqual, cmp); + if (out) { + moveTo(leading, trailing, maxKey, false, areEqual, cmp); + break; + } else if (nowEqual) { + order = 0; + } else { + order = -1; + } } - return -1; // no capacity, will need a new root - }; - - /** - * Inserts the child without updating cached size counts. - */ - function insertNoCount( - parent: BNodeInternal, - index: number, - child: BNode - ): void { - parent.children.splice(index, 0, child); - parent.keys.splice(index, 0, child.maxKey()); } - // ---- Side-specific delegates for merging subtrees into a frontier ---- - - function getLeftmostIndex(): number { - return 0; + // Ensure any trailing non-disjoint entries are added + flushPendingEntries(); + return { disjoint, tallestIndex }; +} + +export function buildFromDecomposition( + branchingFactor: number, + decomposed: DecomposeResult +): BTree { + + const { disjoint, tallestIndex } = decomposed; + const disjointEntryCount = alternatingCount(disjoint); + + // Now we have a set of disjoint subtrees and we need to merge them into a single tree. + // To do this, we start with the tallest subtree from the disjoint set and, for all subtrees + // to the "right" and "left" of it in sorted order, we append them onto the appropriate side + // of the current tree, splitting nodes as necessary to maintain balance. + // A "side" is referred to as a frontier, as it is a linked list of nodes from the root down to + // the leaf level on that side of the tree. Each appended subtree is appended to the node at the + // same height as itself on the frontier. Each tree is guaranteed to be at most as tall as the + // current frontier because we start from the tallest subtree and work outward. + const initialRoot = alternatingGetSecond>(disjoint, tallestIndex); + const frontier: BNode[] = [initialRoot]; + + // Process all subtrees to the right of the tallest subtree + if (tallestIndex + 1 <= disjointEntryCount - 1) { + updateFrontier(frontier, 0, getRightmostIndex); + processSide( + branchingFactor, + disjoint, + frontier, + tallestIndex + 1, + disjointEntryCount, 1, + getRightmostIndex, + getRightInsertionIndex, + splitOffRightSide, + updateRightMax + ); } - function getRightmostIndex(node: BNodeInternal): number { - return node.children.length - 1; + // Process all subtrees to the left of the current tree + if (tallestIndex - 1 >= 0) { + // Note we need to update the frontier here because the right-side processing may have grown the tree taller. + updateFrontier(frontier, 0, getLeftmostIndex); + processSide( + branchingFactor, + disjoint, + frontier, + tallestIndex - 1, + -1, + -1, + getLeftmostIndex, + getLeftmostIndex, + splitOffLeftSide, + noop // left side appending doesn't update max keys + ); } - function getRightInsertionIndex(node: BNodeInternal): number { - return node.children.length; + const merged = new BTree(undefined, this._compare, this._maxNodeSize); + merged._root = frontier[0]; + + // Return the resulting tree + return merged; +} + +/** + * Processes one side (left or right) of the disjoint subtree set during a merge operation. + * Merges each subtree in the disjoint set from start to end (exclusive) into the given spine. + */ +function processSide( + branchingFactor: number, + disjoint: (number | BNode)[], + spine: BNode[], + start: number, + end: number, + step: number, + sideIndex: (node: BNodeInternal) => number, + sideInsertionIndex: (node: BNodeInternal) => number, + splitOffSide: (node: BNodeInternal) => BNodeInternal, + updateMax: (node: BNodeInternal, maxBelow: K) => void +): void { + // Determine the depth of the first shared node on the frontier. + // Appending subtrees to the frontier must respect the copy-on-write semantics by cloning + // any shared nodes down to the insertion point. We track it by depth to avoid a log(n) walk of the + // frontier for each insertion as that would fundamentally change our asymptotics. + let isSharedFrontierDepth = 0; + let cur = spine[0]; + // Find the first shared node on the frontier + while (!cur.isShared && isSharedFrontierDepth < spine.length - 1) { + isSharedFrontierDepth++; + cur = (cur as BNodeInternal).children[sideIndex(cur as BNodeInternal)]; } - function splitOffRightSide(node: BNodeInternal): BNodeInternal { - return node.splitOffRightSide(); + // This array holds the sum of sizes of nodes that have been inserted but not yet propagated upward. + // For example, if a subtree of size 5 is inserted at depth 2, then unflushedSizes[1] += 5. + // These sizes are added to the depth above the insertion point because the insertion updates the direct parent of the insertion. + // These sizes are flushed upward any time we need to insert at level higher than pending unflushed sizes. + // E.g. in our example, if we later insert at depth 0, we will add 5 to the node at depth 1 and the root at depth 0 before inserting. + // This scheme enables us to avoid a log(n) propagation of sizes for each insertion. + const unflushedSizes: number[] = new Array(spine.length).fill(0); // pre-fill to avoid "holey" array + + for (let i = start; i != end; i += step) { + const currentHeight = spine.length - 1; // height is number of internal levels; 0 means leaf + const subtree = alternatingGetSecond>(disjoint, i); + const subtreeHeight = alternatingGetFirst>(disjoint, i); + const insertionDepth = currentHeight - (subtreeHeight + 1); // node at this depth has children of height 'subtreeHeight' + + // Ensure path is unshared before mutation + ensureNotShared(spine, isSharedFrontierDepth, insertionDepth, sideIndex); + + // Calculate expansion depth (first ancestor with capacity) + const expansionDepth = Math.max(0, findCascadeEndDepth(spine, insertionDepth, branchingFactor)); + + // Update sizes on spine above the shared ancestor before we expand + updateSizeAndMax(spine, unflushedSizes, isSharedFrontierDepth, expansionDepth, updateMax); + + // Append and cascade splits upward + const newRoot = appendAndCascade(spine, insertionDepth, branchingFactor, subtree, sideIndex, sideInsertionIndex, splitOffSide); + if (newRoot) { + // Set the spine root to the highest up new node; the rest of the spine is updated below + spine[0] = newRoot; + unflushedSizes.forEach((count) => check(count === 0, "Unexpected unflushed size after root split.")); + unflushedSizes.push(0); // new root level + isSharedFrontierDepth = insertionDepth + 2; + unflushedSizes[insertionDepth + 1] += susize(); + } else { + isSharedFrontierDepth = insertionDepth + 1; + unflushedSizes[insertionDepth] += susize(); + } + + // Finally, update the frontier from the highest new node downward + // Note that this is often the point where the new subtree is attached, + // but in the case of cascaded splits it may be higher up. + updateFrontier(spine, expansionDepth, sideIndex); + check(isSharedFrontierDepth === spine.length - 1 || spine[isSharedFrontierDepth].isShared === true, "Non-leaf subtrees must be shared."); + check(unflushedSizes.length === spine.length, "Unflushed sizes length mismatch after root split."); } - function splitOffLeftSide(node: BNodeInternal): BNodeInternal { - return node.splitOffLeftSide(); + // Finally, propagate any remaining unflushed sizes upward and update max keys + updateSizeAndMax(spine, unflushedSizes, isSharedFrontierDepth, 0, updateMax); +}; + +/** + * Append a subtree at a given depth on the chosen side; cascade splits upward if needed. + * All un-propagated sizes must have already been applied to the spine up to the end of any cascading expansions. + * This method guarantees that the size of the inserted subtree will not propagate upward beyond the insertion point. + * Returns a new root if the root was split, otherwise undefined. + */ +function appendAndCascade( + spine: BNode[], + insertionDepth: number, + branchingFactor: number, + subtree: BNode, + sideIndex: (node: BNodeInternal) => number, + sideInsertionIndex: (node: BNodeInternal) => number, + splitOffSide: (node: BNodeInternal) => BNodeInternal +): BNodeInternal | undefined { + // We must take care to avoid accidental propagation upward of the size of the inserted su + // To do this, we first split nodes upward from the insertion point until we find a node with capacity + // or create a new root. Since all un-propagated sizes have already been applied to the spine up to this point, + // inserting at the end ensures no accidental propagation. + + // Depth is -1 if the subtree is the same height as the current tree + if (insertionDepth >= 0) { + let carry: BNode | undefined = undefined; + // Determine initially where to insert after any splits + let insertTarget: BNodeInternal = spine[insertionDepth] as BNodeInternal; + if (insertTarget.keys.length >= branchingFactor) { + insertTarget = carry = splitOffSide(insertTarget); + } + + let d = insertionDepth - 1; + while (carry && d >= 0) { + const parent = spine[d] as BNodeInternal; + const idx = sideIndex(parent); + // Refresh last key since child was split + parent.keys[idx] = parent.children[idx].maxKey(); + if (parent.keys.length < branchingFactor) { + // We have reached the end of the cascade + insertNoCount(parent, sideInsertionIndex(parent), carry); + carry = undefined; + } else { + // Splitting the parent here requires care to avoid incorrectly double counting sizes + // Example: a node is at max capacity 4, with children each of size 4 for 16 total. + // We split the node into two nodes of 2 children each, but this does *not* modify the size + // of its parent. Therefore when we insert the carry into the torn-off node, we must not + // increase its size or we will double-count the size of the carry su + const tornOff = splitOffSide(parent); + insertNoCount(tornOff, sideInsertionIndex(tornOff), carry); + carry = tornOff; + } + d--; + } + + let newRoot: BNodeInternal | undefined = undefined; + if (carry !== undefined) { + // Expansion reached the root, need a new root to hold carry + const oldRoot = spine[0] as BNodeInternal; + newRoot = new BNodeInternal([oldRoot], oldRoot.size() + carry.size()); + insertNoCount(newRoot, sideInsertionIndex(newRoot), carry); + } + + // Finally, insert the subtree at the insertion point + insertNoCount(insertTarget, sideInsertionIndex(insertTarget), subtree); + return newRoot; + } else { + // Insertion of subtree with equal height to current tree + const oldRoot = spine[0] as BNodeInternal; + const newRoot = new BNodeInternal([oldRoot], oldRoot.size()); + insertNoCount(newRoot, sideInsertionIndex(newRoot), subtree); + return newRoot; } +}; - function updateRightMax(node: BNodeInternal, maxBelow: K): void { - node.keys[node.keys.length - 1] = maxBelow; +/** + * Clone along the spine from [isSharedFrontierDepth to depthTo] inclusive so path is safe to mutate. + * Short-circuits if first shared node is deeper than depthTo (the insertion depth). + */ +function ensureNotShared( + spine: BNode[], + isSharedFrontierDepth: number, + depthToInclusive: number, + sideIndex: (node: BNodeInternal) => number) { + if (spine.length === 1 /* only a leaf */ || depthToInclusive < 0 /* new root case */) + return; // nothing to clone when root is a leaf; equal-height case will handle this + + // Clone root if needed first (depth 0) + if (isSharedFrontierDepth === 0) { + const root = spine[0]; + spine[0] = root.clone() as BNodeInternal; } - // ------- Alternating list helpers ------- - // These helpers manage a list that alternates between two types of entries. - // Storing data this way avoids small tuple allocations and shows major improvements - // in GC time in benchmarks. + // Clone downward along the frontier to 'depthToInclusive' + for (let depth = Math.max(isSharedFrontierDepth, 1); depth <= depthToInclusive; depth++) { + const parent = spine[depth - 1] as BNodeInternal; + const childIndex = sideIndex(parent); + const clone = parent.children[childIndex].clone(); + parent.children[childIndex] = clone; + spine[depth] = clone as BNodeInternal; + } +}; - function alternatingCount(list: unknown[]): number { - return list.length >> 1; +/** + * Propagates size updates and updates max keys for nodes in (isSharedFrontierDepth, depthTo) + */ +function updateSizeAndMax( + spine: BNode[], + unflushedSizes: number[], + isSharedFrontierDepth: number, + depthUpToInclusive: number, + updateMax: (node: BNodeInternal, maxBelow: K) => void) { + // If isSharedFrontierDepth is <= depthUpToInclusive there is nothing to update because + // the insertion point is inside a shared node which will always have correct sizes + const maxKey = spine[isSharedFrontierDepth].maxKey(); + const startDepth = isSharedFrontierDepth - 1; + for (let depth = startDepth; depth >= depthUpToInclusive; depth--) { + const sizeAtLevel = unflushedSizes[depth]; + unflushedSizes[depth] = 0; // we are propagating it now + if (depth > 0) { + // propagate size upward, will be added lazily, either when a subtree is appended at or above that level or + // at the end of processing the entire side + unflushedSizes[depth - 1] += sizeAtLevel; + } + const node = spine[depth] as BNodeInternal; + node._size += sizeAtLevel; + // No-op if left side, as max keys in parents are unchanged by appending to the beginning of a node + updateMax(node, maxKey); } +}; - function alternatingGetFirst(list: Array, index: number): TFirst { - return list[index << 1] as TFirst; +/** + * Update a spine (frontier) from a specific depth down, inclusive. + * Extends the frontier array if it is not already as long as the frontier. + */ +function updateFrontier(frontier: BNode[], depthLastValid: number, sideIndex: (node: BNodeInternal) => number): void { + check(frontier.length > depthLastValid, "updateFrontier: depthLastValid exceeds frontier height"); + const startingAncestor = frontier[depthLastValid]; + if (startingAncestor.isLeaf) + return; + const internal = startingAncestor as BNodeInternal; + let cur: BNode = internal.children[sideIndex(internal)]; + let depth = depthLastValid + 1; + while (!cur.isLeaf) { + const ni = cur as BNodeInternal; + frontier[depth] = ni; + cur = ni.children[sideIndex(ni)]; + depth++; } + frontier[depth] = cur; +}; - function alternatingGetSecond(list: Array, index: number): TSecond { - return list[(index << 1) + 1] as TSecond; +/** + * Find the first ancestor (starting at insertionDepth) with capacity. + */ +function findCascadeEndDepth(spine: BNode[], insertionDepth: number, branchingFactor: number): number { + for (let depth = insertionDepth; depth >= 0; depth--) { + if (spine[depth].keys.length < branchingFactor) + return depth; } + return -1; // no capacity, will need a new root +}; - function alternatingPush(list: Array, first: TFirst, second: TSecond): void { - // Micro benchmarks show this is the fastest way to do this - list.push(first, second); - } \ No newline at end of file +/** + * Inserts the child without updating cached size counts. + */ +function insertNoCount( + parent: BNodeInternal, + index: number, + child: BNode +): void { + parent.children.splice(index, 0, child); + parent.keys.splice(index, 0, child.maxKey()); +} + +// ---- Side-specific delegates for merging subtrees into a frontier ---- + +function getLeftmostIndex(): number { + return 0; +} + +function getRightmostIndex(node: BNodeInternal): number { + return node.children.length - 1; +} + +function getRightInsertionIndex(node: BNodeInternal): number { + return node.children.length; +} + +function splitOffRightSide(node: BNodeInternal): BNodeInternal { + return node.splitOffRightSide(); +} + +function splitOffLeftSide(node: BNodeInternal): BNodeInternal { + return node.splitOffLeftSide(); +} + +function updateRightMax(node: BNodeInternal, maxBelow: K): void { + node.keys[node.keys.length - 1] = maxBelow; +} + +// ------- Alternating list helpers ------- +// These helpers manage a list that alternates between two types of entries. +// Storing data this way avoids small tuple allocations and shows major improvements +// in GC time in benchmarks. + +function alternatingCount(list: unknown[]): number { + return list.length >> 1; +} + +function alternatingGetFirst(list: Array, index: number): TFirst { + return list[index << 1] as TFirst; +} + +function alternatingGetSecond(list: Array, index: number): TSecond { + return list[(index << 1) + 1] as TSecond; +} + +function alternatingPush(list: Array, first: TFirst, second: TSecond): void { + // Micro benchmarks show this is the fastest way to do this + list.push(first, second); +} \ No newline at end of file diff --git a/intersect.ts b/intersect.ts index d69e5d8..ae889ff 100644 --- a/intersect.ts +++ b/intersect.ts @@ -1,63 +1,63 @@ import BTree from "./b+tree"; import { createCursor, moveForwardOne, moveTo, getKey, noop, checkCanDoSetOperation } from "./parallelWalk" - /** - * Intersects the two trees, calling the supplied `intersection` callback for each intersecting key/value pair. - * Neither tree is modified. - * @param treeA First tree to intersect. - * @param treeB Second tree to intersect. - * @param intersection Called for keys that appear in both trees. - * @description Complexity is bounded O(N + M) time and O(log(N + M)) for allocations. - * However, time is additionally bounded by O(log(N + M) * D) where D is the number of disjoint ranges of keys between - * the two trees. In practice, that means for keys of random distribution the performance is O(N + M) and for - * keys with significant numbers of non-overlapping key ranges it is O(log(N + M) * D) which is much faster. - * The algorithm achieves this additional non-linear bound by skipping over non-intersecting subtrees entirely. - * Note that in benchmarks even the worst case (fully interleaved keys) performance is faster than calling `toArray` - * on both trees and performing a walk on the sorted contents due to the reduced allocation overhead. - */ - export function intersect(treeA: BTree, treeB: BTree, intersection: (key: K, leftValue: V, rightValue: V) => void): void { - checkCanDoSetOperation(treeA, treeB); - if (treeB.size === 0 || treeA.size === 0) - return; +/** + * Intersects the two trees, calling the supplied `intersection` callback for each intersecting key/value pair. + * Neither tree is modified. + * @param treeA First tree to intersect. + * @param treeB Second tree to intersect. + * @param intersection Called for keys that appear in both trees. + * @description Complexity is bounded O(N + M) time and O(log(N + M)) for allocations. + * However, time is additionally bounded by O(log(N + M) * D) where D is the number of disjoint ranges of keys between + * the two trees. In practice, that means for keys of random distribution the performance is O(N + M) and for + * keys with significant numbers of non-overlapping key ranges it is O(log(N + M) * D) which is much faster. + * The algorithm achieves this additional non-linear bound by skipping over non-intersecting subtrees entirely. + * Note that in benchmarks even the worst case (fully interleaved keys) performance is faster than calling `toArray` + * on both trees and performing a walk on the sorted contents due to the reduced allocation overhead. + */ +export function intersect(treeA: BTree, treeB: BTree, intersection: (key: K, leftValue: V, rightValue: V) => void): void { + checkCanDoSetOperation(treeA, treeB); + if (treeB.size === 0 || treeA.size === 0) + return; - const cmp = treeA._compare; - const makePayload = (): undefined => undefined; - let cursorA = createCursor(treeA, makePayload, noop, noop, noop, noop, noop); - let cursorB = createCursor(treeB, makePayload, noop, noop, noop, noop, noop); - let leading = cursorA; - let trailing = cursorB; - let order = cmp(getKey(leading), getKey(trailing)); - - // The intersect walk is somewhat similar to a merge walk in that it does an alternating hop walk with cursors. - // However, the only thing we care about is when the two cursors are equal (equality is intersection). - // When they are not equal we just advance the trailing cursor. - while (true) { - const areEqual = order === 0; - if (areEqual) { - const key = getKey(leading); - const vA = cursorA.leaf.values[cursorA.leafIndex]; - const vB = cursorB.leaf.values[cursorB.leafIndex]; - intersection(key, vA, vB); - const outT = moveForwardOne(trailing, leading, key, cmp); - const outL = moveForwardOne(leading, trailing, key, cmp); - if (outT && outL) - break; - order = cmp(getKey(leading), getKey(trailing)); + const cmp = treeA._compare; + const makePayload = (): undefined => undefined; + let cursorA = createCursor(treeA, makePayload, noop, noop, noop, noop, noop); + let cursorB = createCursor(treeB, makePayload, noop, noop, noop, noop, noop); + let leading = cursorA; + let trailing = cursorB; + let order = cmp(getKey(leading), getKey(trailing)); + + // The intersect walk is somewhat similar to a merge walk in that it does an alternating hop walk with cursors. + // However, the only thing we care about is when the two cursors are equal (equality is intersection). + // When they are not equal we just advance the trailing cursor. + while (true) { + const areEqual = order === 0; + if (areEqual) { + const key = getKey(leading); + const vA = cursorA.leaf.values[cursorA.leafIndex]; + const vB = cursorB.leaf.values[cursorB.leafIndex]; + intersection(key, vA, vB); + const outT = moveForwardOne(trailing, leading, key, cmp); + const outL = moveForwardOne(leading, trailing, key, cmp); + if (outT && outL) + break; + order = cmp(getKey(leading), getKey(trailing)); + } else { + if (order < 0) { + const tmp = trailing; + trailing = leading; leading = tmp; + } + // At this point, leading is guaranteed to be ahead of trailing. + const [out, nowEqual] = moveTo(trailing, leading, getKey(leading), true, areEqual, cmp) + if (out) { + // We've reached the end of one tree, so intersections are guaranteed to be done. + break; + } else if (nowEqual) { + order = 0; } else { - if (order < 0) { - const tmp = trailing; - trailing = leading; leading = tmp; - } - // At this point, leading is guaranteed to be ahead of trailing. - const [out, nowEqual] = moveTo(trailing, leading, getKey(leading), true, areEqual, cmp) - if (out) { - // We've reached the end of one tree, so intersections are guaranteed to be done. - break; - } else if (nowEqual) { - order = 0; - } else { - order = -1; // trailing is ahead of leading - } + order = -1; // trailing is ahead of leading } } - } \ No newline at end of file + } +} \ No newline at end of file diff --git a/merge.ts b/merge.ts index 87a8e1a..45dc819 100644 --- a/merge.ts +++ b/merge.ts @@ -2,32 +2,32 @@ import BTree from "./b+tree"; import { decompose, buildFromDecomposition } from "./decompose"; import { checkCanDoSetOperation } from "./parallelWalk"; - /** - * Efficiently merges two trees, reusing subtrees wherever possible. - * Neither input tree is modified. - * @param treeA First tree to merge. - * @param treeB Second tree to merge. - * @param merge Called for keys that appear in both trees. Return the desired value, or - * `undefined` to omit the key from the result. - * @returns A new BTree that contains the merged key/value pairs. - * @description Complexity is bounded O(N + M) for both time and allocations. - * However, it is additionally bounded by O(log(N + M) * D) where D is the number of disjoint ranges of keys between - * the two trees. In practice, that means for keys of random distribution the performance is O(N + M) and for - * keys with significant numbers of non-overlapping key ranges it is O(log(N + M) * D) which is much faster. - * The algorithm achieves this additional non-linear bound by skipping over non-intersecting subtrees entirely. - * Note that in benchmarks even the worst case (fully interleaved keys) performance is faster than cloning `this` - * and inserting the contents of `other` into the clone. - */ - export function merge(treeA: BTree, treeB: BTree, merge: (key: K, leftValue: V, rightValue: V) => V | undefined): BTree { - const branchingFactor = checkCanDoSetOperation(treeA, treeB); - if (treeA._root.size() === 0) - return treeB.clone(); - if (treeB._root.size() === 0) - return treeA.clone(); +/** + * Efficiently merges two trees, reusing subtrees wherever possible. + * Neither input tree is modified. + * @param treeA First tree to merge. + * @param treeB Second tree to merge. + * @param merge Called for keys that appear in both trees. Return the desired value, or + * `undefined` to omit the key from the result. + * @returns A new BTree that contains the merged key/value pairs. + * @description Complexity is bounded O(N + M) for both time and allocations. + * However, it is additionally bounded by O(log(N + M) * D) where D is the number of disjoint ranges of keys between + * the two trees. In practice, that means for keys of random distribution the performance is O(N + M) and for + * keys with significant numbers of non-overlapping key ranges it is O(log(N + M) * D) which is much faster. + * The algorithm achieves this additional non-linear bound by skipping over non-intersecting subtrees entirely. + * Note that in benchmarks even the worst case (fully interleaved keys) performance is faster than cloning `this` + * and inserting the contents of `other` into the clone. + */ +export function merge(treeA: BTree, treeB: BTree, merge: (key: K, leftValue: V, rightValue: V) => V | undefined): BTree { + const branchingFactor = checkCanDoSetOperation(treeA, treeB); + if (treeA._root.size() === 0) + return treeB.clone(); + if (treeB._root.size() === 0) + return treeA.clone(); - // Decompose both trees into disjoint subtrees leaves. - // As many of these as possible will be reused from the original trees, and the remaining - // will be leaves that are the result of merging intersecting leaves. - const decomposed = decompose(treeA, treeB, merge); - return buildFromDecomposition(branchingFactor, decomposed); - } \ No newline at end of file + // Decompose both trees into disjoint subtrees leaves. + // As many of these as possible will be reused from the original trees, and the remaining + // will be leaves that are the result of merging intersecting leaves. + const decomposed = decompose(treeA, treeB, merge); + return buildFromDecomposition(branchingFactor, decomposed); +} \ No newline at end of file diff --git a/parallelWalk.ts b/parallelWalk.ts index 144f99d..8045e8c 100644 --- a/parallelWalk.ts +++ b/parallelWalk.ts @@ -16,204 +16,204 @@ export interface MergeCursor { onEnterLeaf: (leaf: BNode, destIndex: number, cursorThis: MergeCursor, cursorOther: MergeCursor) => void; } - /** - * Walks the cursor forward by one key. - * Should only be called to advance cursors that started equal. - * Returns true if end-of-tree was reached (cursor not structurally mutated). - */ - export function moveForwardOne( - cur: MergeCursor, - other: MergeCursor, - currentKey: K, - cmp: (a:K,b:K)=>number - ): boolean { - const leaf = cur.leaf; - const nextIndex = cur.leafIndex + 1; - if (nextIndex < leaf.keys.length) { - // Still within current leaf - cur.onMoveInLeaf(leaf, cur.leafPayload, cur.leafIndex, nextIndex, true); - cur.leafIndex = nextIndex; - return false; - } - - // If our optimized step within leaf failed, use full moveTo logic - // Pass isInclusive=false to ensure we walk forward to the key exactly after the current - return BTree.moveTo(cur, other, currentKey, false, true, cmp)[0]; +/** + * Walks the cursor forward by one key. + * Should only be called to advance cursors that started equal. + * Returns true if end-of-tree was reached (cursor not structurally mutated). + */ +export function moveForwardOne( + cur: MergeCursor, + other: MergeCursor, + currentKey: K, + cmp: (a:K,b:K)=>number +): boolean { + const leaf = cur.leaf; + const nextIndex = cur.leafIndex + 1; + if (nextIndex < leaf.keys.length) { + // Still within current leaf + cur.onMoveInLeaf(leaf, cur.leafPayload, cur.leafIndex, nextIndex, true); + cur.leafIndex = nextIndex; + return false; } + // If our optimized step within leaf failed, use full moveTo logic + // Pass isInclusive=false to ensure we walk forward to the key exactly after the current + return BTree.moveTo(cur, other, currentKey, false, true, cmp)[0]; +} + /** * Create a cursor pointing to the leftmost key of the supplied tree. */ export function createCursor( - tree: BTree, - makePayload: MergeCursor["makePayload"], - onEnterLeaf: MergeCursor["onEnterLeaf"], - onMoveInLeaf: MergeCursor["onMoveInLeaf"], - onExitLeaf: MergeCursor["onExitLeaf"], - onStepUp: MergeCursor["onStepUp"], - onStepDown: MergeCursor["onStepDown"], - ): MergeCursor { - const spine: Array<{ node: BNodeInternal, childIndex: number, payload: TP }> = []; - let n: BNode = tree._root; - while (!n.isLeaf) { - const ni = n as BNodeInternal; - const payload = makePayload(); - spine.push({ node: ni, childIndex: 0, payload }); - n = ni.children[0]; - } - const leafPayload = makePayload(); - const cur: MergeCursor = { - tree, leaf: n, leafIndex: 0, spine, leafPayload, makePayload: makePayload, - onEnterLeaf, onMoveInLeaf, onExitLeaf, onStepUp, onStepDown - }; - return cur; + tree: BTree, + makePayload: MergeCursor["makePayload"], + onEnterLeaf: MergeCursor["onEnterLeaf"], + onMoveInLeaf: MergeCursor["onMoveInLeaf"], + onExitLeaf: MergeCursor["onExitLeaf"], + onStepUp: MergeCursor["onStepUp"], + onStepDown: MergeCursor["onStepDown"], +): MergeCursor { + const spine: Array<{ node: BNodeInternal, childIndex: number, payload: TP }> = []; + let n: BNode = tree._root; + while (!n.isLeaf) { + const ni = n as BNodeInternal; + const payload = makePayload(); + spine.push({ node: ni, childIndex: 0, payload }); + n = ni.children[0]; } + const leafPayload = makePayload(); + const cur: MergeCursor = { + tree, leaf: n, leafIndex: 0, spine, leafPayload, makePayload: makePayload, + onEnterLeaf, onMoveInLeaf, onExitLeaf, onStepUp, onStepDown + }; + return cur; +} export function getKey(c: MergeCursor): K { - return c.leaf.keys[c.leafIndex]; + return c.leaf.keys[c.leafIndex]; } - /** - * Move cursor strictly forward to the first key >= (inclusive) or > (exclusive) target. - * Returns a boolean indicating if end-of-tree was reached (cursor not structurally mutated). - * Also returns a boolean indicating if the target key was landed on exactly. - */ - export function moveTo( - cur: MergeCursor, - other: MergeCursor, - targetKey: K, - isInclusive: boolean, - startedEqual: boolean, - cmp: (a:K,b:K)=>number - ): [outOfTree: boolean, targetExactlyReached: boolean] { - // Cache callbacks for perf - const onMoveInLeaf = cur.onMoveInLeaf; - // Fast path: destination within current leaf - const leaf = cur.leaf; - const leafPayload = cur.leafPayload; - const i = leaf.indexOf(targetKey, -1, cmp); - let destInLeaf: number; - let targetExactlyReached: boolean; - if (i < 0) { - destInLeaf = ~i; - targetExactlyReached = false; +/** + * Move cursor strictly forward to the first key >= (inclusive) or > (exclusive) target. + * Returns a boolean indicating if end-of-tree was reached (cursor not structurally mutated). + * Also returns a boolean indicating if the target key was landed on exactly. + */ +export function moveTo( + cur: MergeCursor, + other: MergeCursor, + targetKey: K, + isInclusive: boolean, + startedEqual: boolean, + cmp: (a:K,b:K)=>number +): [outOfTree: boolean, targetExactlyReached: boolean] { + // Cache callbacks for perf + const onMoveInLeaf = cur.onMoveInLeaf; + // Fast path: destination within current leaf + const leaf = cur.leaf; + const leafPayload = cur.leafPayload; + const i = leaf.indexOf(targetKey, -1, cmp); + let destInLeaf: number; + let targetExactlyReached: boolean; + if (i < 0) { + destInLeaf = ~i; + targetExactlyReached = false; + } else { + if (isInclusive) { + destInLeaf = i; + targetExactlyReached = true; } else { - if (isInclusive) { - destInLeaf = i; - targetExactlyReached = true; - } else { - destInLeaf = i + 1; - targetExactlyReached = false; - } - } - const leafKeyCount = leaf.keys.length; - if (destInLeaf < leafKeyCount) { - onMoveInLeaf(leaf, leafPayload, cur.leafIndex, destInLeaf, startedEqual); - cur.leafIndex = destInLeaf; - return [false, targetExactlyReached]; + destInLeaf = i + 1; + targetExactlyReached = false; } + } + const leafKeyCount = leaf.keys.length; + if (destInLeaf < leafKeyCount) { + onMoveInLeaf(leaf, leafPayload, cur.leafIndex, destInLeaf, startedEqual); + cur.leafIndex = destInLeaf; + return [false, targetExactlyReached]; + } - // Find first ancestor with a viable right step - const spine = cur.spine; - const initialSpineLength = spine.length; - let descentLevel = -1; - let descentIndex = -1; - - for (let s = initialSpineLength - 1; s >= 0; s--) { - const parent = spine[s].node; - const indexOf = parent.indexOf(targetKey, -1, cmp); - let stepDownIndex: number; - if (indexOf < 0) { - stepDownIndex = ~indexOf; - } else { - stepDownIndex = isInclusive ? indexOf : indexOf + 1; - } - - // Note: when key not found, indexOf with failXor=0 already returns insertion index - if (stepDownIndex < parent.keys.length) { - descentLevel = s; - descentIndex = stepDownIndex; - break; - } + // Find first ancestor with a viable right step + const spine = cur.spine; + const initialSpineLength = spine.length; + let descentLevel = -1; + let descentIndex = -1; + + for (let s = initialSpineLength - 1; s >= 0; s--) { + const parent = spine[s].node; + const indexOf = parent.indexOf(targetKey, -1, cmp); + let stepDownIndex: number; + if (indexOf < 0) { + stepDownIndex = ~indexOf; + } else { + stepDownIndex = isInclusive ? indexOf : indexOf + 1; } - // Exit leaf; even if no spine, we did walk out of it conceptually - const startIndex = cur.leafIndex; - cur.onExitLeaf(leaf, leafPayload, startIndex, startedEqual, cur); - - const onStepUp = cur.onStepUp; - if (descentLevel < 0) { - // No descent point; step up all the way; last callback gets infinity - for (let depth = initialSpineLength - 1; depth >= 0; depth--) { - const entry = spine[depth]; - const sd = depth === 0 ? Number.POSITIVE_INFINITY : Number.NaN; - onStepUp(entry.node, initialSpineLength - depth, entry.payload, entry.childIndex, depth, sd, cur); - } - return [true, false]; + // Note: when key not found, indexOf with failXor=0 already returns insertion index + if (stepDownIndex < parent.keys.length) { + descentLevel = s; + descentIndex = stepDownIndex; + break; } + } + + // Exit leaf; even if no spine, we did walk out of it conceptually + const startIndex = cur.leafIndex; + cur.onExitLeaf(leaf, leafPayload, startIndex, startedEqual, cur); - // Step up through ancestors above the descentLevel - for (let depth = initialSpineLength - 1; depth > descentLevel; depth--) { + const onStepUp = cur.onStepUp; + if (descentLevel < 0) { + // No descent point; step up all the way; last callback gets infinity + for (let depth = initialSpineLength - 1; depth >= 0; depth--) { const entry = spine[depth]; - onStepUp(entry.node, initialSpineLength - depth, entry.payload, entry.childIndex, depth, Number.NaN, cur); + const sd = depth === 0 ? Number.POSITIVE_INFINITY : Number.NaN; + onStepUp(entry.node, initialSpineLength - depth, entry.payload, entry.childIndex, depth, sd, cur); } + return [true, false]; + } - const entry = spine[descentLevel]; - onStepUp(entry.node, initialSpineLength - descentLevel, entry.payload, entry.childIndex, descentLevel, descentIndex, cur); - entry.childIndex = descentIndex; - - const onStepDown = cur.onStepDown; - const makePayload = cur.makePayload; - - // Descend, invoking onStepDown and creating payloads - let height = initialSpineLength - descentLevel - 1; // calculate height before changing length - spine.length = descentLevel + 1; - let node: BNode = spine[descentLevel].node.children[descentIndex]; - - while (!node.isLeaf) { - const ni = node as BNodeInternal; - const keys = ni.keys; - let stepDownIndex = ni.indexOf(targetKey, 0, cmp); - if (!isInclusive && stepDownIndex < keys.length && cmp(keys[stepDownIndex], targetKey) === 0) - stepDownIndex++; - const payload = makePayload(); - const spineIndex = spine.length; - spine.push({ node: ni, childIndex: stepDownIndex, payload }); - onStepDown(ni, height, spineIndex, stepDownIndex, cur); - node = ni.children[stepDownIndex]; - height -= 1; - } + // Step up through ancestors above the descentLevel + for (let depth = initialSpineLength - 1; depth > descentLevel; depth--) { + const entry = spine[depth]; + onStepUp(entry.node, initialSpineLength - depth, entry.payload, entry.childIndex, depth, Number.NaN, cur); + } - // Enter destination leaf - const idx = node.indexOf(targetKey, -1, cmp); - let destIndex: number; - if (idx < 0) { - destIndex = ~idx; - targetExactlyReached = false; + const entry = spine[descentLevel]; + onStepUp(entry.node, initialSpineLength - descentLevel, entry.payload, entry.childIndex, descentLevel, descentIndex, cur); + entry.childIndex = descentIndex; + + const onStepDown = cur.onStepDown; + const makePayload = cur.makePayload; + + // Descend, invoking onStepDown and creating payloads + let height = initialSpineLength - descentLevel - 1; // calculate height before changing length + spine.length = descentLevel + 1; + let node: BNode = spine[descentLevel].node.children[descentIndex]; + + while (!node.isLeaf) { + const ni = node as BNodeInternal; + const keys = ni.keys; + let stepDownIndex = ni.indexOf(targetKey, 0, cmp); + if (!isInclusive && stepDownIndex < keys.length && cmp(keys[stepDownIndex], targetKey) === 0) + stepDownIndex++; + const payload = makePayload(); + const spineIndex = spine.length; + spine.push({ node: ni, childIndex: stepDownIndex, payload }); + onStepDown(ni, height, spineIndex, stepDownIndex, cur); + node = ni.children[stepDownIndex]; + height -= 1; + } + + // Enter destination leaf + const idx = node.indexOf(targetKey, -1, cmp); + let destIndex: number; + if (idx < 0) { + destIndex = ~idx; + targetExactlyReached = false; + } else { + if (isInclusive) { + destIndex = idx; + targetExactlyReached = true; } else { - if (isInclusive) { - destIndex = idx; - targetExactlyReached = true; - } else { - destIndex = idx + 1; - targetExactlyReached = false; - } + destIndex = idx + 1; + targetExactlyReached = false; } - cur.leaf = node; - cur.leafPayload = makePayload(); - cur.leafIndex = destIndex; - cur.onEnterLeaf(node, destIndex, cur, other); - return [false, targetExactlyReached]; } + cur.leaf = node; + cur.leafPayload = makePayload(); + cur.leafIndex = destIndex; + cur.onEnterLeaf(node, destIndex, cur, other); + return [false, targetExactlyReached]; +} export function noop(): void {} - export function checkCanDoSetOperation(treeA: BTree, treeB: BTree): number { - if (treeA._compare !== treeB._compare) - throw new Error("Cannot merge BTrees with different comparators."); +export function checkCanDoSetOperation(treeA: BTree, treeB: BTree): number { + if (treeA._compare !== treeB._compare) + throw new Error("Cannot merge BTrees with different comparators."); - const branchingFactor = treeA._maxNodeSize; - if (branchingFactor !== treeB._maxNodeSize) - throw new Error("Cannot merge BTrees with different max node sizes."); - return branchingFactor; - } \ No newline at end of file + const branchingFactor = treeA._maxNodeSize; + if (branchingFactor !== treeB._maxNodeSize) + throw new Error("Cannot merge BTrees with different max node sizes."); + return branchingFactor; +} From 51e5ab57a7c35c5227545ab346eee76bf954e2ab Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Tue, 11 Nov 2025 15:54:57 -0800 Subject: [PATCH 059/143] stub --- extended/index.ts | 48 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/extended/index.ts b/extended/index.ts index 0d6edf9..554eca7 100644 --- a/extended/index.ts +++ b/extended/index.ts @@ -22,6 +22,18 @@ export class BTreeEx extends BTree { return result as this; } + /** + * Computes the differences between `this` and `other`. + * For efficiency, the diff is returned via invocations of supplied handlers. + * The computation is optimized for the case in which the two trees have large amounts of shared data + * (obtained by calling the `clone` or `with` APIs) and will avoid any iteration of shared state. + * The handlers can cause computation to early exit by returning `{ break: R }`. + * Neither collection should be mutated during the comparison (inside your callbacks), as this method assumes they remain stable. + * @param other The tree to compute a diff against. + * @param onlyThis Callback invoked for all keys only present in `this`. + * @param onlyOther Callback invoked for all keys only present in `other`. + * @param different Callback invoked for all keys with differing values. + */ diffAgainst( other: BTree, onlyThis?: (k: K, v: V) => { break?: R } | void, @@ -30,6 +42,42 @@ export class BTreeEx extends BTree { ): R | undefined { return diffAgainstAlgorithm(this, other, onlyThis, onlyOther, different); } + + /** + * Intersects this tree with `other`, calling the supplied `intersection` callback for each intersecting key/value pair. + * Neither tree is modified. + * @param other The other tree to intersect with this one. + * @param intersection Called for keys that appear in both trees. + * @description Complexity is bounded O(N + M) time and O(log(N + M)) for allocations. + * However, time is additionally bounded by O(log(N + M) * D) where D is the number of disjoint ranges of keys between + * the two trees. In practice, that means for keys of random distribution the performance is O(N + M) and for + * keys with significant numbers of non-overlapping key ranges it is O(log(N + M) * D) which is much faster. + * The algorithm achieves this additional non-linear bound by skipping over non-intersecting subtrees entirely. + * Note that in benchmarks even the worst case (fully interleaved keys) performance is faster than calling `toArray` + * on both trees and performing a walk on the sorted contents due to the reduced allocation overhead. + */ + intersect(other: BTree, intersection: (key: K, leftValue: V, rightValue: V) => void): void { + + } + + /** + * Efficiently merges this tree with `other`, reusing subtrees wherever possible. + * Neither input tree is modified. + * @param other The other tree to merge into this one. + * @param merge Called for keys that appear in both trees. Return the desired value, or + * `undefined` to omit the key from the result. + * @returns A new BTree that contains the merged key/value pairs. + * @description Complexity is bounded O(N + M) for both time and allocations. + * However, it is additionally bounded by O(log(N + M) * D) where D is the number of disjoint ranges of keys between + * the two trees. In practice, that means for keys of random distribution the performance is O(N + M) and for + * keys with significant numbers of non-overlapping key ranges it is O(log(N + M) * D) which is much faster. + * The algorithm achieves this additional non-linear bound by skipping over non-intersecting subtrees entirely. + * Note that in benchmarks even the worst case (fully interleaved keys) performance is faster than cloning `this` + * and inserting the contents of `other` into the clone. + */ + merge(other: BTree, merge: (key: K, leftValue: V, rightValue: V) => V | undefined): BTreeEx { + + } } export interface BTreeEx { From 1d8ff809b82d81f771a527ae1a002b7fa57ff585 Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Tue, 11 Nov 2025 16:22:42 -0800 Subject: [PATCH 060/143] building --- b+tree.d.ts | 6 + b+tree.js | 37 +- b+tree.test.ts | 126 ++--- benchmarks.ts | 64 +-- extended/decompose.d.ts | 17 + extended/decompose.js | 541 ++++++++++++++++++++ decompose.ts => extended/decompose.ts | 27 +- extended/index.d.ts | 42 ++ extended/index.js | 46 ++ extended/index.ts | 6 +- extended/intersect.d.ts | 16 + extended/intersect.js | 69 +++ intersect.ts => extended/intersect.ts | 12 +- extended/merge.d.ts | 18 + extended/merge.js | 37 ++ merge.ts => extended/merge.ts | 18 +- extended/parallelWalk.d.ts | 41 ++ extended/parallelWalk.js | 185 +++++++ parallelWalk.ts => extended/parallelWalk.ts | 11 +- package-lock.json | 4 +- 20 files changed, 1195 insertions(+), 128 deletions(-) create mode 100644 extended/decompose.d.ts create mode 100644 extended/decompose.js rename decompose.ts => extended/decompose.ts (97%) create mode 100644 extended/intersect.d.ts create mode 100644 extended/intersect.js rename intersect.ts => extended/intersect.ts (85%) create mode 100644 extended/merge.d.ts create mode 100644 extended/merge.js rename merge.ts => extended/merge.ts (65%) create mode 100644 extended/parallelWalk.d.ts create mode 100644 extended/parallelWalk.js rename parallelWalk.ts => extended/parallelWalk.ts (95%) diff --git a/b+tree.d.ts b/b+tree.d.ts index 57fddd7..f0cd1a5 100644 --- a/b+tree.d.ts +++ b/b+tree.d.ts @@ -425,5 +425,11 @@ export default class BTree implements ISortedMapF, ISort * was the intention, but TypeScript is acting weird and may return `ISortedSet` * even if `V` can't be `undefined` (discussion: btree-typescript issue #14) */ export declare function asSet(btree: BTree): undefined extends V ? ISortedSet : unknown; +/** + * Determines whether two nodes are overlapping in key range. + * Takes the leftmost known key of each node to avoid a log(n) min calculation. + * This will still catch overlapping nodes because of the alternate hopping walk of the cursors. + */ +export declare function areOverlapping(aMin: K, aMax: K, bMin: K, bMax: K, cmp: (x: K, y: K) => number): boolean; /** A BTree frozen in the empty state. */ export declare const EmptyBTree: BTree; diff --git a/b+tree.js b/b+tree.js index 54139d5..7d8061c 100644 --- a/b+tree.js +++ b/b+tree.js @@ -15,7 +15,7 @@ var __extends = (this && this.__extends) || (function () { }; })(); Object.defineProperty(exports, "__esModule", { value: true }); -exports.EmptyBTree = exports.check = exports.BNodeInternal = exports.BNode = exports.asSet = exports.simpleComparator = exports.defaultComparator = void 0; +exports.EmptyBTree = exports.check = exports.areOverlapping = exports.BNodeInternal = exports.BNode = exports.asSet = exports.simpleComparator = exports.defaultComparator = void 0; /** * Compares DefaultComparables to form a strict partial ordering. * @@ -1496,6 +1496,41 @@ function sumChildSizes(children) { total += children[i].size(); return total; } +/** + * Determines whether two nodes are overlapping in key range. + * Takes the leftmost known key of each node to avoid a log(n) min calculation. + * This will still catch overlapping nodes because of the alternate hopping walk of the cursors. + */ +function areOverlapping(aMin, aMax, bMin, bMax, cmp) { + // There are 4 possibilities: + // 1. aMin.........aMax + // bMin.........bMax + // (aMax between bMin and bMax) + // 2. aMin.........aMax + // bMin.........bMax + // (aMin between bMin and bMax) + // 3. aMin.............aMax + // bMin....bMax + // (aMin and aMax enclose bMin and bMax; note this includes equality cases) + // 4. aMin....aMax + // bMin.............bMax + // (bMin and bMax enclose aMin and aMax; note equality cases are identical to case 3) + var aMinBMin = cmp(aMin, bMin); + var aMinBMax = cmp(aMin, bMax); + if (aMinBMin >= 0 && aMinBMax <= 0) { + // case 2 or 4 + return true; + } + var aMaxBMin = cmp(aMax, bMin); + var aMaxBMax = cmp(aMax, bMax); + if (aMaxBMin >= 0 && aMaxBMax <= 0) { + // case 1 + return true; + } + // case 3 or no overlap + return aMinBMin <= 0 && aMaxBMax >= 0; +} +exports.areOverlapping = areOverlapping; var Delete = { delete: true }, DeleteRange = function () { return Delete; }; var Break = { break: true }; var EmptyLeaf = (function () { diff --git a/b+tree.test.ts b/b+tree.test.ts index 8738776..8bc7c33 100644 --- a/b+tree.test.ts +++ b/b+tree.test.ts @@ -1141,11 +1141,11 @@ function testIntersect(maxNodeSize: number) { const compare = (a: number, b: number) => a - b; const buildTree = (entries: Array<[number, number]>) => - new BTree(entries, compare, maxNodeSize); + new BTreeEx(entries, compare, maxNodeSize); const tuples = (...pairs: Array<[number, number]>) => pairs; - const collectCalls = (left: BTree, right: BTree) => { + const collectCalls = (left: BTreeEx, right: BTreeEx) => { const calls: Array<{ key: number, leftValue: number, rightValue: number }> = []; left.intersect(right, (key, leftValue, rightValue) => { calls.push({ key, leftValue, rightValue }); @@ -1256,14 +1256,14 @@ function testIntersect(maxNodeSize: number) { test('Intersect throws for comparator mismatch', () => { const compareA = (a: number, b: number) => a - b; const compareB = (a: number, b: number) => a - b; - const tree1 = new BTree([[1, 1]], compareA, maxNodeSize); - const tree2 = new BTree([[1, 1]], compareB, maxNodeSize); + const tree1 = new BTreeEx([[1, 1]], compareA, maxNodeSize); + const tree2 = new BTreeEx([[1, 1]], compareB, maxNodeSize); expect(() => tree1.intersect(tree2, () => {})).toThrow("Cannot intersect BTrees with different comparators."); }); test('Intersect throws for max node size mismatch', () => { - const tree1 = new BTree([[1, 1]], compare, maxNodeSize); - const tree2 = new BTree([[1, 1]], compare, maxNodeSize + 1); + const tree1 = new BTreeEx([[1, 1]], compare, maxNodeSize); + const tree2 = new BTreeEx([[1, 1]], compare, maxNodeSize + 1); expect(() => tree1.intersect(tree2, () => {})).toThrow("Cannot intersect BTrees with different max node sizes."); }); } @@ -1301,8 +1301,8 @@ describe('BTree intersect fuzz tests', () => { const collisionLabel = collisionChance.toFixed(2); test(`size ${size}, fractionA ${fractionA.toFixed(2)}, fractionB ${fractionB.toFixed(2)}, collision ${collisionLabel}`, () => { - const treeA = new BTree([], compare, maxNodeSize); - const treeB = new BTree([], compare, maxNodeSize); + const treeA = new BTreeEx([], compare, maxNodeSize); + const treeB = new BTreeEx([], compare, maxNodeSize); const keys = makeArray(size, true, 1, collisionChance, rng); @@ -1379,14 +1379,14 @@ function testMerge(maxNodeSize: number) { }; const buildTree = (keys: number[], valueScale = 1, valueOffset = 0) => { - const tree = new BTree([], compare, maxNodeSize); + const tree = new BTreeEx([], compare, maxNodeSize); for (const key of keys) { tree.set(key, key * valueScale + valueOffset); } return tree; }; - const expectRootLeafState = (tree: BTree, expectedIsLeaf: boolean) => { + const expectRootLeafState = (tree: BTreeEx, expectedIsLeaf: boolean) => { const root = tree['_root'] as any; expect(root.isLeaf).toBe(expectedIsLeaf); }; @@ -1404,8 +1404,8 @@ function testMerge(maxNodeSize: number) { }; const naiveMerge = ( - left: BTree, - right: BTree, + left: BTreeEx, + right: BTreeEx, mergeFn: MergeFn ) => { const expected = left.clone(); @@ -1426,10 +1426,10 @@ function testMerge(maxNodeSize: number) { }; const expectMergeMatchesBaseline = ( - left: BTree, - right: BTree, + left: BTreeEx, + right: BTreeEx, mergeFn: MergeFn, - after?: (ctx: { result: BTree, expected: BTree }) => void, + after?: (ctx: { result: BTreeEx, expected: BTreeEx }) => void, options: MergeExpectationOptions = {} ) => { const expectedMergeFn = options.expectedMergeFn ?? mergeFn; @@ -1667,8 +1667,8 @@ function testMerge(maxNodeSize: number) { }); test('Merge two empty trees', () => { - const tree1 = new BTree([], compare, maxNodeSize); - const tree2 = new BTree([], compare, maxNodeSize); + const tree1 = new BTreeEx([], compare, maxNodeSize); + const tree2 = new BTreeEx([], compare, maxNodeSize); const mergeFn: MergeFn = (_k, v1, v2) => v1 + v2; const { result } = expectMergeMatchesBaseline(tree1, tree2, mergeFn, undefined, { @@ -1678,8 +1678,8 @@ function testMerge(maxNodeSize: number) { }); test('Merge empty tree with non-empty tree', () => { - const tree1 = new BTree([], compare, maxNodeSize); - const tree2 = new BTree([[1, 10], [2, 20], [3, 30]], compare, maxNodeSize); + const tree1 = new BTreeEx([], compare, maxNodeSize); + const tree2 = new BTreeEx([[1, 10], [2, 20], [3, 30]], compare, maxNodeSize); const mergeFn: MergeFn = (_k, v1, v2) => v1 + v2; const { result: leftMerge } = expectMergeMatchesBaseline(tree1, tree2, mergeFn); @@ -1694,8 +1694,8 @@ function testMerge(maxNodeSize: number) { }); test('Merge with no overlapping keys', () => { - const tree1 = new BTree([[1, 10], [3, 30], [5, 50]], compare, maxNodeSize); - const tree2 = new BTree([[2, 20], [4, 40], [6, 60]], compare, maxNodeSize); + const tree1 = new BTreeEx([[1, 10], [3, 30], [5, 50]], compare, maxNodeSize); + const tree2 = new BTreeEx([[2, 20], [4, 40], [6, 60]], compare, maxNodeSize); const mergeFn: MergeFn = () => { throw new Error('Should not be called for non-overlapping keys'); }; @@ -1709,8 +1709,8 @@ function testMerge(maxNodeSize: number) { }); test('Merge with completely overlapping keys - sum values', () => { - const tree1 = new BTree([[1, 10], [2, 20], [3, 30]], compare, maxNodeSize); - const tree2 = new BTree([[1, 5], [2, 15], [3, 25]], compare, maxNodeSize); + const tree1 = new BTreeEx([[1, 10], [2, 20], [3, 30]], compare, maxNodeSize); + const tree2 = new BTreeEx([[1, 5], [2, 15], [3, 25]], compare, maxNodeSize); const mergeFn: MergeFn = (_k, v1, v2) => v1 + v2; const { result } = expectMergeMatchesBaseline(tree1, tree2, mergeFn, undefined, { @@ -1720,8 +1720,8 @@ function testMerge(maxNodeSize: number) { }); test('Merge with completely overlapping keys - prefer left', () => { - const tree1 = new BTree([[1, 10], [2, 20], [3, 30]], compare, maxNodeSize); - const tree2 = new BTree([[1, 100], [2, 200], [3, 300]], compare, maxNodeSize); + const tree1 = new BTreeEx([[1, 10], [2, 20], [3, 30]], compare, maxNodeSize); + const tree2 = new BTreeEx([[1, 100], [2, 200], [3, 300]], compare, maxNodeSize); const mergeFn: MergeFn = (_k, v1, _v2) => v1; const { result } = expectMergeMatchesBaseline(tree1, tree2, mergeFn, undefined, { @@ -1731,8 +1731,8 @@ function testMerge(maxNodeSize: number) { }); test('Merge with completely overlapping keys - prefer right', () => { - const tree1 = new BTree([[1, 10], [2, 20], [3, 30]], compare, maxNodeSize); - const tree2 = new BTree([[1, 100], [2, 200], [3, 300]], compare, maxNodeSize); + const tree1 = new BTreeEx([[1, 10], [2, 20], [3, 30]], compare, maxNodeSize); + const tree2 = new BTreeEx([[1, 100], [2, 200], [3, 300]], compare, maxNodeSize); const mergeFn: MergeFn = (_k, _v1, v2) => v2; const { result } = expectMergeMatchesBaseline(tree1, tree2, mergeFn); @@ -1740,8 +1740,8 @@ function testMerge(maxNodeSize: number) { }); test('Merge with partially overlapping keys', () => { - const tree1 = new BTree([[1, 10], [2, 20], [3, 30], [4, 40]], compare, maxNodeSize); - const tree2 = new BTree([[3, 300], [4, 400], [5, 500], [6, 600]], compare, maxNodeSize); + const tree1 = new BTreeEx([[1, 10], [2, 20], [3, 30], [4, 40]], compare, maxNodeSize); + const tree2 = new BTreeEx([[3, 300], [4, 400], [5, 500], [6, 600]], compare, maxNodeSize); const mergedKeys: number[] = []; const mergeFn: MergeFn = (key, v1, v2) => { @@ -1756,8 +1756,8 @@ function testMerge(maxNodeSize: number) { }); test('Merge with overlapping keys can delete entries', () => { - const tree1 = new BTree([[1, 10], [2, 20], [3, 30], [4, 40]], compare, maxNodeSize); - const tree2 = new BTree([[2, 200], [3, 300], [4, 400], [5, 500]], compare, maxNodeSize); + const tree1 = new BTreeEx([[1, 10], [2, 20], [3, 30], [4, 40]], compare, maxNodeSize); + const tree2 = new BTreeEx([[2, 200], [3, 300], [4, 400], [5, 500]], compare, maxNodeSize); const mergeFn: MergeFn = (k, v1, v2) => { if (k === 3) return undefined; return v1 + v2; @@ -1768,8 +1768,8 @@ function testMerge(maxNodeSize: number) { }); test('Merge is called even when values are equal', () => { - const tree1 = new BTree([[1, 10], [2, 20]], compare, maxNodeSize); - const tree2 = new BTree([[2, 20], [3, 30]], compare, maxNodeSize); + const tree1 = new BTreeEx([[1, 10], [2, 20]], compare, maxNodeSize); + const tree2 = new BTreeEx([[2, 20], [3, 30]], compare, maxNodeSize); const mergeCallLog: Array<{k: number, v1: number, v2: number}> = []; const mergeFn: MergeFn = (k, v1, v2) => { @@ -1786,8 +1786,8 @@ function testMerge(maxNodeSize: number) { test('Merge does not mutate input trees', () => { const entries1: [number, number][] = [[1, 10], [2, 20], [3, 30]]; const entries2: [number, number][] = [[2, 200], [3, 300], [4, 400]]; - const tree1 = new BTree(entries1, compare, maxNodeSize); - const tree2 = new BTree(entries2, compare, maxNodeSize); + const tree1 = new BTreeEx(entries1, compare, maxNodeSize); + const tree2 = new BTreeEx(entries2, compare, maxNodeSize); const mergeFn: MergeFn = (_k, v1, v2) => v1 + v2; const snapshot1 = tree1.toArray(); @@ -1808,8 +1808,8 @@ function testMerge(maxNodeSize: number) { const entries2: [number, number][] = []; for (let i = 500; i < 1500; i++) entries2.push([i, i * 10]); - const tree1 = new BTree(entries1, compare, maxNodeSize); - const tree2 = new BTree(entries2, compare, maxNodeSize); + const tree1 = new BTreeEx(entries1, compare, maxNodeSize); + const tree2 = new BTreeEx(entries2, compare, maxNodeSize); let mergeCount = 0; const mergeFn: MergeFn = (k, v1, v2) => { @@ -1824,8 +1824,8 @@ function testMerge(maxNodeSize: number) { }); test('Merge with overlaps at boundaries', () => { - const tree1 = new BTree([], compare, maxNodeSize); - const tree2 = new BTree([], compare, maxNodeSize); + const tree1 = new BTreeEx([], compare, maxNodeSize); + const tree2 = new BTreeEx([], compare, maxNodeSize); for (let i = 0; i < 100; i++) { tree1.set(i * 2, i * 2); @@ -1850,8 +1850,8 @@ function testMerge(maxNodeSize: number) { }); test('Merge result can be modified without affecting inputs', () => { - const tree1 = new BTree([[1, 10], [2, 20]], compare, maxNodeSize); - const tree2 = new BTree([[3, 30], [4, 40]], compare, maxNodeSize); + const tree1 = new BTreeEx([[1, 10], [2, 20]], compare, maxNodeSize); + const tree2 = new BTreeEx([[3, 30], [4, 40]], compare, maxNodeSize); const mergeFn: MergeFn = (_k, v1, v2) => v1 + v2; const { result } = expectMergeMatchesBaseline(tree1, tree2, mergeFn); @@ -1878,8 +1878,8 @@ function testMerge(maxNodeSize: number) { const entries2: [number, number][] = []; for (let i = 101; i <= 200; i++) entries2.push([i, i]); - const tree1 = new BTree(entries1, compare, maxNodeSize); - const tree2 = new BTree(entries2, compare, maxNodeSize); + const tree1 = new BTreeEx(entries1, compare, maxNodeSize); + const tree2 = new BTreeEx(entries2, compare, maxNodeSize); const mergeFn: MergeFn = () => { throw new Error('Should not be called - no overlaps'); }; @@ -1898,8 +1898,8 @@ function testMerge(maxNodeSize: number) { }); test('Merge with single element trees', () => { - const tree1 = new BTree([[5, 50]], compare, maxNodeSize); - const tree2 = new BTree([[5, 500]], compare, maxNodeSize); + const tree1 = new BTreeEx([[5, 50]], compare, maxNodeSize); + const tree2 = new BTreeEx([[5, 500]], compare, maxNodeSize); const mergeFn: MergeFn = (_k, v1, v2) => Math.max(v1, v2); const { result } = expectMergeMatchesBaseline(tree1, tree2, mergeFn); @@ -1907,11 +1907,11 @@ function testMerge(maxNodeSize: number) { }); test('Merge interleaved keys', () => { - const tree1 = new BTree([], compare, maxNodeSize); + const tree1 = new BTreeEx([], compare, maxNodeSize); for (let i = 1; i <= 100; i += 2) tree1.set(i, i); - const tree2 = new BTree([], compare, maxNodeSize); + const tree2 = new BTreeEx([], compare, maxNodeSize); for (let i = 2; i <= 100; i += 2) tree2.set(i, i); @@ -1928,8 +1928,8 @@ function testMerge(maxNodeSize: number) { }); test('Merge excluding all overlapping keys', () => { - const tree1 = new BTree([[1, 10], [2, 20], [3, 30]], compare, maxNodeSize); - const tree2 = new BTree([[2, 200], [3, 300], [4, 400]], compare, maxNodeSize); + const tree1 = new BTreeEx([[1, 10], [2, 20], [3, 30]], compare, maxNodeSize); + const tree2 = new BTreeEx([[2, 200], [3, 300], [4, 400]], compare, maxNodeSize); const mergeFn: MergeFn = () => undefined; const { result } = expectMergeMatchesBaseline(tree1, tree2, mergeFn); @@ -1937,8 +1937,8 @@ function testMerge(maxNodeSize: number) { }); test('Merge reuses appended subtree with minimum fanout', () => { - const tree1 = new BTree([], compare, maxNodeSize); - const tree2 = new BTree([], compare, maxNodeSize); + const tree1 = new BTreeEx([], compare, maxNodeSize); + const tree2 = new BTreeEx([], compare, maxNodeSize); for (let i = 0; i < 400; i++) { tree1.set(i, i); @@ -1959,8 +1959,8 @@ function testMerge(maxNodeSize: number) { }); test('Merge with large disjoint ranges', () => { - const tree1 = new BTree([], compare, maxNodeSize); - const tree2 = new BTree([], compare, maxNodeSize); + const tree1 = new BTreeEx([], compare, maxNodeSize); + const tree2 = new BTreeEx([], compare, maxNodeSize); for (let i = 0; i <= 10000; i++) tree1.set(i, i); @@ -1988,8 +1988,8 @@ function testMerge(maxNodeSize: number) { const keys1 = makeArray(size, true); const keys2 = makeArray(size, true); - const tree1 = new BTree(); - const tree2 = new BTree(); + const tree1 = new BTreeEx(); + const tree2 = new BTreeEx(); for (let k of keys1) tree1.set(k, k); @@ -2007,8 +2007,8 @@ function testMerge(maxNodeSize: number) { const offset = Math.floor(size * 0.9); const overlap = size - offset; - const tree1 = new BTree([], compare, maxNodeSize); - const tree2 = new BTree([], compare, maxNodeSize); + const tree1 = new BTreeEx([], compare, maxNodeSize); + const tree2 = new BTreeEx([], compare, maxNodeSize); for (let i = 0; i < size; i++) tree1.set(i, i); @@ -2035,8 +2035,8 @@ function testMerge(maxNodeSize: number) { }); test('Merge throws error when comparators differ', () => { - const tree1 = new BTree([[1, 10]], compare, maxNodeSize); - const tree2 = new BTree([[2, 20]], (a, b) => b - a, maxNodeSize); + const tree1 = new BTreeEx([[1, 10]], compare, maxNodeSize); + const tree2 = new BTreeEx([[2, 20]], (a, b) => b - a, maxNodeSize); const mergeFn: MergeFn = (_k, v1, v2) => v1 + v2; expect(() => tree1.merge(tree2, mergeFn)).toThrow(); @@ -2044,8 +2044,8 @@ function testMerge(maxNodeSize: number) { test('Merge throws error when max node sizes differ', () => { const otherFanout = maxNodeSize === 32 ? 16 : 32; - const tree1 = new BTree([[1, 10]], compare, maxNodeSize); - const tree2 = new BTree([[2, 20]], compare, otherFanout); + const tree1 = new BTreeEx([[1, 10]], compare, maxNodeSize); + const tree2 = new BTreeEx([[2, 20]], compare, otherFanout); const mergeFn: MergeFn = (_k, v1, v2) => v1 + v2; expect(() => tree1.merge(tree2, mergeFn)).toThrow(); @@ -2129,8 +2129,8 @@ describe('BTree merge fuzz tests', () => { const collisionLabel = collisionChance.toFixed(2); test(`size ${size}, fractionA ${fractionA.toFixed(2)}, fractionB ${fractionB.toFixed(2)}, collision ${collisionLabel}`, () => { - const treeA = new BTree([], compare, maxNodeSize); - const treeB = new BTree([], compare, maxNodeSize); + const treeA = new BTreeEx([], compare, maxNodeSize); + const treeB = new BTreeEx([], compare, maxNodeSize); const keys = makeArray(size, true, 1, collisionChance, rng); const sorted = Array.from(new Set(keys)).sort(compare); diff --git a/benchmarks.ts b/benchmarks.ts index 3cfc401..040159c 100644 --- a/benchmarks.ts +++ b/benchmarks.ts @@ -430,8 +430,8 @@ console.log("### Merge between B+ trees"); const timeMergeVsBaseline = ( baseTitle: string, - tree1: BTree, - tree2: BTree, + tree1: BTreeEx, + tree2: BTreeEx, prefer = preferLeftMerge, mergeLabel = 'merge()', baselineLabel = 'clone+set loop (baseline)' @@ -448,8 +448,8 @@ console.log("### Merge between B+ trees"); // Test 1: Non-overlapping ranges (best case - minimal intersections) console.log("# Non-overlapping ranges (disjoint keys)"); sizes.forEach((size) => { - const tree1 = new BTree(); - const tree2 = new BTree(); + const tree1 = new BTreeEx(); + const tree2 = new BTreeEx(); const offset = size * 3; for (let i = 0; i < size; i++) { @@ -464,8 +464,8 @@ console.log("### Merge between B+ trees"); console.log(); console.log("# Adjacent ranges (one intersection point)"); sizes.forEach((size) => { - const tree1 = new BTree(); - const tree2 = new BTree(); + const tree1 = new BTreeEx(); + const tree2 = new BTreeEx(); // Tree1: 0-size, Tree2: size-(2*size) for (let i = 0; i <= size; i++) { @@ -480,8 +480,8 @@ console.log("### Merge between B+ trees"); console.log(); console.log("# Interleaved ranges (two intersection points)"); sizes.forEach((size) => { - const tree1 = new BTree(); - const tree2 = new BTree(); + const tree1 = new BTreeEx(); + const tree2 = new BTreeEx(); // Tree1: 0-size, 2*size-3*size // Tree2: size-2*size @@ -498,8 +498,8 @@ console.log("### Merge between B+ trees"); console.log(); console.log("# Complete overlap (worst case - all keys intersect)"); sizes.forEach((size) => { - const tree1 = new BTree(); - const tree2 = new BTree(); + const tree1 = new BTreeEx(); + const tree2 = new BTreeEx(); for (let i = 0; i < size; i++) { tree1.set(i, i); @@ -513,8 +513,8 @@ console.log("### Merge between B+ trees"); console.log(); console.log("# Partial overlap (10% intersection)"); sizes.forEach((size) => { - const tree1 = new BTree(); - const tree2 = new BTree(); + const tree1 = new BTreeEx(); + const tree2 = new BTreeEx(); // Tree1: 0-(size) // Tree2: (~0.9*size)-(1.9*size) @@ -538,8 +538,8 @@ console.log("### Merge between B+ trees"); const keys1 = makeArray(size, true); const keys2 = makeArray(size, true); - const tree1 = new BTree(); - const tree2 = new BTree(); + const tree1 = new BTreeEx(); + const tree2 = new BTreeEx(); for (let k of keys1) { tree1.set(k, k); @@ -555,8 +555,8 @@ console.log("### Merge between B+ trees"); console.log(); console.log("# Merge with empty tree"); sizes.forEach((size) => { - const tree1 = new BTree(); - const tree2 = new BTree(); + const tree1 = new BTreeEx(); + const tree2 = new BTreeEx(); for (let i = 0; i < size; i++) { tree1.set(i, i); @@ -569,8 +569,8 @@ console.log("### Merge between B+ trees"); console.log(); console.log("# Compare merge vs manual iteration for complete overlap"); sizes.forEach((size) => { - const tree1 = new BTree(); - const tree2 = new BTree(); + const tree1 = new BTreeEx(); + const tree2 = new BTreeEx(); for (let i = 0; i < size; i++) { tree1.set(i, i); @@ -588,12 +588,12 @@ console.log("### Merge between B+ trees"); const overlapInterval = 100_000; const overlapPerInterval = 10; - const tree1 = new BTree(); + const tree1 = new BTreeEx(); for (let i = 0; i < totalKeys; i++) { tree1.set(i, i); } - const tree2 = new BTree(); + const tree2 = new BTreeEx(); for (let i = 0; i < totalKeys; i++) { if ((i % overlapInterval) < overlapPerInterval) { tree2.set(i, i); @@ -613,8 +613,8 @@ console.log("### Intersect between B+ trees"); const sizes = [100, 1000, 10000, 100000]; const runIntersect = ( - tree1: BTree, - tree2: BTree + tree1: BTreeEx, + tree2: BTreeEx ) => { let count = 0; let checksum = 0; @@ -640,8 +640,8 @@ console.log("### Intersect between B+ trees"); const timeIntersectVsBaseline = ( baseTitle: string, - tree1: BTree, - tree2: BTree, + tree1: BTreeEx, + tree2: BTreeEx, intersectLabel = 'intersect()', baselineLabel = 'sort baseline' ) => { @@ -658,8 +658,8 @@ console.log("### Intersect between B+ trees"); console.log(); console.log("# Non-overlapping ranges (no shared keys)"); sizes.forEach((size) => { - const tree1 = new BTree(); - const tree2 = new BTree(); + const tree1 = new BTreeEx(); + const tree2 = new BTreeEx(); const offset = size * 3; for (let i = 0; i < size; i++) { tree1.set(i, i); @@ -673,8 +673,8 @@ console.log("### Intersect between B+ trees"); console.log(); console.log("# 50% overlapping ranges"); sizes.forEach((size) => { - const tree1 = new BTree(); - const tree2 = new BTree(); + const tree1 = new BTreeEx(); + const tree2 = new BTreeEx(); const offset = Math.floor(size / 2); for (let i = 0; i < size; i++) { tree1.set(i, i); @@ -688,8 +688,8 @@ console.log("### Intersect between B+ trees"); console.log(); console.log("# Complete overlap (all keys shared)"); sizes.forEach((size) => { - const tree1 = new BTree(); - const tree2 = new BTree(); + const tree1 = new BTreeEx(); + const tree2 = new BTreeEx(); for (let i = 0; i < size; i++) { tree1.set(i, i); tree2.set(i, i * 3); @@ -709,8 +709,8 @@ console.log("### Intersect between B+ trees"); keys2[i] = keys1[i]; } - const tree1 = new BTree(); - const tree2 = new BTree(); + const tree1 = new BTreeEx(); + const tree2 = new BTreeEx(); for (let i = 0; i < keys1.length; i++) { const key = keys1[i]; diff --git a/extended/decompose.d.ts b/extended/decompose.d.ts new file mode 100644 index 0000000..143984c --- /dev/null +++ b/extended/decompose.d.ts @@ -0,0 +1,17 @@ +import BTree, { BNode } from '../b+tree'; +import type { BTreeWithInternals } from './shared'; +export declare type DecomposeResult = { + disjoint: (number | BNode)[]; + tallestIndex: number; +}; +/** + * Decomposes two trees into disjoint nodes. Reuses interior nodes when they do not overlap/intersect with any leaf nodes + * in the other tree. Overlapping leaf nodes are broken down into new leaf nodes containing merged entries. + * The algorithm is a parallel tree walk using two cursors. The trailing cursor (behind in key space) is walked forward + * until it is at or after the leading cursor. As it does this, any whole nodes or subtrees it passes are guaranteed to + * be disjoint. This is true because the leading cursor was also previously walked in this way, and is thus pointing to + * the first key at or after the trailing cursor's previous position. + * The cursor walk is efficient, meaning it skips over disjoint subtrees entirely rather than visiting every leaf. + */ +export declare function decompose(left: BTreeWithInternals, right: BTreeWithInternals, mergeValues: (key: K, leftValue: V, rightValue: V) => V | undefined): DecomposeResult; +export declare function buildFromDecomposition, K, V>(constructor: new (entries?: [K, V][], compare?: (a: K, b: K) => number, maxNodeSize?: number) => TBTree, branchingFactor: number, decomposed: DecomposeResult, cmp: (a: K, b: K) => number, maxNodeSize: number): TBTree; diff --git a/extended/decompose.js b/extended/decompose.js new file mode 100644 index 0000000..9997eb0 --- /dev/null +++ b/extended/decompose.js @@ -0,0 +1,541 @@ +"use strict"; +Object.defineProperty(exports, "__esModule", { value: true }); +exports.buildFromDecomposition = exports.decompose = void 0; +var b_tree_1 = require("../b+tree"); +var parallelWalk_1 = require("./parallelWalk"); +/** + * Decomposes two trees into disjoint nodes. Reuses interior nodes when they do not overlap/intersect with any leaf nodes + * in the other tree. Overlapping leaf nodes are broken down into new leaf nodes containing merged entries. + * The algorithm is a parallel tree walk using two cursors. The trailing cursor (behind in key space) is walked forward + * until it is at or after the leading cursor. As it does this, any whole nodes or subtrees it passes are guaranteed to + * be disjoint. This is true because the leading cursor was also previously walked in this way, and is thus pointing to + * the first key at or after the trailing cursor's previous position. + * The cursor walk is efficient, meaning it skips over disjoint subtrees entirely rather than visiting every leaf. + */ +function decompose(left, right, mergeValues) { + var cmp = left._compare; + (0, b_tree_1.check)(left._root.size() > 0 && right._root.size() > 0, "decompose requires non-empty inputs"); + // Holds the disjoint nodes that result from decomposition. + // Alternating entries of (height, node) to avoid creating small tuples + var disjoint = []; + // During the decomposition, leaves that are not disjoint are decomposed into individual entries + // that accumulate in this array in sorted order. They are flushed into leaf nodes whenever a reused + // disjoint subtree is added to the disjoint set. + // Note that there are unavoidable cases in which this will generate underfilled leaves. + // An example of this would be a leaf in one tree that contained keys [0, 100, 101, 102]. + // In the other tree, there is a leaf that contains [2, 3, 4, 5]. This leaf can be reused entirely, + // but the first tree's leaf must be decomposed into [0] and [100, 101, 102] + var pending = []; + var tallestIndex = -1, tallestHeight = -1; + // During the upward part of the cursor walk, this holds the highest disjoint node seen so far. + // This is done because we cannot know immediately whether we can add the node to the disjoint set + // because its ancestor may also be disjoint and should be reused instead. + var highestDisjoint = undefined; + var flushPendingEntries = function () { + var totalPairs = alternatingCount(pending); + if (totalPairs === 0) + return; + // This method creates as many evenly filled leaves as possible from + // the pending entries. All will be > 50% full if we are creating more than one leaf. + var max = left._maxNodeSize; + var leafCount = Math.ceil(totalPairs / max); + var remaining = totalPairs; + var pairIndex = 0; + while (leafCount > 0) { + var chunkSize = Math.ceil(remaining / leafCount); + var keys = new Array(chunkSize); + var vals = new Array(chunkSize); + for (var i = 0; i < chunkSize; i++) { + keys[i] = alternatingGetFirst(pending, pairIndex); + vals[i] = alternatingGetSecond(pending, pairIndex); + pairIndex++; + } + remaining -= chunkSize; + leafCount--; + var leaf = new b_tree_1.BNode(keys, vals); + alternatingPush(disjoint, 0, leaf); + if (tallestHeight < 0) { + tallestIndex = alternatingCount(disjoint) - 1; + tallestHeight = 0; + } + } + pending.length = 0; + }; + var addSharedNodeToDisjointSet = function (node, height) { + flushPendingEntries(); + node.isShared = true; + alternatingPush(disjoint, height, node); + if (height > tallestHeight) { + tallestIndex = alternatingCount(disjoint) - 1; + tallestHeight = height; + } + }; + var addHighestDisjoint = function () { + if (highestDisjoint !== undefined) { + addSharedNodeToDisjointSet(highestDisjoint.node, highestDisjoint.height); + highestDisjoint = undefined; + } + }; + // Mark all nodes at or above depthFrom in the cursor spine as disqualified (non-disjoint) + var disqualifySpine = function (cursor, depthFrom) { + var spine = cursor.spine; + for (var i = depthFrom; i >= 0; --i) { + var payload = spine[i].payload; + // Safe to early out because we always disqualify all ancestors of a disqualified node + // That is correct because every ancestor of a non-disjoint node is also non-disjoint + // because it must enclose the non-disjoint range. + if (payload.disqualified) + break; + payload.disqualified = true; + } + }; + // Cursor payload factory + var makePayload = function () { return ({ disqualified: false }); }; + var pushLeafRange = function (leaf, from, toExclusive) { + var keys = leaf.keys; + var values = leaf.values; + for (var i = from; i < toExclusive; ++i) + alternatingPush(pending, keys[i], values[i]); + }; + var onMoveInLeaf = function (leaf, payload, fromIndex, toIndex, startedEqual) { + (0, b_tree_1.check)(payload.disqualified === true, "onMoveInLeaf: leaf must be disqualified"); + var start = startedEqual ? fromIndex + 1 : fromIndex; + if (start < toIndex) + pushLeafRange(leaf, start, toIndex); + }; + var onExitLeaf = function (leaf, payload, startingIndex, startedEqual, cursorThis) { + highestDisjoint = undefined; + if (!payload.disqualified) { + highestDisjoint = { node: leaf, height: 0 }; + if (cursorThis.spine.length === 0) { + // if we are exiting a leaf and there are no internal nodes, we will reach the end of the tree. + // In this case we need to add the leaf now because step up will not be called. + addHighestDisjoint(); + } + } + else { + var start = startedEqual ? startingIndex + 1 : startingIndex; + var leafSize = leaf.keys.length; + if (start < leafSize) + pushLeafRange(leaf, start, leafSize); + } + }; + var onStepUp = function (parent, height, payload, fromIndex, spineIndex, stepDownIndex, cursorThis) { + var children = parent.children; + var nextHeight = height - 1; + if (stepDownIndex !== stepDownIndex /* NaN: still walking up */ + || stepDownIndex === Number.POSITIVE_INFINITY /* target key is beyond edge of tree, done with walk */) { + if (!payload.disqualified) { + highestDisjoint = { node: parent, height: height }; + if (stepDownIndex === Number.POSITIVE_INFINITY) { + // We have finished our walk, and we won't be stepping down, so add the root + addHighestDisjoint(); + } + } + else { + addHighestDisjoint(); + var len = children.length; + for (var i = fromIndex + 1; i < len; ++i) + addSharedNodeToDisjointSet(children[i], nextHeight); + } + } + else { + // We have a valid step down index, so we need to disqualify the spine if needed. + // This is identical to the step down logic, but we must also perform it here because + // in the case of stepping down into a leaf, the step down callback is never called. + if (stepDownIndex > 0) { + disqualifySpine(cursorThis, spineIndex); + } + addHighestDisjoint(); + for (var i = fromIndex + 1; i < stepDownIndex; ++i) + addSharedNodeToDisjointSet(children[i], nextHeight); + } + }; + var onStepDown = function (node, height, spineIndex, stepDownIndex, cursorThis) { + if (stepDownIndex > 0) { + // When we step down into a node, we know that we have walked from a key that is less than our target. + // Because of this, if we are not stepping down into the first child, we know that all children before + // the stepDownIndex must overlap with the other tree because they must be before our target key. Since + // the child we are stepping into has a key greater than our target key, this node must overlap. + // If a child overlaps, the entire spine overlaps because a parent in a btree always encloses the range + // of its children. + disqualifySpine(cursorThis, spineIndex); + var children = node.children; + var nextHeight = height - 1; + for (var i = 0; i < stepDownIndex; ++i) + addSharedNodeToDisjointSet(children[i], nextHeight); + } + }; + var onEnterLeaf = function (leaf, destIndex, cursorThis, cursorOther) { + if (destIndex > 0 + || (0, b_tree_1.areOverlapping)(leaf.minKey(), leaf.maxKey(), (0, parallelWalk_1.getKey)(cursorOther), cursorOther.leaf.maxKey(), cmp)) { + // Similar logic to the step-down case, except in this case we also know the leaf in the other + // tree overlaps a leaf in this tree (this leaf, specifically). Thus, we can disqualify both spines. + cursorThis.leafPayload.disqualified = true; + cursorOther.leafPayload.disqualified = true; + disqualifySpine(cursorThis, cursorThis.spine.length - 1); + disqualifySpine(cursorOther, cursorOther.spine.length - 1); + pushLeafRange(leaf, 0, destIndex); + } + }; + // Need the max key of both trees to perform the "finishing" walk of which ever cursor finishes second + var maxKeyLeft = left._root.maxKey(); + var maxKeyRight = right._root.maxKey(); + var maxKey = cmp(maxKeyLeft, maxKeyRight) >= 0 ? maxKeyLeft : maxKeyRight; + // Initialize cursors at minimum keys. + var curA = (0, parallelWalk_1.createCursor)(left, makePayload, onEnterLeaf, onMoveInLeaf, onExitLeaf, onStepUp, onStepDown); + var curB = (0, parallelWalk_1.createCursor)(right, makePayload, onEnterLeaf, onMoveInLeaf, onExitLeaf, onStepUp, onStepDown); + // The guarantee that no overlapping interior nodes are accidentally reused relies on the careful + // alternating hopping walk of the cursors: WLOG, cursorA always--with one exception--walks from a key just behind (in key space) + // the key of cursorB to the first key >= cursorB. Call this transition a "crossover point." All interior nodes that + // overlap cause a crossover point, and all crossover points are guaranteed to be walked using this method. Thus, + // all overlapping interior nodes will be found if they are checked for on step-down. + // The one exception mentioned above is when they start at the same key. In this case, they are both advanced forward and then + // their new ordering determines how they walk from there. + // The one issue then is detecting any overlaps that occur based on their very initial position (minimum key of each tree). + // This is handled by the initial disqualification step below, which essentially emulates the step down disqualification for each spine. + // Initialize disqualification w.r.t. opposite leaf. + var initDisqualify = function (cur, other) { + var minKey = (0, parallelWalk_1.getKey)(cur); + var otherMin = (0, parallelWalk_1.getKey)(other); + var otherMax = other.leaf.maxKey(); + if ((0, b_tree_1.areOverlapping)(minKey, cur.leaf.maxKey(), otherMin, otherMax, cmp)) + cur.leafPayload.disqualified = true; + for (var i = 0; i < cur.spine.length; ++i) { + var entry = cur.spine[i]; + // Since we are on the left side of the tree, we can use the leaf min key for every spine node + if ((0, b_tree_1.areOverlapping)(minKey, entry.node.maxKey(), otherMin, otherMax, cmp)) + entry.payload.disqualified = true; + } + }; + initDisqualify(curA, curB); + initDisqualify(curB, curA); + var leading = curA; + var trailing = curB; + var order = cmp((0, parallelWalk_1.getKey)(leading), (0, parallelWalk_1.getKey)(trailing)); + // Walk both cursors in alternating hops + while (true) { + var areEqual = order === 0; + if (areEqual) { + var key = (0, parallelWalk_1.getKey)(leading); + var vA = curA.leaf.values[curA.leafIndex]; + var vB = curB.leaf.values[curB.leafIndex]; + // Perform the actual merge of values here. The cursors will avoid adding a duplicate of this key/value + // to pending because they respect the areEqual flag during their moves. + var merged = mergeValues(key, vA, vB); + if (merged !== undefined) + alternatingPush(pending, key, merged); + var outTrailing = (0, parallelWalk_1.moveForwardOne)(trailing, leading, key, cmp); + var outLeading = (0, parallelWalk_1.moveForwardOne)(leading, trailing, key, cmp); + if (outTrailing || outLeading) { + if (!outTrailing || !outLeading) { + // In these cases, we pass areEqual=false because a return value of "out of tree" means + // the cursor did not move. This must be true because they started equal and one of them had more tree + // to walk (one is !out), so they cannot be equal at this point. + if (outTrailing) { + (0, parallelWalk_1.moveTo)(leading, trailing, maxKey, false, false, cmp); + } + else { + (0, parallelWalk_1.moveTo)(trailing, leading, maxKey, false, false, cmp); + } + } + break; + } + order = cmp((0, parallelWalk_1.getKey)(leading), (0, parallelWalk_1.getKey)(trailing)); + } + else { + if (order < 0) { + var tmp = trailing; + trailing = leading; + leading = tmp; + } + var _a = (0, parallelWalk_1.moveTo)(trailing, leading, (0, parallelWalk_1.getKey)(leading), true, areEqual, cmp), out = _a[0], nowEqual = _a[1]; + if (out) { + (0, parallelWalk_1.moveTo)(leading, trailing, maxKey, false, areEqual, cmp); + break; + } + else if (nowEqual) { + order = 0; + } + else { + order = -1; + } + } + } + // Ensure any trailing non-disjoint entries are added + flushPendingEntries(); + return { disjoint: disjoint, tallestIndex: tallestIndex }; +} +exports.decompose = decompose; +function buildFromDecomposition(constructor, branchingFactor, decomposed, cmp, maxNodeSize) { + var disjoint = decomposed.disjoint, tallestIndex = decomposed.tallestIndex; + var disjointEntryCount = alternatingCount(disjoint); + // Now we have a set of disjoint subtrees and we need to merge them into a single tree. + // To do this, we start with the tallest subtree from the disjoint set and, for all subtrees + // to the "right" and "left" of it in sorted order, we append them onto the appropriate side + // of the current tree, splitting nodes as necessary to maintain balance. + // A "side" is referred to as a frontier, as it is a linked list of nodes from the root down to + // the leaf level on that side of the tree. Each appended subtree is appended to the node at the + // same height as itself on the frontier. Each tree is guaranteed to be at most as tall as the + // current frontier because we start from the tallest subtree and work outward. + var initialRoot = alternatingGetSecond(disjoint, tallestIndex); + var frontier = [initialRoot]; + // Process all subtrees to the right of the tallest subtree + if (tallestIndex + 1 <= disjointEntryCount - 1) { + updateFrontier(frontier, 0, getRightmostIndex); + processSide(branchingFactor, disjoint, frontier, tallestIndex + 1, disjointEntryCount, 1, getRightmostIndex, getRightInsertionIndex, splitOffRightSide, updateRightMax); + } + // Process all subtrees to the left of the current tree + if (tallestIndex - 1 >= 0) { + // Note we need to update the frontier here because the right-side processing may have grown the tree taller. + updateFrontier(frontier, 0, getLeftmostIndex); + processSide(branchingFactor, disjoint, frontier, tallestIndex - 1, -1, -1, getLeftmostIndex, getLeftmostIndex, splitOffLeftSide, parallelWalk_1.noop // left side appending doesn't update max keys + ); + } + var merged = new constructor(undefined, cmp, maxNodeSize); + merged._root = frontier[0]; + // Return the resulting tree + return merged; +} +exports.buildFromDecomposition = buildFromDecomposition; +/** + * Processes one side (left or right) of the disjoint subtree set during a merge operation. + * Merges each subtree in the disjoint set from start to end (exclusive) into the given spine. + */ +function processSide(branchingFactor, disjoint, spine, start, end, step, sideIndex, sideInsertionIndex, splitOffSide, updateMax) { + // Determine the depth of the first shared node on the frontier. + // Appending subtrees to the frontier must respect the copy-on-write semantics by cloning + // any shared nodes down to the insertion point. We track it by depth to avoid a log(n) walk of the + // frontier for each insertion as that would fundamentally change our asymptotics. + var isSharedFrontierDepth = 0; + var cur = spine[0]; + // Find the first shared node on the frontier + while (!cur.isShared && isSharedFrontierDepth < spine.length - 1) { + isSharedFrontierDepth++; + cur = cur.children[sideIndex(cur)]; + } + // This array holds the sum of sizes of nodes that have been inserted but not yet propagated upward. + // For example, if a subtree of size 5 is inserted at depth 2, then unflushedSizes[1] += 5. + // These sizes are added to the depth above the insertion point because the insertion updates the direct parent of the insertion. + // These sizes are flushed upward any time we need to insert at level higher than pending unflushed sizes. + // E.g. in our example, if we later insert at depth 0, we will add 5 to the node at depth 1 and the root at depth 0 before inserting. + // This scheme enables us to avoid a log(n) propagation of sizes for each insertion. + var unflushedSizes = new Array(spine.length).fill(0); // pre-fill to avoid "holey" array + for (var i = start; i != end; i += step) { + var currentHeight = spine.length - 1; // height is number of internal levels; 0 means leaf + var subtree = alternatingGetSecond(disjoint, i); + var subtreeHeight = alternatingGetFirst(disjoint, i); + var insertionDepth = currentHeight - (subtreeHeight + 1); // node at this depth has children of height 'subtreeHeight' + // Ensure path is unshared before mutation + ensureNotShared(spine, isSharedFrontierDepth, insertionDepth, sideIndex); + // Calculate expansion depth (first ancestor with capacity) + var expansionDepth = Math.max(0, findCascadeEndDepth(spine, insertionDepth, branchingFactor)); + // Update sizes on spine above the shared ancestor before we expand + updateSizeAndMax(spine, unflushedSizes, isSharedFrontierDepth, expansionDepth, updateMax); + // Append and cascade splits upward + var newRoot = appendAndCascade(spine, insertionDepth, branchingFactor, subtree, sideIndex, sideInsertionIndex, splitOffSide); + if (newRoot) { + // Set the spine root to the highest up new node; the rest of the spine is updated below + spine[0] = newRoot; + unflushedSizes.forEach(function (count) { return (0, b_tree_1.check)(count === 0, "Unexpected unflushed size after root split."); }); + unflushedSizes.push(0); // new root level + isSharedFrontierDepth = insertionDepth + 2; + unflushedSizes[insertionDepth + 1] += subtree.size(); + } + else { + isSharedFrontierDepth = insertionDepth + 1; + unflushedSizes[insertionDepth] += subtree.size(); + } + // Finally, update the frontier from the highest new node downward + // Note that this is often the point where the new subtree is attached, + // but in the case of cascaded splits it may be higher up. + updateFrontier(spine, expansionDepth, sideIndex); + (0, b_tree_1.check)(isSharedFrontierDepth === spine.length - 1 || spine[isSharedFrontierDepth].isShared === true, "Non-leaf subtrees must be shared."); + (0, b_tree_1.check)(unflushedSizes.length === spine.length, "Unflushed sizes length mismatch after root split."); + } + // Finally, propagate any remaining unflushed sizes upward and update max keys + updateSizeAndMax(spine, unflushedSizes, isSharedFrontierDepth, 0, updateMax); +} +; +/** + * Append a subtree at a given depth on the chosen side; cascade splits upward if needed. + * All un-propagated sizes must have already been applied to the spine up to the end of any cascading expansions. + * This method guarantees that the size of the inserted subtree will not propagate upward beyond the insertion point. + * Returns a new root if the root was split, otherwise undefined. + */ +function appendAndCascade(spine, insertionDepth, branchingFactor, subtree, sideIndex, sideInsertionIndex, splitOffSide) { + // We must take care to avoid accidental propagation upward of the size of the inserted su + // To do this, we first split nodes upward from the insertion point until we find a node with capacity + // or create a new root. Since all un-propagated sizes have already been applied to the spine up to this point, + // inserting at the end ensures no accidental propagation. + // Depth is -1 if the subtree is the same height as the current tree + if (insertionDepth >= 0) { + var carry = undefined; + // Determine initially where to insert after any splits + var insertTarget = spine[insertionDepth]; + if (insertTarget.keys.length >= branchingFactor) { + insertTarget = carry = splitOffSide(insertTarget); + } + var d = insertionDepth - 1; + while (carry && d >= 0) { + var parent = spine[d]; + var idx = sideIndex(parent); + // Refresh last key since child was split + parent.keys[idx] = parent.children[idx].maxKey(); + if (parent.keys.length < branchingFactor) { + // We have reached the end of the cascade + insertNoCount(parent, sideInsertionIndex(parent), carry); + carry = undefined; + } + else { + // Splitting the parent here requires care to avoid incorrectly double counting sizes + // Example: a node is at max capacity 4, with children each of size 4 for 16 total. + // We split the node into two nodes of 2 children each, but this does *not* modify the size + // of its parent. Therefore when we insert the carry into the torn-off node, we must not + // increase its size or we will double-count the size of the carry su + var tornOff = splitOffSide(parent); + insertNoCount(tornOff, sideInsertionIndex(tornOff), carry); + carry = tornOff; + } + d--; + } + var newRoot = undefined; + if (carry !== undefined) { + // Expansion reached the root, need a new root to hold carry + var oldRoot = spine[0]; + newRoot = new b_tree_1.BNodeInternal([oldRoot], oldRoot.size() + carry.size()); + insertNoCount(newRoot, sideInsertionIndex(newRoot), carry); + } + // Finally, insert the subtree at the insertion point + insertNoCount(insertTarget, sideInsertionIndex(insertTarget), subtree); + return newRoot; + } + else { + // Insertion of subtree with equal height to current tree + var oldRoot = spine[0]; + var newRoot = new b_tree_1.BNodeInternal([oldRoot], oldRoot.size()); + insertNoCount(newRoot, sideInsertionIndex(newRoot), subtree); + return newRoot; + } +} +; +/** + * Clone along the spine from [isSharedFrontierDepth to depthTo] inclusive so path is safe to mutate. + * Short-circuits if first shared node is deeper than depthTo (the insertion depth). + */ +function ensureNotShared(spine, isSharedFrontierDepth, depthToInclusive, sideIndex) { + if (spine.length === 1 /* only a leaf */ || depthToInclusive < 0 /* new root case */) + return; // nothing to clone when root is a leaf; equal-height case will handle this + // Clone root if needed first (depth 0) + if (isSharedFrontierDepth === 0) { + var root = spine[0]; + spine[0] = root.clone(); + } + // Clone downward along the frontier to 'depthToInclusive' + for (var depth = Math.max(isSharedFrontierDepth, 1); depth <= depthToInclusive; depth++) { + var parent = spine[depth - 1]; + var childIndex = sideIndex(parent); + var clone = parent.children[childIndex].clone(); + parent.children[childIndex] = clone; + spine[depth] = clone; + } +} +; +/** + * Propagates size updates and updates max keys for nodes in (isSharedFrontierDepth, depthTo) + */ +function updateSizeAndMax(spine, unflushedSizes, isSharedFrontierDepth, depthUpToInclusive, updateMax) { + // If isSharedFrontierDepth is <= depthUpToInclusive there is nothing to update because + // the insertion point is inside a shared node which will always have correct sizes + var maxKey = spine[isSharedFrontierDepth].maxKey(); + var startDepth = isSharedFrontierDepth - 1; + for (var depth = startDepth; depth >= depthUpToInclusive; depth--) { + var sizeAtLevel = unflushedSizes[depth]; + unflushedSizes[depth] = 0; // we are propagating it now + if (depth > 0) { + // propagate size upward, will be added lazily, either when a subtree is appended at or above that level or + // at the end of processing the entire side + unflushedSizes[depth - 1] += sizeAtLevel; + } + var node = spine[depth]; + node._size += sizeAtLevel; + // No-op if left side, as max keys in parents are unchanged by appending to the beginning of a node + updateMax(node, maxKey); + } +} +; +/** + * Update a spine (frontier) from a specific depth down, inclusive. + * Extends the frontier array if it is not already as long as the frontier. + */ +function updateFrontier(frontier, depthLastValid, sideIndex) { + (0, b_tree_1.check)(frontier.length > depthLastValid, "updateFrontier: depthLastValid exceeds frontier height"); + var startingAncestor = frontier[depthLastValid]; + if (startingAncestor.isLeaf) + return; + var internal = startingAncestor; + var cur = internal.children[sideIndex(internal)]; + var depth = depthLastValid + 1; + while (!cur.isLeaf) { + var ni = cur; + frontier[depth] = ni; + cur = ni.children[sideIndex(ni)]; + depth++; + } + frontier[depth] = cur; +} +; +/** + * Find the first ancestor (starting at insertionDepth) with capacity. + */ +function findCascadeEndDepth(spine, insertionDepth, branchingFactor) { + for (var depth = insertionDepth; depth >= 0; depth--) { + if (spine[depth].keys.length < branchingFactor) + return depth; + } + return -1; // no capacity, will need a new root +} +; +/** + * Inserts the child without updating cached size counts. + */ +function insertNoCount(parent, index, child) { + parent.children.splice(index, 0, child); + parent.keys.splice(index, 0, child.maxKey()); +} +// ---- Side-specific delegates for merging subtrees into a frontier ---- +function getLeftmostIndex() { + return 0; +} +function getRightmostIndex(node) { + return node.children.length - 1; +} +function getRightInsertionIndex(node) { + return node.children.length; +} +function splitOffRightSide(node) { + return node.splitOffRightSide(); +} +function splitOffLeftSide(node) { + return node.splitOffLeftSide(); +} +function updateRightMax(node, maxBelow) { + node.keys[node.keys.length - 1] = maxBelow; +} +// ------- Alternating list helpers ------- +// These helpers manage a list that alternates between two types of entries. +// Storing data this way avoids small tuple allocations and shows major improvements +// in GC time in benchmarks. +function alternatingCount(list) { + return list.length >> 1; +} +function alternatingGetFirst(list, index) { + return list[index << 1]; +} +function alternatingGetSecond(list, index) { + return list[(index << 1) + 1]; +} +function alternatingPush(list, first, second) { + // Micro benchmarks show this is the fastest way to do this + list.push(first, second); +} diff --git a/decompose.ts b/extended/decompose.ts similarity index 97% rename from decompose.ts rename to extended/decompose.ts index 3cdf5d0..bbe5a12 100644 --- a/decompose.ts +++ b/extended/decompose.ts @@ -1,5 +1,6 @@ -import BTree, { areOverlapping } from "./b+tree"; -import { createCursor, getKey, MergeCursor, MergeCursorPayload, moveForwardOne } from "./parallelWalk"; +import BTree, { areOverlapping, BNode, BNodeInternal, check } from '../b+tree'; +import type { BTreeWithInternals } from './shared'; +import { createCursor, getKey, MergeCursor, MergeCursorPayload, moveForwardOne, moveTo, noop } from "./parallelWalk"; export type DecomposeResult = { disjoint: (number | BNode)[], tallestIndex: number }; @@ -13,8 +14,8 @@ export type DecomposeResult = { disjoint: (number | BNode)[], tallestI * The cursor walk is efficient, meaning it skips over disjoint subtrees entirely rather than visiting every leaf. */ export function decompose( - left: BTree, - right: BTree, + left: BTreeWithInternals, + right: BTreeWithInternals, mergeValues: (key: K, leftValue: V, rightValue: V) => V | undefined ): DecomposeResult { const cmp = left._compare; @@ -318,11 +319,13 @@ export function decompose( return { disjoint, tallestIndex }; } -export function buildFromDecomposition( +export function buildFromDecomposition, K,V>( + constructor: new (entries?: [K,V][], compare?: (a: K, b: K) => number, maxNodeSize?: number) => TBTree, branchingFactor: number, - decomposed: DecomposeResult -): BTree { - + decomposed: DecomposeResult, + cmp: (a: K, b: K) => number, + maxNodeSize: number +): TBTree { const { disjoint, tallestIndex } = decomposed; const disjointEntryCount = alternatingCount(disjoint); @@ -371,8 +374,8 @@ export function buildFromDecomposition( ); } - const merged = new BTree(undefined, this._compare, this._maxNodeSize); - merged._root = frontier[0]; + const merged = new constructor(undefined, cmp, maxNodeSize); + (merged as unknown as BTreeWithInternals)._root = frontier[0]; // Return the resulting tree return merged; @@ -437,10 +440,10 @@ function processSide( unflushedSizes.forEach((count) => check(count === 0, "Unexpected unflushed size after root split.")); unflushedSizes.push(0); // new root level isSharedFrontierDepth = insertionDepth + 2; - unflushedSizes[insertionDepth + 1] += susize(); + unflushedSizes[insertionDepth + 1] += subtree.size(); } else { isSharedFrontierDepth = insertionDepth + 1; - unflushedSizes[insertionDepth] += susize(); + unflushedSizes[insertionDepth] += subtree.size(); } // Finally, update the frontier from the highest new node downward diff --git a/extended/index.d.ts b/extended/index.d.ts index 528377f..6b6ec6e 100644 --- a/extended/index.d.ts +++ b/extended/index.d.ts @@ -2,6 +2,18 @@ import BTree from '../b+tree'; export declare class BTreeEx extends BTree { clone(): this; greedyClone(force?: boolean): this; + /** + * Computes the differences between `this` and `other`. + * For efficiency, the diff is returned via invocations of supplied handlers. + * The computation is optimized for the case in which the two trees have large amounts of shared data + * (obtained by calling the `clone` or `with` APIs) and will avoid any iteration of shared state. + * The handlers can cause computation to early exit by returning `{ break: R }`. + * Neither collection should be mutated during the comparison (inside your callbacks), as this method assumes they remain stable. + * @param other The tree to compute a diff against. + * @param onlyThis Callback invoked for all keys only present in `this`. + * @param onlyOther Callback invoked for all keys only present in `other`. + * @param different Callback invoked for all keys with differing values. + */ diffAgainst(other: BTree, onlyThis?: (k: K, v: V) => { break?: R; } | void, onlyOther?: (k: K, v: V) => { @@ -9,6 +21,36 @@ export declare class BTreeEx extends BTree { } | void, different?: (k: K, vThis: V, vOther: V) => { break?: R; } | void): R | undefined; + /** + * Intersects this tree with `other`, calling the supplied `intersection` callback for each intersecting key/value pair. + * Neither tree is modified. + * @param other The other tree to intersect with this one. + * @param intersection Called for keys that appear in both trees. + * @description Complexity is bounded O(N + M) time and O(log(N + M)) for allocations. + * However, time is additionally bounded by O(log(N + M) * D) where D is the number of disjoint ranges of keys between + * the two trees. In practice, that means for keys of random distribution the performance is O(N + M) and for + * keys with significant numbers of non-overlapping key ranges it is O(log(N + M) * D) which is much faster. + * The algorithm achieves this additional non-linear bound by skipping over non-intersecting subtrees entirely. + * Note that in benchmarks even the worst case (fully interleaved keys) performance is faster than calling `toArray` + * on both trees and performing a walk on the sorted contents due to the reduced allocation overhead. + */ + intersect(other: BTree, intersection: (key: K, leftValue: V, rightValue: V) => void): void; + /** + * Efficiently merges this tree with `other`, reusing subtrees wherever possible. + * Neither input tree is modified. + * @param other The other tree to merge into this one. + * @param merge Called for keys that appear in both trees. Return the desired value, or + * `undefined` to omit the key from the result. + * @returns A new BTree that contains the merged key/value pairs. + * @description Complexity is bounded O(N + M) for both time and allocations. + * However, it is additionally bounded by O(log(N + M) * D) where D is the number of disjoint ranges of keys between + * the two trees. In practice, that means for keys of random distribution the performance is O(N + M) and for + * keys with significant numbers of non-overlapping key ranges it is O(log(N + M) * D) which is much faster. + * The algorithm achieves this additional non-linear bound by skipping over non-intersecting subtrees entirely. + * Note that in benchmarks even the worst case (fully interleaved keys) performance is faster than cloning `this` + * and inserting the contents of `other` into the clone. + */ + merge(other: BTreeEx, mergeFn: (key: K, leftValue: V, rightValue: V) => V | undefined): BTreeEx; } export interface BTreeEx { with(key: K): BTreeEx; diff --git a/extended/index.js b/extended/index.js index 9d9ba2d..6d849b4 100644 --- a/extended/index.js +++ b/extended/index.js @@ -21,6 +21,7 @@ Object.defineProperty(exports, "__esModule", { value: true }); exports.BTreeEx = void 0; var b_tree_1 = __importDefault(require("../b+tree")); var diffAgainst_1 = require("./diffAgainst"); +var merge_1 = require("./merge"); var BTreeEx = /** @class */ (function (_super) { __extends(BTreeEx, _super); function BTreeEx() { @@ -43,9 +44,54 @@ var BTreeEx = /** @class */ (function (_super) { target._size = source._size; return result; }; + /** + * Computes the differences between `this` and `other`. + * For efficiency, the diff is returned via invocations of supplied handlers. + * The computation is optimized for the case in which the two trees have large amounts of shared data + * (obtained by calling the `clone` or `with` APIs) and will avoid any iteration of shared state. + * The handlers can cause computation to early exit by returning `{ break: R }`. + * Neither collection should be mutated during the comparison (inside your callbacks), as this method assumes they remain stable. + * @param other The tree to compute a diff against. + * @param onlyThis Callback invoked for all keys only present in `this`. + * @param onlyOther Callback invoked for all keys only present in `other`. + * @param different Callback invoked for all keys with differing values. + */ BTreeEx.prototype.diffAgainst = function (other, onlyThis, onlyOther, different) { return (0, diffAgainst_1.diffAgainst)(this, other, onlyThis, onlyOther, different); }; + /** + * Intersects this tree with `other`, calling the supplied `intersection` callback for each intersecting key/value pair. + * Neither tree is modified. + * @param other The other tree to intersect with this one. + * @param intersection Called for keys that appear in both trees. + * @description Complexity is bounded O(N + M) time and O(log(N + M)) for allocations. + * However, time is additionally bounded by O(log(N + M) * D) where D is the number of disjoint ranges of keys between + * the two trees. In practice, that means for keys of random distribution the performance is O(N + M) and for + * keys with significant numbers of non-overlapping key ranges it is O(log(N + M) * D) which is much faster. + * The algorithm achieves this additional non-linear bound by skipping over non-intersecting subtrees entirely. + * Note that in benchmarks even the worst case (fully interleaved keys) performance is faster than calling `toArray` + * on both trees and performing a walk on the sorted contents due to the reduced allocation overhead. + */ + BTreeEx.prototype.intersect = function (other, intersection) { + }; + /** + * Efficiently merges this tree with `other`, reusing subtrees wherever possible. + * Neither input tree is modified. + * @param other The other tree to merge into this one. + * @param merge Called for keys that appear in both trees. Return the desired value, or + * `undefined` to omit the key from the result. + * @returns A new BTree that contains the merged key/value pairs. + * @description Complexity is bounded O(N + M) for both time and allocations. + * However, it is additionally bounded by O(log(N + M) * D) where D is the number of disjoint ranges of keys between + * the two trees. In practice, that means for keys of random distribution the performance is O(N + M) and for + * keys with significant numbers of non-overlapping key ranges it is O(log(N + M) * D) which is much faster. + * The algorithm achieves this additional non-linear bound by skipping over non-intersecting subtrees entirely. + * Note that in benchmarks even the worst case (fully interleaved keys) performance is faster than cloning `this` + * and inserting the contents of `other` into the clone. + */ + BTreeEx.prototype.merge = function (other, mergeFn) { + return (0, merge_1.merge)(this, other, mergeFn); + }; return BTreeEx; }(b_tree_1.default)); exports.BTreeEx = BTreeEx; diff --git a/extended/index.ts b/extended/index.ts index 554eca7..3b4442a 100644 --- a/extended/index.ts +++ b/extended/index.ts @@ -1,6 +1,8 @@ import BTree from '../b+tree'; import type { BTreeWithInternals } from './shared'; import { diffAgainst as diffAgainstAlgorithm } from './diffAgainst'; +import { intersect } from './intersect'; +import { merge } from './merge'; export class BTreeEx extends BTree { clone(): this { @@ -75,8 +77,8 @@ export class BTreeEx extends BTree { * Note that in benchmarks even the worst case (fully interleaved keys) performance is faster than cloning `this` * and inserting the contents of `other` into the clone. */ - merge(other: BTree, merge: (key: K, leftValue: V, rightValue: V) => V | undefined): BTreeEx { - + merge(other: BTreeEx, mergeFn: (key: K, leftValue: V, rightValue: V) => V | undefined): BTreeEx { + return merge, K, V>(this, other, mergeFn); } } diff --git a/extended/intersect.d.ts b/extended/intersect.d.ts new file mode 100644 index 0000000..c84b1d8 --- /dev/null +++ b/extended/intersect.d.ts @@ -0,0 +1,16 @@ +import BTree from '../b+tree'; +/** + * Intersects the two trees, calling the supplied `intersection` callback for each intersecting key/value pair. + * Neither tree is modified. + * @param treeA First tree to intersect. + * @param treeB Second tree to intersect. + * @param intersection Called for keys that appear in both trees. + * @description Complexity is bounded O(N + M) time and O(log(N + M)) for allocations. + * However, time is additionally bounded by O(log(N + M) * D) where D is the number of disjoint ranges of keys between + * the two trees. In practice, that means for keys of random distribution the performance is O(N + M) and for + * keys with significant numbers of non-overlapping key ranges it is O(log(N + M) * D) which is much faster. + * The algorithm achieves this additional non-linear bound by skipping over non-intersecting subtrees entirely. + * Note that in benchmarks even the worst case (fully interleaved keys) performance is faster than calling `toArray` + * on both trees and performing a walk on the sorted contents due to the reduced allocation overhead. + */ +export declare function intersect(treeA: BTree, treeB: BTree, intersection: (key: K, leftValue: V, rightValue: V) => void): void; diff --git a/extended/intersect.js b/extended/intersect.js new file mode 100644 index 0000000..3d2fb82 --- /dev/null +++ b/extended/intersect.js @@ -0,0 +1,69 @@ +"use strict"; +Object.defineProperty(exports, "__esModule", { value: true }); +exports.intersect = void 0; +var parallelWalk_1 = require("./parallelWalk"); +/** + * Intersects the two trees, calling the supplied `intersection` callback for each intersecting key/value pair. + * Neither tree is modified. + * @param treeA First tree to intersect. + * @param treeB Second tree to intersect. + * @param intersection Called for keys that appear in both trees. + * @description Complexity is bounded O(N + M) time and O(log(N + M)) for allocations. + * However, time is additionally bounded by O(log(N + M) * D) where D is the number of disjoint ranges of keys between + * the two trees. In practice, that means for keys of random distribution the performance is O(N + M) and for + * keys with significant numbers of non-overlapping key ranges it is O(log(N + M) * D) which is much faster. + * The algorithm achieves this additional non-linear bound by skipping over non-intersecting subtrees entirely. + * Note that in benchmarks even the worst case (fully interleaved keys) performance is faster than calling `toArray` + * on both trees and performing a walk on the sorted contents due to the reduced allocation overhead. + */ +function intersect(treeA, treeB, intersection) { + var _treeA = treeA; + var _treeB = treeB; + (0, parallelWalk_1.checkCanDoSetOperation)(_treeA, _treeB); + if (treeB.size === 0 || treeA.size === 0) + return; + var cmp = treeA._compare; + var makePayload = function () { return undefined; }; + var cursorA = (0, parallelWalk_1.createCursor)(_treeA, makePayload, parallelWalk_1.noop, parallelWalk_1.noop, parallelWalk_1.noop, parallelWalk_1.noop, parallelWalk_1.noop); + var cursorB = (0, parallelWalk_1.createCursor)(_treeB, makePayload, parallelWalk_1.noop, parallelWalk_1.noop, parallelWalk_1.noop, parallelWalk_1.noop, parallelWalk_1.noop); + var leading = cursorA; + var trailing = cursorB; + var order = cmp((0, parallelWalk_1.getKey)(leading), (0, parallelWalk_1.getKey)(trailing)); + // The intersect walk is somewhat similar to a merge walk in that it does an alternating hop walk with cursors. + // However, the only thing we care about is when the two cursors are equal (equality is intersection). + // When they are not equal we just advance the trailing cursor. + while (true) { + var areEqual = order === 0; + if (areEqual) { + var key = (0, parallelWalk_1.getKey)(leading); + var vA = cursorA.leaf.values[cursorA.leafIndex]; + var vB = cursorB.leaf.values[cursorB.leafIndex]; + intersection(key, vA, vB); + var outT = (0, parallelWalk_1.moveForwardOne)(trailing, leading, key, cmp); + var outL = (0, parallelWalk_1.moveForwardOne)(leading, trailing, key, cmp); + if (outT && outL) + break; + order = cmp((0, parallelWalk_1.getKey)(leading), (0, parallelWalk_1.getKey)(trailing)); + } + else { + if (order < 0) { + var tmp = trailing; + trailing = leading; + leading = tmp; + } + // At this point, leading is guaranteed to be ahead of trailing. + var _a = (0, parallelWalk_1.moveTo)(trailing, leading, (0, parallelWalk_1.getKey)(leading), true, areEqual, cmp), out = _a[0], nowEqual = _a[1]; + if (out) { + // We've reached the end of one tree, so intersections are guaranteed to be done. + break; + } + else if (nowEqual) { + order = 0; + } + else { + order = -1; // trailing is ahead of leading + } + } + } +} +exports.intersect = intersect; diff --git a/intersect.ts b/extended/intersect.ts similarity index 85% rename from intersect.ts rename to extended/intersect.ts index ae889ff..e920fdf 100644 --- a/intersect.ts +++ b/extended/intersect.ts @@ -1,4 +1,6 @@ -import BTree from "./b+tree"; +import BTree from '../b+tree'; +import { BNode, BNodeInternal, check } from '../b+tree'; +import type { BTreeWithInternals } from './shared'; import { createCursor, moveForwardOne, moveTo, getKey, noop, checkCanDoSetOperation } from "./parallelWalk" /** @@ -16,14 +18,16 @@ import { createCursor, moveForwardOne, moveTo, getKey, noop, checkCanDoSetOperat * on both trees and performing a walk on the sorted contents due to the reduced allocation overhead. */ export function intersect(treeA: BTree, treeB: BTree, intersection: (key: K, leftValue: V, rightValue: V) => void): void { - checkCanDoSetOperation(treeA, treeB); + const _treeA = treeA as unknown as BTreeWithInternals; + const _treeB = treeB as unknown as BTreeWithInternals; + checkCanDoSetOperation(_treeA, _treeB); if (treeB.size === 0 || treeA.size === 0) return; const cmp = treeA._compare; const makePayload = (): undefined => undefined; - let cursorA = createCursor(treeA, makePayload, noop, noop, noop, noop, noop); - let cursorB = createCursor(treeB, makePayload, noop, noop, noop, noop, noop); + let cursorA = createCursor(_treeA, makePayload, noop, noop, noop, noop, noop); + let cursorB = createCursor(_treeB, makePayload, noop, noop, noop, noop, noop); let leading = cursorA; let trailing = cursorB; let order = cmp(getKey(leading), getKey(trailing)); diff --git a/extended/merge.d.ts b/extended/merge.d.ts new file mode 100644 index 0000000..dff8899 --- /dev/null +++ b/extended/merge.d.ts @@ -0,0 +1,18 @@ +import BTree from '../b+tree'; +/** + * Efficiently merges two trees, reusing subtrees wherever possible. + * Neither input tree is modified. + * @param treeA First tree to merge. + * @param treeB Second tree to merge. + * @param merge Called for keys that appear in both trees. Return the desired value, or + * `undefined` to omit the key from the result. + * @returns A new BTree that contains the merged key/value pairs. + * @description Complexity is bounded O(N + M) for both time and allocations. + * However, it is additionally bounded by O(log(N + M) * D) where D is the number of disjoint ranges of keys between + * the two trees. In practice, that means for keys of random distribution the performance is O(N + M) and for + * keys with significant numbers of non-overlapping key ranges it is O(log(N + M) * D) which is much faster. + * The algorithm achieves this additional non-linear bound by skipping over non-intersecting subtrees entirely. + * Note that in benchmarks even the worst case (fully interleaved keys) performance is faster than cloning `this` + * and inserting the contents of `other` into the clone. + */ +export declare function merge, K, V>(treeA: TBTree, treeB: TBTree, merge: (key: K, leftValue: V, rightValue: V) => V | undefined): TBTree; diff --git a/extended/merge.js b/extended/merge.js new file mode 100644 index 0000000..338e120 --- /dev/null +++ b/extended/merge.js @@ -0,0 +1,37 @@ +"use strict"; +Object.defineProperty(exports, "__esModule", { value: true }); +exports.merge = void 0; +var decompose_1 = require("./decompose"); +var parallelWalk_1 = require("./parallelWalk"); +/** + * Efficiently merges two trees, reusing subtrees wherever possible. + * Neither input tree is modified. + * @param treeA First tree to merge. + * @param treeB Second tree to merge. + * @param merge Called for keys that appear in both trees. Return the desired value, or + * `undefined` to omit the key from the result. + * @returns A new BTree that contains the merged key/value pairs. + * @description Complexity is bounded O(N + M) for both time and allocations. + * However, it is additionally bounded by O(log(N + M) * D) where D is the number of disjoint ranges of keys between + * the two trees. In practice, that means for keys of random distribution the performance is O(N + M) and for + * keys with significant numbers of non-overlapping key ranges it is O(log(N + M) * D) which is much faster. + * The algorithm achieves this additional non-linear bound by skipping over non-intersecting subtrees entirely. + * Note that in benchmarks even the worst case (fully interleaved keys) performance is faster than cloning `this` + * and inserting the contents of `other` into the clone. + */ +function merge(treeA, treeB, merge) { + var _treeA = treeA; + var _treeB = treeB; + var branchingFactor = (0, parallelWalk_1.checkCanDoSetOperation)(_treeA, _treeB); + if (_treeA._root.size() === 0) + return treeB.clone(); + if (_treeB._root.size() === 0) + return treeA.clone(); + // Decompose both trees into disjoint subtrees leaves. + // As many of these as possible will be reused from the original trees, and the remaining + // will be leaves that are the result of merging intersecting leaves. + var decomposed = (0, decompose_1.decompose)(_treeA, _treeB, merge); + var constructor = treeA.constructor; + return (0, decompose_1.buildFromDecomposition)(constructor, branchingFactor, decomposed, _treeA._compare, _treeA._maxNodeSize); +} +exports.merge = merge; diff --git a/merge.ts b/extended/merge.ts similarity index 65% rename from merge.ts rename to extended/merge.ts index 45dc819..68b56e7 100644 --- a/merge.ts +++ b/extended/merge.ts @@ -1,4 +1,5 @@ -import BTree from "./b+tree"; +import BTree from '../b+tree'; +import type { BTreeWithInternals } from './shared'; import { decompose, buildFromDecomposition } from "./decompose"; import { checkCanDoSetOperation } from "./parallelWalk"; @@ -18,16 +19,19 @@ import { checkCanDoSetOperation } from "./parallelWalk"; * Note that in benchmarks even the worst case (fully interleaved keys) performance is faster than cloning `this` * and inserting the contents of `other` into the clone. */ -export function merge(treeA: BTree, treeB: BTree, merge: (key: K, leftValue: V, rightValue: V) => V | undefined): BTree { - const branchingFactor = checkCanDoSetOperation(treeA, treeB); - if (treeA._root.size() === 0) +export function merge, K,V>(treeA: TBTree, treeB: TBTree, merge: (key: K, leftValue: V, rightValue: V) => V | undefined): TBTree { + const _treeA = treeA as unknown as BTreeWithInternals; + const _treeB = treeB as unknown as BTreeWithInternals; + const branchingFactor = checkCanDoSetOperation(_treeA, _treeB); + if (_treeA._root.size() === 0) return treeB.clone(); - if (treeB._root.size() === 0) + if (_treeB._root.size() === 0) return treeA.clone(); // Decompose both trees into disjoint subtrees leaves. // As many of these as possible will be reused from the original trees, and the remaining // will be leaves that are the result of merging intersecting leaves. - const decomposed = decompose(treeA, treeB, merge); - return buildFromDecomposition(branchingFactor, decomposed); + const decomposed = decompose(_treeA, _treeB, merge); + const constructor = treeA.constructor as new (entries?: [K,V][], compare?: (a: K, b: K) => number, maxNodeSize?: number) => TBTree; + return buildFromDecomposition(constructor, branchingFactor, decomposed, _treeA._compare, _treeA._maxNodeSize); } \ No newline at end of file diff --git a/extended/parallelWalk.d.ts b/extended/parallelWalk.d.ts new file mode 100644 index 0000000..8af39ef --- /dev/null +++ b/extended/parallelWalk.d.ts @@ -0,0 +1,41 @@ +import { BNode, BNodeInternal } from '../b+tree'; +import type { BTreeWithInternals } from './shared'; +export declare type MergeCursorPayload = { + disqualified: boolean; +}; +export interface MergeCursor { + tree: BTreeWithInternals; + leaf: BNode; + leafIndex: number; + spine: Array<{ + node: BNodeInternal; + childIndex: number; + payload: TPayload; + }>; + leafPayload: TPayload; + makePayload: () => TPayload; + onMoveInLeaf: (leaf: BNode, payload: TPayload, fromIndex: number, toIndex: number, isInclusive: boolean) => void; + onExitLeaf: (leaf: BNode, payload: TPayload, startingIndex: number, isInclusive: boolean, cursorThis: MergeCursor) => void; + onStepUp: (parent: BNodeInternal, height: number, payload: TPayload, fromIndex: number, spineIndex: number, stepDownIndex: number, cursorThis: MergeCursor) => void; + onStepDown: (node: BNodeInternal, height: number, spineIndex: number, stepDownIndex: number, cursorThis: MergeCursor) => void; + onEnterLeaf: (leaf: BNode, destIndex: number, cursorThis: MergeCursor, cursorOther: MergeCursor) => void; +} +/** + * Walks the cursor forward by one key. + * Should only be called to advance cursors that started equal. + * Returns true if end-of-tree was reached (cursor not structurally mutated). + */ +export declare function moveForwardOne(cur: MergeCursor, other: MergeCursor, currentKey: K, cmp: (a: K, b: K) => number): boolean; +/** + * Create a cursor pointing to the leftmost key of the supplied tree. + */ +export declare function createCursor(tree: BTreeWithInternals, makePayload: MergeCursor["makePayload"], onEnterLeaf: MergeCursor["onEnterLeaf"], onMoveInLeaf: MergeCursor["onMoveInLeaf"], onExitLeaf: MergeCursor["onExitLeaf"], onStepUp: MergeCursor["onStepUp"], onStepDown: MergeCursor["onStepDown"]): MergeCursor; +export declare function getKey(c: MergeCursor): K; +/** + * Move cursor strictly forward to the first key >= (inclusive) or > (exclusive) target. + * Returns a boolean indicating if end-of-tree was reached (cursor not structurally mutated). + * Also returns a boolean indicating if the target key was landed on exactly. + */ +export declare function moveTo(cur: MergeCursor, other: MergeCursor, targetKey: K, isInclusive: boolean, startedEqual: boolean, cmp: (a: K, b: K) => number): [outOfTree: boolean, targetExactlyReached: boolean]; +export declare function noop(): void; +export declare function checkCanDoSetOperation(treeA: BTreeWithInternals, treeB: BTreeWithInternals): number; diff --git a/extended/parallelWalk.js b/extended/parallelWalk.js new file mode 100644 index 0000000..7632649 --- /dev/null +++ b/extended/parallelWalk.js @@ -0,0 +1,185 @@ +"use strict"; +Object.defineProperty(exports, "__esModule", { value: true }); +exports.checkCanDoSetOperation = exports.noop = exports.moveTo = exports.getKey = exports.createCursor = exports.moveForwardOne = void 0; +/** + * Walks the cursor forward by one key. + * Should only be called to advance cursors that started equal. + * Returns true if end-of-tree was reached (cursor not structurally mutated). + */ +function moveForwardOne(cur, other, currentKey, cmp) { + var leaf = cur.leaf; + var nextIndex = cur.leafIndex + 1; + if (nextIndex < leaf.keys.length) { + // Still within current leaf + cur.onMoveInLeaf(leaf, cur.leafPayload, cur.leafIndex, nextIndex, true); + cur.leafIndex = nextIndex; + return false; + } + // If our optimized step within leaf failed, use full moveTo logic + // Pass isInclusive=false to ensure we walk forward to the key exactly after the current + return moveTo(cur, other, currentKey, false, true, cmp)[0]; +} +exports.moveForwardOne = moveForwardOne; +/** + * Create a cursor pointing to the leftmost key of the supplied tree. + */ +function createCursor(tree, makePayload, onEnterLeaf, onMoveInLeaf, onExitLeaf, onStepUp, onStepDown) { + var spine = []; + var n = tree._root; + while (!n.isLeaf) { + var ni = n; + var payload = makePayload(); + spine.push({ node: ni, childIndex: 0, payload: payload }); + n = ni.children[0]; + } + var leafPayload = makePayload(); + var cur = { + tree: tree, + leaf: n, leafIndex: 0, + spine: spine, + leafPayload: leafPayload, + makePayload: makePayload, + onEnterLeaf: onEnterLeaf, + onMoveInLeaf: onMoveInLeaf, + onExitLeaf: onExitLeaf, + onStepUp: onStepUp, + onStepDown: onStepDown + }; + return cur; +} +exports.createCursor = createCursor; +function getKey(c) { + return c.leaf.keys[c.leafIndex]; +} +exports.getKey = getKey; +/** + * Move cursor strictly forward to the first key >= (inclusive) or > (exclusive) target. + * Returns a boolean indicating if end-of-tree was reached (cursor not structurally mutated). + * Also returns a boolean indicating if the target key was landed on exactly. + */ +function moveTo(cur, other, targetKey, isInclusive, startedEqual, cmp) { + // Cache callbacks for perf + var onMoveInLeaf = cur.onMoveInLeaf; + // Fast path: destination within current leaf + var leaf = cur.leaf; + var leafPayload = cur.leafPayload; + var i = leaf.indexOf(targetKey, -1, cmp); + var destInLeaf; + var targetExactlyReached; + if (i < 0) { + destInLeaf = ~i; + targetExactlyReached = false; + } + else { + if (isInclusive) { + destInLeaf = i; + targetExactlyReached = true; + } + else { + destInLeaf = i + 1; + targetExactlyReached = false; + } + } + var leafKeyCount = leaf.keys.length; + if (destInLeaf < leafKeyCount) { + onMoveInLeaf(leaf, leafPayload, cur.leafIndex, destInLeaf, startedEqual); + cur.leafIndex = destInLeaf; + return [false, targetExactlyReached]; + } + // Find first ancestor with a viable right step + var spine = cur.spine; + var initialSpineLength = spine.length; + var descentLevel = -1; + var descentIndex = -1; + for (var s = initialSpineLength - 1; s >= 0; s--) { + var parent = spine[s].node; + var indexOf = parent.indexOf(targetKey, -1, cmp); + var stepDownIndex = void 0; + if (indexOf < 0) { + stepDownIndex = ~indexOf; + } + else { + stepDownIndex = isInclusive ? indexOf : indexOf + 1; + } + // Note: when key not found, indexOf with failXor=0 already returns insertion index + if (stepDownIndex < parent.keys.length) { + descentLevel = s; + descentIndex = stepDownIndex; + break; + } + } + // Exit leaf; even if no spine, we did walk out of it conceptually + var startIndex = cur.leafIndex; + cur.onExitLeaf(leaf, leafPayload, startIndex, startedEqual, cur); + var onStepUp = cur.onStepUp; + if (descentLevel < 0) { + // No descent point; step up all the way; last callback gets infinity + for (var depth = initialSpineLength - 1; depth >= 0; depth--) { + var entry_1 = spine[depth]; + var sd = depth === 0 ? Number.POSITIVE_INFINITY : Number.NaN; + onStepUp(entry_1.node, initialSpineLength - depth, entry_1.payload, entry_1.childIndex, depth, sd, cur); + } + return [true, false]; + } + // Step up through ancestors above the descentLevel + for (var depth = initialSpineLength - 1; depth > descentLevel; depth--) { + var entry_2 = spine[depth]; + onStepUp(entry_2.node, initialSpineLength - depth, entry_2.payload, entry_2.childIndex, depth, Number.NaN, cur); + } + var entry = spine[descentLevel]; + onStepUp(entry.node, initialSpineLength - descentLevel, entry.payload, entry.childIndex, descentLevel, descentIndex, cur); + entry.childIndex = descentIndex; + var onStepDown = cur.onStepDown; + var makePayload = cur.makePayload; + // Descend, invoking onStepDown and creating payloads + var height = initialSpineLength - descentLevel - 1; // calculate height before changing length + spine.length = descentLevel + 1; + var node = spine[descentLevel].node.children[descentIndex]; + while (!node.isLeaf) { + var ni = node; + var keys = ni.keys; + var stepDownIndex = ni.indexOf(targetKey, 0, cmp); + if (!isInclusive && stepDownIndex < keys.length && cmp(keys[stepDownIndex], targetKey) === 0) + stepDownIndex++; + var payload = makePayload(); + var spineIndex = spine.length; + spine.push({ node: ni, childIndex: stepDownIndex, payload: payload }); + onStepDown(ni, height, spineIndex, stepDownIndex, cur); + node = ni.children[stepDownIndex]; + height -= 1; + } + // Enter destination leaf + var idx = node.indexOf(targetKey, -1, cmp); + var destIndex; + if (idx < 0) { + destIndex = ~idx; + targetExactlyReached = false; + } + else { + if (isInclusive) { + destIndex = idx; + targetExactlyReached = true; + } + else { + destIndex = idx + 1; + targetExactlyReached = false; + } + } + cur.leaf = node; + cur.leafPayload = makePayload(); + cur.leafIndex = destIndex; + cur.onEnterLeaf(node, destIndex, cur, other); + return [false, targetExactlyReached]; +} +exports.moveTo = moveTo; +function noop() { } +exports.noop = noop; +function checkCanDoSetOperation(treeA, treeB) { + if (treeA._compare !== treeB._compare) + throw new Error("Cannot merge BTrees with different comparators."); + var branchingFactor = treeA._maxNodeSize; + if (branchingFactor !== treeB._maxNodeSize) + throw new Error("Cannot merge BTrees with different max node sizes."); + return branchingFactor; +} +exports.checkCanDoSetOperation = checkCanDoSetOperation; diff --git a/parallelWalk.ts b/extended/parallelWalk.ts similarity index 95% rename from parallelWalk.ts rename to extended/parallelWalk.ts index 8045e8c..0094e64 100644 --- a/parallelWalk.ts +++ b/extended/parallelWalk.ts @@ -1,9 +1,10 @@ -import BTree from "./b+tree"; +import { BNode, BNodeInternal } from '../b+tree'; +import type { BTreeWithInternals } from './shared'; export type MergeCursorPayload = { disqualified: boolean }; export interface MergeCursor { - tree: BTree; + tree: BTreeWithInternals; leaf: BNode; leafIndex: number; spine: Array<{ node: BNodeInternal, childIndex: number, payload: TPayload }>; @@ -38,14 +39,14 @@ export function moveForwardOne( // If our optimized step within leaf failed, use full moveTo logic // Pass isInclusive=false to ensure we walk forward to the key exactly after the current - return BTree.moveTo(cur, other, currentKey, false, true, cmp)[0]; + return moveTo(cur, other, currentKey, false, true, cmp)[0]; } /** * Create a cursor pointing to the leftmost key of the supplied tree. */ export function createCursor( - tree: BTree, + tree: BTreeWithInternals, makePayload: MergeCursor["makePayload"], onEnterLeaf: MergeCursor["onEnterLeaf"], onMoveInLeaf: MergeCursor["onMoveInLeaf"], @@ -208,7 +209,7 @@ export function moveTo( export function noop(): void {} -export function checkCanDoSetOperation(treeA: BTree, treeB: BTree): number { +export function checkCanDoSetOperation(treeA: BTreeWithInternals, treeB: BTreeWithInternals): number { if (treeA._compare !== treeB._compare) throw new Error("Cannot merge BTrees with different comparators."); diff --git a/package-lock.json b/package-lock.json index 985f56a..c6a9486 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "sorted-btree", - "version": "1.9.0", + "version": "2.0.0", "lockfileVersion": 2, "requires": true, "packages": { "": { "name": "sorted-btree", - "version": "1.9.0", + "version": "2.0.0", "license": "MIT", "devDependencies": { "@types/bintrees": "^1.0.2", From 31475a3f01c21f302ac01b949d813ac7a65af900 Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Tue, 11 Nov 2025 16:31:50 -0800 Subject: [PATCH 061/143] tests passing --- b+tree.test.ts | 9 +++++---- extended/index.js | 2 ++ extended/index.ts | 2 +- extended/parallelWalk.d.ts | 2 ++ extended/parallelWalk.js | 8 +++++--- extended/parallelWalk.ts | 8 ++++++-- 6 files changed, 21 insertions(+), 10 deletions(-) diff --git a/b+tree.test.ts b/b+tree.test.ts index 8bc7c33..1b7e9c0 100644 --- a/b+tree.test.ts +++ b/b+tree.test.ts @@ -1,6 +1,7 @@ import BTree, { IMap, defaultComparator, simpleComparator } from './b+tree'; import BTreeEx from './extended'; import diffAgainst from './extended/diffAgainst'; +import { branchingFactorErrorMsg, comparatorErrorMsg } from './extended/parallelWalk'; import SortedArray from './sorted-array'; import MersenneTwister from 'mersenne-twister'; @@ -1258,13 +1259,13 @@ function testIntersect(maxNodeSize: number) { const compareB = (a: number, b: number) => a - b; const tree1 = new BTreeEx([[1, 1]], compareA, maxNodeSize); const tree2 = new BTreeEx([[1, 1]], compareB, maxNodeSize); - expect(() => tree1.intersect(tree2, () => {})).toThrow("Cannot intersect BTrees with different comparators."); + expect(() => tree1.intersect(tree2, () => {})).toThrow(comparatorErrorMsg); }); test('Intersect throws for max node size mismatch', () => { const tree1 = new BTreeEx([[1, 1]], compare, maxNodeSize); const tree2 = new BTreeEx([[1, 1]], compare, maxNodeSize + 1); - expect(() => tree1.intersect(tree2, () => {})).toThrow("Cannot intersect BTrees with different max node sizes."); + expect(() => tree1.intersect(tree2, () => {})).toThrow(branchingFactorErrorMsg); }); } @@ -2039,7 +2040,7 @@ function testMerge(maxNodeSize: number) { const tree2 = new BTreeEx([[2, 20]], (a, b) => b - a, maxNodeSize); const mergeFn: MergeFn = (_k, v1, v2) => v1 + v2; - expect(() => tree1.merge(tree2, mergeFn)).toThrow(); + expect(() => tree1.merge(tree2, mergeFn)).toThrow(comparatorErrorMsg); }); test('Merge throws error when max node sizes differ', () => { @@ -2048,7 +2049,7 @@ function testMerge(maxNodeSize: number) { const tree2 = new BTreeEx([[2, 20]], compare, otherFanout); const mergeFn: MergeFn = (_k, v1, v2) => v1 + v2; - expect(() => tree1.merge(tree2, mergeFn)).toThrow(); + expect(() => tree1.merge(tree2, mergeFn)).toThrow(branchingFactorErrorMsg); }); } diff --git a/extended/index.js b/extended/index.js index 6d849b4..1a3c6e6 100644 --- a/extended/index.js +++ b/extended/index.js @@ -21,6 +21,7 @@ Object.defineProperty(exports, "__esModule", { value: true }); exports.BTreeEx = void 0; var b_tree_1 = __importDefault(require("../b+tree")); var diffAgainst_1 = require("./diffAgainst"); +var intersect_1 = require("./intersect"); var merge_1 = require("./merge"); var BTreeEx = /** @class */ (function (_super) { __extends(BTreeEx, _super); @@ -73,6 +74,7 @@ var BTreeEx = /** @class */ (function (_super) { * on both trees and performing a walk on the sorted contents due to the reduced allocation overhead. */ BTreeEx.prototype.intersect = function (other, intersection) { + (0, intersect_1.intersect)(this, other, intersection); }; /** * Efficiently merges this tree with `other`, reusing subtrees wherever possible. diff --git a/extended/index.ts b/extended/index.ts index 3b4442a..2696b95 100644 --- a/extended/index.ts +++ b/extended/index.ts @@ -59,7 +59,7 @@ export class BTreeEx extends BTree { * on both trees and performing a walk on the sorted contents due to the reduced allocation overhead. */ intersect(other: BTree, intersection: (key: K, leftValue: V, rightValue: V) => void): void { - + intersect(this, other, intersection); } /** diff --git a/extended/parallelWalk.d.ts b/extended/parallelWalk.d.ts index 8af39ef..7e1dac9 100644 --- a/extended/parallelWalk.d.ts +++ b/extended/parallelWalk.d.ts @@ -38,4 +38,6 @@ export declare function getKey(c: MergeCursor): K; */ export declare function moveTo(cur: MergeCursor, other: MergeCursor, targetKey: K, isInclusive: boolean, startedEqual: boolean, cmp: (a: K, b: K) => number): [outOfTree: boolean, targetExactlyReached: boolean]; export declare function noop(): void; +export declare const comparatorErrorMsg = "Cannot perform set operations on BTrees with different comparators."; +export declare const branchingFactorErrorMsg = "Cannot perform set operations on BTrees with different max node sizes."; export declare function checkCanDoSetOperation(treeA: BTreeWithInternals, treeB: BTreeWithInternals): number; diff --git a/extended/parallelWalk.js b/extended/parallelWalk.js index 7632649..11eeead 100644 --- a/extended/parallelWalk.js +++ b/extended/parallelWalk.js @@ -1,6 +1,6 @@ "use strict"; Object.defineProperty(exports, "__esModule", { value: true }); -exports.checkCanDoSetOperation = exports.noop = exports.moveTo = exports.getKey = exports.createCursor = exports.moveForwardOne = void 0; +exports.checkCanDoSetOperation = exports.branchingFactorErrorMsg = exports.comparatorErrorMsg = exports.noop = exports.moveTo = exports.getKey = exports.createCursor = exports.moveForwardOne = void 0; /** * Walks the cursor forward by one key. * Should only be called to advance cursors that started equal. @@ -174,12 +174,14 @@ function moveTo(cur, other, targetKey, isInclusive, startedEqual, cmp) { exports.moveTo = moveTo; function noop() { } exports.noop = noop; +exports.comparatorErrorMsg = "Cannot perform set operations on BTrees with different comparators."; +exports.branchingFactorErrorMsg = "Cannot perform set operations on BTrees with different max node sizes."; function checkCanDoSetOperation(treeA, treeB) { if (treeA._compare !== treeB._compare) - throw new Error("Cannot merge BTrees with different comparators."); + throw new Error(exports.comparatorErrorMsg); var branchingFactor = treeA._maxNodeSize; if (branchingFactor !== treeB._maxNodeSize) - throw new Error("Cannot merge BTrees with different max node sizes."); + throw new Error(exports.branchingFactorErrorMsg); return branchingFactor; } exports.checkCanDoSetOperation = checkCanDoSetOperation; diff --git a/extended/parallelWalk.ts b/extended/parallelWalk.ts index 0094e64..47a176c 100644 --- a/extended/parallelWalk.ts +++ b/extended/parallelWalk.ts @@ -209,12 +209,16 @@ export function moveTo( export function noop(): void {} +export const comparatorErrorMsg = "Cannot perform set operations on BTrees with different comparators."; + +export const branchingFactorErrorMsg = "Cannot perform set operations on BTrees with different max node sizes."; + export function checkCanDoSetOperation(treeA: BTreeWithInternals, treeB: BTreeWithInternals): number { if (treeA._compare !== treeB._compare) - throw new Error("Cannot merge BTrees with different comparators."); + throw new Error(comparatorErrorMsg); const branchingFactor = treeA._maxNodeSize; if (branchingFactor !== treeB._maxNodeSize) - throw new Error("Cannot merge BTrees with different max node sizes."); + throw new Error(branchingFactorErrorMsg); return branchingFactor; } From cee22084b7575263854e315aac94258b118936ad Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Tue, 11 Nov 2025 16:45:09 -0800 Subject: [PATCH 062/143] cleanup --- b+tree.test.ts | 44 +++++++++++++++++++++++++++----------------- extended/index.js | 4 ++-- extended/index.ts | 2 +- extended/merge.d.ts | 2 +- extended/merge.js | 3 +-- extended/merge.ts | 2 +- 6 files changed, 33 insertions(+), 24 deletions(-) diff --git a/b+tree.test.ts b/b+tree.test.ts index 1b7e9c0..d992f11 100644 --- a/b+tree.test.ts +++ b/b+tree.test.ts @@ -1,6 +1,7 @@ import BTree, { IMap, defaultComparator, simpleComparator } from './b+tree'; import BTreeEx from './extended'; import diffAgainst from './extended/diffAgainst'; +import merge from './extended/merge'; import { branchingFactorErrorMsg, comparatorErrorMsg } from './extended/parallelWalk'; import SortedArray from './sorted-array'; import MersenneTwister from 'mersenne-twister'; @@ -1253,21 +1254,22 @@ function testIntersect(maxNodeSize: number) { { key: 4, leftValue: 40, rightValue: 400 }, ]); }); +} - test('Intersect throws for comparator mismatch', () => { - const compareA = (a: number, b: number) => a - b; - const compareB = (a: number, b: number) => a - b; - const tree1 = new BTreeEx([[1, 1]], compareA, maxNodeSize); - const tree2 = new BTreeEx([[1, 1]], compareB, maxNodeSize); +describe('BTree intersect input/output validation', () => { + test('Intersect throws error when comparators differ', () => { + const tree1 = new BTreeEx([[1, 10]], (a, b) => b + a); + const tree2 = new BTreeEx([[2, 20]], (a, b) => b - a); expect(() => tree1.intersect(tree2, () => {})).toThrow(comparatorErrorMsg); }); - test('Intersect throws for max node size mismatch', () => { - const tree1 = new BTreeEx([[1, 1]], compare, maxNodeSize); - const tree2 = new BTreeEx([[1, 1]], compare, maxNodeSize + 1); + test('Intersect throws error when max node sizes differ', () => { + const compare = (a: number, b: number) => b - a; + const tree1 = new BTreeEx([[1, 10]], compare, 32); + const tree2 = new BTreeEx([[2, 20]], compare, 33); expect(() => tree1.intersect(tree2, () => {})).toThrow(branchingFactorErrorMsg); }); -} +}); describe('BTree intersect fuzz tests', () => { const compare = (a: number, b: number) => a - b; @@ -1364,6 +1366,8 @@ describe('BTree merge tests with fanout 32', testMerge.bind(null, 32)); describe('BTree merge tests with fanout 10', testMerge.bind(null, 10)); describe('BTree merge tests with fanout 4', testMerge.bind(null, 4)); +type MergeFn = (key: number, leftValue: number, rightValue: number) => number | undefined; + function testMerge(maxNodeSize: number) { const compare = (a: number, b: number) => a - b; const sharesNode = (root: any, targetNode: any): boolean => { @@ -1399,7 +1403,6 @@ function testMerge(maxNodeSize: number) { return result; }; - type MergeFn = (key: number, leftValue: number, rightValue: number) => number | undefined; type MergeExpectationOptions = { expectedMergeFn?: MergeFn; }; @@ -2034,25 +2037,32 @@ function testMerge(maxNodeSize: number) { for (let i = size; i < upperBound; i++) expect(result.get(i)).toBe(i * 10); }); +} - test('Merge throws error when comparators differ', () => { - const tree1 = new BTreeEx([[1, 10]], compare, maxNodeSize); - const tree2 = new BTreeEx([[2, 20]], (a, b) => b - a, maxNodeSize); +describe('BTree merge input/output validation', () => { + test('Merge throws error when comparators differ', () => { + const tree1 = new BTreeEx([[1, 10]], (a, b) => b + a); + const tree2 = new BTreeEx([[2, 20]], (a, b) => b - a); const mergeFn: MergeFn = (_k, v1, v2) => v1 + v2; expect(() => tree1.merge(tree2, mergeFn)).toThrow(comparatorErrorMsg); }); test('Merge throws error when max node sizes differ', () => { - const otherFanout = maxNodeSize === 32 ? 16 : 32; - const tree1 = new BTreeEx([[1, 10]], compare, maxNodeSize); - const tree2 = new BTreeEx([[2, 20]], compare, otherFanout); + const compare = (a: number, b: number) => b - a; + const tree1 = new BTreeEx([[1, 10]], compare, 32); + const tree2 = new BTreeEx([[2, 20]], compare, 33); const mergeFn: MergeFn = (_k, v1, v2) => v1 + v2; expect(() => tree1.merge(tree2, mergeFn)).toThrow(branchingFactorErrorMsg); }); -} + test('Merging trees returns a tree of the same class', () => { + expect(merge(new BTreeEx(), new BTreeEx(), (_k, v1, v2) => v1)).toBeInstanceOf(BTreeEx); + expect(merge(new BTree(), new BTree(), (_k, v1, v2) => v1)).toBeInstanceOf(BTree); + expect(merge(new BTree(), new BTree(), (_k, v1, v2) => v1) instanceof BTreeEx).toBeFalsy(); + }); +}); function swap(keys: any[], i: number, j: number) { var tmp = keys[i]; diff --git a/extended/index.js b/extended/index.js index 1a3c6e6..0faf28f 100644 --- a/extended/index.js +++ b/extended/index.js @@ -22,7 +22,7 @@ exports.BTreeEx = void 0; var b_tree_1 = __importDefault(require("../b+tree")); var diffAgainst_1 = require("./diffAgainst"); var intersect_1 = require("./intersect"); -var merge_1 = require("./merge"); +var merge_1 = __importDefault(require("./merge")); var BTreeEx = /** @class */ (function (_super) { __extends(BTreeEx, _super); function BTreeEx() { @@ -92,7 +92,7 @@ var BTreeEx = /** @class */ (function (_super) { * and inserting the contents of `other` into the clone. */ BTreeEx.prototype.merge = function (other, mergeFn) { - return (0, merge_1.merge)(this, other, mergeFn); + return (0, merge_1.default)(this, other, mergeFn); }; return BTreeEx; }(b_tree_1.default)); diff --git a/extended/index.ts b/extended/index.ts index 2696b95..31fde49 100644 --- a/extended/index.ts +++ b/extended/index.ts @@ -2,7 +2,7 @@ import BTree from '../b+tree'; import type { BTreeWithInternals } from './shared'; import { diffAgainst as diffAgainstAlgorithm } from './diffAgainst'; import { intersect } from './intersect'; -import { merge } from './merge'; +import merge from './merge'; export class BTreeEx extends BTree { clone(): this { diff --git a/extended/merge.d.ts b/extended/merge.d.ts index dff8899..7397162 100644 --- a/extended/merge.d.ts +++ b/extended/merge.d.ts @@ -15,4 +15,4 @@ import BTree from '../b+tree'; * Note that in benchmarks even the worst case (fully interleaved keys) performance is faster than cloning `this` * and inserting the contents of `other` into the clone. */ -export declare function merge, K, V>(treeA: TBTree, treeB: TBTree, merge: (key: K, leftValue: V, rightValue: V) => V | undefined): TBTree; +export default function merge, K, V>(treeA: TBTree, treeB: TBTree, merge: (key: K, leftValue: V, rightValue: V) => V | undefined): TBTree; diff --git a/extended/merge.js b/extended/merge.js index 338e120..b269794 100644 --- a/extended/merge.js +++ b/extended/merge.js @@ -1,6 +1,5 @@ "use strict"; Object.defineProperty(exports, "__esModule", { value: true }); -exports.merge = void 0; var decompose_1 = require("./decompose"); var parallelWalk_1 = require("./parallelWalk"); /** @@ -34,4 +33,4 @@ function merge(treeA, treeB, merge) { var constructor = treeA.constructor; return (0, decompose_1.buildFromDecomposition)(constructor, branchingFactor, decomposed, _treeA._compare, _treeA._maxNodeSize); } -exports.merge = merge; +exports.default = merge; diff --git a/extended/merge.ts b/extended/merge.ts index 68b56e7..408ec44 100644 --- a/extended/merge.ts +++ b/extended/merge.ts @@ -19,7 +19,7 @@ import { checkCanDoSetOperation } from "./parallelWalk"; * Note that in benchmarks even the worst case (fully interleaved keys) performance is faster than cloning `this` * and inserting the contents of `other` into the clone. */ -export function merge, K,V>(treeA: TBTree, treeB: TBTree, merge: (key: K, leftValue: V, rightValue: V) => V | undefined): TBTree { +export default function merge, K,V>(treeA: TBTree, treeB: TBTree, merge: (key: K, leftValue: V, rightValue: V) => V | undefined): TBTree { const _treeA = treeA as unknown as BTreeWithInternals; const _treeB = treeB as unknown as BTreeWithInternals; const branchingFactor = checkCanDoSetOperation(_treeA, _treeB); From d8fdf266f34fa7f3d309fac120027dbdbb621e8b Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Tue, 11 Nov 2025 17:07:39 -0800 Subject: [PATCH 063/143] merge -> union --- b+tree.test.ts | 356 ++++++++++++++-------------- benchmarks.ts | 58 ++--- extended/index.d.ts | 10 +- extended/index.js | 14 +- extended/index.ts | 14 +- extended/{merge.d.ts => union.d.ts} | 12 +- extended/{merge.js => union.js} | 16 +- extended/{merge.ts => union.ts} | 20 +- readme.md | 6 +- 9 files changed, 255 insertions(+), 251 deletions(-) rename extended/{merge.d.ts => union.d.ts} (63%) rename extended/{merge.js => union.js} (81%) rename extended/{merge.ts => union.ts} (76%) diff --git a/b+tree.test.ts b/b+tree.test.ts index d992f11..59b6293 100644 --- a/b+tree.test.ts +++ b/b+tree.test.ts @@ -1,7 +1,7 @@ import BTree, { IMap, defaultComparator, simpleComparator } from './b+tree'; import BTreeEx from './extended'; import diffAgainst from './extended/diffAgainst'; -import merge from './extended/merge'; +import union from './extended/union'; import { branchingFactorErrorMsg, comparatorErrorMsg } from './extended/parallelWalk'; import SortedArray from './sorted-array'; import MersenneTwister from 'mersenne-twister'; @@ -1362,13 +1362,13 @@ describe('BTree intersect fuzz tests', () => { } }); -describe('BTree merge tests with fanout 32', testMerge.bind(null, 32)); -describe('BTree merge tests with fanout 10', testMerge.bind(null, 10)); -describe('BTree merge tests with fanout 4', testMerge.bind(null, 4)); +describe('BTree union tests with fanout 32', testUnion.bind(null, 32)); +describe('BTree union tests with fanout 10', testUnion.bind(null, 10)); +describe('BTree union tests with fanout 4', testUnion.bind(null, 4)); -type MergeFn = (key: number, leftValue: number, rightValue: number) => number | undefined; +type UnionFn = (key: number, leftValue: number, rightValue: number) => number | undefined; -function testMerge(maxNodeSize: number) { +function testUnion(maxNodeSize: number) { const compare = (a: number, b: number) => a - b; const sharesNode = (root: any, targetNode: any): boolean => { if (root === targetNode) @@ -1403,20 +1403,20 @@ function testMerge(maxNodeSize: number) { return result; }; - type MergeExpectationOptions = { - expectedMergeFn?: MergeFn; + type UnionExpectationOptions = { + expectedUnionFn?: UnionFn; }; - const naiveMerge = ( + const naiveUnion = ( left: BTreeEx, right: BTreeEx, - mergeFn: MergeFn + unionFn: UnionFn ) => { const expected = left.clone(); right.forEachPair((key, rightValue) => { if (expected.has(key)) { const leftValue = expected.get(key)!; - const mergedValue = mergeFn(key, leftValue, rightValue); + const mergedValue = unionFn(key, leftValue, rightValue); if (mergedValue === undefined) { expected.delete(key); } else { @@ -1429,16 +1429,16 @@ function testMerge(maxNodeSize: number) { return expected; }; - const expectMergeMatchesBaseline = ( + const expectUnionMatchesBaseline = ( left: BTreeEx, right: BTreeEx, - mergeFn: MergeFn, + unionFn: UnionFn, after?: (ctx: { result: BTreeEx, expected: BTreeEx }) => void, - options: MergeExpectationOptions = {} + options: UnionExpectationOptions = {} ) => { - const expectedMergeFn = options.expectedMergeFn ?? mergeFn; - const expected = naiveMerge(left, right, expectedMergeFn); - const result = left.merge(right, mergeFn); + const expectedUnionFn = options.expectedUnionFn ?? unionFn; + const expected = naiveUnion(left, right, expectedUnionFn); + const result = left.union(right, unionFn); expect(result.toArray()).toEqual(expected.toArray()); result.checkValid(); expected.checkValid(); @@ -1446,7 +1446,7 @@ function testMerge(maxNodeSize: number) { return { result, expected }; }; - test('Merge disjoint roots reuses appended subtree', () => { + test('Union disjoint roots reuses appended subtree', () => { const size = maxNodeSize * 3; const tree1 = buildTree(range(0, size), 1, 0); const offset = size * 5; @@ -1455,22 +1455,22 @@ function testMerge(maxNodeSize: number) { expectRootLeafState(tree1, false); expectRootLeafState(tree2, false); - let mergeCalls = 0; - const mergeFn: MergeFn = () => { - mergeCalls++; + let unionCalls = 0; + const unionFn: UnionFn = () => { + unionCalls++; return 0; }; - expectMergeMatchesBaseline(tree1, tree2, mergeFn, ({ result }) => { + expectUnionMatchesBaseline(tree1, tree2, unionFn, ({ result }) => { const resultRoot = result['_root'] as any; expect(sharesNode(resultRoot, tree1['_root'] as any)).toBe(true); expect(sharesNode(resultRoot, tree2['_root'] as any)).toBe(true); }); - expect(mergeCalls).toBe(0); + expect(unionCalls).toBe(0); }); - test('Merge leaf roots with intersecting keys uses merge callback', () => { + test('Union leaf roots with intersecting keys uses union callback', () => { const tree1 = buildTree([1, 2, 4], 10, 0); const tree2 = buildTree([2, 3, 5], 100, 0); @@ -1478,34 +1478,34 @@ function testMerge(maxNodeSize: number) { expectRootLeafState(tree2, true); const calls: Array<{ key: number, leftValue: number, rightValue: number }> = []; - const mergeFn: MergeFn = (key, leftValue, rightValue) => { + const unionFn: UnionFn = (key, leftValue, rightValue) => { calls.push({ key, leftValue, rightValue }); return leftValue + rightValue; }; - expectMergeMatchesBaseline(tree1, tree2, mergeFn, undefined, { - expectedMergeFn: (_k, leftValue, rightValue) => leftValue + rightValue + expectUnionMatchesBaseline(tree1, tree2, unionFn, undefined, { + expectedUnionFn: (_k, leftValue, rightValue) => leftValue + rightValue }); expect(calls).toEqual([{ key: 2, leftValue: 20, rightValue: 200 }]); }); - test('Merge leaf roots with disjoint keys', () => { + test('Union leaf roots with disjoint keys', () => { const tree1 = buildTree([1, 3, 5], 1, 0); const tree2 = buildTree([2, 4, 6], 1, 1000); expectRootLeafState(tree1, true); expectRootLeafState(tree2, true); - let mergeCalls = 0; - const mergeFn: MergeFn = () => { - mergeCalls++; + let unionCalls = 0; + const unionFn: UnionFn = () => { + unionCalls++; return 0; }; - const { result } = expectMergeMatchesBaseline(tree1, tree2, mergeFn, undefined, { - expectedMergeFn: (_k, leftValue, rightValue) => leftValue + rightValue + const { result } = expectUnionMatchesBaseline(tree1, tree2, unionFn, undefined, { + expectedUnionFn: (_k, leftValue, rightValue) => leftValue + rightValue }); - expect(mergeCalls).toBe(0); + expect(unionCalls).toBe(0); expect(result.toArray()).toEqual([ [1, 1], [2, 1002], @@ -1516,7 +1516,7 @@ function testMerge(maxNodeSize: number) { ]); }); - test('Merge trees disjoint except for shared maximum key', () => { + test('Union trees disjoint except for shared maximum key', () => { const size = maxNodeSize * 2; const tree1 = buildTree(range(0, size), 1, 0); const tree2 = buildTree(range(size - 1, size - 1 + size), 3, 0); @@ -1524,21 +1524,21 @@ function testMerge(maxNodeSize: number) { expectRootLeafState(tree1, false); expectRootLeafState(tree2, false); - let mergeCalls = 0; - const mergeFn: MergeFn = (_key, leftValue, rightValue) => { - mergeCalls++; + let unionCalls = 0; + const unionFn: UnionFn = (_key, leftValue, rightValue) => { + unionCalls++; return leftValue + rightValue; }; - const { result } = expectMergeMatchesBaseline(tree1, tree2, mergeFn, undefined, { - expectedMergeFn: (_k, leftValue, rightValue) => leftValue + rightValue + const { result } = expectUnionMatchesBaseline(tree1, tree2, unionFn, undefined, { + expectedUnionFn: (_k, leftValue, rightValue) => leftValue + rightValue }); - expect(mergeCalls).toBe(1); + expect(unionCalls).toBe(1); expect(result.get(size - 1)).toBe((size - 1) + (size - 1) * 3); expect(result.size).toBe(tree1.size + tree2.size - 1); }); - test('Merge trees where all leaves are disjoint and one tree straddles the other', () => { + test('Union trees where all leaves are disjoint and one tree straddles the other', () => { const straddleLength = 3 * 2 * maxNodeSize; // creates multiple leaves on both trees const tree1 = buildTree( range(0, straddleLength / 3).concat(range((straddleLength / 3) * 2, straddleLength)), @@ -1549,18 +1549,18 @@ function testMerge(maxNodeSize: number) { expectRootLeafState(tree1, false); expectRootLeafState(tree2, false); - let mergeCalls = 0; - const mergeFn: MergeFn = (_key, leftValue, rightValue) => { - mergeCalls++; + let unionCalls = 0; + const unionFn: UnionFn = (_key, leftValue, rightValue) => { + unionCalls++; return leftValue + rightValue; }; - const { result } = expectMergeMatchesBaseline(tree1, tree2, mergeFn); - expect(mergeCalls).toBe(0); + const { result } = expectUnionMatchesBaseline(tree1, tree2, unionFn); + expect(unionCalls).toBe(0); expect(result.size).toBe(tree1.size + tree2.size); }); - test('Merge where two-leaf tree intersects leaf-root tree across both leaves', () => { + test('Union where two-leaf tree intersects leaf-root tree across both leaves', () => { const size = maxNodeSize + Math.max(3, Math.floor(maxNodeSize / 2)); const tree1 = buildTree(range(0, size), 2, 0); const tree2 = buildTree([1, Math.floor(size / 2), size - 1], 5, 0); @@ -1569,18 +1569,18 @@ function testMerge(maxNodeSize: number) { expectRootLeafState(tree2, true); const seenKeys: number[] = []; - const mergeFn: MergeFn = (key, _leftValue, rightValue) => { + const unionFn: UnionFn = (key, _leftValue, rightValue) => { seenKeys.push(key); return rightValue; }; - expectMergeMatchesBaseline(tree1, tree2, mergeFn, undefined, { - expectedMergeFn: (_k, _leftValue, rightValue) => rightValue + expectUnionMatchesBaseline(tree1, tree2, unionFn, undefined, { + expectedUnionFn: (_k, _leftValue, rightValue) => rightValue }); expect(seenKeys.sort((a, b) => a - b)).toEqual([1, Math.floor(size / 2), size - 1]); }); - test('Merge where max key equals min key of other tree', () => { + test('Union where max key equals min key of other tree', () => { const size = maxNodeSize * 2; const tree1 = buildTree(range(0, size), 1, 0); const tree2 = buildTree(range(size - 1, size - 1 + size), 10, 0); @@ -1588,21 +1588,21 @@ function testMerge(maxNodeSize: number) { expectRootLeafState(tree1, false); expectRootLeafState(tree2, false); - let mergeCalls = 0; - const mergeFn: MergeFn = (_key, _leftValue, rightValue) => { - mergeCalls++; + let unionCalls = 0; + const unionFn: UnionFn = (_key, _leftValue, rightValue) => { + unionCalls++; return rightValue; }; - const { result } = expectMergeMatchesBaseline(tree1, tree2, mergeFn, undefined, { - expectedMergeFn: (_k, _leftValue, rightValue) => rightValue + const { result } = expectUnionMatchesBaseline(tree1, tree2, unionFn, undefined, { + expectedUnionFn: (_k, _leftValue, rightValue) => rightValue }); - expect(mergeCalls).toBe(1); + expect(unionCalls).toBe(1); expect(result.get(size - 1)).toBe((size - 1) * 10); expect(result.size).toBe(tree1.size + tree2.size - 1); }); - test('Merge odd and even keyed trees', () => { + test('Union odd and even keyed trees', () => { const limit = maxNodeSize * 3; const treeOdd = buildTree(range(1, limit * 2, 2), 1, 0); const treeEven = buildTree(range(0, limit * 2, 2), 1, 100); @@ -1610,18 +1610,18 @@ function testMerge(maxNodeSize: number) { expectRootLeafState(treeOdd, false); expectRootLeafState(treeEven, false); - let mergeCalls = 0; - const mergeFn: MergeFn = () => { - mergeCalls++; + let unionCalls = 0; + const unionFn: UnionFn = () => { + unionCalls++; return 0; }; - const { result } = expectMergeMatchesBaseline(treeOdd, treeEven, mergeFn); - expect(mergeCalls).toBe(0); + const { result } = expectUnionMatchesBaseline(treeOdd, treeEven, unionFn); + expect(unionCalls).toBe(0); expect(result.size).toBe(treeOdd.size + treeEven.size); }); - test('Merge with single boundary overlap prefers right value', () => { + test('Union with single boundary overlap prefers right value', () => { const size = maxNodeSize * 2; const tree1 = buildTree(range(0, size), 1, 0); const tree2 = buildTree(range(size - 1, size - 1 + size), 10, 0); @@ -1629,19 +1629,19 @@ function testMerge(maxNodeSize: number) { expectRootLeafState(tree1, false); expectRootLeafState(tree2, false); - let mergeCalls = 0; - const mergeFn: MergeFn = (_key, _leftValue, rightValue) => { - mergeCalls++; + let unionCalls = 0; + const unionFn: UnionFn = (_key, _leftValue, rightValue) => { + unionCalls++; return rightValue; }; - expectMergeMatchesBaseline(tree1, tree2, mergeFn, undefined, { - expectedMergeFn: (_k, _leftValue, rightValue) => rightValue + expectUnionMatchesBaseline(tree1, tree2, unionFn, undefined, { + expectedUnionFn: (_k, _leftValue, rightValue) => rightValue }); - expect(mergeCalls).toBe(1); + expect(unionCalls).toBe(1); }); - test('Merge overlapping prefix equal to branching factor', () => { + test('Union overlapping prefix equal to branching factor', () => { const shared = maxNodeSize; const tree1Keys = [ ...range(0, shared), @@ -1659,145 +1659,145 @@ function testMerge(maxNodeSize: number) { expectRootLeafState(tree2, false); const mergedKeys: number[] = []; - const mergeFn: MergeFn = (key, leftValue, rightValue) => { + const unionFn: UnionFn = (key, leftValue, rightValue) => { mergedKeys.push(key); return leftValue + rightValue; }; - expectMergeMatchesBaseline(tree1, tree2, mergeFn, undefined, { - expectedMergeFn: (_k, leftValue, rightValue) => leftValue + rightValue + expectUnionMatchesBaseline(tree1, tree2, unionFn, undefined, { + expectedUnionFn: (_k, leftValue, rightValue) => leftValue + rightValue }); expect(mergedKeys.sort((a, b) => a - b)).toEqual(range(0, shared)); }); - test('Merge two empty trees', () => { + test('Union two empty trees', () => { const tree1 = new BTreeEx([], compare, maxNodeSize); const tree2 = new BTreeEx([], compare, maxNodeSize); - const mergeFn: MergeFn = (_k, v1, v2) => v1 + v2; + const unionFn: UnionFn = (_k, v1, v2) => v1 + v2; - const { result } = expectMergeMatchesBaseline(tree1, tree2, mergeFn, undefined, { - expectedMergeFn: (_k, v1, v2) => v1 + v2 + const { result } = expectUnionMatchesBaseline(tree1, tree2, unionFn, undefined, { + expectedUnionFn: (_k, v1, v2) => v1 + v2 }); expect(result.size).toBe(0); }); - test('Merge empty tree with non-empty tree', () => { + test('Union empty tree with non-empty tree', () => { const tree1 = new BTreeEx([], compare, maxNodeSize); const tree2 = new BTreeEx([[1, 10], [2, 20], [3, 30]], compare, maxNodeSize); - const mergeFn: MergeFn = (_k, v1, v2) => v1 + v2; + const unionFn: UnionFn = (_k, v1, v2) => v1 + v2; - const { result: leftMerge } = expectMergeMatchesBaseline(tree1, tree2, mergeFn); - expect(leftMerge.toArray()).toEqual(tree2.toArray()); + const { result: leftUnion } = expectUnionMatchesBaseline(tree1, tree2, unionFn); + expect(leftUnion.toArray()).toEqual(tree2.toArray()); - const { result: rightMerge } = expectMergeMatchesBaseline(tree2, tree1, mergeFn); - expect(rightMerge.toArray()).toEqual(tree2.toArray()); + const { result: rightUnion } = expectUnionMatchesBaseline(tree2, tree1, unionFn); + expect(rightUnion.toArray()).toEqual(tree2.toArray()); expect(tree1.toArray()).toEqual([]); expect(tree2.toArray()).toEqual([[1, 10], [2, 20], [3, 30]]); tree1.checkValid(); tree2.checkValid(); }); - test('Merge with no overlapping keys', () => { + test('Union with no overlapping keys', () => { const tree1 = new BTreeEx([[1, 10], [3, 30], [5, 50]], compare, maxNodeSize); const tree2 = new BTreeEx([[2, 20], [4, 40], [6, 60]], compare, maxNodeSize); - const mergeFn: MergeFn = () => { + const unionFn: UnionFn = () => { throw new Error('Should not be called for non-overlapping keys'); }; - const { result } = expectMergeMatchesBaseline(tree1, tree2, mergeFn, undefined, { - expectedMergeFn: mergeFn + const { result } = expectUnionMatchesBaseline(tree1, tree2, unionFn, undefined, { + expectedUnionFn: unionFn }); expect(result.size).toBe(6); expect(result.toArray()).toEqual([[1, 10], [2, 20], [3, 30], [4, 40], [5, 50], [6, 60]]); }); - test('Merge with completely overlapping keys - sum values', () => { + test('Union with completely overlapping keys - sum values', () => { const tree1 = new BTreeEx([[1, 10], [2, 20], [3, 30]], compare, maxNodeSize); const tree2 = new BTreeEx([[1, 5], [2, 15], [3, 25]], compare, maxNodeSize); - const mergeFn: MergeFn = (_k, v1, v2) => v1 + v2; + const unionFn: UnionFn = (_k, v1, v2) => v1 + v2; - const { result } = expectMergeMatchesBaseline(tree1, tree2, mergeFn, undefined, { - expectedMergeFn: (_k, v1, v2) => v1 + v2 + const { result } = expectUnionMatchesBaseline(tree1, tree2, unionFn, undefined, { + expectedUnionFn: (_k, v1, v2) => v1 + v2 }); expect(result.size).toBe(tree1.size); }); - test('Merge with completely overlapping keys - prefer left', () => { + test('Union with completely overlapping keys - prefer left', () => { const tree1 = new BTreeEx([[1, 10], [2, 20], [3, 30]], compare, maxNodeSize); const tree2 = new BTreeEx([[1, 100], [2, 200], [3, 300]], compare, maxNodeSize); - const mergeFn: MergeFn = (_k, v1, _v2) => v1; + const unionFn: UnionFn = (_k, v1, _v2) => v1; - const { result } = expectMergeMatchesBaseline(tree1, tree2, mergeFn, undefined, { - expectedMergeFn: (_k, v1, _v2) => v1 + const { result } = expectUnionMatchesBaseline(tree1, tree2, unionFn, undefined, { + expectedUnionFn: (_k, v1, _v2) => v1 }); expect(result.toArray()).toEqual(tree1.toArray()); }); - test('Merge with completely overlapping keys - prefer right', () => { + test('Union with completely overlapping keys - prefer right', () => { const tree1 = new BTreeEx([[1, 10], [2, 20], [3, 30]], compare, maxNodeSize); const tree2 = new BTreeEx([[1, 100], [2, 200], [3, 300]], compare, maxNodeSize); - const mergeFn: MergeFn = (_k, _v1, v2) => v2; + const unionFn: UnionFn = (_k, _v1, v2) => v2; - const { result } = expectMergeMatchesBaseline(tree1, tree2, mergeFn); + const { result } = expectUnionMatchesBaseline(tree1, tree2, unionFn); expect(result.toArray()).toEqual(tree2.toArray()); }); - test('Merge with partially overlapping keys', () => { + test('Union with partially overlapping keys', () => { const tree1 = new BTreeEx([[1, 10], [2, 20], [3, 30], [4, 40]], compare, maxNodeSize); const tree2 = new BTreeEx([[3, 300], [4, 400], [5, 500], [6, 600]], compare, maxNodeSize); const mergedKeys: number[] = []; - const mergeFn: MergeFn = (key, v1, v2) => { + const unionFn: UnionFn = (key, v1, v2) => { mergedKeys.push(key); return v1 + v2; }; - expectMergeMatchesBaseline(tree1, tree2, mergeFn, undefined, { - expectedMergeFn: (_k, v1, v2) => v1 + v2 + expectUnionMatchesBaseline(tree1, tree2, unionFn, undefined, { + expectedUnionFn: (_k, v1, v2) => v1 + v2 }); expect(mergedKeys.sort((a, b) => a - b)).toEqual([3, 4]); }); - test('Merge with overlapping keys can delete entries', () => { + test('Union with overlapping keys can delete entries', () => { const tree1 = new BTreeEx([[1, 10], [2, 20], [3, 30], [4, 40]], compare, maxNodeSize); const tree2 = new BTreeEx([[2, 200], [3, 300], [4, 400], [5, 500]], compare, maxNodeSize); - const mergeFn: MergeFn = (k, v1, v2) => { + const unionFn: UnionFn = (k, v1, v2) => { if (k === 3) return undefined; return v1 + v2; }; - const { result } = expectMergeMatchesBaseline(tree1, tree2, mergeFn); + const { result } = expectUnionMatchesBaseline(tree1, tree2, unionFn); expect(result.has(3)).toBe(false); }); - test('Merge is called even when values are equal', () => { + test('Union is called even when values are equal', () => { const tree1 = new BTreeEx([[1, 10], [2, 20]], compare, maxNodeSize); const tree2 = new BTreeEx([[2, 20], [3, 30]], compare, maxNodeSize); - const mergeCallLog: Array<{k: number, v1: number, v2: number}> = []; - const mergeFn: MergeFn = (k, v1, v2) => { - mergeCallLog.push({k, v1, v2}); + const unionCallLog: Array<{k: number, v1: number, v2: number}> = []; + const unionFn: UnionFn = (k, v1, v2) => { + unionCallLog.push({k, v1, v2}); return v1; }; - expectMergeMatchesBaseline(tree1, tree2, mergeFn, undefined, { - expectedMergeFn: (_k, v1, v2) => v1 + expectUnionMatchesBaseline(tree1, tree2, unionFn, undefined, { + expectedUnionFn: (_k, v1, v2) => v1 }); - expect(mergeCallLog).toEqual([{k: 2, v1: 20, v2: 20}]); + expect(unionCallLog).toEqual([{k: 2, v1: 20, v2: 20}]); }); - test('Merge does not mutate input trees', () => { + test('Union does not mutate input trees', () => { const entries1: [number, number][] = [[1, 10], [2, 20], [3, 30]]; const entries2: [number, number][] = [[2, 200], [3, 300], [4, 400]]; const tree1 = new BTreeEx(entries1, compare, maxNodeSize); const tree2 = new BTreeEx(entries2, compare, maxNodeSize); - const mergeFn: MergeFn = (_k, v1, v2) => v1 + v2; + const unionFn: UnionFn = (_k, v1, v2) => v1 + v2; const snapshot1 = tree1.toArray(); const snapshot2 = tree2.toArray(); - expectMergeMatchesBaseline(tree1, tree2, mergeFn); + expectUnionMatchesBaseline(tree1, tree2, unionFn); expect(tree1.toArray()).toEqual(snapshot1); expect(tree2.toArray()).toEqual(snapshot2); @@ -1805,7 +1805,7 @@ function testMerge(maxNodeSize: number) { tree2.checkValid(); }); - test('Merge large trees with some overlaps', () => { + test('Union large trees with some overlaps', () => { const entries1: [number, number][] = []; for (let i = 0; i < 1000; i++) entries1.push([i, i]); @@ -1815,19 +1815,19 @@ function testMerge(maxNodeSize: number) { const tree1 = new BTreeEx(entries1, compare, maxNodeSize); const tree2 = new BTreeEx(entries2, compare, maxNodeSize); - let mergeCount = 0; - const mergeFn: MergeFn = (k, v1, v2) => { - mergeCount++; + let unionCount = 0; + const unionFn: UnionFn = (k, v1, v2) => { + unionCount++; return v1 + v2; }; - expectMergeMatchesBaseline(tree1, tree2, mergeFn, undefined, { - expectedMergeFn: (_k, v1, v2) => v1 + v2 + expectUnionMatchesBaseline(tree1, tree2, unionFn, undefined, { + expectedUnionFn: (_k, v1, v2) => v1 + v2 }); - expect(mergeCount).toBe(500); + expect(unionCount).toBe(500); }); - test('Merge with overlaps at boundaries', () => { + test('Union with overlaps at boundaries', () => { const tree1 = new BTreeEx([], compare, maxNodeSize); const tree2 = new BTreeEx([], compare, maxNodeSize); @@ -1840,25 +1840,25 @@ function testMerge(maxNodeSize: number) { } const mergedKeys: number[] = []; - const mergeFn: MergeFn = (key, v1, v2) => { + const unionFn: UnionFn = (key, v1, v2) => { mergedKeys.push(key); return v1 + v2; }; - expectMergeMatchesBaseline(tree1, tree2, mergeFn, undefined, { - expectedMergeFn: (_k, v1, v2) => v1 + v2 + expectUnionMatchesBaseline(tree1, tree2, unionFn, undefined, { + expectedUnionFn: (_k, v1, v2) => v1 + v2 }); const expectedMergedKeys = range(50, 150).filter(k => k % 2 === 0); expect(mergedKeys.sort((a, b) => a - b)).toEqual(expectedMergedKeys); }); - test('Merge result can be modified without affecting inputs', () => { + test('Union result can be modified without affecting inputs', () => { const tree1 = new BTreeEx([[1, 10], [2, 20]], compare, maxNodeSize); const tree2 = new BTreeEx([[3, 30], [4, 40]], compare, maxNodeSize); - const mergeFn: MergeFn = (_k, v1, v2) => v1 + v2; + const unionFn: UnionFn = (_k, v1, v2) => v1 + v2; - const { result } = expectMergeMatchesBaseline(tree1, tree2, mergeFn); + const { result } = expectUnionMatchesBaseline(tree1, tree2, unionFn); result.set(1, 100); result.set(5, 50); @@ -1874,7 +1874,7 @@ function testMerge(maxNodeSize: number) { result.checkValid(); }); - test('Merge with disjoint ranges', () => { + test('Union with disjoint ranges', () => { const entries1: [number, number][] = []; for (let i = 1; i <= 100; i++) entries1.push([i, i]); for (let i = 201; i <= 300; i++) entries1.push([i, i]); @@ -1884,12 +1884,12 @@ function testMerge(maxNodeSize: number) { const tree1 = new BTreeEx(entries1, compare, maxNodeSize); const tree2 = new BTreeEx(entries2, compare, maxNodeSize); - const mergeFn: MergeFn = () => { + const unionFn: UnionFn = () => { throw new Error('Should not be called - no overlaps'); }; - const { result } = expectMergeMatchesBaseline(tree1, tree2, mergeFn, undefined, { - expectedMergeFn: mergeFn + const { result } = expectUnionMatchesBaseline(tree1, tree2, unionFn, undefined, { + expectedUnionFn: unionFn }); expect(result.size).toBe(300); @@ -1901,16 +1901,16 @@ function testMerge(maxNodeSize: number) { expect(result.get(300)).toBe(300); }); - test('Merge with single element trees', () => { + test('Union with single element trees', () => { const tree1 = new BTreeEx([[5, 50]], compare, maxNodeSize); const tree2 = new BTreeEx([[5, 500]], compare, maxNodeSize); - const mergeFn: MergeFn = (_k, v1, v2) => Math.max(v1, v2); + const unionFn: UnionFn = (_k, v1, v2) => Math.max(v1, v2); - const { result } = expectMergeMatchesBaseline(tree1, tree2, mergeFn); + const { result } = expectUnionMatchesBaseline(tree1, tree2, unionFn); expect(result.toArray()).toEqual([[5, 500]]); }); - test('Merge interleaved keys', () => { + test('Union interleaved keys', () => { const tree1 = new BTreeEx([], compare, maxNodeSize); for (let i = 1; i <= 100; i += 2) tree1.set(i, i); @@ -1919,28 +1919,28 @@ function testMerge(maxNodeSize: number) { for (let i = 2; i <= 100; i += 2) tree2.set(i, i); - const mergeFn: MergeFn = () => { + const unionFn: UnionFn = () => { throw new Error('Should not be called - no overlapping keys'); }; - const { result } = expectMergeMatchesBaseline(tree1, tree2, mergeFn, undefined, { - expectedMergeFn: mergeFn + const { result } = expectUnionMatchesBaseline(tree1, tree2, unionFn, undefined, { + expectedUnionFn: unionFn }); expect(result.size).toBe(100); for (let i = 1; i <= 100; i++) expect(result.get(i)).toBe(i); }); - test('Merge excluding all overlapping keys', () => { + test('Union excluding all overlapping keys', () => { const tree1 = new BTreeEx([[1, 10], [2, 20], [3, 30]], compare, maxNodeSize); const tree2 = new BTreeEx([[2, 200], [3, 300], [4, 400]], compare, maxNodeSize); - const mergeFn: MergeFn = () => undefined; + const unionFn: UnionFn = () => undefined; - const { result } = expectMergeMatchesBaseline(tree1, tree2, mergeFn); + const { result } = expectUnionMatchesBaseline(tree1, tree2, unionFn); expect(result.toArray()).toEqual([[1, 10], [4, 400]]); }); - test('Merge reuses appended subtree with minimum fanout', () => { + test('Union reuses appended subtree with minimum fanout', () => { const tree1 = new BTreeEx([], compare, maxNodeSize); const tree2 = new BTreeEx([], compare, maxNodeSize); @@ -1951,18 +1951,18 @@ function testMerge(maxNodeSize: number) { tree2.set(i, i * 2); } - const mergeFn: MergeFn = () => { + const unionFn: UnionFn = () => { throw new Error('Should not be called for disjoint ranges'); }; - expectMergeMatchesBaseline(tree1, tree2, mergeFn, ({ result }) => { + expectUnionMatchesBaseline(tree1, tree2, unionFn, ({ result }) => { const resultRoot = result['_root'] as any; const tree2Root = tree2['_root'] as any; expect(sharesNode(resultRoot, tree2Root)).toBe(true); }); }); - test('Merge with large disjoint ranges', () => { + test('Union with large disjoint ranges', () => { const tree1 = new BTreeEx([], compare, maxNodeSize); const tree2 = new BTreeEx([], compare, maxNodeSize); @@ -1971,23 +1971,23 @@ function testMerge(maxNodeSize: number) { for (let i = 10001; i <= 20000; i++) tree2.set(i, i); - let mergeCalls = 0; - const mergeFn: MergeFn = (_k, v1, v2) => { - mergeCalls++; + let unionCalls = 0; + const unionFn: UnionFn = (_k, v1, v2) => { + unionCalls++; return v1 + v2; }; - const { result } = expectMergeMatchesBaseline(tree1, tree2, mergeFn, undefined, { - expectedMergeFn: mergeFn + const { result } = expectUnionMatchesBaseline(tree1, tree2, unionFn, undefined, { + expectedUnionFn: unionFn }); - expect(mergeCalls).toBe(0); + expect(unionCalls).toBe(0); expect(result.size).toBe(tree1.size + tree2.size); expect(result.get(0)).toBe(0); expect(result.get(20000)).toBe(20000); }); - test('Merge trees with random overlap', () => { + test('Union trees with random overlap', () => { const size = 10000; const keys1 = makeArray(size, true); const keys2 = makeArray(size, true); @@ -2000,13 +2000,13 @@ function testMerge(maxNodeSize: number) { for (let k of keys2) tree2.set(k, k * 10); - const preferLeft: MergeFn = (_key, leftValue) => leftValue; - expectMergeMatchesBaseline(tree1, tree2, preferLeft, undefined, { - expectedMergeFn: preferLeft + const preferLeft: UnionFn = (_key, leftValue) => leftValue; + expectUnionMatchesBaseline(tree1, tree2, preferLeft, undefined, { + expectedUnionFn: preferLeft }); }); - test('Merge trees with ~10% overlap', () => { + test('Union trees with ~10% overlap', () => { const size = 200; const offset = Math.floor(size * 0.9); const overlap = size - offset; @@ -2022,10 +2022,10 @@ function testMerge(maxNodeSize: number) { tree2.set(key, key * 10); } - const preferLeft: MergeFn = (_key, leftValue) => leftValue; + const preferLeft: UnionFn = (_key, leftValue) => leftValue; - const { result } = expectMergeMatchesBaseline(tree1, tree2, preferLeft, undefined, { - expectedMergeFn: preferLeft + const { result } = expectUnionMatchesBaseline(tree1, tree2, preferLeft, undefined, { + expectedUnionFn: preferLeft }); expect(result.size).toBe(size + size - overlap); @@ -2039,28 +2039,28 @@ function testMerge(maxNodeSize: number) { }); } -describe('BTree merge input/output validation', () => { - test('Merge throws error when comparators differ', () => { +describe('BTree union input/output validation', () => { + test('Union throws error when comparators differ', () => { const tree1 = new BTreeEx([[1, 10]], (a, b) => b + a); const tree2 = new BTreeEx([[2, 20]], (a, b) => b - a); - const mergeFn: MergeFn = (_k, v1, v2) => v1 + v2; + const unionFn: UnionFn = (_k, v1, v2) => v1 + v2; - expect(() => tree1.merge(tree2, mergeFn)).toThrow(comparatorErrorMsg); + expect(() => tree1.union(tree2, unionFn)).toThrow(comparatorErrorMsg); }); - test('Merge throws error when max node sizes differ', () => { + test('Union throws error when max node sizes differ', () => { const compare = (a: number, b: number) => b - a; const tree1 = new BTreeEx([[1, 10]], compare, 32); const tree2 = new BTreeEx([[2, 20]], compare, 33); - const mergeFn: MergeFn = (_k, v1, v2) => v1 + v2; + const unionFn: UnionFn = (_k, v1, v2) => v1 + v2; - expect(() => tree1.merge(tree2, mergeFn)).toThrow(branchingFactorErrorMsg); + expect(() => tree1.union(tree2, unionFn)).toThrow(branchingFactorErrorMsg); }); - test('Merging trees returns a tree of the same class', () => { - expect(merge(new BTreeEx(), new BTreeEx(), (_k, v1, v2) => v1)).toBeInstanceOf(BTreeEx); - expect(merge(new BTree(), new BTree(), (_k, v1, v2) => v1)).toBeInstanceOf(BTree); - expect(merge(new BTree(), new BTree(), (_k, v1, v2) => v1) instanceof BTreeEx).toBeFalsy(); + test('Union returns a tree of the same class', () => { + expect(union(new BTreeEx(), new BTreeEx(), (_k, v1, v2) => v1)).toBeInstanceOf(BTreeEx); + expect(union(new BTree(), new BTree(), (_k, v1, v2) => v1)).toBeInstanceOf(BTree); + expect(union(new BTree(), new BTree(), (_k, v1, v2) => v1) instanceof BTreeEx).toBeFalsy(); }); }); @@ -2105,9 +2105,9 @@ function makeArray(size: number, randomOrder: boolean, spacing = 10, collisionCh const randomInt = (rng: MersenneTwister, maxExclusive: number) => Math.floor(rng.random() * maxExclusive); -describe('BTree merge fuzz tests', () => { +describe('BTree union fuzz tests', () => { const compare = (a: number, b: number) => a - b; - const mergeFn = (_k: number, left: number, _right: number) => left; + const unionFn = (_k: number, left: number, _right: number) => left; const FUZZ_SETTINGS = { branchingFactors: [4, 5, 32], ooms: [0, 1, 2], // [0, 1, 2, 3], @@ -2157,12 +2157,12 @@ describe('BTree merge fuzz tests', () => { const aArray = treeA.toArray(); const bArray = treeB.toArray(); - const merged = treeA.merge(treeB, mergeFn); + const merged = treeA.union(treeB, unionFn); merged.checkValid(); expect(merged.toArray()).toEqual(sorted.map(k => [k, k])); - // Merge should not have mutated inputs + // Union should not have mutated inputs expect(treeA.toArray()).toEqual(aArray); expect(treeB.toArray()).toEqual(bArray); diff --git a/benchmarks.ts b/benchmarks.ts index 040159c..7110239 100644 --- a/benchmarks.ts +++ b/benchmarks.ts @@ -426,21 +426,21 @@ console.log("### Merge between B+ trees"); console.log(`\tShared nodes (baseline): ${baselineStats.shared}/${baselineStats.total}`); }; - const preferLeftMerge = (_k: number, leftValue: any, _rightValue: any) => leftValue; + const preferLeftUnion = (_k: number, leftValue: any, _rightValue: any) => leftValue; - const timeMergeVsBaseline = ( + const timeUnionVsBaseline = ( baseTitle: string, tree1: BTreeEx, tree2: BTreeEx, - prefer = preferLeftMerge, - mergeLabel = 'merge()', + prefer = preferLeftUnion, + unionLabel = 'union()', baselineLabel = 'clone+set loop (baseline)' ) => { - const mergeResult = measure(() => `${baseTitle} using ${mergeLabel}`, () => { - return tree1.merge(tree2, prefer); + const unionResult = measure(() => `${baseTitle} using ${unionLabel}`, () => { + return tree1.union(tree2, prefer); }); - const mergeStats = countTreeNodeStats(mergeResult); - console.log(`\tShared nodes (merge): ${mergeStats.shared}/${mergeStats.total}`); + const unionStats = countTreeNodeStats(unionResult); + console.log(`\tShared nodes (union): ${unionStats.shared}/${unionStats.total}`); timeBaselineMerge(`${baseTitle} using ${baselineLabel}`, tree1, tree2); }; @@ -457,8 +457,8 @@ console.log("### Merge between B+ trees"); tree2.set(offset + i, offset + i); // Keys well beyond tree1's range } - const baseTitle = `Merge ${size}+${size} non-overlapping trees`; - timeMergeVsBaseline(baseTitle, tree1, tree2); + const baseTitle = `Union ${size}+${size} non-overlapping trees`; + timeUnionVsBaseline(baseTitle, tree1, tree2); }); console.log(); @@ -473,8 +473,8 @@ console.log("### Merge between B+ trees"); tree2.set(i + size, i + size); } - const baseTitle = `Merge ${size}+${size} adjacent range trees`; - timeMergeVsBaseline(baseTitle, tree1, tree2); + const baseTitle = `Union ${size}+${size} adjacent range trees`; + timeUnionVsBaseline(baseTitle, tree1, tree2); }); console.log(); @@ -491,8 +491,8 @@ console.log("### Merge between B+ trees"); tree2.set(i + size, i + size); } - const baseTitle = `Merge ${size * 2}+${size} interleaved range trees`; - timeMergeVsBaseline(baseTitle, tree1, tree2); + const baseTitle = `Union ${size * 2}+${size} interleaved range trees`; + timeUnionVsBaseline(baseTitle, tree1, tree2); }); console.log(); @@ -506,8 +506,8 @@ console.log("### Merge between B+ trees"); tree2.set(i, i * 10); } - const baseTitle = `Merge ${size}+${size} completely overlapping trees (prefer left)`; - timeMergeVsBaseline(baseTitle, tree1, tree2); + const baseTitle = `Union ${size}+${size} completely overlapping trees (prefer left)`; + timeUnionVsBaseline(baseTitle, tree1, tree2); }); console.log(); @@ -528,12 +528,12 @@ console.log("### Merge between B+ trees"); tree2.set(key, key * 10); } - const baseTitle = `Merge trees with 10% overlap (${size}+${size} keys)`; - timeMergeVsBaseline(baseTitle, tree1, tree2); + const baseTitle = `Union trees with 10% overlap (${size}+${size} keys)`; + timeUnionVsBaseline(baseTitle, tree1, tree2); }); console.log(); - console.log("# Merge random overlaps"); + console.log("# Union random overlaps"); sizes.forEach((size) => { const keys1 = makeArray(size, true); const keys2 = makeArray(size, true); @@ -548,12 +548,12 @@ console.log("### Merge between B+ trees"); tree2.set(k, k * 10); } - const baseTitle = `Merge ${tree1.size}+${tree2.size} trees with random keys`; - timeMergeVsBaseline(baseTitle, tree1, tree2); + const baseTitle = `Union ${tree1.size}+${tree2.size} trees with random keys`; + timeUnionVsBaseline(baseTitle, tree1, tree2); }); console.log(); - console.log("# Merge with empty tree"); + console.log("# Union with empty tree"); sizes.forEach((size) => { const tree1 = new BTreeEx(); const tree2 = new BTreeEx(); @@ -562,12 +562,12 @@ console.log("### Merge between B+ trees"); tree1.set(i, i); } - const baseTitle = `Merge ${size}-key tree with empty tree`; - timeMergeVsBaseline(baseTitle, tree1, tree2); + const baseTitle = `Union ${size}-key tree with empty tree`; + timeUnionVsBaseline(baseTitle, tree1, tree2); }); console.log(); - console.log("# Compare merge vs manual iteration for complete overlap"); + console.log("# Compare union vs manual iteration for complete overlap"); sizes.forEach((size) => { const tree1 = new BTreeEx(); const tree2 = new BTreeEx(); @@ -577,8 +577,8 @@ console.log("### Merge between B+ trees"); tree2.set(i, i * 10); } - const baseTitle = `Merge ${size}+${size} overlapping trees`; - timeMergeVsBaseline(baseTitle, tree1, tree2); + const baseTitle = `Union ${size}+${size} overlapping trees`; + timeUnionVsBaseline(baseTitle, tree1, tree2); }); console.log(); @@ -602,8 +602,8 @@ console.log("### Merge between B+ trees"); } } - const baseTitle = `Merge ${tree1.size}+${tree2.size} sparse-overlap trees`; - timeMergeVsBaseline(baseTitle, tree1, tree2); + const baseTitle = `Union ${tree1.size}+${tree2.size} sparse-overlap trees`; + timeUnionVsBaseline(baseTitle, tree1, tree2); } } diff --git a/extended/index.d.ts b/extended/index.d.ts index 6b6ec6e..e2d6443 100644 --- a/extended/index.d.ts +++ b/extended/index.d.ts @@ -36,12 +36,12 @@ export declare class BTreeEx extends BTree { */ intersect(other: BTree, intersection: (key: K, leftValue: V, rightValue: V) => void): void; /** - * Efficiently merges this tree with `other`, reusing subtrees wherever possible. + * Efficiently unions this tree with `other`, reusing subtrees wherever possible. * Neither input tree is modified. - * @param other The other tree to merge into this one. - * @param merge Called for keys that appear in both trees. Return the desired value, or + * @param other The other tree to union with this one. + * @param combineFn Called for keys that appear in both trees. Return the desired value, or * `undefined` to omit the key from the result. - * @returns A new BTree that contains the merged key/value pairs. + * @returns A new BTree that contains the unioned key/value pairs. * @description Complexity is bounded O(N + M) for both time and allocations. * However, it is additionally bounded by O(log(N + M) * D) where D is the number of disjoint ranges of keys between * the two trees. In practice, that means for keys of random distribution the performance is O(N + M) and for @@ -50,7 +50,7 @@ export declare class BTreeEx extends BTree { * Note that in benchmarks even the worst case (fully interleaved keys) performance is faster than cloning `this` * and inserting the contents of `other` into the clone. */ - merge(other: BTreeEx, mergeFn: (key: K, leftValue: V, rightValue: V) => V | undefined): BTreeEx; + union(other: BTreeEx, combineFn: (key: K, leftValue: V, rightValue: V) => V | undefined): BTreeEx; } export interface BTreeEx { with(key: K): BTreeEx; diff --git a/extended/index.js b/extended/index.js index 0faf28f..f5adb0a 100644 --- a/extended/index.js +++ b/extended/index.js @@ -22,7 +22,7 @@ exports.BTreeEx = void 0; var b_tree_1 = __importDefault(require("../b+tree")); var diffAgainst_1 = require("./diffAgainst"); var intersect_1 = require("./intersect"); -var merge_1 = __importDefault(require("./merge")); +var union_1 = __importDefault(require("./union")); var BTreeEx = /** @class */ (function (_super) { __extends(BTreeEx, _super); function BTreeEx() { @@ -77,12 +77,12 @@ var BTreeEx = /** @class */ (function (_super) { (0, intersect_1.intersect)(this, other, intersection); }; /** - * Efficiently merges this tree with `other`, reusing subtrees wherever possible. + * Efficiently unions this tree with `other`, reusing subtrees wherever possible. * Neither input tree is modified. - * @param other The other tree to merge into this one. - * @param merge Called for keys that appear in both trees. Return the desired value, or + * @param other The other tree to union with this one. + * @param combineFn Called for keys that appear in both trees. Return the desired value, or * `undefined` to omit the key from the result. - * @returns A new BTree that contains the merged key/value pairs. + * @returns A new BTree that contains the unioned key/value pairs. * @description Complexity is bounded O(N + M) for both time and allocations. * However, it is additionally bounded by O(log(N + M) * D) where D is the number of disjoint ranges of keys between * the two trees. In practice, that means for keys of random distribution the performance is O(N + M) and for @@ -91,8 +91,8 @@ var BTreeEx = /** @class */ (function (_super) { * Note that in benchmarks even the worst case (fully interleaved keys) performance is faster than cloning `this` * and inserting the contents of `other` into the clone. */ - BTreeEx.prototype.merge = function (other, mergeFn) { - return (0, merge_1.default)(this, other, mergeFn); + BTreeEx.prototype.union = function (other, combineFn) { + return (0, union_1.default)(this, other, combineFn); }; return BTreeEx; }(b_tree_1.default)); diff --git a/extended/index.ts b/extended/index.ts index 31fde49..278f175 100644 --- a/extended/index.ts +++ b/extended/index.ts @@ -2,7 +2,7 @@ import BTree from '../b+tree'; import type { BTreeWithInternals } from './shared'; import { diffAgainst as diffAgainstAlgorithm } from './diffAgainst'; import { intersect } from './intersect'; -import merge from './merge'; +import union from './union'; export class BTreeEx extends BTree { clone(): this { @@ -63,12 +63,12 @@ export class BTreeEx extends BTree { } /** - * Efficiently merges this tree with `other`, reusing subtrees wherever possible. + * Efficiently unions this tree with `other`, reusing subtrees wherever possible. * Neither input tree is modified. - * @param other The other tree to merge into this one. - * @param merge Called for keys that appear in both trees. Return the desired value, or + * @param other The other tree to union with this one. + * @param combineFn Called for keys that appear in both trees. Return the desired value, or * `undefined` to omit the key from the result. - * @returns A new BTree that contains the merged key/value pairs. + * @returns A new BTree that contains the unioned key/value pairs. * @description Complexity is bounded O(N + M) for both time and allocations. * However, it is additionally bounded by O(log(N + M) * D) where D is the number of disjoint ranges of keys between * the two trees. In practice, that means for keys of random distribution the performance is O(N + M) and for @@ -77,8 +77,8 @@ export class BTreeEx extends BTree { * Note that in benchmarks even the worst case (fully interleaved keys) performance is faster than cloning `this` * and inserting the contents of `other` into the clone. */ - merge(other: BTreeEx, mergeFn: (key: K, leftValue: V, rightValue: V) => V | undefined): BTreeEx { - return merge, K, V>(this, other, mergeFn); + union(other: BTreeEx, combineFn: (key: K, leftValue: V, rightValue: V) => V | undefined): BTreeEx { + return union, K, V>(this, other, combineFn); } } diff --git a/extended/merge.d.ts b/extended/union.d.ts similarity index 63% rename from extended/merge.d.ts rename to extended/union.d.ts index 7397162..de33870 100644 --- a/extended/merge.d.ts +++ b/extended/union.d.ts @@ -1,12 +1,12 @@ import BTree from '../b+tree'; /** - * Efficiently merges two trees, reusing subtrees wherever possible. + * Efficiently unions two trees, reusing subtrees wherever possible. * Neither input tree is modified. - * @param treeA First tree to merge. - * @param treeB Second tree to merge. - * @param merge Called for keys that appear in both trees. Return the desired value, or + * @param treeA First tree to union. + * @param treeB Second tree to union. + * @param combineFn Called for keys that appear in both trees. Return the desired value, or * `undefined` to omit the key from the result. - * @returns A new BTree that contains the merged key/value pairs. + * @returns A new BTree that contains the unioned key/value pairs. * @description Complexity is bounded O(N + M) for both time and allocations. * However, it is additionally bounded by O(log(N + M) * D) where D is the number of disjoint ranges of keys between * the two trees. In practice, that means for keys of random distribution the performance is O(N + M) and for @@ -15,4 +15,4 @@ import BTree from '../b+tree'; * Note that in benchmarks even the worst case (fully interleaved keys) performance is faster than cloning `this` * and inserting the contents of `other` into the clone. */ -export default function merge, K, V>(treeA: TBTree, treeB: TBTree, merge: (key: K, leftValue: V, rightValue: V) => V | undefined): TBTree; +export default function union, K, V>(treeA: TBTree, treeB: TBTree, combineFn: (key: K, leftValue: V, rightValue: V) => V | undefined): TBTree; diff --git a/extended/merge.js b/extended/union.js similarity index 81% rename from extended/merge.js rename to extended/union.js index b269794..84db6b2 100644 --- a/extended/merge.js +++ b/extended/union.js @@ -3,13 +3,13 @@ Object.defineProperty(exports, "__esModule", { value: true }); var decompose_1 = require("./decompose"); var parallelWalk_1 = require("./parallelWalk"); /** - * Efficiently merges two trees, reusing subtrees wherever possible. + * Efficiently unions two trees, reusing subtrees wherever possible. * Neither input tree is modified. - * @param treeA First tree to merge. - * @param treeB Second tree to merge. - * @param merge Called for keys that appear in both trees. Return the desired value, or + * @param treeA First tree to union. + * @param treeB Second tree to union. + * @param combineFn Called for keys that appear in both trees. Return the desired value, or * `undefined` to omit the key from the result. - * @returns A new BTree that contains the merged key/value pairs. + * @returns A new BTree that contains the unioned key/value pairs. * @description Complexity is bounded O(N + M) for both time and allocations. * However, it is additionally bounded by O(log(N + M) * D) where D is the number of disjoint ranges of keys between * the two trees. In practice, that means for keys of random distribution the performance is O(N + M) and for @@ -18,7 +18,7 @@ var parallelWalk_1 = require("./parallelWalk"); * Note that in benchmarks even the worst case (fully interleaved keys) performance is faster than cloning `this` * and inserting the contents of `other` into the clone. */ -function merge(treeA, treeB, merge) { +function union(treeA, treeB, combineFn) { var _treeA = treeA; var _treeB = treeB; var branchingFactor = (0, parallelWalk_1.checkCanDoSetOperation)(_treeA, _treeB); @@ -29,8 +29,8 @@ function merge(treeA, treeB, merge) { // Decompose both trees into disjoint subtrees leaves. // As many of these as possible will be reused from the original trees, and the remaining // will be leaves that are the result of merging intersecting leaves. - var decomposed = (0, decompose_1.decompose)(_treeA, _treeB, merge); + var decomposed = (0, decompose_1.decompose)(_treeA, _treeB, combineFn); var constructor = treeA.constructor; return (0, decompose_1.buildFromDecomposition)(constructor, branchingFactor, decomposed, _treeA._compare, _treeA._maxNodeSize); } -exports.default = merge; +exports.default = union; diff --git a/extended/merge.ts b/extended/union.ts similarity index 76% rename from extended/merge.ts rename to extended/union.ts index 408ec44..bc45a0c 100644 --- a/extended/merge.ts +++ b/extended/union.ts @@ -4,13 +4,13 @@ import { decompose, buildFromDecomposition } from "./decompose"; import { checkCanDoSetOperation } from "./parallelWalk"; /** - * Efficiently merges two trees, reusing subtrees wherever possible. + * Efficiently unions two trees, reusing subtrees wherever possible. * Neither input tree is modified. - * @param treeA First tree to merge. - * @param treeB Second tree to merge. - * @param merge Called for keys that appear in both trees. Return the desired value, or + * @param treeA First tree to union. + * @param treeB Second tree to union. + * @param combineFn Called for keys that appear in both trees. Return the desired value, or * `undefined` to omit the key from the result. - * @returns A new BTree that contains the merged key/value pairs. + * @returns A new BTree that contains the unioned key/value pairs. * @description Complexity is bounded O(N + M) for both time and allocations. * However, it is additionally bounded by O(log(N + M) * D) where D is the number of disjoint ranges of keys between * the two trees. In practice, that means for keys of random distribution the performance is O(N + M) and for @@ -19,7 +19,11 @@ import { checkCanDoSetOperation } from "./parallelWalk"; * Note that in benchmarks even the worst case (fully interleaved keys) performance is faster than cloning `this` * and inserting the contents of `other` into the clone. */ -export default function merge, K,V>(treeA: TBTree, treeB: TBTree, merge: (key: K, leftValue: V, rightValue: V) => V | undefined): TBTree { +export default function union, K,V>( + treeA: TBTree, + treeB: TBTree, + combineFn: (key: K, leftValue: V, rightValue: V) => V | undefined +): TBTree { const _treeA = treeA as unknown as BTreeWithInternals; const _treeB = treeB as unknown as BTreeWithInternals; const branchingFactor = checkCanDoSetOperation(_treeA, _treeB); @@ -31,7 +35,7 @@ export default function merge, K,V>(treeA: TBTree, tre // Decompose both trees into disjoint subtrees leaves. // As many of these as possible will be reused from the original trees, and the remaining // will be leaves that are the result of merging intersecting leaves. - const decomposed = decompose(_treeA, _treeB, merge); + const decomposed = decompose(_treeA, _treeB, combineFn); const constructor = treeA.constructor as new (entries?: [K,V][], compare?: (a: K, b: K) => number, maxNodeSize?: number) => TBTree; return buildFromDecomposition(constructor, branchingFactor, decomposed, _treeA._compare, _treeA._maxNodeSize); -} \ No newline at end of file +} diff --git a/readme.md b/readme.md index ac637e2..1e1b91e 100644 --- a/readme.md +++ b/readme.md @@ -92,8 +92,8 @@ Features - Includes a lattice of interfaces for TypeScript users (see below) - Supports diffing computation between two trees that is highly optimized for the case in which a majority of nodes are shared (such as when persistent methods are used). -- Supports fast merge & intersection operations with asymptotic speedups when large disjoint ranges of keys are present. - The merge operation generates a new tree that shares nodes with the original trees when possible. +- Supports fast union & intersection operations with asymptotic speedups when large disjoint ranges of keys are present. + The union operation generates a new tree that shares nodes with the original trees when possible. ### Additional operations supported on this B+ tree ### @@ -135,7 +135,7 @@ The "scanning" methods (`forEach, forRange, editRange, deleteRange`) will normal - Get a new tree with one pair removed: `t.without(key)` - Get a new tree with specific pairs removed: `t.withoutKeys(keys)` - Get a new tree with a range of keys removed: `t.withoutRange(low, high, includeHi)` -- Get a new tree that is the result of a merge: `t.merge(other, mergeFn)` +- Get a new tree that is the result of a union: `t.union(other, unionFn)` **Things to keep in mind:** I ran a test which suggested `t.with` is three times slower than `t.set`. These methods do not return a frozen tree even if the original tree was frozen (for performance reasons, e.g. frozen trees use slightly more memory.) From a2ae2a558bf995357199c6cbebb42b2675f8da68 Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Tue, 11 Nov 2025 17:42:01 -0800 Subject: [PATCH 064/143] Move root test into test folder --- b+tree.test.ts => test/b+tree.test.ts | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename b+tree.test.ts => test/b+tree.test.ts (100%) diff --git a/b+tree.test.ts b/test/b+tree.test.ts similarity index 100% rename from b+tree.test.ts rename to test/b+tree.test.ts From 68ff57872b531e2519a47b70b668113c43d4c041 Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Tue, 11 Nov 2025 17:45:48 -0800 Subject: [PATCH 065/143] split out tests --- test/b+tree.test.ts | 1275 +------------------------------------- test/diffAgainst.test.ts | 218 +++++++ test/intersect.test.ts | 233 +++++++ test/shared.ts | 68 ++ test/union.test.ts | 794 ++++++++++++++++++++++++ tsconfig.json | 2 +- 6 files changed, 1318 insertions(+), 1272 deletions(-) create mode 100644 test/diffAgainst.test.ts create mode 100644 test/intersect.test.ts create mode 100644 test/shared.ts create mode 100644 test/union.test.ts diff --git a/test/b+tree.test.ts b/test/b+tree.test.ts index 59b6293..df88c26 100644 --- a/test/b+tree.test.ts +++ b/test/b+tree.test.ts @@ -1,23 +1,10 @@ -import BTree, { IMap, defaultComparator, simpleComparator } from './b+tree'; -import BTreeEx from './extended'; -import diffAgainst from './extended/diffAgainst'; -import union from './extended/union'; -import { branchingFactorErrorMsg, comparatorErrorMsg } from './extended/parallelWalk'; -import SortedArray from './sorted-array'; -import MersenneTwister from 'mersenne-twister'; +import BTree, { IMap, defaultComparator, simpleComparator } from '../b+tree'; +import BTreeEx from '../extended'; +import SortedArray from '../sorted-array'; +import { addToBoth, expectTreeEqualTo, randInt } from './shared'; var test: (name:string,f:()=>void)=>void = it; -var rand: any = new MersenneTwister(1234); -function randInt(max: number) { return rand.random_int() % max; } -function expectTreeEqualTo(a: BTree, b: SortedArray) { - a.checkValid(); - expect(a.toArray()).toEqual(b.getArray()); -} -function addToBoth(a: IMap, b: IMap, k: K, v: V) { - expect(a.set(k,v)).toEqual(b.set(k,v)); -} - describe('defaultComparator', () => { const dateA = new Date(Date.UTC(96, 1, 2, 3, 4, 5)); @@ -857,206 +844,6 @@ function testBTree(maxNodeSize: number) expectTreeEqualTo(t9, list); }); - describe("Diff computation", () => { - let onlyThis: Map; - let onlyOther: Map; - let different: Map; - function reset(): void { - onlyOther = new Map(); - onlyThis = new Map(); - different = new Map(); - } - - beforeEach(() => reset()); - - const OnlyThis = (k: number, v: number) => { onlyThis.set(k, v); } - const OnlyOther = (k: number, v: number) => { onlyOther.set(k, v); } - const Different = (k: number, vThis: number, vOther: number) => { different.set(k, `vThis: ${vThis}, vOther: ${vOther}`); } - const compare = (a: number, b: number) => a - b; - - function expectMapsEquals(mapA: Map, mapB: Map) { - const onlyA = []; - const onlyB = []; - const different = []; - mapA.forEach((valueA, keyA) => { - const valueB = mapB.get(keyA); - if (valueB === undefined) { - onlyA.push([keyA, valueA]); - } else if (!Object.is(valueB, valueB)) { - different.push([keyA, valueA, valueB]); - } - }); - mapB.forEach((valueB, keyB) => { - const valueA = mapA.get(keyB); - if (valueA === undefined) { - onlyA.push([keyB, valueB]); - } - }); - expect(onlyA.length).toEqual(0); - expect(onlyB.length).toEqual(0); - expect(different.length).toEqual(0); - } - - function expectDiffCorrect(treeThis: BTreeEx, treeOther: BTreeEx): void { - reset(); - treeThis.diffAgainst(treeOther, OnlyThis, OnlyOther, Different); - let onlyThisT: Map = new Map(); - let onlyOtherT: Map = new Map(); - let differentT: Map = new Map(); - treeThis.forEachPair((kThis, vThis) => { - if (!treeOther.has(kThis)) { - onlyThisT.set(kThis, vThis); - } else { - const vOther = treeOther.get(kThis); - if (!Object.is(vThis, vOther)) - differentT.set(kThis, `vThis: ${vThis}, vOther: ${vOther}`); - } - }); - treeOther.forEachPair((kOther, vOther) => { - if (!treeThis.has(kOther)) { - onlyOtherT.set(kOther, vOther); - } - }); - expectMapsEquals(onlyThis, onlyThisT); - expectMapsEquals(onlyOther, onlyOtherT); - expectMapsEquals(different, differentT); - } - - test(`Diff of trees with different comparators is an error`, () => { - const treeA = new BTreeEx([], compare); - const treeB = new BTreeEx([], (a, b) => b - a); - expect(() => treeA.diffAgainst(treeB, OnlyThis, OnlyOther, Different)).toThrow('comparators'); - }); - - test(`Standalone diffAgainst works with core trees`, () => { - const treeA = new BTree([[1, 1], [2, 2], [4, 4]], compare, maxNodeSize); - const treeB = new BTree([[1, 1], [2, 22], [3, 3]], compare, maxNodeSize); - const onlyThisKeys: number[] = []; - const onlyOtherKeys: number[] = []; - const differentKeys: number[] = []; - diffAgainst( - treeA, - treeB, - (k) => { onlyThisKeys.push(k); }, - (k) => { onlyOtherKeys.push(k); }, - (k) => { differentKeys.push(k); } - ); - expect(onlyThisKeys).toEqual([4]); - expect(onlyOtherKeys).toEqual([3]); - expect(differentKeys).toEqual([2]); - }); - - const entriesGroup: [number, number][][] = [[], [[1, 1], [2, 2], [3, 3], [4, 4], [5, 5]]]; - entriesGroup.forEach(entries => { - test(`Diff of the same tree ${entries.length > 0 ? "(non-empty)" : "(empty)"}`, () => { - const tree = new BTreeEx(entries, compare, maxNodeSize); - expectDiffCorrect(tree, tree); - expect(onlyOther.size).toEqual(0); - expect(onlyThis.size).toEqual(0); - expect(different.size).toEqual(0); - }); - }); - - test(`Diff of identical trees`, () => { - const treeA = new BTreeEx(entriesGroup[1], compare, maxNodeSize); - const treeB = new BTreeEx(entriesGroup[1], compare, maxNodeSize); - expectDiffCorrect(treeA, treeB); - }); - - [entriesGroup, [...entriesGroup].reverse()].forEach(doubleEntries => { - test(`Diff of an ${doubleEntries[0].length === 0 ? 'empty' : 'non-empty'} tree and a ${doubleEntries[1].length === 0 ? 'empty' : 'non-empty'} one`, () => { - const treeA = new BTreeEx(doubleEntries[0], compare, maxNodeSize); - const treeB = new BTreeEx(doubleEntries[1], compare, maxNodeSize); - expectDiffCorrect(treeA, treeB); - }); - }); - - test(`Diff of different trees`, () => { - const treeA = new BTreeEx(entriesGroup[1], compare, maxNodeSize); - const treeB = new BTreeEx(entriesGroup[1], compare, maxNodeSize); - treeB.set(-1, -1); - treeB.delete(2); - treeB.set(3, 4); - treeB.set(10, 10); - expectDiffCorrect(treeA, treeB); - }); - - test(`Diff of odds and evens`, () => { - const treeA = new BTreeEx([[1, 1], [3, 3], [5, 5], [7, 7]], compare, maxNodeSize); - const treeB = new BTreeEx([[2, 2], [4, 4], [6, 6], [8, 8]], compare, maxNodeSize); - expectDiffCorrect(treeA, treeB); - expectDiffCorrect(treeB, treeA); - }); - - function applyChanges(treeA: BTreeEx, duplicate: (tree: BTreeEx) => BTreeEx): void { - const treeB = duplicate(treeA); - const maxKey: number = treeA.maxKey()!; - const onlyInA = -10; - treeA.set(onlyInA, onlyInA); - const onlyInBSmall = -1; - treeB.set(onlyInBSmall, onlyInBSmall); - const onlyInBLarge = maxKey + 1; - treeB.set(onlyInBLarge, onlyInBLarge); - const onlyInAFromDelete = 10 - treeB.delete(onlyInAFromDelete); - const differingValue = -100; - const modifiedInB1 = 3, modifiedInB2 = maxKey - 2; - treeB.set(modifiedInB1, differingValue); - treeB.set(modifiedInB2, differingValue) - treeA.diffAgainst(treeB, OnlyThis, OnlyOther, Different); - expectDiffCorrect(treeA, treeB); - } - - function makeLargeTree(size?: number): BTreeEx { - size = size ?? Math.pow(maxNodeSize, 3); - const tree = new BTreeEx([], compare, maxNodeSize); - for (let i = 0; i < size; i++) { - tree.set(i, i); - } - return tree; - } - - test(`Diff of large trees`, () => { - const tree = makeLargeTree(); - applyChanges(tree, tree => tree.greedyClone()); - }); - - test(`Diff of cloned trees`, () => { - const tree = makeLargeTree(); - applyChanges(tree, tree => tree.clone()); - }); - - test(`Diff can early exit`, () => { - const tree = makeLargeTree(100); - const tree2 = tree.clone(); - tree2.set(-1, -1); - tree2.delete(10); - tree2.set(20, -1); - tree2.set(110, -1); - const ReturnKey = (key: number) => { return { break: key }; }; - - let val = tree.diffAgainst(tree2, OnlyThis, OnlyOther, ReturnKey); - expect(onlyOther.size).toEqual(1); - expect(onlyThis.size).toEqual(0); - expect(val).toEqual(20); - reset(); - - val = tree.diffAgainst(tree2, OnlyThis, ReturnKey, Different); - expect(different.size).toEqual(0); - expect(onlyThis.size).toEqual(0); - expect(val).toEqual(110); - reset(); - - val = tree.diffAgainst(tree2, ReturnKey, OnlyOther, Different); - expect(different.size).toEqual(1); - expect(onlyOther.size).toEqual(1); - expect(val).toEqual(10); - reset(); - - expectDiffCorrect(tree, tree2); - }); - }); - test("Issue #2 reproduction", () => { const tree = new BTree([], (a, b) => a - b, maxNodeSize); for (let i = 0; i <= 1999; i++) { @@ -1134,1057 +921,3 @@ function testBTree(maxNodeSize: number) expect(tree.get(key)).not.toBeUndefined(); }); } - -describe('BTree intersect tests with fanout 32', testIntersect.bind(null, 32)); -describe('BTree intersect tests with fanout 10', testIntersect.bind(null, 10)); -describe('BTree intersect tests with fanout 4', testIntersect.bind(null, 4)); - -function testIntersect(maxNodeSize: number) { - const compare = (a: number, b: number) => a - b; - - const buildTree = (entries: Array<[number, number]>) => - new BTreeEx(entries, compare, maxNodeSize); - - const tuples = (...pairs: Array<[number, number]>) => pairs; - - const collectCalls = (left: BTreeEx, right: BTreeEx) => { - const calls: Array<{ key: number, leftValue: number, rightValue: number }> = []; - left.intersect(right, (key, leftValue, rightValue) => { - calls.push({ key, leftValue, rightValue }); - }); - return calls; - }; - - test('Intersect two empty trees', () => { - const tree1 = buildTree([]); - const tree2 = buildTree([]); - expect(collectCalls(tree1, tree2)).toEqual([]); - }); - - test('Intersect empty tree with non-empty tree', () => { - const tree1 = buildTree([]); - const tree2 = buildTree(tuples([1, 10], [2, 20], [3, 30])); - expect(collectCalls(tree1, tree2)).toEqual([]); - expect(collectCalls(tree2, tree1)).toEqual([]); - }); - - test('Intersect with no overlapping keys', () => { - const tree1 = buildTree(tuples([1, 10], [3, 30], [5, 50])); - const tree2 = buildTree(tuples([2, 20], [4, 40], [6, 60])); - expect(collectCalls(tree1, tree2)).toEqual([]); - }); - - test('Intersect with single overlapping key', () => { - const tree1 = buildTree(tuples([1, 10], [2, 20], [3, 30])); - const tree2 = buildTree(tuples([0, 100], [2, 200], [4, 400])); - expect(collectCalls(tree1, tree2)).toEqual([{ key: 2, leftValue: 20, rightValue: 200 }]); - }); - - test('Intersect with multiple overlapping keys maintains tree contents', () => { - const leftEntries: Array<[number, number]> = [[1, 10], [2, 20], [3, 30], [4, 40], [5, 50]]; - const rightEntries: Array<[number, number]> = [[0, 100], [2, 200], [4, 400], [6, 600]]; - const tree1 = buildTree(leftEntries); - const tree2 = buildTree(rightEntries); - const leftBefore = tree1.toArray(); - const rightBefore = tree2.toArray(); - expect(collectCalls(tree1, tree2)).toEqual([ - { key: 2, leftValue: 20, rightValue: 200 }, - { key: 4, leftValue: 40, rightValue: 400 }, - ]); - expect(tree1.toArray()).toEqual(leftBefore); - expect(tree2.toArray()).toEqual(rightBefore); - tree1.checkValid(); - tree2.checkValid(); - }); - - test('Intersect with contiguous overlap yields sorted keys', () => { - const tree1 = buildTree(tuples([1, 1], [2, 2], [3, 3], [4, 4], [5, 5], [6, 6])); - const tree2 = buildTree(tuples([3, 30], [4, 40], [5, 50], [6, 60], [7, 70])); - const calls = collectCalls(tree1, tree2); - expect(calls.map(c => c.key)).toEqual([3, 4, 5, 6]); - expect(calls.map(c => c.leftValue)).toEqual([3, 4, 5, 6]); - expect(calls.map(c => c.rightValue)).toEqual([30, 40, 50, 60]); - }); - - test('Intersect large overlapping range counts each shared key once', () => { - const size = 1000; - const overlapStart = 500; - const leftEntries = Array.from({ length: size }, (_, i) => [i, i * 3] as [number, number]); - const rightEntries = Array.from({ length: size }, (_, i) => { - const key = i + overlapStart; - return [key, key * 7] as [number, number]; - }); - const tree1 = buildTree(leftEntries); - const tree2 = buildTree(rightEntries); - const calls = collectCalls(tree1, tree2); - expect(calls.length).toBe(size - overlapStart); - expect(calls[0]).toEqual({ - key: overlapStart, - leftValue: overlapStart * 3, - rightValue: overlapStart * 7 - }); - const lastCall = calls[calls.length - 1]; - expect(lastCall.key).toBe(size - 1); - expect(lastCall.leftValue).toBe((size - 1) * 3); - expect(lastCall.rightValue).toBe((size - 1) * 7); - }); - - test('Intersect tree with itself visits each key once', () => { - const entries = Array.from({ length: 20 }, (_, i) => [i, i * 2] as [number, number]); - const tree = buildTree(entries); - const calls = collectCalls(tree, tree); - expect(calls.length).toBe(entries.length); - for (let i = 0; i < entries.length; i++) { - const [key, value] = entries[i]; - expect(calls[i]).toEqual({ key, leftValue: value, rightValue: value }); - } - }); - - test('Intersect arguments determine left/right values', () => { - const tree1 = buildTree(tuples([1, 100], [2, 200], [4, 400])); - const tree2 = buildTree(tuples([2, 20], [3, 30], [4, 40])); - const callsLeft = collectCalls(tree1, tree2); - const callsRight = collectCalls(tree2, tree1); - expect(callsLeft).toEqual([ - { key: 2, leftValue: 200, rightValue: 20 }, - { key: 4, leftValue: 400, rightValue: 40 }, - ]); - expect(callsRight).toEqual([ - { key: 2, leftValue: 20, rightValue: 200 }, - { key: 4, leftValue: 40, rightValue: 400 }, - ]); - }); -} - -describe('BTree intersect input/output validation', () => { - test('Intersect throws error when comparators differ', () => { - const tree1 = new BTreeEx([[1, 10]], (a, b) => b + a); - const tree2 = new BTreeEx([[2, 20]], (a, b) => b - a); - expect(() => tree1.intersect(tree2, () => {})).toThrow(comparatorErrorMsg); - }); - - test('Intersect throws error when max node sizes differ', () => { - const compare = (a: number, b: number) => b - a; - const tree1 = new BTreeEx([[1, 10]], compare, 32); - const tree2 = new BTreeEx([[2, 20]], compare, 33); - expect(() => tree1.intersect(tree2, () => {})).toThrow(branchingFactorErrorMsg); - }); -}); - -describe('BTree intersect fuzz tests', () => { - const compare = (a: number, b: number) => a - b; - const FUZZ_SETTINGS = { - branchingFactors: [4, 5, 32], - ooms: [2, 3], - fractionsPerOOM: [0.1, 0.25, 0.5], - collisionChances: [0.05, 0.1, 0.3], - timeoutMs: 30_000 - } as const; - - FUZZ_SETTINGS.fractionsPerOOM.forEach(fraction => { - if (fraction < 0 || fraction > 1) - throw new Error('FUZZ_SETTINGS.fractionsPerOOM must contain values between 0 and 1'); - }); - FUZZ_SETTINGS.collisionChances.forEach(chance => { - if (chance < 0 || chance > 1) - throw new Error('FUZZ_SETTINGS.collisionChances must contain values between 0 and 1'); - }); - - jest.setTimeout(FUZZ_SETTINGS.timeoutMs); - - const rng = new MersenneTwister(0xC0FFEE); - - for (const maxNodeSize of FUZZ_SETTINGS.branchingFactors) { - describe(`branching factor ${maxNodeSize}`, () => { - for (const collisionChance of FUZZ_SETTINGS.collisionChances) { - for (const oom of FUZZ_SETTINGS.ooms) { - const size = 5 * Math.pow(10, oom); - for (const fractionA of FUZZ_SETTINGS.fractionsPerOOM) { - const fractionB = 1 - fractionA; - const collisionLabel = collisionChance.toFixed(2); - - test(`size ${size}, fractionA ${fractionA.toFixed(2)}, fractionB ${fractionB.toFixed(2)}, collision ${collisionLabel}`, () => { - const treeA = new BTreeEx([], compare, maxNodeSize); - const treeB = new BTreeEx([], compare, maxNodeSize); - - const keys = makeArray(size, true, 1, collisionChance, rng); - - for (const value of keys) { - const assignToA = rng.random() < fractionA; - const assignToB = rng.random() < fractionB; - - if (!assignToA && !assignToB) { - if (rng.random() < 0.5) - treeA.set(value, value); - else - treeB.set(value, value); - continue; - } - - if (assignToA) - treeA.set(value, value); - if (assignToB) - treeB.set(value, value); - } - - const aArray = treeA.toArray(); - const bArray = treeB.toArray(); - const bMap = new Map(bArray); - const expected: Array<[number, number, number]> = []; - for (const [key, leftValue] of aArray) { - const rightValue = bMap.get(key); - if (rightValue !== undefined) - expected.push([key, leftValue, rightValue]); - } - - const actual: Array<[number, number, number]> = []; - treeA.intersect(treeB, (key, leftValue, rightValue) => { - actual.push([key, leftValue, rightValue]); - }); - expect(actual).toEqual(expected); - - const swappedActual: Array<[number, number, number]> = []; - treeB.intersect(treeA, (key, leftValue, rightValue) => { - swappedActual.push([key, leftValue, rightValue]); - }); - const swappedExpected = expected.map(([key, leftValue, rightValue]) => [key, rightValue, leftValue]); - expect(swappedActual).toEqual(swappedExpected); - - expect(treeA.toArray()).toEqual(aArray); - expect(treeB.toArray()).toEqual(bArray); - treeA.checkValid(); - treeB.checkValid(); - }); - } - } - } - }); - } -}); - -describe('BTree union tests with fanout 32', testUnion.bind(null, 32)); -describe('BTree union tests with fanout 10', testUnion.bind(null, 10)); -describe('BTree union tests with fanout 4', testUnion.bind(null, 4)); - -type UnionFn = (key: number, leftValue: number, rightValue: number) => number | undefined; - -function testUnion(maxNodeSize: number) { - const compare = (a: number, b: number) => a - b; - const sharesNode = (root: any, targetNode: any): boolean => { - if (root === targetNode) - return true; - if (root.isLeaf) - return false; - const children = (root as any).children as any[]; - for (let i = 0; i < children.length; i++) { - if (sharesNode(children[i], targetNode)) - return true; - } - return false; - }; - - const buildTree = (keys: number[], valueScale = 1, valueOffset = 0) => { - const tree = new BTreeEx([], compare, maxNodeSize); - for (const key of keys) { - tree.set(key, key * valueScale + valueOffset); - } - return tree; - }; - - const expectRootLeafState = (tree: BTreeEx, expectedIsLeaf: boolean) => { - const root = tree['_root'] as any; - expect(root.isLeaf).toBe(expectedIsLeaf); - }; - - const range = (start: number, endExclusive: number, step = 1): number[] => { - const result: number[] = []; - for (let i = start; i < endExclusive; i += step) - result.push(i); - return result; - }; - - type UnionExpectationOptions = { - expectedUnionFn?: UnionFn; - }; - - const naiveUnion = ( - left: BTreeEx, - right: BTreeEx, - unionFn: UnionFn - ) => { - const expected = left.clone(); - right.forEachPair((key, rightValue) => { - if (expected.has(key)) { - const leftValue = expected.get(key)!; - const mergedValue = unionFn(key, leftValue, rightValue); - if (mergedValue === undefined) { - expected.delete(key); - } else { - expected.set(key, mergedValue); - } - } else { - expected.set(key, rightValue); - } - }); - return expected; - }; - - const expectUnionMatchesBaseline = ( - left: BTreeEx, - right: BTreeEx, - unionFn: UnionFn, - after?: (ctx: { result: BTreeEx, expected: BTreeEx }) => void, - options: UnionExpectationOptions = {} - ) => { - const expectedUnionFn = options.expectedUnionFn ?? unionFn; - const expected = naiveUnion(left, right, expectedUnionFn); - const result = left.union(right, unionFn); - expect(result.toArray()).toEqual(expected.toArray()); - result.checkValid(); - expected.checkValid(); - after?.({ result, expected }); - return { result, expected }; - }; - - test('Union disjoint roots reuses appended subtree', () => { - const size = maxNodeSize * 3; - const tree1 = buildTree(range(0, size), 1, 0); - const offset = size * 5; - const tree2 = buildTree(range(offset, offset + size), 2, 0); - - expectRootLeafState(tree1, false); - expectRootLeafState(tree2, false); - - let unionCalls = 0; - const unionFn: UnionFn = () => { - unionCalls++; - return 0; - }; - - expectUnionMatchesBaseline(tree1, tree2, unionFn, ({ result }) => { - const resultRoot = result['_root'] as any; - expect(sharesNode(resultRoot, tree1['_root'] as any)).toBe(true); - expect(sharesNode(resultRoot, tree2['_root'] as any)).toBe(true); - }); - - expect(unionCalls).toBe(0); - }); - - test('Union leaf roots with intersecting keys uses union callback', () => { - const tree1 = buildTree([1, 2, 4], 10, 0); - const tree2 = buildTree([2, 3, 5], 100, 0); - - expectRootLeafState(tree1, true); - expectRootLeafState(tree2, true); - - const calls: Array<{ key: number, leftValue: number, rightValue: number }> = []; - const unionFn: UnionFn = (key, leftValue, rightValue) => { - calls.push({ key, leftValue, rightValue }); - return leftValue + rightValue; - }; - - expectUnionMatchesBaseline(tree1, tree2, unionFn, undefined, { - expectedUnionFn: (_k, leftValue, rightValue) => leftValue + rightValue - }); - expect(calls).toEqual([{ key: 2, leftValue: 20, rightValue: 200 }]); - }); - - test('Union leaf roots with disjoint keys', () => { - const tree1 = buildTree([1, 3, 5], 1, 0); - const tree2 = buildTree([2, 4, 6], 1, 1000); - - expectRootLeafState(tree1, true); - expectRootLeafState(tree2, true); - - let unionCalls = 0; - const unionFn: UnionFn = () => { - unionCalls++; - return 0; - }; - - const { result } = expectUnionMatchesBaseline(tree1, tree2, unionFn, undefined, { - expectedUnionFn: (_k, leftValue, rightValue) => leftValue + rightValue - }); - expect(unionCalls).toBe(0); - expect(result.toArray()).toEqual([ - [1, 1], - [2, 1002], - [3, 3], - [4, 1004], - [5, 5], - [6, 1006] - ]); - }); - - test('Union trees disjoint except for shared maximum key', () => { - const size = maxNodeSize * 2; - const tree1 = buildTree(range(0, size), 1, 0); - const tree2 = buildTree(range(size - 1, size - 1 + size), 3, 0); - - expectRootLeafState(tree1, false); - expectRootLeafState(tree2, false); - - let unionCalls = 0; - const unionFn: UnionFn = (_key, leftValue, rightValue) => { - unionCalls++; - return leftValue + rightValue; - }; - - const { result } = expectUnionMatchesBaseline(tree1, tree2, unionFn, undefined, { - expectedUnionFn: (_k, leftValue, rightValue) => leftValue + rightValue - }); - expect(unionCalls).toBe(1); - expect(result.get(size - 1)).toBe((size - 1) + (size - 1) * 3); - expect(result.size).toBe(tree1.size + tree2.size - 1); - }); - - test('Union trees where all leaves are disjoint and one tree straddles the other', () => { - const straddleLength = 3 * 2 * maxNodeSize; // creates multiple leaves on both trees - const tree1 = buildTree( - range(0, straddleLength / 3).concat(range((straddleLength / 3) * 2, straddleLength)), - 1 - ); - const tree2 = buildTree(range(straddleLength / 3, (straddleLength / 3) * 2), 3); - - expectRootLeafState(tree1, false); - expectRootLeafState(tree2, false); - - let unionCalls = 0; - const unionFn: UnionFn = (_key, leftValue, rightValue) => { - unionCalls++; - return leftValue + rightValue; - }; - - const { result } = expectUnionMatchesBaseline(tree1, tree2, unionFn); - expect(unionCalls).toBe(0); - expect(result.size).toBe(tree1.size + tree2.size); - }); - - test('Union where two-leaf tree intersects leaf-root tree across both leaves', () => { - const size = maxNodeSize + Math.max(3, Math.floor(maxNodeSize / 2)); - const tree1 = buildTree(range(0, size), 2, 0); - const tree2 = buildTree([1, Math.floor(size / 2), size - 1], 5, 0); - - expectRootLeafState(tree1, false); - expectRootLeafState(tree2, true); - - const seenKeys: number[] = []; - const unionFn: UnionFn = (key, _leftValue, rightValue) => { - seenKeys.push(key); - return rightValue; - }; - - expectUnionMatchesBaseline(tree1, tree2, unionFn, undefined, { - expectedUnionFn: (_k, _leftValue, rightValue) => rightValue - }); - expect(seenKeys.sort((a, b) => a - b)).toEqual([1, Math.floor(size / 2), size - 1]); - }); - - test('Union where max key equals min key of other tree', () => { - const size = maxNodeSize * 2; - const tree1 = buildTree(range(0, size), 1, 0); - const tree2 = buildTree(range(size - 1, size - 1 + size), 10, 0); - - expectRootLeafState(tree1, false); - expectRootLeafState(tree2, false); - - let unionCalls = 0; - const unionFn: UnionFn = (_key, _leftValue, rightValue) => { - unionCalls++; - return rightValue; - }; - - const { result } = expectUnionMatchesBaseline(tree1, tree2, unionFn, undefined, { - expectedUnionFn: (_k, _leftValue, rightValue) => rightValue - }); - expect(unionCalls).toBe(1); - expect(result.get(size - 1)).toBe((size - 1) * 10); - expect(result.size).toBe(tree1.size + tree2.size - 1); - }); - - test('Union odd and even keyed trees', () => { - const limit = maxNodeSize * 3; - const treeOdd = buildTree(range(1, limit * 2, 2), 1, 0); - const treeEven = buildTree(range(0, limit * 2, 2), 1, 100); - - expectRootLeafState(treeOdd, false); - expectRootLeafState(treeEven, false); - - let unionCalls = 0; - const unionFn: UnionFn = () => { - unionCalls++; - return 0; - }; - - const { result } = expectUnionMatchesBaseline(treeOdd, treeEven, unionFn); - expect(unionCalls).toBe(0); - expect(result.size).toBe(treeOdd.size + treeEven.size); - }); - - test('Union with single boundary overlap prefers right value', () => { - const size = maxNodeSize * 2; - const tree1 = buildTree(range(0, size), 1, 0); - const tree2 = buildTree(range(size - 1, size - 1 + size), 10, 0); - - expectRootLeafState(tree1, false); - expectRootLeafState(tree2, false); - - let unionCalls = 0; - const unionFn: UnionFn = (_key, _leftValue, rightValue) => { - unionCalls++; - return rightValue; - }; - - expectUnionMatchesBaseline(tree1, tree2, unionFn, undefined, { - expectedUnionFn: (_k, _leftValue, rightValue) => rightValue - }); - expect(unionCalls).toBe(1); - }); - - test('Union overlapping prefix equal to branching factor', () => { - const shared = maxNodeSize; - const tree1Keys = [ - ...range(0, shared), - ...range(shared, shared + maxNodeSize) - ]; - const tree2Keys = [ - ...range(0, shared), - ...range(shared + maxNodeSize, shared + maxNodeSize * 2) - ]; - - const tree1 = buildTree(tree1Keys, 1, 0); - const tree2 = buildTree(tree2Keys, 2, 0); - - expectRootLeafState(tree1, false); - expectRootLeafState(tree2, false); - - const mergedKeys: number[] = []; - const unionFn: UnionFn = (key, leftValue, rightValue) => { - mergedKeys.push(key); - return leftValue + rightValue; - }; - - expectUnionMatchesBaseline(tree1, tree2, unionFn, undefined, { - expectedUnionFn: (_k, leftValue, rightValue) => leftValue + rightValue - }); - expect(mergedKeys.sort((a, b) => a - b)).toEqual(range(0, shared)); - }); - - test('Union two empty trees', () => { - const tree1 = new BTreeEx([], compare, maxNodeSize); - const tree2 = new BTreeEx([], compare, maxNodeSize); - const unionFn: UnionFn = (_k, v1, v2) => v1 + v2; - - const { result } = expectUnionMatchesBaseline(tree1, tree2, unionFn, undefined, { - expectedUnionFn: (_k, v1, v2) => v1 + v2 - }); - expect(result.size).toBe(0); - }); - - test('Union empty tree with non-empty tree', () => { - const tree1 = new BTreeEx([], compare, maxNodeSize); - const tree2 = new BTreeEx([[1, 10], [2, 20], [3, 30]], compare, maxNodeSize); - const unionFn: UnionFn = (_k, v1, v2) => v1 + v2; - - const { result: leftUnion } = expectUnionMatchesBaseline(tree1, tree2, unionFn); - expect(leftUnion.toArray()).toEqual(tree2.toArray()); - - const { result: rightUnion } = expectUnionMatchesBaseline(tree2, tree1, unionFn); - expect(rightUnion.toArray()).toEqual(tree2.toArray()); - expect(tree1.toArray()).toEqual([]); - expect(tree2.toArray()).toEqual([[1, 10], [2, 20], [3, 30]]); - tree1.checkValid(); - tree2.checkValid(); - }); - - test('Union with no overlapping keys', () => { - const tree1 = new BTreeEx([[1, 10], [3, 30], [5, 50]], compare, maxNodeSize); - const tree2 = new BTreeEx([[2, 20], [4, 40], [6, 60]], compare, maxNodeSize); - const unionFn: UnionFn = () => { - throw new Error('Should not be called for non-overlapping keys'); - }; - - const { result } = expectUnionMatchesBaseline(tree1, tree2, unionFn, undefined, { - expectedUnionFn: unionFn - }); - - expect(result.size).toBe(6); - expect(result.toArray()).toEqual([[1, 10], [2, 20], [3, 30], [4, 40], [5, 50], [6, 60]]); - }); - - test('Union with completely overlapping keys - sum values', () => { - const tree1 = new BTreeEx([[1, 10], [2, 20], [3, 30]], compare, maxNodeSize); - const tree2 = new BTreeEx([[1, 5], [2, 15], [3, 25]], compare, maxNodeSize); - const unionFn: UnionFn = (_k, v1, v2) => v1 + v2; - - const { result } = expectUnionMatchesBaseline(tree1, tree2, unionFn, undefined, { - expectedUnionFn: (_k, v1, v2) => v1 + v2 - }); - expect(result.size).toBe(tree1.size); - }); - - test('Union with completely overlapping keys - prefer left', () => { - const tree1 = new BTreeEx([[1, 10], [2, 20], [3, 30]], compare, maxNodeSize); - const tree2 = new BTreeEx([[1, 100], [2, 200], [3, 300]], compare, maxNodeSize); - const unionFn: UnionFn = (_k, v1, _v2) => v1; - - const { result } = expectUnionMatchesBaseline(tree1, tree2, unionFn, undefined, { - expectedUnionFn: (_k, v1, _v2) => v1 - }); - expect(result.toArray()).toEqual(tree1.toArray()); - }); - - test('Union with completely overlapping keys - prefer right', () => { - const tree1 = new BTreeEx([[1, 10], [2, 20], [3, 30]], compare, maxNodeSize); - const tree2 = new BTreeEx([[1, 100], [2, 200], [3, 300]], compare, maxNodeSize); - const unionFn: UnionFn = (_k, _v1, v2) => v2; - - const { result } = expectUnionMatchesBaseline(tree1, tree2, unionFn); - expect(result.toArray()).toEqual(tree2.toArray()); - }); - - test('Union with partially overlapping keys', () => { - const tree1 = new BTreeEx([[1, 10], [2, 20], [3, 30], [4, 40]], compare, maxNodeSize); - const tree2 = new BTreeEx([[3, 300], [4, 400], [5, 500], [6, 600]], compare, maxNodeSize); - - const mergedKeys: number[] = []; - const unionFn: UnionFn = (key, v1, v2) => { - mergedKeys.push(key); - return v1 + v2; - }; - - expectUnionMatchesBaseline(tree1, tree2, unionFn, undefined, { - expectedUnionFn: (_k, v1, v2) => v1 + v2 - }); - expect(mergedKeys.sort((a, b) => a - b)).toEqual([3, 4]); - }); - - test('Union with overlapping keys can delete entries', () => { - const tree1 = new BTreeEx([[1, 10], [2, 20], [3, 30], [4, 40]], compare, maxNodeSize); - const tree2 = new BTreeEx([[2, 200], [3, 300], [4, 400], [5, 500]], compare, maxNodeSize); - const unionFn: UnionFn = (k, v1, v2) => { - if (k === 3) return undefined; - return v1 + v2; - }; - - const { result } = expectUnionMatchesBaseline(tree1, tree2, unionFn); - expect(result.has(3)).toBe(false); - }); - - test('Union is called even when values are equal', () => { - const tree1 = new BTreeEx([[1, 10], [2, 20]], compare, maxNodeSize); - const tree2 = new BTreeEx([[2, 20], [3, 30]], compare, maxNodeSize); - - const unionCallLog: Array<{k: number, v1: number, v2: number}> = []; - const unionFn: UnionFn = (k, v1, v2) => { - unionCallLog.push({k, v1, v2}); - return v1; - }; - - expectUnionMatchesBaseline(tree1, tree2, unionFn, undefined, { - expectedUnionFn: (_k, v1, v2) => v1 - }); - expect(unionCallLog).toEqual([{k: 2, v1: 20, v2: 20}]); - }); - - test('Union does not mutate input trees', () => { - const entries1: [number, number][] = [[1, 10], [2, 20], [3, 30]]; - const entries2: [number, number][] = [[2, 200], [3, 300], [4, 400]]; - const tree1 = new BTreeEx(entries1, compare, maxNodeSize); - const tree2 = new BTreeEx(entries2, compare, maxNodeSize); - const unionFn: UnionFn = (_k, v1, v2) => v1 + v2; - - const snapshot1 = tree1.toArray(); - const snapshot2 = tree2.toArray(); - - expectUnionMatchesBaseline(tree1, tree2, unionFn); - - expect(tree1.toArray()).toEqual(snapshot1); - expect(tree2.toArray()).toEqual(snapshot2); - tree1.checkValid(); - tree2.checkValid(); - }); - - test('Union large trees with some overlaps', () => { - const entries1: [number, number][] = []; - for (let i = 0; i < 1000; i++) entries1.push([i, i]); - - const entries2: [number, number][] = []; - for (let i = 500; i < 1500; i++) entries2.push([i, i * 10]); - - const tree1 = new BTreeEx(entries1, compare, maxNodeSize); - const tree2 = new BTreeEx(entries2, compare, maxNodeSize); - - let unionCount = 0; - const unionFn: UnionFn = (k, v1, v2) => { - unionCount++; - return v1 + v2; - }; - - expectUnionMatchesBaseline(tree1, tree2, unionFn, undefined, { - expectedUnionFn: (_k, v1, v2) => v1 + v2 - }); - expect(unionCount).toBe(500); - }); - - test('Union with overlaps at boundaries', () => { - const tree1 = new BTreeEx([], compare, maxNodeSize); - const tree2 = new BTreeEx([], compare, maxNodeSize); - - for (let i = 0; i < 100; i++) { - tree1.set(i * 2, i * 2); - } - - for (let i = 50; i < 150; i++) { - tree2.set(i, i * 10); - } - - const mergedKeys: number[] = []; - const unionFn: UnionFn = (key, v1, v2) => { - mergedKeys.push(key); - return v1 + v2; - }; - - expectUnionMatchesBaseline(tree1, tree2, unionFn, undefined, { - expectedUnionFn: (_k, v1, v2) => v1 + v2 - }); - - const expectedMergedKeys = range(50, 150).filter(k => k % 2 === 0); - expect(mergedKeys.sort((a, b) => a - b)).toEqual(expectedMergedKeys); - }); - - test('Union result can be modified without affecting inputs', () => { - const tree1 = new BTreeEx([[1, 10], [2, 20]], compare, maxNodeSize); - const tree2 = new BTreeEx([[3, 30], [4, 40]], compare, maxNodeSize); - const unionFn: UnionFn = (_k, v1, v2) => v1 + v2; - - const { result } = expectUnionMatchesBaseline(tree1, tree2, unionFn); - - result.set(1, 100); - result.set(5, 50); - result.delete(2); - - expect(tree1.get(1)).toBe(10); - expect(tree1.get(2)).toBe(20); - expect(tree1.has(5)).toBe(false); - expect(tree2.get(3)).toBe(30); - expect(tree2.get(4)).toBe(40); - tree1.checkValid(); - tree2.checkValid(); - result.checkValid(); - }); - - test('Union with disjoint ranges', () => { - const entries1: [number, number][] = []; - for (let i = 1; i <= 100; i++) entries1.push([i, i]); - for (let i = 201; i <= 300; i++) entries1.push([i, i]); - - const entries2: [number, number][] = []; - for (let i = 101; i <= 200; i++) entries2.push([i, i]); - - const tree1 = new BTreeEx(entries1, compare, maxNodeSize); - const tree2 = new BTreeEx(entries2, compare, maxNodeSize); - const unionFn: UnionFn = () => { - throw new Error('Should not be called - no overlaps'); - }; - - const { result } = expectUnionMatchesBaseline(tree1, tree2, unionFn, undefined, { - expectedUnionFn: unionFn - }); - - expect(result.size).toBe(300); - expect(result.get(1)).toBe(1); - expect(result.get(100)).toBe(100); - expect(result.get(101)).toBe(101); - expect(result.get(200)).toBe(200); - expect(result.get(201)).toBe(201); - expect(result.get(300)).toBe(300); - }); - - test('Union with single element trees', () => { - const tree1 = new BTreeEx([[5, 50]], compare, maxNodeSize); - const tree2 = new BTreeEx([[5, 500]], compare, maxNodeSize); - const unionFn: UnionFn = (_k, v1, v2) => Math.max(v1, v2); - - const { result } = expectUnionMatchesBaseline(tree1, tree2, unionFn); - expect(result.toArray()).toEqual([[5, 500]]); - }); - - test('Union interleaved keys', () => { - const tree1 = new BTreeEx([], compare, maxNodeSize); - for (let i = 1; i <= 100; i += 2) - tree1.set(i, i); - - const tree2 = new BTreeEx([], compare, maxNodeSize); - for (let i = 2; i <= 100; i += 2) - tree2.set(i, i); - - const unionFn: UnionFn = () => { - throw new Error('Should not be called - no overlapping keys'); - }; - - const { result } = expectUnionMatchesBaseline(tree1, tree2, unionFn, undefined, { - expectedUnionFn: unionFn - }); - expect(result.size).toBe(100); - for (let i = 1; i <= 100; i++) - expect(result.get(i)).toBe(i); - }); - - test('Union excluding all overlapping keys', () => { - const tree1 = new BTreeEx([[1, 10], [2, 20], [3, 30]], compare, maxNodeSize); - const tree2 = new BTreeEx([[2, 200], [3, 300], [4, 400]], compare, maxNodeSize); - const unionFn: UnionFn = () => undefined; - - const { result } = expectUnionMatchesBaseline(tree1, tree2, unionFn); - expect(result.toArray()).toEqual([[1, 10], [4, 400]]); - }); - - test('Union reuses appended subtree with minimum fanout', () => { - const tree1 = new BTreeEx([], compare, maxNodeSize); - const tree2 = new BTreeEx([], compare, maxNodeSize); - - for (let i = 0; i < 400; i++) { - tree1.set(i, i); - } - for (let i = 400; i < 800; i++) { - tree2.set(i, i * 2); - } - - const unionFn: UnionFn = () => { - throw new Error('Should not be called for disjoint ranges'); - }; - - expectUnionMatchesBaseline(tree1, tree2, unionFn, ({ result }) => { - const resultRoot = result['_root'] as any; - const tree2Root = tree2['_root'] as any; - expect(sharesNode(resultRoot, tree2Root)).toBe(true); - }); - }); - - test('Union with large disjoint ranges', () => { - const tree1 = new BTreeEx([], compare, maxNodeSize); - const tree2 = new BTreeEx([], compare, maxNodeSize); - - for (let i = 0; i <= 10000; i++) - tree1.set(i, i); - for (let i = 10001; i <= 20000; i++) - tree2.set(i, i); - - let unionCalls = 0; - const unionFn: UnionFn = (_k, v1, v2) => { - unionCalls++; - return v1 + v2; - }; - - const { result } = expectUnionMatchesBaseline(tree1, tree2, unionFn, undefined, { - expectedUnionFn: unionFn - }); - - expect(unionCalls).toBe(0); - expect(result.size).toBe(tree1.size + tree2.size); - expect(result.get(0)).toBe(0); - expect(result.get(20000)).toBe(20000); - }); - - test('Union trees with random overlap', () => { - const size = 10000; - const keys1 = makeArray(size, true); - const keys2 = makeArray(size, true); - - const tree1 = new BTreeEx(); - const tree2 = new BTreeEx(); - - for (let k of keys1) - tree1.set(k, k); - for (let k of keys2) - tree2.set(k, k * 10); - - const preferLeft: UnionFn = (_key, leftValue) => leftValue; - expectUnionMatchesBaseline(tree1, tree2, preferLeft, undefined, { - expectedUnionFn: preferLeft - }); - }); - - test('Union trees with ~10% overlap', () => { - const size = 200; - const offset = Math.floor(size * 0.9); - const overlap = size - offset; - - const tree1 = new BTreeEx([], compare, maxNodeSize); - const tree2 = new BTreeEx([], compare, maxNodeSize); - - for (let i = 0; i < size; i++) - tree1.set(i, i); - - for (let i = 0; i < size; i++) { - const key = offset + i; - tree2.set(key, key * 10); - } - - const preferLeft: UnionFn = (_key, leftValue) => leftValue; - - const { result } = expectUnionMatchesBaseline(tree1, tree2, preferLeft, undefined, { - expectedUnionFn: preferLeft - }); - - expect(result.size).toBe(size + size - overlap); - for (let i = 0; i < offset; i++) - expect(result.get(i)).toBe(i); - for (let i = offset; i < size; i++) - expect(result.get(i)).toBe(i); - const upperBound = offset + size; - for (let i = size; i < upperBound; i++) - expect(result.get(i)).toBe(i * 10); - }); -} - -describe('BTree union input/output validation', () => { - test('Union throws error when comparators differ', () => { - const tree1 = new BTreeEx([[1, 10]], (a, b) => b + a); - const tree2 = new BTreeEx([[2, 20]], (a, b) => b - a); - const unionFn: UnionFn = (_k, v1, v2) => v1 + v2; - - expect(() => tree1.union(tree2, unionFn)).toThrow(comparatorErrorMsg); - }); - - test('Union throws error when max node sizes differ', () => { - const compare = (a: number, b: number) => b - a; - const tree1 = new BTreeEx([[1, 10]], compare, 32); - const tree2 = new BTreeEx([[2, 20]], compare, 33); - const unionFn: UnionFn = (_k, v1, v2) => v1 + v2; - - expect(() => tree1.union(tree2, unionFn)).toThrow(branchingFactorErrorMsg); - }); - - test('Union returns a tree of the same class', () => { - expect(union(new BTreeEx(), new BTreeEx(), (_k, v1, v2) => v1)).toBeInstanceOf(BTreeEx); - expect(union(new BTree(), new BTree(), (_k, v1, v2) => v1)).toBeInstanceOf(BTree); - expect(union(new BTree(), new BTree(), (_k, v1, v2) => v1) instanceof BTreeEx).toBeFalsy(); - }); -}); - -function swap(keys: any[], i: number, j: number) { - var tmp = keys[i]; - keys[i] = keys[j]; - keys[j] = tmp; -} - -function makeArray(size: number, randomOrder: boolean, spacing = 10, collisionChance = 0, rng?: MersenneTwister) { - const randomizer = rng ?? rand; - const useGlobalRand = rng === undefined; - const randomFloat = () => { - if (typeof randomizer.random === 'function') - return randomizer.random(); - return Math.random(); - }; - const randomIntWithMax = (max: number) => { - if (max <= 0) - return 0; - if (useGlobalRand) - return randInt(max); - return Math.floor(randomFloat() * max); - }; - - const keys: number[] = []; - let current = 0; - for (let i = 0; i < size; i++) { - if (i > 0 && collisionChance > 0 && randomFloat() < collisionChance) { - keys[i] = keys[i - 1]; - } else { - current += 1 + randomIntWithMax(spacing); - keys[i] = current; - } - } - if (randomOrder) { - for (let i = 0; i < size; i++) - swap(keys, i, randomIntWithMax(size)); - } - return keys; -} - -const randomInt = (rng: MersenneTwister, maxExclusive: number) => Math.floor(rng.random() * maxExclusive); - -describe('BTree union fuzz tests', () => { - const compare = (a: number, b: number) => a - b; - const unionFn = (_k: number, left: number, _right: number) => left; - const FUZZ_SETTINGS = { - branchingFactors: [4, 5, 32], - ooms: [0, 1, 2], // [0, 1, 2, 3], - fractionsPerOOM: [0.1, 0.25, 0.5], // [0.0001, 0.01, 0.1, 0.25, 0.5], - collisionChances: [0.1, 0.5], // [0, 0.01, 0.1, 0.5] - } as const; - const RANDOM_EDITS_PER_TEST = 20; - const TIMEOUT_MS = 30_000; - - FUZZ_SETTINGS.fractionsPerOOM.forEach(fraction => { - if (fraction < 0 || fraction > 1) - throw new Error('FUZZ_SETTINGS.fractionsPerOOM must contain values between 0 and 1'); - }); - FUZZ_SETTINGS.collisionChances.forEach(chance => { - if (chance < 0 || chance > 1) - throw new Error('FUZZ_SETTINGS.collisionChances must contain values between 0 and 1'); - }); - - jest.setTimeout(TIMEOUT_MS); - - const rng = new MersenneTwister(0xBEEFCAFE); - - for (const maxNodeSize of FUZZ_SETTINGS.branchingFactors) { - describe(`branching factor ${maxNodeSize}`, () => { - for (const collisionChance of FUZZ_SETTINGS.collisionChances) { - for (const oom of FUZZ_SETTINGS.ooms) { - const size = 5 * Math.pow(10, oom); - for (const fractionA of FUZZ_SETTINGS.fractionsPerOOM) { - const fractionB = 1 - fractionA; - const collisionLabel = collisionChance.toFixed(2); - - test(`size ${size}, fractionA ${fractionA.toFixed(2)}, fractionB ${fractionB.toFixed(2)}, collision ${collisionLabel}`, () => { - const treeA = new BTreeEx([], compare, maxNodeSize); - const treeB = new BTreeEx([], compare, maxNodeSize); - - const keys = makeArray(size, true, 1, collisionChance, rng); - const sorted = Array.from(new Set(keys)).sort(compare); - - for (const value of keys) { - if (rng.random() < fractionA) { - treeA.set(value, value); - } else { - treeB.set(value, value); - } - } - - const aArray = treeA.toArray(); - const bArray = treeB.toArray(); - - const merged = treeA.union(treeB, unionFn); - merged.checkValid(); - - expect(merged.toArray()).toEqual(sorted.map(k => [k, k])); - - // Union should not have mutated inputs - expect(treeA.toArray()).toEqual(aArray); - expect(treeB.toArray()).toEqual(bArray); - - for (let edit = 0; edit < RANDOM_EDITS_PER_TEST; edit++) { - const key = 1 + randomInt(rng, size); - const action = rng.random(); - if (action < 0.33) { - merged.set(key, key); - } else if (action < 0.66) { - merged.set(key, -key); - } else { - merged.delete(key); - } - } - - // Check for shared mutability issues - expect(treeA.toArray()).toEqual(aArray); - expect(treeB.toArray()).toEqual(bArray); - }); - } - } - } - }); - } -}); diff --git a/test/diffAgainst.test.ts b/test/diffAgainst.test.ts new file mode 100644 index 0000000..d89fbd8 --- /dev/null +++ b/test/diffAgainst.test.ts @@ -0,0 +1,218 @@ +import BTree from '../b+tree'; +import BTreeEx from '../extended'; +import diffAgainst from '../extended/diffAgainst'; + +var test: (name: string, f: () => void) => void = it; + +const FANOUTS = [32, 10, 4] as const; + +for (const fanout of FANOUTS) { + describe(`BTree diffAgainst tests with fanout ${fanout}`, () => { + runDiffAgainstSuite(fanout); + }); +} + +function runDiffAgainstSuite(maxNodeSize: number): void { + describe('Diff computation', () => { + let onlyThis: Map; + let onlyOther: Map; + let different: Map; + function reset(): void { + onlyOther = new Map(); + onlyThis = new Map(); + different = new Map(); + } + + beforeEach(() => reset()); + + const OnlyThis = (k: number, v: number) => { onlyThis.set(k, v); }; + const OnlyOther = (k: number, v: number) => { onlyOther.set(k, v); }; + const Different = (k: number, vThis: number, vOther: number) => { + different.set(k, `vThis: ${vThis}, vOther: ${vOther}`); + }; + const compare = (a: number, b: number) => a - b; + + function expectMapsEquals(mapA: Map, mapB: Map) { + const onlyA = []; + const onlyB = []; + const different = []; + mapA.forEach((valueA, keyA) => { + const valueB = mapB.get(keyA); + if (valueB === undefined) { + onlyA.push([keyA, valueA]); + } else if (!Object.is(valueB, valueB)) { + different.push([keyA, valueA, valueB]); + } + }); + mapB.forEach((valueB, keyB) => { + const valueA = mapA.get(keyB); + if (valueA === undefined) { + onlyA.push([keyB, valueB]); + } + }); + expect(onlyA.length).toEqual(0); + expect(onlyB.length).toEqual(0); + expect(different.length).toEqual(0); + } + + function expectDiffCorrect(treeThis: BTreeEx, treeOther: BTreeEx): void { + reset(); + treeThis.diffAgainst(treeOther, OnlyThis, OnlyOther, Different); + const onlyThisT: Map = new Map(); + const onlyOtherT: Map = new Map(); + const differentT: Map = new Map(); + treeThis.forEachPair((kThis, vThis) => { + if (!treeOther.has(kThis)) { + onlyThisT.set(kThis, vThis); + } else { + const vOther = treeOther.get(kThis); + if (!Object.is(vThis, vOther)) + differentT.set(kThis, `vThis: ${vThis}, vOther: ${vOther}`); + } + }); + treeOther.forEachPair((kOther, vOther) => { + if (!treeThis.has(kOther)) { + onlyOtherT.set(kOther, vOther); + } + }); + expectMapsEquals(onlyThis, onlyThisT); + expectMapsEquals(onlyOther, onlyOtherT); + expectMapsEquals(different, differentT); + } + + test('Diff of trees with different comparators is an error', () => { + const treeA = new BTreeEx([], compare); + const treeB = new BTreeEx([], (a, b) => b - a); + expect(() => treeA.diffAgainst(treeB, OnlyThis, OnlyOther, Different)).toThrow('comparators'); + }); + + test('Standalone diffAgainst works with core trees', () => { + const treeA = new BTree([[1, 1], [2, 2], [4, 4]], compare, maxNodeSize); + const treeB = new BTree([[1, 1], [2, 22], [3, 3]], compare, maxNodeSize); + const onlyThisKeys: number[] = []; + const onlyOtherKeys: number[] = []; + const differentKeys: number[] = []; + diffAgainst( + treeA, + treeB, + (k) => { onlyThisKeys.push(k); }, + (k) => { onlyOtherKeys.push(k); }, + (k) => { differentKeys.push(k); } + ); + expect(onlyThisKeys).toEqual([4]); + expect(onlyOtherKeys).toEqual([3]); + expect(differentKeys).toEqual([2]); + }); + + const entriesGroup: [number, number][][] = [[], [[1, 1], [2, 2], [3, 3], [4, 4], [5, 5]]]; + entriesGroup.forEach(entries => { + test(`Diff of the same tree ${entries.length > 0 ? '(non-empty)' : '(empty)'}`, () => { + const tree = new BTreeEx(entries, compare, maxNodeSize); + expectDiffCorrect(tree, tree); + expect(onlyOther.size).toEqual(0); + expect(onlyThis.size).toEqual(0); + expect(different.size).toEqual(0); + }); + }); + + test('Diff of identical trees', () => { + const treeA = new BTreeEx(entriesGroup[1], compare, maxNodeSize); + const treeB = new BTreeEx(entriesGroup[1], compare, maxNodeSize); + expectDiffCorrect(treeA, treeB); + }); + + [entriesGroup, [...entriesGroup].reverse()].forEach(doubleEntries => { + test(`Diff of an ${doubleEntries[0].length === 0 ? 'empty' : 'non-empty'} tree and a ${doubleEntries[1].length === 0 ? 'empty' : 'non-empty'} one`, () => { + const treeA = new BTreeEx(doubleEntries[0], compare, maxNodeSize); + const treeB = new BTreeEx(doubleEntries[1], compare, maxNodeSize); + expectDiffCorrect(treeA, treeB); + }); + }); + + test('Diff of different trees', () => { + const treeA = new BTreeEx(entriesGroup[1], compare, maxNodeSize); + const treeB = new BTreeEx(entriesGroup[1], compare, maxNodeSize); + treeB.set(-1, -1); + treeB.delete(2); + treeB.set(3, 4); + treeB.set(10, 10); + expectDiffCorrect(treeA, treeB); + }); + + test('Diff of odds and evens', () => { + const treeA = new BTreeEx([[1, 1], [3, 3], [5, 5], [7, 7]], compare, maxNodeSize); + const treeB = new BTreeEx([[2, 2], [4, 4], [6, 6], [8, 8]], compare, maxNodeSize); + expectDiffCorrect(treeA, treeB); + expectDiffCorrect(treeB, treeA); + }); + + function applyChanges(treeA: BTreeEx, duplicate: (tree: BTreeEx) => BTreeEx): void { + const treeB = duplicate(treeA); + const maxKey: number = treeA.maxKey()!; + const onlyInA = -10; + treeA.set(onlyInA, onlyInA); + const onlyInBSmall = -1; + treeB.set(onlyInBSmall, onlyInBSmall); + const onlyInBLarge = maxKey + 1; + treeB.set(onlyInBLarge, onlyInBLarge); + const onlyInAFromDelete = 10; + treeB.delete(onlyInAFromDelete); + const differingValue = -100; + const modifiedInB1 = 3; + const modifiedInB2 = maxKey - 2; + treeB.set(modifiedInB1, differingValue); + treeB.set(modifiedInB2, differingValue); + treeA.diffAgainst(treeB, OnlyThis, OnlyOther, Different); + expectDiffCorrect(treeA, treeB); + } + + function makeLargeTree(size?: number): BTreeEx { + size = size ?? Math.pow(maxNodeSize, 3); + const tree = new BTreeEx([], compare, maxNodeSize); + for (let i = 0; i < size; i++) { + tree.set(i, i); + } + return tree; + } + + test('Diff of large trees', () => { + const tree = makeLargeTree(); + applyChanges(tree, tree => tree.greedyClone()); + }); + + test('Diff of cloned trees', () => { + const tree = makeLargeTree(); + applyChanges(tree, tree => tree.clone()); + }); + + test('Diff can early exit', () => { + const tree = makeLargeTree(100); + const tree2 = tree.clone(); + tree2.set(-1, -1); + tree2.delete(10); + tree2.set(20, -1); + tree2.set(110, -1); + const ReturnKey = (key: number) => { return { break: key }; }; + + let val = tree.diffAgainst(tree2, OnlyThis, OnlyOther, ReturnKey); + expect(onlyOther.size).toEqual(1); + expect(onlyThis.size).toEqual(0); + expect(val).toEqual(20); + reset(); + + val = tree.diffAgainst(tree2, OnlyThis, ReturnKey, Different); + expect(different.size).toEqual(0); + expect(onlyThis.size).toEqual(0); + expect(val).toEqual(110); + reset(); + + val = tree.diffAgainst(tree2, ReturnKey, OnlyOther, Different); + expect(different.size).toEqual(1); + expect(onlyOther.size).toEqual(1); + expect(val).toEqual(10); + reset(); + + expectDiffCorrect(tree, tree2); + }); + }); +} diff --git a/test/intersect.test.ts b/test/intersect.test.ts new file mode 100644 index 0000000..c52db67 --- /dev/null +++ b/test/intersect.test.ts @@ -0,0 +1,233 @@ +import BTreeEx from '../extended'; +import { branchingFactorErrorMsg, comparatorErrorMsg } from '../extended/parallelWalk'; +import MersenneTwister from 'mersenne-twister'; +import { makeArray } from './shared'; + +var test: (name: string, f: () => void) => void = it; + +describe('BTree intersect tests with fanout 32', testIntersect.bind(null, 32)); +describe('BTree intersect tests with fanout 10', testIntersect.bind(null, 10)); +describe('BTree intersect tests with fanout 4', testIntersect.bind(null, 4)); + +function testIntersect(maxNodeSize: number) { + const compare = (a: number, b: number) => a - b; + + const buildTree = (entries: Array<[number, number]>) => + new BTreeEx(entries, compare, maxNodeSize); + + const tuples = (...pairs: Array<[number, number]>) => pairs; + + const collectCalls = (left: BTreeEx, right: BTreeEx) => { + const calls: Array<{ key: number, leftValue: number, rightValue: number }> = []; + left.intersect(right, (key, leftValue, rightValue) => { + calls.push({ key, leftValue, rightValue }); + }); + return calls; + }; + + test('Intersect two empty trees', () => { + const tree1 = buildTree([]); + const tree2 = buildTree([]); + expect(collectCalls(tree1, tree2)).toEqual([]); + }); + + test('Intersect empty tree with non-empty tree', () => { + const tree1 = buildTree([]); + const tree2 = buildTree(tuples([1, 10], [2, 20], [3, 30])); + expect(collectCalls(tree1, tree2)).toEqual([]); + expect(collectCalls(tree2, tree1)).toEqual([]); + }); + + test('Intersect with no overlapping keys', () => { + const tree1 = buildTree(tuples([1, 10], [3, 30], [5, 50])); + const tree2 = buildTree(tuples([2, 20], [4, 40], [6, 60])); + expect(collectCalls(tree1, tree2)).toEqual([]); + }); + + test('Intersect with single overlapping key', () => { + const tree1 = buildTree(tuples([1, 10], [2, 20], [3, 30])); + const tree2 = buildTree(tuples([0, 100], [2, 200], [4, 400])); + expect(collectCalls(tree1, tree2)).toEqual([{ key: 2, leftValue: 20, rightValue: 200 }]); + }); + + test('Intersect with multiple overlapping keys maintains tree contents', () => { + const leftEntries: Array<[number, number]> = [[1, 10], [2, 20], [3, 30], [4, 40], [5, 50]]; + const rightEntries: Array<[number, number]> = [[0, 100], [2, 200], [4, 400], [6, 600]]; + const tree1 = buildTree(leftEntries); + const tree2 = buildTree(rightEntries); + const leftBefore = tree1.toArray(); + const rightBefore = tree2.toArray(); + expect(collectCalls(tree1, tree2)).toEqual([ + { key: 2, leftValue: 20, rightValue: 200 }, + { key: 4, leftValue: 40, rightValue: 400 }, + ]); + expect(tree1.toArray()).toEqual(leftBefore); + expect(tree2.toArray()).toEqual(rightBefore); + tree1.checkValid(); + tree2.checkValid(); + }); + + test('Intersect with contiguous overlap yields sorted keys', () => { + const tree1 = buildTree(tuples([1, 1], [2, 2], [3, 3], [4, 4], [5, 5], [6, 6])); + const tree2 = buildTree(tuples([3, 30], [4, 40], [5, 50], [6, 60], [7, 70])); + const calls = collectCalls(tree1, tree2); + expect(calls.map(c => c.key)).toEqual([3, 4, 5, 6]); + expect(calls.map(c => c.leftValue)).toEqual([3, 4, 5, 6]); + expect(calls.map(c => c.rightValue)).toEqual([30, 40, 50, 60]); + }); + + test('Intersect large overlapping range counts each shared key once', () => { + const size = 1000; + const overlapStart = 500; + const leftEntries = Array.from({ length: size }, (_, i) => [i, i * 3] as [number, number]); + const rightEntries = Array.from({ length: size }, (_, i) => { + const key = i + overlapStart; + return [key, key * 7] as [number, number]; + }); + const tree1 = buildTree(leftEntries); + const tree2 = buildTree(rightEntries); + const calls = collectCalls(tree1, tree2); + expect(calls.length).toBe(size - overlapStart); + expect(calls[0]).toEqual({ + key: overlapStart, + leftValue: overlapStart * 3, + rightValue: overlapStart * 7 + }); + const lastCall = calls[calls.length - 1]; + expect(lastCall.key).toBe(size - 1); + expect(lastCall.leftValue).toBe((size - 1) * 3); + expect(lastCall.rightValue).toBe((size - 1) * 7); + }); + + test('Intersect tree with itself visits each key once', () => { + const entries = Array.from({ length: 20 }, (_, i) => [i, i * 2] as [number, number]); + const tree = buildTree(entries); + const calls = collectCalls(tree, tree); + expect(calls.length).toBe(entries.length); + for (let i = 0; i < entries.length; i++) { + const [key, value] = entries[i]; + expect(calls[i]).toEqual({ key, leftValue: value, rightValue: value }); + } + }); + + test('Intersect arguments determine left/right values', () => { + const tree1 = buildTree(tuples([1, 100], [2, 200], [4, 400])); + const tree2 = buildTree(tuples([2, 20], [3, 30], [4, 40])); + const callsLeft = collectCalls(tree1, tree2); + const callsRight = collectCalls(tree2, tree1); + expect(callsLeft).toEqual([ + { key: 2, leftValue: 200, rightValue: 20 }, + { key: 4, leftValue: 400, rightValue: 40 }, + ]); + expect(callsRight).toEqual([ + { key: 2, leftValue: 20, rightValue: 200 }, + { key: 4, leftValue: 40, rightValue: 400 }, + ]); + }); +} + +describe('BTree intersect input/output validation', () => { + test('Intersect throws error when comparators differ', () => { + const tree1 = new BTreeEx([[1, 10]], (a, b) => b + a); + const tree2 = new BTreeEx([[2, 20]], (a, b) => b - a); + expect(() => tree1.intersect(tree2, () => {})).toThrow(comparatorErrorMsg); + }); + + test('Intersect throws error when max node sizes differ', () => { + const compare = (a: number, b: number) => b - a; + const tree1 = new BTreeEx([[1, 10]], compare, 32); + const tree2 = new BTreeEx([[2, 20]], compare, 33); + expect(() => tree1.intersect(tree2, () => {})).toThrow(branchingFactorErrorMsg); + }); +}); + +describe('BTree intersect fuzz tests', () => { + const compare = (a: number, b: number) => a - b; + const FUZZ_SETTINGS = { + branchingFactors: [4, 5, 32], + ooms: [2, 3], + fractionsPerOOM: [0.1, 0.25, 0.5], + collisionChances: [0.05, 0.1, 0.3], + timeoutMs: 30_000 + } as const; + + FUZZ_SETTINGS.fractionsPerOOM.forEach(fraction => { + if (fraction < 0 || fraction > 1) + throw new Error('FUZZ_SETTINGS.fractionsPerOOM must contain values between 0 and 1'); + }); + FUZZ_SETTINGS.collisionChances.forEach(chance => { + if (chance < 0 || chance > 1) + throw new Error('FUZZ_SETTINGS.collisionChances must contain values between 0 and 1'); + }); + + jest.setTimeout(FUZZ_SETTINGS.timeoutMs); + + const rng = new MersenneTwister(0xC0FFEE); + + for (const maxNodeSize of FUZZ_SETTINGS.branchingFactors) { + describe(`branching factor ${maxNodeSize}`, () => { + for (const collisionChance of FUZZ_SETTINGS.collisionChances) { + for (const oom of FUZZ_SETTINGS.ooms) { + const size = 5 * Math.pow(10, oom); + for (const fractionA of FUZZ_SETTINGS.fractionsPerOOM) { + const fractionB = 1 - fractionA; + const collisionLabel = collisionChance.toFixed(2); + + test(`size ${size}, fractionA ${fractionA.toFixed(2)}, fractionB ${fractionB.toFixed(2)}, collision ${collisionLabel}`, () => { + const treeA = new BTreeEx([], compare, maxNodeSize); + const treeB = new BTreeEx([], compare, maxNodeSize); + + const keys = makeArray(size, true, 1, collisionChance, rng); + + for (const value of keys) { + const assignToA = rng.random() < fractionA; + const assignToB = rng.random() < fractionB; + + if (!assignToA && !assignToB) { + if (rng.random() < 0.5) + treeA.set(value, value); + else + treeB.set(value, value); + continue; + } + + if (assignToA) + treeA.set(value, value); + if (assignToB) + treeB.set(value, value); + } + + const aArray = treeA.toArray(); + const bArray = treeB.toArray(); + const bMap = new Map(bArray); + const expected: Array<[number, number, number]> = []; + for (const [key, leftValue] of aArray) { + const rightValue = bMap.get(key); + if (rightValue !== undefined) + expected.push([key, leftValue, rightValue]); + } + + const actual: Array<[number, number, number]> = []; + treeA.intersect(treeB, (key, leftValue, rightValue) => { + actual.push([key, leftValue, rightValue]); + }); + expect(actual).toEqual(expected); + + const swappedActual: Array<[number, number, number]> = []; + treeB.intersect(treeA, (key, leftValue, rightValue) => { + swappedActual.push([key, leftValue, rightValue]); + }); + const swappedExpected = expected.map(([key, leftValue, rightValue]) => [key, rightValue, leftValue]); + expect(swappedActual).toEqual(swappedExpected); + + expect(treeA.toArray()).toEqual(aArray); + expect(treeB.toArray()).toEqual(bArray); + treeA.checkValid(); + treeB.checkValid(); + }); + } + } + } + }); + } +}); diff --git a/test/shared.ts b/test/shared.ts new file mode 100644 index 0000000..99f96dc --- /dev/null +++ b/test/shared.ts @@ -0,0 +1,68 @@ +import BTree, { IMap } from '../b+tree'; +import SortedArray from '../sorted-array'; +import MersenneTwister from 'mersenne-twister'; + +const rand = new MersenneTwister(1234); + +export function randInt(max: number): number { + return rand.random_int() % max; +} + +export function expectTreeEqualTo(tree: BTree, list: SortedArray): void { + tree.checkValid(); + expect(tree.toArray()).toEqual(list.getArray()); +} + +export function addToBoth(a: IMap, b: IMap, k: K, v: V): void { + expect(a.set(k, v)).toEqual(b.set(k, v)); +} + +export function makeArray( + size: number, + randomOrder: boolean, + spacing = 10, + collisionChance = 0, + rng?: MersenneTwister +): number[] { + const randomizer = rng ?? rand; + const useGlobalRand = rng === undefined; + + const randomFloat = () => { + if (typeof randomizer.random === 'function') + return randomizer.random(); + return Math.random(); + }; + + const randomIntWithMax = (max: number) => { + if (max <= 0) + return 0; + if (useGlobalRand) + return randInt(max); + return Math.floor(randomFloat() * max); + }; + + const keys: number[] = []; + let current = 0; + for (let i = 0; i < size; i++) { + if (i > 0 && collisionChance > 0 && randomFloat() < collisionChance) { + keys[i] = keys[i - 1]; + } else { + current += 1 + randomIntWithMax(spacing); + keys[i] = current; + } + } + if (randomOrder) { + for (let i = 0; i < size; i++) + swap(keys, i, randomIntWithMax(size)); + } + return keys; +} + +export const randomInt = (rng: MersenneTwister, maxExclusive: number) => + Math.floor(rng.random() * maxExclusive); + +function swap(keys: any[], i: number, j: number) { + const tmp = keys[i]; + keys[i] = keys[j]; + keys[j] = tmp; +} diff --git a/test/union.test.ts b/test/union.test.ts new file mode 100644 index 0000000..d1a0da4 --- /dev/null +++ b/test/union.test.ts @@ -0,0 +1,794 @@ +import BTree from '../b+tree'; +import BTreeEx from '../extended'; +import union from '../extended/union'; +import { branchingFactorErrorMsg, comparatorErrorMsg } from '../extended/parallelWalk'; +import MersenneTwister from 'mersenne-twister'; +import { makeArray, randomInt } from './shared'; + +var test: (name: string, f: () => void) => void = it; + +describe('BTree union tests with fanout 32', testUnion.bind(null, 32)); +describe('BTree union tests with fanout 10', testUnion.bind(null, 10)); +describe('BTree union tests with fanout 4', testUnion.bind(null, 4)); + +type UnionFn = (key: number, leftValue: number, rightValue: number) => number | undefined; + +function testUnion(maxNodeSize: number) { + const compare = (a: number, b: number) => a - b; + const sharesNode = (root: any, targetNode: any): boolean => { + if (root === targetNode) + return true; + if (root.isLeaf) + return false; + const children = (root as any).children as any[]; + for (let i = 0; i < children.length; i++) { + if (sharesNode(children[i], targetNode)) + return true; + } + return false; + }; + + const buildTree = (keys: number[], valueScale = 1, valueOffset = 0) => { + const tree = new BTreeEx([], compare, maxNodeSize); + for (const key of keys) { + tree.set(key, key * valueScale + valueOffset); + } + return tree; + }; + + const expectRootLeafState = (tree: BTreeEx, expectedIsLeaf: boolean) => { + const root = tree['_root'] as any; + expect(root.isLeaf).toBe(expectedIsLeaf); + }; + + const range = (start: number, endExclusive: number, step = 1): number[] => { + const result: number[] = []; + for (let i = start; i < endExclusive; i += step) + result.push(i); + return result; + }; + + type UnionExpectationOptions = { + expectedUnionFn?: UnionFn; + }; + + const naiveUnion = ( + left: BTreeEx, + right: BTreeEx, + unionFn: UnionFn + ) => { + const expected = left.clone(); + right.forEachPair((key, rightValue) => { + if (expected.has(key)) { + const leftValue = expected.get(key)!; + const unionedValue = unionFn(key, leftValue, rightValue); + if (unionedValue === undefined) { + expected.delete(key); + } else { + expected.set(key, unionedValue); + } + } else { + expected.set(key, rightValue); + } + }); + return expected; + }; + + const expectUnionMatchesBaseline = ( + left: BTreeEx, + right: BTreeEx, + unionFn: UnionFn, + after?: (ctx: { result: BTreeEx, expected: BTreeEx }) => void, + options: UnionExpectationOptions = {} + ) => { + const expectedUnionFn = options.expectedUnionFn ?? unionFn; + const expected = naiveUnion(left, right, expectedUnionFn); + const result = left.union(right, unionFn); + expect(result.toArray()).toEqual(expected.toArray()); + result.checkValid(); + expected.checkValid(); + after?.({ result, expected }); + return { result, expected }; + }; + + test('Union disjoint roots reuses appended subtree', () => { + const size = maxNodeSize * 3; + const tree1 = buildTree(range(0, size), 1, 0); + const offset = size * 5; + const tree2 = buildTree(range(offset, offset + size), 2, 0); + + expectRootLeafState(tree1, false); + expectRootLeafState(tree2, false); + + let unionCalls = 0; + const unionFn: UnionFn = () => { + unionCalls++; + return 0; + }; + + expectUnionMatchesBaseline(tree1, tree2, unionFn, ({ result }) => { + const resultRoot = result['_root'] as any; + expect(sharesNode(resultRoot, tree1['_root'] as any)).toBe(true); + expect(sharesNode(resultRoot, tree2['_root'] as any)).toBe(true); + }); + + expect(unionCalls).toBe(0); + }); + + test('Union leaf roots with intersecting keys uses union callback', () => { + const tree1 = buildTree([1, 2, 4], 10, 0); + const tree2 = buildTree([2, 3, 5], 100, 0); + + expectRootLeafState(tree1, true); + expectRootLeafState(tree2, true); + + const calls: Array<{ key: number, leftValue: number, rightValue: number }> = []; + const unionFn: UnionFn = (key, leftValue, rightValue) => { + calls.push({ key, leftValue, rightValue }); + return leftValue + rightValue; + }; + + expectUnionMatchesBaseline(tree1, tree2, unionFn, undefined, { + expectedUnionFn: (_k, leftValue, rightValue) => leftValue + rightValue + }); + expect(calls).toEqual([{ key: 2, leftValue: 20, rightValue: 200 }]); + }); + + test('Union leaf roots with disjoint keys', () => { + const tree1 = buildTree([1, 3, 5], 1, 0); + const tree2 = buildTree([2, 4, 6], 1, 1000); + + expectRootLeafState(tree1, true); + expectRootLeafState(tree2, true); + + let unionCalls = 0; + const unionFn: UnionFn = () => { + unionCalls++; + return 0; + }; + + const { result } = expectUnionMatchesBaseline(tree1, tree2, unionFn, undefined, { + expectedUnionFn: (_k, leftValue, rightValue) => leftValue + rightValue + }); + expect(unionCalls).toBe(0); + expect(result.toArray()).toEqual([ + [1, 1], + [2, 1002], + [3, 3], + [4, 1004], + [5, 5], + [6, 1006] + ]); + }); + + test('Union trees disjoint except for shared maximum key', () => { + const size = maxNodeSize * 2; + const tree1 = buildTree(range(0, size), 1, 0); + const tree2 = buildTree(range(size - 1, size - 1 + size), 3, 0); + + expectRootLeafState(tree1, false); + expectRootLeafState(tree2, false); + + let unionCalls = 0; + const unionFn: UnionFn = (_key, leftValue, rightValue) => { + unionCalls++; + return leftValue + rightValue; + }; + + const { result } = expectUnionMatchesBaseline(tree1, tree2, unionFn, undefined, { + expectedUnionFn: (_k, leftValue, rightValue) => leftValue + rightValue + }); + expect(unionCalls).toBe(1); + expect(result.get(size - 1)).toBe((size - 1) + (size - 1) * 3); + expect(result.size).toBe(tree1.size + tree2.size - 1); + }); + + test('Union trees where all leaves are disjoint and one tree straddles the other', () => { + const straddleLength = 3 * 2 * maxNodeSize; // creates multiple leaves on both trees + const tree1 = buildTree( + range(0, straddleLength / 3).concat(range((straddleLength / 3) * 2, straddleLength)), + 1 + ); + const tree2 = buildTree(range(straddleLength / 3, (straddleLength / 3) * 2), 3); + + expectRootLeafState(tree1, false); + expectRootLeafState(tree2, false); + + let unionCalls = 0; + const unionFn: UnionFn = (_key, leftValue, rightValue) => { + unionCalls++; + return leftValue + rightValue; + }; + + const { result } = expectUnionMatchesBaseline(tree1, tree2, unionFn); + expect(unionCalls).toBe(0); + expect(result.size).toBe(tree1.size + tree2.size); + }); + + test('Union where two-leaf tree intersects leaf-root tree across both leaves', () => { + const size = maxNodeSize + Math.max(3, Math.floor(maxNodeSize / 2)); + const tree1 = buildTree(range(0, size), 2, 0); + const tree2 = buildTree([1, Math.floor(size / 2), size - 1], 5, 0); + + expectRootLeafState(tree1, false); + expectRootLeafState(tree2, true); + + const seenKeys: number[] = []; + const unionFn: UnionFn = (key, _leftValue, rightValue) => { + seenKeys.push(key); + return rightValue; + }; + + expectUnionMatchesBaseline(tree1, tree2, unionFn, undefined, { + expectedUnionFn: (_k, _leftValue, rightValue) => rightValue + }); + expect(seenKeys.sort((a, b) => a - b)).toEqual([1, Math.floor(size / 2), size - 1]); + }); + + test('Union where max key equals min key of other tree', () => { + const size = maxNodeSize * 2; + const tree1 = buildTree(range(0, size), 1, 0); + const tree2 = buildTree(range(size - 1, size - 1 + size), 10, 0); + + expectRootLeafState(tree1, false); + expectRootLeafState(tree2, false); + + let unionCalls = 0; + const unionFn: UnionFn = (_key, _leftValue, rightValue) => { + unionCalls++; + return rightValue; + }; + + const { result } = expectUnionMatchesBaseline(tree1, tree2, unionFn, undefined, { + expectedUnionFn: (_k, _leftValue, rightValue) => rightValue + }); + expect(unionCalls).toBe(1); + expect(result.get(size - 1)).toBe((size - 1) * 10); + expect(result.size).toBe(tree1.size + tree2.size - 1); + }); + + test('Union odd and even keyed trees', () => { + const limit = maxNodeSize * 3; + const treeOdd = buildTree(range(1, limit * 2, 2), 1, 0); + const treeEven = buildTree(range(0, limit * 2, 2), 1, 100); + + expectRootLeafState(treeOdd, false); + expectRootLeafState(treeEven, false); + + let unionCalls = 0; + const unionFn: UnionFn = () => { + unionCalls++; + return 0; + }; + + const { result } = expectUnionMatchesBaseline(treeOdd, treeEven, unionFn); + expect(unionCalls).toBe(0); + expect(result.size).toBe(treeOdd.size + treeEven.size); + }); + + test('Union with single boundary overlap prefers right value', () => { + const size = maxNodeSize * 2; + const tree1 = buildTree(range(0, size), 1, 0); + const tree2 = buildTree(range(size - 1, size - 1 + size), 10, 0); + + expectRootLeafState(tree1, false); + expectRootLeafState(tree2, false); + + let unionCalls = 0; + const unionFn: UnionFn = (_key, _leftValue, rightValue) => { + unionCalls++; + return rightValue; + }; + + expectUnionMatchesBaseline(tree1, tree2, unionFn, undefined, { + expectedUnionFn: (_k, _leftValue, rightValue) => rightValue + }); + expect(unionCalls).toBe(1); + }); + + test('Union overlapping prefix equal to branching factor', () => { + const shared = maxNodeSize; + const tree1Keys = [ + ...range(0, shared), + ...range(shared, shared + maxNodeSize) + ]; + const tree2Keys = [ + ...range(0, shared), + ...range(shared + maxNodeSize, shared + maxNodeSize * 2) + ]; + + const tree1 = buildTree(tree1Keys, 1, 0); + const tree2 = buildTree(tree2Keys, 2, 0); + + expectRootLeafState(tree1, false); + expectRootLeafState(tree2, false); + + const unionedKeys: number[] = []; + const unionFn: UnionFn = (key, leftValue, rightValue) => { + unionedKeys.push(key); + return leftValue + rightValue; + }; + + expectUnionMatchesBaseline(tree1, tree2, unionFn, undefined, { + expectedUnionFn: (_k, leftValue, rightValue) => leftValue + rightValue + }); + expect(unionedKeys.sort((a, b) => a - b)).toEqual(range(0, shared)); + }); + + test('Union two empty trees', () => { + const tree1 = new BTreeEx([], compare, maxNodeSize); + const tree2 = new BTreeEx([], compare, maxNodeSize); + const unionFn: UnionFn = (_k, v1, v2) => v1 + v2; + + const { result } = expectUnionMatchesBaseline(tree1, tree2, unionFn, undefined, { + expectedUnionFn: (_k, v1, v2) => v1 + v2 + }); + expect(result.size).toBe(0); + }); + + test('Union empty tree with non-empty tree', () => { + const tree1 = new BTreeEx([], compare, maxNodeSize); + const tree2 = new BTreeEx([[1, 10], [2, 20], [3, 30]], compare, maxNodeSize); + const unionFn: UnionFn = (_k, v1, v2) => v1 + v2; + + const { result: leftUnion } = expectUnionMatchesBaseline(tree1, tree2, unionFn); + expect(leftUnion.toArray()).toEqual(tree2.toArray()); + + const { result: rightUnion } = expectUnionMatchesBaseline(tree2, tree1, unionFn); + expect(rightUnion.toArray()).toEqual(tree2.toArray()); + expect(tree1.toArray()).toEqual([]); + expect(tree2.toArray()).toEqual([[1, 10], [2, 20], [3, 30]]); + tree1.checkValid(); + tree2.checkValid(); + }); + + test('Union with no overlapping keys', () => { + const tree1 = new BTreeEx([[1, 10], [3, 30], [5, 50]], compare, maxNodeSize); + const tree2 = new BTreeEx([[2, 20], [4, 40], [6, 60]], compare, maxNodeSize); + const unionFn: UnionFn = () => { + throw new Error('Should not be called for non-overlapping keys'); + }; + + const { result } = expectUnionMatchesBaseline(tree1, tree2, unionFn, undefined, { + expectedUnionFn: unionFn + }); + + expect(result.size).toBe(6); + expect(result.toArray()).toEqual([[1, 10], [2, 20], [3, 30], [4, 40], [5, 50], [6, 60]]); + }); + + test('Union with completely overlapping keys - sum values', () => { + const tree1 = new BTreeEx([[1, 10], [2, 20], [3, 30]], compare, maxNodeSize); + const tree2 = new BTreeEx([[1, 5], [2, 15], [3, 25]], compare, maxNodeSize); + const unionFn: UnionFn = (_k, v1, v2) => v1 + v2; + + const { result } = expectUnionMatchesBaseline(tree1, tree2, unionFn, undefined, { + expectedUnionFn: (_k, v1, v2) => v1 + v2 + }); + expect(result.size).toBe(tree1.size); + }); + + test('Union with completely overlapping keys - prefer left', () => { + const tree1 = new BTreeEx([[1, 10], [2, 20], [3, 30]], compare, maxNodeSize); + const tree2 = new BTreeEx([[1, 100], [2, 200], [3, 300]], compare, maxNodeSize); + const unionFn: UnionFn = (_k, v1, _v2) => v1; + + const { result } = expectUnionMatchesBaseline(tree1, tree2, unionFn, undefined, { + expectedUnionFn: (_k, v1, _v2) => v1 + }); + expect(result.toArray()).toEqual(tree1.toArray()); + }); + + test('Union with completely overlapping keys - prefer right', () => { + const tree1 = new BTreeEx([[1, 10], [2, 20], [3, 30]], compare, maxNodeSize); + const tree2 = new BTreeEx([[1, 100], [2, 200], [3, 300]], compare, maxNodeSize); + const unionFn: UnionFn = (_k, _v1, v2) => v2; + + const { result } = expectUnionMatchesBaseline(tree1, tree2, unionFn); + expect(result.toArray()).toEqual(tree2.toArray()); + }); + + test('Union with partially overlapping keys', () => { + const tree1 = new BTreeEx([[1, 10], [2, 20], [3, 30], [4, 40]], compare, maxNodeSize); + const tree2 = new BTreeEx([[3, 300], [4, 400], [5, 500], [6, 600]], compare, maxNodeSize); + + const unionedKeys: number[] = []; + const unionFn: UnionFn = (key, v1, v2) => { + unionedKeys.push(key); + return v1 + v2; + }; + + expectUnionMatchesBaseline(tree1, tree2, unionFn, undefined, { + expectedUnionFn: (_k, v1, v2) => v1 + v2 + }); + expect(unionedKeys.sort((a, b) => a - b)).toEqual([3, 4]); + }); + + test('Union with overlapping keys can delete entries', () => { + const tree1 = new BTreeEx([[1, 10], [2, 20], [3, 30], [4, 40]], compare, maxNodeSize); + const tree2 = new BTreeEx([[2, 200], [3, 300], [4, 400], [5, 500]], compare, maxNodeSize); + const unionFn: UnionFn = (k, v1, v2) => { + if (k === 3) return undefined; + return v1 + v2; + }; + + const { result } = expectUnionMatchesBaseline(tree1, tree2, unionFn); + expect(result.has(3)).toBe(false); + }); + + test('Union is called even when values are equal', () => { + const tree1 = new BTreeEx([[1, 10], [2, 20]], compare, maxNodeSize); + const tree2 = new BTreeEx([[2, 20], [3, 30]], compare, maxNodeSize); + + const unionCallLog: Array<{k: number, v1: number, v2: number}> = []; + const unionFn: UnionFn = (k, v1, v2) => { + unionCallLog.push({k, v1, v2}); + return v1; + }; + + expectUnionMatchesBaseline(tree1, tree2, unionFn, undefined, { + expectedUnionFn: (_k, v1, v2) => v1 + }); + expect(unionCallLog).toEqual([{k: 2, v1: 20, v2: 20}]); + }); + + test('Union does not mutate input trees', () => { + const entries1: [number, number][] = [[1, 10], [2, 20], [3, 30]]; + const entries2: [number, number][] = [[2, 200], [3, 300], [4, 400]]; + const tree1 = new BTreeEx(entries1, compare, maxNodeSize); + const tree2 = new BTreeEx(entries2, compare, maxNodeSize); + const unionFn: UnionFn = (_k, v1, v2) => v1 + v2; + + const snapshot1 = tree1.toArray(); + const snapshot2 = tree2.toArray(); + + expectUnionMatchesBaseline(tree1, tree2, unionFn); + + expect(tree1.toArray()).toEqual(snapshot1); + expect(tree2.toArray()).toEqual(snapshot2); + tree1.checkValid(); + tree2.checkValid(); + }); + + test('Union large trees with some overlaps', () => { + const entries1: [number, number][] = []; + for (let i = 0; i < 1000; i++) entries1.push([i, i]); + + const entries2: [number, number][] = []; + for (let i = 500; i < 1500; i++) entries2.push([i, i * 10]); + + const tree1 = new BTreeEx(entries1, compare, maxNodeSize); + const tree2 = new BTreeEx(entries2, compare, maxNodeSize); + + let unionCount = 0; + const unionFn: UnionFn = (k, v1, v2) => { + unionCount++; + return v1 + v2; + }; + + expectUnionMatchesBaseline(tree1, tree2, unionFn, undefined, { + expectedUnionFn: (_k, v1, v2) => v1 + v2 + }); + expect(unionCount).toBe(500); + }); + + test('Union with overlaps at boundaries', () => { + const tree1 = new BTreeEx([], compare, maxNodeSize); + const tree2 = new BTreeEx([], compare, maxNodeSize); + + for (let i = 0; i < 100; i++) { + tree1.set(i * 2, i * 2); + } + + for (let i = 50; i < 150; i++) { + tree2.set(i, i * 10); + } + + const unionedKeys: number[] = []; + const unionFn: UnionFn = (key, v1, v2) => { + unionedKeys.push(key); + return v1 + v2; + }; + + expectUnionMatchesBaseline(tree1, tree2, unionFn, undefined, { + expectedUnionFn: (_k, v1, v2) => v1 + v2 + }); + + const expectedUnionedKeys = range(50, 150).filter(k => k % 2 === 0); + expect(unionedKeys.sort((a, b) => a - b)).toEqual(expectedUnionedKeys); + }); + + test('Union result can be modified without affecting inputs', () => { + const tree1 = new BTreeEx([[1, 10], [2, 20]], compare, maxNodeSize); + const tree2 = new BTreeEx([[3, 30], [4, 40]], compare, maxNodeSize); + const unionFn: UnionFn = (_k, v1, v2) => v1 + v2; + + const { result } = expectUnionMatchesBaseline(tree1, tree2, unionFn); + + result.set(1, 100); + result.set(5, 50); + result.delete(2); + + expect(tree1.get(1)).toBe(10); + expect(tree1.get(2)).toBe(20); + expect(tree1.has(5)).toBe(false); + expect(tree2.get(3)).toBe(30); + expect(tree2.get(4)).toBe(40); + tree1.checkValid(); + tree2.checkValid(); + result.checkValid(); + }); + + test('Union with disjoint ranges', () => { + const entries1: [number, number][] = []; + for (let i = 1; i <= 100; i++) entries1.push([i, i]); + for (let i = 201; i <= 300; i++) entries1.push([i, i]); + + const entries2: [number, number][] = []; + for (let i = 101; i <= 200; i++) entries2.push([i, i]); + + const tree1 = new BTreeEx(entries1, compare, maxNodeSize); + const tree2 = new BTreeEx(entries2, compare, maxNodeSize); + const unionFn: UnionFn = () => { + throw new Error('Should not be called - no overlaps'); + }; + + const { result } = expectUnionMatchesBaseline(tree1, tree2, unionFn, undefined, { + expectedUnionFn: unionFn + }); + + expect(result.size).toBe(300); + expect(result.get(1)).toBe(1); + expect(result.get(100)).toBe(100); + expect(result.get(101)).toBe(101); + expect(result.get(200)).toBe(200); + expect(result.get(201)).toBe(201); + expect(result.get(300)).toBe(300); + }); + + test('Union with single element trees', () => { + const tree1 = new BTreeEx([[5, 50]], compare, maxNodeSize); + const tree2 = new BTreeEx([[5, 500]], compare, maxNodeSize); + const unionFn: UnionFn = (_k, v1, v2) => Math.max(v1, v2); + + const { result } = expectUnionMatchesBaseline(tree1, tree2, unionFn); + expect(result.toArray()).toEqual([[5, 500]]); + }); + + test('Union interleaved keys', () => { + const tree1 = new BTreeEx([], compare, maxNodeSize); + for (let i = 1; i <= 100; i += 2) + tree1.set(i, i); + + const tree2 = new BTreeEx([], compare, maxNodeSize); + for (let i = 2; i <= 100; i += 2) + tree2.set(i, i); + + const unionFn: UnionFn = () => { + throw new Error('Should not be called - no overlapping keys'); + }; + + const { result } = expectUnionMatchesBaseline(tree1, tree2, unionFn, undefined, { + expectedUnionFn: unionFn + }); + expect(result.size).toBe(100); + for (let i = 1; i <= 100; i++) + expect(result.get(i)).toBe(i); + }); + + test('Union excluding all overlapping keys', () => { + const tree1 = new BTreeEx([[1, 10], [2, 20], [3, 30]], compare, maxNodeSize); + const tree2 = new BTreeEx([[2, 200], [3, 300], [4, 400]], compare, maxNodeSize); + const unionFn: UnionFn = () => undefined; + + const { result } = expectUnionMatchesBaseline(tree1, tree2, unionFn); + expect(result.toArray()).toEqual([[1, 10], [4, 400]]); + }); + + test('Union reuses appended subtree with minimum fanout', () => { + const tree1 = new BTreeEx([], compare, maxNodeSize); + const tree2 = new BTreeEx([], compare, maxNodeSize); + + for (let i = 0; i < 400; i++) { + tree1.set(i, i); + } + for (let i = 400; i < 800; i++) { + tree2.set(i, i * 2); + } + + const unionFn: UnionFn = () => { + throw new Error('Should not be called for disjoint ranges'); + }; + + expectUnionMatchesBaseline(tree1, tree2, unionFn, ({ result }) => { + const resultRoot = result['_root'] as any; + const tree2Root = tree2['_root'] as any; + expect(sharesNode(resultRoot, tree2Root)).toBe(true); + }); + }); + + test('Union with large disjoint ranges', () => { + const tree1 = new BTreeEx([], compare, maxNodeSize); + const tree2 = new BTreeEx([], compare, maxNodeSize); + + for (let i = 0; i <= 10000; i++) + tree1.set(i, i); + for (let i = 10001; i <= 20000; i++) + tree2.set(i, i); + + let unionCalls = 0; + const unionFn: UnionFn = (_k, v1, v2) => { + unionCalls++; + return v1 + v2; + }; + + const { result } = expectUnionMatchesBaseline(tree1, tree2, unionFn, undefined, { + expectedUnionFn: unionFn + }); + + expect(unionCalls).toBe(0); + expect(result.size).toBe(tree1.size + tree2.size); + expect(result.get(0)).toBe(0); + expect(result.get(20000)).toBe(20000); + }); + + test('Union trees with random overlap', () => { + const size = 10000; + const keys1 = makeArray(size, true); + const keys2 = makeArray(size, true); + + const tree1 = new BTreeEx(); + const tree2 = new BTreeEx(); + + for (let k of keys1) + tree1.set(k, k); + for (let k of keys2) + tree2.set(k, k * 10); + + const preferLeft: UnionFn = (_key, leftValue) => leftValue; + expectUnionMatchesBaseline(tree1, tree2, preferLeft, undefined, { + expectedUnionFn: preferLeft + }); + }); + + test('Union trees with ~10% overlap', () => { + const size = 200; + const offset = Math.floor(size * 0.9); + const overlap = size - offset; + + const tree1 = new BTreeEx([], compare, maxNodeSize); + const tree2 = new BTreeEx([], compare, maxNodeSize); + + for (let i = 0; i < size; i++) + tree1.set(i, i); + + for (let i = 0; i < size; i++) { + const key = offset + i; + tree2.set(key, key * 10); + } + + const preferLeft: UnionFn = (_key, leftValue) => leftValue; + + const { result } = expectUnionMatchesBaseline(tree1, tree2, preferLeft, undefined, { + expectedUnionFn: preferLeft + }); + + expect(result.size).toBe(size + size - overlap); + for (let i = 0; i < offset; i++) + expect(result.get(i)).toBe(i); + for (let i = offset; i < size; i++) + expect(result.get(i)).toBe(i); + const upperBound = offset + size; + for (let i = size; i < upperBound; i++) + expect(result.get(i)).toBe(i * 10); + }); +} + +describe('BTree union input/output validation', () => { + test('Union throws error when comparators differ', () => { + const tree1 = new BTreeEx([[1, 10]], (a, b) => b + a); + const tree2 = new BTreeEx([[2, 20]], (a, b) => b - a); + const unionFn: UnionFn = (_k, v1, v2) => v1 + v2; + + expect(() => tree1.union(tree2, unionFn)).toThrow(comparatorErrorMsg); + }); + + test('Union throws error when max node sizes differ', () => { + const compare = (a: number, b: number) => b - a; + const tree1 = new BTreeEx([[1, 10]], compare, 32); + const tree2 = new BTreeEx([[2, 20]], compare, 33); + const unionFn: UnionFn = (_k, v1, v2) => v1 + v2; + + expect(() => tree1.union(tree2, unionFn)).toThrow(branchingFactorErrorMsg); + }); + + test('Union returns a tree of the same class', () => { + expect(union(new BTreeEx(), new BTreeEx(), (_k, v1, v2) => v1)).toBeInstanceOf(BTreeEx); + expect(union(new BTree(), new BTree(), (_k, v1, v2) => v1)).toBeInstanceOf(BTree); + expect(union(new BTree(), new BTree(), (_k, v1, v2) => v1) instanceof BTreeEx).toBeFalsy(); + }); +}); + +describe('BTree union fuzz tests', () => { + const compare = (a: number, b: number) => a - b; + const unionFn = (_k: number, left: number, _right: number) => left; + const FUZZ_SETTINGS = { + branchingFactors: [4, 5, 32], + ooms: [0, 1, 2], // [0, 1, 2, 3], + fractionsPerOOM: [0.1, 0.25, 0.5], // [0.0001, 0.01, 0.1, 0.25, 0.5], + collisionChances: [0.1, 0.5], // [0, 0.01, 0.1, 0.5] + } as const; + const RANDOM_EDITS_PER_TEST = 20; + const TIMEOUT_MS = 30_000; + + FUZZ_SETTINGS.fractionsPerOOM.forEach(fraction => { + if (fraction < 0 || fraction > 1) + throw new Error('FUZZ_SETTINGS.fractionsPerOOM must contain values between 0 and 1'); + }); + FUZZ_SETTINGS.collisionChances.forEach(chance => { + if (chance < 0 || chance > 1) + throw new Error('FUZZ_SETTINGS.collisionChances must contain values between 0 and 1'); + }); + + jest.setTimeout(TIMEOUT_MS); + + const rng = new MersenneTwister(0xBEEFCAFE); + + for (const maxNodeSize of FUZZ_SETTINGS.branchingFactors) { + describe(`branching factor ${maxNodeSize}`, () => { + for (const collisionChance of FUZZ_SETTINGS.collisionChances) { + for (const oom of FUZZ_SETTINGS.ooms) { + const size = 5 * Math.pow(10, oom); + for (const fractionA of FUZZ_SETTINGS.fractionsPerOOM) { + const fractionB = 1 - fractionA; + const collisionLabel = collisionChance.toFixed(2); + + test(`size ${size}, fractionA ${fractionA.toFixed(2)}, fractionB ${fractionB.toFixed(2)}, collision ${collisionLabel}`, () => { + const treeA = new BTreeEx([], compare, maxNodeSize); + const treeB = new BTreeEx([], compare, maxNodeSize); + + const keys = makeArray(size, true, 1, collisionChance, rng); + const sorted = Array.from(new Set(keys)).sort(compare); + + for (const value of keys) { + if (rng.random() < fractionA) { + treeA.set(value, value); + } else { + treeB.set(value, value); + } + } + + const aArray = treeA.toArray(); + const bArray = treeB.toArray(); + + const unioned = treeA.union(treeB, unionFn); + unioned.checkValid(); + + expect(unioned.toArray()).toEqual(sorted.map(k => [k, k])); + + // Union should not have mutated inputs + expect(treeA.toArray()).toEqual(aArray); + expect(treeB.toArray()).toEqual(bArray); + + for (let edit = 0; edit < RANDOM_EDITS_PER_TEST; edit++) { + const key = 1 + randomInt(rng, size); + const action = rng.random(); + if (action < 0.33) { + unioned.set(key, key); + } else if (action < 0.66) { + unioned.set(key, -key); + } else { + unioned.delete(key); + } + } + + // Check for shared mutability issues + expect(treeA.toArray()).toEqual(aArray); + expect(treeB.toArray()).toEqual(bArray); + }); + } + } + } + }); + } +}); diff --git a/tsconfig.json b/tsconfig.json index 09f7275..e3758ba 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -17,5 +17,5 @@ "stripInternal": true }, "include": ["**/*.ts"], - "exclude": ["node_modules", "tests", "b+tree.test.ts"], + "exclude": ["node_modules", "tests", "test"], } From 937286812195b17ecd0f4e9dd11a2d638d3e0025 Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Tue, 11 Nov 2025 19:31:26 -0800 Subject: [PATCH 066/143] support ignore right in decompose --- extended/decompose.d.ts | 2 +- extended/decompose.js | 12 ++++++++++-- extended/decompose.ts | 12 ++++++++++-- 3 files changed, 21 insertions(+), 5 deletions(-) diff --git a/extended/decompose.d.ts b/extended/decompose.d.ts index 143984c..daf2d5f 100644 --- a/extended/decompose.d.ts +++ b/extended/decompose.d.ts @@ -13,5 +13,5 @@ export declare type DecomposeResult = { * the first key at or after the trailing cursor's previous position. * The cursor walk is efficient, meaning it skips over disjoint subtrees entirely rather than visiting every leaf. */ -export declare function decompose(left: BTreeWithInternals, right: BTreeWithInternals, mergeValues: (key: K, leftValue: V, rightValue: V) => V | undefined): DecomposeResult; +export declare function decompose(left: BTreeWithInternals, right: BTreeWithInternals, mergeValues: (key: K, leftValue: V, rightValue: V) => V | undefined, ignoreRight?: boolean): DecomposeResult; export declare function buildFromDecomposition, K, V>(constructor: new (entries?: [K, V][], compare?: (a: K, b: K) => number, maxNodeSize?: number) => TBTree, branchingFactor: number, decomposed: DecomposeResult, cmp: (a: K, b: K) => number, maxNodeSize: number): TBTree; diff --git a/extended/decompose.js b/extended/decompose.js index 9997eb0..3305467 100644 --- a/extended/decompose.js +++ b/extended/decompose.js @@ -12,7 +12,8 @@ var parallelWalk_1 = require("./parallelWalk"); * the first key at or after the trailing cursor's previous position. * The cursor walk is efficient, meaning it skips over disjoint subtrees entirely rather than visiting every leaf. */ -function decompose(left, right, mergeValues) { +function decompose(left, right, mergeValues, ignoreRight) { + if (ignoreRight === void 0) { ignoreRight = false; } var cmp = left._compare; (0, b_tree_1.check)(left._root.size() > 0 && right._root.size() > 0, "decompose requires non-empty inputs"); // Holds the disjoint nodes that result from decomposition. @@ -184,7 +185,14 @@ function decompose(left, right, mergeValues) { var maxKey = cmp(maxKeyLeft, maxKeyRight) >= 0 ? maxKeyLeft : maxKeyRight; // Initialize cursors at minimum keys. var curA = (0, parallelWalk_1.createCursor)(left, makePayload, onEnterLeaf, onMoveInLeaf, onExitLeaf, onStepUp, onStepDown); - var curB = (0, parallelWalk_1.createCursor)(right, makePayload, onEnterLeaf, onMoveInLeaf, onExitLeaf, onStepUp, onStepDown); + var curB; + if (ignoreRight) { + var dummyPayload_1 = { disqualified: true }; + curB = (0, parallelWalk_1.createCursor)(right, function () { return dummyPayload_1; }, parallelWalk_1.noop, parallelWalk_1.noop, parallelWalk_1.noop, parallelWalk_1.noop, parallelWalk_1.noop); + } + else { + curB = (0, parallelWalk_1.createCursor)(right, makePayload, onEnterLeaf, onMoveInLeaf, onExitLeaf, onStepUp, onStepDown); + } // The guarantee that no overlapping interior nodes are accidentally reused relies on the careful // alternating hopping walk of the cursors: WLOG, cursorA always--with one exception--walks from a key just behind (in key space) // the key of cursorB to the first key >= cursorB. Call this transition a "crossover point." All interior nodes that diff --git a/extended/decompose.ts b/extended/decompose.ts index bbe5a12..f293577 100644 --- a/extended/decompose.ts +++ b/extended/decompose.ts @@ -16,7 +16,8 @@ export type DecomposeResult = { disjoint: (number | BNode)[], tallestI export function decompose( left: BTreeWithInternals, right: BTreeWithInternals, - mergeValues: (key: K, leftValue: V, rightValue: V) => V | undefined + mergeValues: (key: K, leftValue: V, rightValue: V) => V | undefined, + ignoreRight: boolean = false ): DecomposeResult { const cmp = left._compare; check(left._root.size() > 0 && right._root.size() > 0, "decompose requires non-empty inputs"); @@ -234,7 +235,14 @@ export function decompose( // Initialize cursors at minimum keys. const curA = createCursor(left, makePayload, onEnterLeaf, onMoveInLeaf, onExitLeaf, onStepUp, onStepDown); - const curB = createCursor(right, makePayload, onEnterLeaf, onMoveInLeaf, onExitLeaf, onStepUp, onStepDown); + + let curB: typeof curA; + if (ignoreRight) { + const dummyPayload: MergeCursorPayload = { disqualified: true }; + curB = createCursor(right, () => dummyPayload, noop, noop, noop, noop, noop); + } else { + curB = createCursor(right, makePayload, onEnterLeaf, onMoveInLeaf, onExitLeaf, onStepUp, onStepDown); + } // The guarantee that no overlapping interior nodes are accidentally reused relies on the careful // alternating hopping walk of the cursors: WLOG, cursorA always--with one exception--walks from a key just behind (in key space) From 7a36fa91d4128df76fc35e5422bd502cfa19868c Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Tue, 11 Nov 2025 19:42:50 -0800 Subject: [PATCH 067/143] default export --- extended/index.js | 4 ++-- extended/index.ts | 2 +- extended/intersect.d.ts | 2 +- extended/intersect.js | 3 +-- extended/intersect.ts | 3 +-- 5 files changed, 6 insertions(+), 8 deletions(-) diff --git a/extended/index.js b/extended/index.js index f5adb0a..8204ab5 100644 --- a/extended/index.js +++ b/extended/index.js @@ -21,7 +21,7 @@ Object.defineProperty(exports, "__esModule", { value: true }); exports.BTreeEx = void 0; var b_tree_1 = __importDefault(require("../b+tree")); var diffAgainst_1 = require("./diffAgainst"); -var intersect_1 = require("./intersect"); +var intersect_1 = __importDefault(require("./intersect")); var union_1 = __importDefault(require("./union")); var BTreeEx = /** @class */ (function (_super) { __extends(BTreeEx, _super); @@ -74,7 +74,7 @@ var BTreeEx = /** @class */ (function (_super) { * on both trees and performing a walk on the sorted contents due to the reduced allocation overhead. */ BTreeEx.prototype.intersect = function (other, intersection) { - (0, intersect_1.intersect)(this, other, intersection); + (0, intersect_1.default)(this, other, intersection); }; /** * Efficiently unions this tree with `other`, reusing subtrees wherever possible. diff --git a/extended/index.ts b/extended/index.ts index 278f175..e2c8712 100644 --- a/extended/index.ts +++ b/extended/index.ts @@ -1,7 +1,7 @@ import BTree from '../b+tree'; import type { BTreeWithInternals } from './shared'; import { diffAgainst as diffAgainstAlgorithm } from './diffAgainst'; -import { intersect } from './intersect'; +import intersect from './intersect'; import union from './union'; export class BTreeEx extends BTree { diff --git a/extended/intersect.d.ts b/extended/intersect.d.ts index c84b1d8..74e3f64 100644 --- a/extended/intersect.d.ts +++ b/extended/intersect.d.ts @@ -13,4 +13,4 @@ import BTree from '../b+tree'; * Note that in benchmarks even the worst case (fully interleaved keys) performance is faster than calling `toArray` * on both trees and performing a walk on the sorted contents due to the reduced allocation overhead. */ -export declare function intersect(treeA: BTree, treeB: BTree, intersection: (key: K, leftValue: V, rightValue: V) => void): void; +export default function intersect(treeA: BTree, treeB: BTree, intersection: (key: K, leftValue: V, rightValue: V) => void): void; diff --git a/extended/intersect.js b/extended/intersect.js index 3d2fb82..b2b410f 100644 --- a/extended/intersect.js +++ b/extended/intersect.js @@ -1,6 +1,5 @@ "use strict"; Object.defineProperty(exports, "__esModule", { value: true }); -exports.intersect = void 0; var parallelWalk_1 = require("./parallelWalk"); /** * Intersects the two trees, calling the supplied `intersection` callback for each intersecting key/value pair. @@ -66,4 +65,4 @@ function intersect(treeA, treeB, intersection) { } } } -exports.intersect = intersect; +exports.default = intersect; diff --git a/extended/intersect.ts b/extended/intersect.ts index e920fdf..6968282 100644 --- a/extended/intersect.ts +++ b/extended/intersect.ts @@ -1,5 +1,4 @@ import BTree from '../b+tree'; -import { BNode, BNodeInternal, check } from '../b+tree'; import type { BTreeWithInternals } from './shared'; import { createCursor, moveForwardOne, moveTo, getKey, noop, checkCanDoSetOperation } from "./parallelWalk" @@ -17,7 +16,7 @@ import { createCursor, moveForwardOne, moveTo, getKey, noop, checkCanDoSetOperat * Note that in benchmarks even the worst case (fully interleaved keys) performance is faster than calling `toArray` * on both trees and performing a walk on the sorted contents due to the reduced allocation overhead. */ -export function intersect(treeA: BTree, treeB: BTree, intersection: (key: K, leftValue: V, rightValue: V) => void): void { +export default function intersect(treeA: BTree, treeB: BTree, intersection: (key: K, leftValue: V, rightValue: V) => void): void { const _treeA = treeA as unknown as BTreeWithInternals; const _treeB = treeB as unknown as BTreeWithInternals; checkCanDoSetOperation(_treeA, _treeB); From a1384b64d9202f050f84cc92b5f2dc0f8bf6a49d Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Tue, 11 Nov 2025 19:57:19 -0800 Subject: [PATCH 068/143] intersect -> forEachKeyInBoth --- benchmarks.ts | 30 ++++++------- .../{intersect.d.ts => forEachKeyInBoth.d.ts} | 12 ++--- .../{intersect.js => forEachKeyInBoth.js} | 18 ++++---- .../{intersect.ts => forEachKeyInBoth.ts} | 18 ++++---- extended/index.d.ts | 12 ++--- extended/index.js | 16 +++---- extended/index.ts | 16 +++---- readme.md | 2 +- ...rsect.test.ts => forEachKeyInBoth.test.ts} | 44 +++++++++---------- 9 files changed, 84 insertions(+), 84 deletions(-) rename extended/{intersect.d.ts => forEachKeyInBoth.d.ts} (59%) rename extended/{intersect.js => forEachKeyInBoth.js} (84%) rename extended/{intersect.ts => forEachKeyInBoth.ts} (80%) rename test/{intersect.test.ts => forEachKeyInBoth.test.ts} (83%) diff --git a/benchmarks.ts b/benchmarks.ts index 7110239..1175cd0 100644 --- a/benchmarks.ts +++ b/benchmarks.ts @@ -608,17 +608,17 @@ console.log("### Merge between B+ trees"); } console.log(); -console.log("### Intersect between B+ trees"); +console.log("### forEachKeyInBoth"); { const sizes = [100, 1000, 10000, 100000]; - const runIntersect = ( + const runForEachKeyInBoth = ( tree1: BTreeEx, tree2: BTreeEx ) => { let count = 0; let checksum = 0; - tree1.intersect(tree2, (_k, leftValue, rightValue) => { + tree1.forEachKeyInBoth(tree2, (_k, leftValue, rightValue) => { count++; checksum += leftValue + rightValue; }); @@ -638,16 +638,16 @@ console.log("### Intersect between B+ trees"); return { count, checksum }; }; - const timeIntersectVsBaseline = ( + const timeForEachKeyInBothVsBaseline = ( baseTitle: string, tree1: BTreeEx, tree2: BTreeEx, - intersectLabel = 'intersect()', + forEachKeyInBothLabel = 'forEachKeyInBoth()', baselineLabel = 'sort baseline' ) => { measure( - result => `${baseTitle} using ${intersectLabel} [count=${result.count}, checksum=${result.checksum}]`, - () => runIntersect(tree1, tree2) + result => `${baseTitle} using ${forEachKeyInBothLabel} [count=${result.count}, checksum=${result.checksum}]`, + () => runForEachKeyInBoth(tree1, tree2) ); measure( result => `${baseTitle} using ${baselineLabel} [count=${result.count}, checksum=${result.checksum}]`, @@ -666,8 +666,8 @@ console.log("### Intersect between B+ trees"); tree2.set(offset + i, offset + i); } - const baseTitle = `Intersect ${size}+${size} disjoint trees`; - timeIntersectVsBaseline(baseTitle, tree1, tree2); + const baseTitle = `forEachKeyInBoth ${size}+${size} disjoint trees`; + timeForEachKeyInBothVsBaseline(baseTitle, tree1, tree2); }); console.log(); @@ -681,8 +681,8 @@ console.log("### Intersect between B+ trees"); tree2.set(i + offset, (i + offset) * 2); } - const baseTitle = `Intersect ${size}+${size} half-overlapping trees`; - timeIntersectVsBaseline(baseTitle, tree1, tree2); + const baseTitle = `forEachKeyInBoth ${size}+${size} half-overlapping trees`; + timeForEachKeyInBothVsBaseline(baseTitle, tree1, tree2); }); console.log(); @@ -695,8 +695,8 @@ console.log("### Intersect between B+ trees"); tree2.set(i, i * 3); } - const baseTitle = `Intersect ${size}+${size} identical-key trees`; - timeIntersectVsBaseline(baseTitle, tree1, tree2); + const baseTitle = `forEachKeyInBoth ${size}+${size} identical-key trees`; + timeForEachKeyInBothVsBaseline(baseTitle, tree1, tree2); }); console.log(); @@ -721,7 +721,7 @@ console.log("### Intersect between B+ trees"); tree2.set(key, key * 7); } - const baseTitle = `Intersect ${tree1.size}+${tree2.size} random trees`; - timeIntersectVsBaseline(baseTitle, tree1, tree2); + const baseTitle = `forEachKeyInBoth ${tree1.size}+${tree2.size} random trees`; + timeForEachKeyInBothVsBaseline(baseTitle, tree1, tree2); }); } diff --git a/extended/intersect.d.ts b/extended/forEachKeyInBoth.d.ts similarity index 59% rename from extended/intersect.d.ts rename to extended/forEachKeyInBoth.d.ts index 74e3f64..c5eea92 100644 --- a/extended/intersect.d.ts +++ b/extended/forEachKeyInBoth.d.ts @@ -1,11 +1,11 @@ import BTree from '../b+tree'; /** - * Intersects the two trees, calling the supplied `intersection` callback for each intersecting key/value pair. + * Calls the supplied `callback` for each key/value pair shared by both trees. * Neither tree is modified. - * @param treeA First tree to intersect. - * @param treeB Second tree to intersect. - * @param intersection Called for keys that appear in both trees. - * @description Complexity is bounded O(N + M) time and O(log(N + M)) for allocations. + * @param treeA First tree to compare. + * @param treeB Second tree to compare. + * @param callback Invoked for keys that appear in both trees. + * @description Complexity is bounded by O(N + M) for time. * However, time is additionally bounded by O(log(N + M) * D) where D is the number of disjoint ranges of keys between * the two trees. In practice, that means for keys of random distribution the performance is O(N + M) and for * keys with significant numbers of non-overlapping key ranges it is O(log(N + M) * D) which is much faster. @@ -13,4 +13,4 @@ import BTree from '../b+tree'; * Note that in benchmarks even the worst case (fully interleaved keys) performance is faster than calling `toArray` * on both trees and performing a walk on the sorted contents due to the reduced allocation overhead. */ -export default function intersect(treeA: BTree, treeB: BTree, intersection: (key: K, leftValue: V, rightValue: V) => void): void; +export default function forEachKeyInBoth(treeA: BTree, treeB: BTree, callback: (key: K, leftValue: V, rightValue: V) => void): void; diff --git a/extended/intersect.js b/extended/forEachKeyInBoth.js similarity index 84% rename from extended/intersect.js rename to extended/forEachKeyInBoth.js index b2b410f..e9e7f05 100644 --- a/extended/intersect.js +++ b/extended/forEachKeyInBoth.js @@ -2,12 +2,12 @@ Object.defineProperty(exports, "__esModule", { value: true }); var parallelWalk_1 = require("./parallelWalk"); /** - * Intersects the two trees, calling the supplied `intersection` callback for each intersecting key/value pair. + * Calls the supplied `callback` for each key/value pair shared by both trees. * Neither tree is modified. - * @param treeA First tree to intersect. - * @param treeB Second tree to intersect. - * @param intersection Called for keys that appear in both trees. - * @description Complexity is bounded O(N + M) time and O(log(N + M)) for allocations. + * @param treeA First tree to compare. + * @param treeB Second tree to compare. + * @param callback Invoked for keys that appear in both trees. + * @description Complexity is bounded by O(N + M) for time. * However, time is additionally bounded by O(log(N + M) * D) where D is the number of disjoint ranges of keys between * the two trees. In practice, that means for keys of random distribution the performance is O(N + M) and for * keys with significant numbers of non-overlapping key ranges it is O(log(N + M) * D) which is much faster. @@ -15,7 +15,7 @@ var parallelWalk_1 = require("./parallelWalk"); * Note that in benchmarks even the worst case (fully interleaved keys) performance is faster than calling `toArray` * on both trees and performing a walk on the sorted contents due to the reduced allocation overhead. */ -function intersect(treeA, treeB, intersection) { +function forEachKeyInBoth(treeA, treeB, callback) { var _treeA = treeA; var _treeB = treeB; (0, parallelWalk_1.checkCanDoSetOperation)(_treeA, _treeB); @@ -28,7 +28,7 @@ function intersect(treeA, treeB, intersection) { var leading = cursorA; var trailing = cursorB; var order = cmp((0, parallelWalk_1.getKey)(leading), (0, parallelWalk_1.getKey)(trailing)); - // The intersect walk is somewhat similar to a merge walk in that it does an alternating hop walk with cursors. + // This walk is somewhat similar to a merge walk in that it does an alternating hop walk with cursors. // However, the only thing we care about is when the two cursors are equal (equality is intersection). // When they are not equal we just advance the trailing cursor. while (true) { @@ -37,7 +37,7 @@ function intersect(treeA, treeB, intersection) { var key = (0, parallelWalk_1.getKey)(leading); var vA = cursorA.leaf.values[cursorA.leafIndex]; var vB = cursorB.leaf.values[cursorB.leafIndex]; - intersection(key, vA, vB); + callback(key, vA, vB); var outT = (0, parallelWalk_1.moveForwardOne)(trailing, leading, key, cmp); var outL = (0, parallelWalk_1.moveForwardOne)(leading, trailing, key, cmp); if (outT && outL) @@ -65,4 +65,4 @@ function intersect(treeA, treeB, intersection) { } } } -exports.default = intersect; +exports.default = forEachKeyInBoth; diff --git a/extended/intersect.ts b/extended/forEachKeyInBoth.ts similarity index 80% rename from extended/intersect.ts rename to extended/forEachKeyInBoth.ts index 6968282..27bfaa9 100644 --- a/extended/intersect.ts +++ b/extended/forEachKeyInBoth.ts @@ -3,12 +3,12 @@ import type { BTreeWithInternals } from './shared'; import { createCursor, moveForwardOne, moveTo, getKey, noop, checkCanDoSetOperation } from "./parallelWalk" /** - * Intersects the two trees, calling the supplied `intersection` callback for each intersecting key/value pair. + * Calls the supplied `callback` for each key/value pair shared by both trees. * Neither tree is modified. - * @param treeA First tree to intersect. - * @param treeB Second tree to intersect. - * @param intersection Called for keys that appear in both trees. - * @description Complexity is bounded O(N + M) time and O(log(N + M)) for allocations. + * @param treeA First tree to compare. + * @param treeB Second tree to compare. + * @param callback Invoked for keys that appear in both trees. + * @description Complexity is bounded by O(N + M) for time. * However, time is additionally bounded by O(log(N + M) * D) where D is the number of disjoint ranges of keys between * the two trees. In practice, that means for keys of random distribution the performance is O(N + M) and for * keys with significant numbers of non-overlapping key ranges it is O(log(N + M) * D) which is much faster. @@ -16,7 +16,7 @@ import { createCursor, moveForwardOne, moveTo, getKey, noop, checkCanDoSetOperat * Note that in benchmarks even the worst case (fully interleaved keys) performance is faster than calling `toArray` * on both trees and performing a walk on the sorted contents due to the reduced allocation overhead. */ -export default function intersect(treeA: BTree, treeB: BTree, intersection: (key: K, leftValue: V, rightValue: V) => void): void { +export default function forEachKeyInBoth(treeA: BTree, treeB: BTree, callback: (key: K, leftValue: V, rightValue: V) => void): void { const _treeA = treeA as unknown as BTreeWithInternals; const _treeB = treeB as unknown as BTreeWithInternals; checkCanDoSetOperation(_treeA, _treeB); @@ -31,7 +31,7 @@ export default function intersect(treeA: BTree, treeB: BTree, int let trailing = cursorB; let order = cmp(getKey(leading), getKey(trailing)); - // The intersect walk is somewhat similar to a merge walk in that it does an alternating hop walk with cursors. + // This walk is somewhat similar to a merge walk in that it does an alternating hop walk with cursors. // However, the only thing we care about is when the two cursors are equal (equality is intersection). // When they are not equal we just advance the trailing cursor. while (true) { @@ -40,7 +40,7 @@ export default function intersect(treeA: BTree, treeB: BTree, int const key = getKey(leading); const vA = cursorA.leaf.values[cursorA.leafIndex]; const vB = cursorB.leaf.values[cursorB.leafIndex]; - intersection(key, vA, vB); + callback(key, vA, vB); const outT = moveForwardOne(trailing, leading, key, cmp); const outL = moveForwardOne(leading, trailing, key, cmp); if (outT && outL) @@ -63,4 +63,4 @@ export default function intersect(treeA: BTree, treeB: BTree, int } } } -} \ No newline at end of file +} diff --git a/extended/index.d.ts b/extended/index.d.ts index e2d6443..18209e5 100644 --- a/extended/index.d.ts +++ b/extended/index.d.ts @@ -22,11 +22,11 @@ export declare class BTreeEx extends BTree { break?: R; } | void): R | undefined; /** - * Intersects this tree with `other`, calling the supplied `intersection` callback for each intersecting key/value pair. + * Calls the supplied `callback` for each key/value pair shared by this tree and `other`. * Neither tree is modified. - * @param other The other tree to intersect with this one. - * @param intersection Called for keys that appear in both trees. - * @description Complexity is bounded O(N + M) time and O(log(N + M)) for allocations. + * @param other The other tree to compare with this one. + * @param callback Called for keys that appear in both trees. + * @description Complexity is bounded by O(N + M) time. * However, time is additionally bounded by O(log(N + M) * D) where D is the number of disjoint ranges of keys between * the two trees. In practice, that means for keys of random distribution the performance is O(N + M) and for * keys with significant numbers of non-overlapping key ranges it is O(log(N + M) * D) which is much faster. @@ -34,7 +34,7 @@ export declare class BTreeEx extends BTree { * Note that in benchmarks even the worst case (fully interleaved keys) performance is faster than calling `toArray` * on both trees and performing a walk on the sorted contents due to the reduced allocation overhead. */ - intersect(other: BTree, intersection: (key: K, leftValue: V, rightValue: V) => void): void; + forEachKeyInBoth(other: BTree, callback: (key: K, leftValue: V, rightValue: V) => void): void; /** * Efficiently unions this tree with `other`, reusing subtrees wherever possible. * Neither input tree is modified. @@ -42,7 +42,7 @@ export declare class BTreeEx extends BTree { * @param combineFn Called for keys that appear in both trees. Return the desired value, or * `undefined` to omit the key from the result. * @returns A new BTree that contains the unioned key/value pairs. - * @description Complexity is bounded O(N + M) for both time and allocations. + * @description Complexity is bounded by O(N + M) for both time and allocations. * However, it is additionally bounded by O(log(N + M) * D) where D is the number of disjoint ranges of keys between * the two trees. In practice, that means for keys of random distribution the performance is O(N + M) and for * keys with significant numbers of non-overlapping key ranges it is O(log(N + M) * D) which is much faster. diff --git a/extended/index.js b/extended/index.js index 8204ab5..e8e0753 100644 --- a/extended/index.js +++ b/extended/index.js @@ -21,7 +21,7 @@ Object.defineProperty(exports, "__esModule", { value: true }); exports.BTreeEx = void 0; var b_tree_1 = __importDefault(require("../b+tree")); var diffAgainst_1 = require("./diffAgainst"); -var intersect_1 = __importDefault(require("./intersect")); +var forEachKeyInBoth_1 = __importDefault(require("./forEachKeyInBoth")); var union_1 = __importDefault(require("./union")); var BTreeEx = /** @class */ (function (_super) { __extends(BTreeEx, _super); @@ -61,11 +61,11 @@ var BTreeEx = /** @class */ (function (_super) { return (0, diffAgainst_1.diffAgainst)(this, other, onlyThis, onlyOther, different); }; /** - * Intersects this tree with `other`, calling the supplied `intersection` callback for each intersecting key/value pair. + * Calls the supplied `callback` for each key/value pair shared by this tree and `other`. * Neither tree is modified. - * @param other The other tree to intersect with this one. - * @param intersection Called for keys that appear in both trees. - * @description Complexity is bounded O(N + M) time and O(log(N + M)) for allocations. + * @param other The other tree to compare with this one. + * @param callback Called for keys that appear in both trees. + * @description Complexity is bounded by O(N + M) time. * However, time is additionally bounded by O(log(N + M) * D) where D is the number of disjoint ranges of keys between * the two trees. In practice, that means for keys of random distribution the performance is O(N + M) and for * keys with significant numbers of non-overlapping key ranges it is O(log(N + M) * D) which is much faster. @@ -73,8 +73,8 @@ var BTreeEx = /** @class */ (function (_super) { * Note that in benchmarks even the worst case (fully interleaved keys) performance is faster than calling `toArray` * on both trees and performing a walk on the sorted contents due to the reduced allocation overhead. */ - BTreeEx.prototype.intersect = function (other, intersection) { - (0, intersect_1.default)(this, other, intersection); + BTreeEx.prototype.forEachKeyInBoth = function (other, callback) { + (0, forEachKeyInBoth_1.default)(this, other, callback); }; /** * Efficiently unions this tree with `other`, reusing subtrees wherever possible. @@ -83,7 +83,7 @@ var BTreeEx = /** @class */ (function (_super) { * @param combineFn Called for keys that appear in both trees. Return the desired value, or * `undefined` to omit the key from the result. * @returns A new BTree that contains the unioned key/value pairs. - * @description Complexity is bounded O(N + M) for both time and allocations. + * @description Complexity is bounded by O(N + M) for both time and allocations. * However, it is additionally bounded by O(log(N + M) * D) where D is the number of disjoint ranges of keys between * the two trees. In practice, that means for keys of random distribution the performance is O(N + M) and for * keys with significant numbers of non-overlapping key ranges it is O(log(N + M) * D) which is much faster. diff --git a/extended/index.ts b/extended/index.ts index e2c8712..04a5b31 100644 --- a/extended/index.ts +++ b/extended/index.ts @@ -1,7 +1,7 @@ import BTree from '../b+tree'; import type { BTreeWithInternals } from './shared'; import { diffAgainst as diffAgainstAlgorithm } from './diffAgainst'; -import intersect from './intersect'; +import forEachKeyInBoth from './forEachKeyInBoth'; import union from './union'; export class BTreeEx extends BTree { @@ -46,11 +46,11 @@ export class BTreeEx extends BTree { } /** - * Intersects this tree with `other`, calling the supplied `intersection` callback for each intersecting key/value pair. + * Calls the supplied `callback` for each key/value pair shared by this tree and `other`. * Neither tree is modified. - * @param other The other tree to intersect with this one. - * @param intersection Called for keys that appear in both trees. - * @description Complexity is bounded O(N + M) time and O(log(N + M)) for allocations. + * @param other The other tree to compare with this one. + * @param callback Called for keys that appear in both trees. + * @description Complexity is bounded by O(N + M) time. * However, time is additionally bounded by O(log(N + M) * D) where D is the number of disjoint ranges of keys between * the two trees. In practice, that means for keys of random distribution the performance is O(N + M) and for * keys with significant numbers of non-overlapping key ranges it is O(log(N + M) * D) which is much faster. @@ -58,8 +58,8 @@ export class BTreeEx extends BTree { * Note that in benchmarks even the worst case (fully interleaved keys) performance is faster than calling `toArray` * on both trees and performing a walk on the sorted contents due to the reduced allocation overhead. */ - intersect(other: BTree, intersection: (key: K, leftValue: V, rightValue: V) => void): void { - intersect(this, other, intersection); + forEachKeyInBoth(other: BTree, callback: (key: K, leftValue: V, rightValue: V) => void): void { + forEachKeyInBoth(this, other, callback); } /** @@ -69,7 +69,7 @@ export class BTreeEx extends BTree { * @param combineFn Called for keys that appear in both trees. Return the desired value, or * `undefined` to omit the key from the result. * @returns A new BTree that contains the unioned key/value pairs. - * @description Complexity is bounded O(N + M) for both time and allocations. + * @description Complexity is bounded by O(N + M) for both time and allocations. * However, it is additionally bounded by O(log(N + M) * D) where D is the number of disjoint ranges of keys between * the two trees. In practice, that means for keys of random distribution the performance is O(N + M) and for * keys with significant numbers of non-overlapping key ranges it is O(log(N + M) * D) which is much faster. diff --git a/readme.md b/readme.md index 1e1b91e..997d855 100644 --- a/readme.md +++ b/readme.md @@ -92,7 +92,7 @@ Features - Includes a lattice of interfaces for TypeScript users (see below) - Supports diffing computation between two trees that is highly optimized for the case in which a majority of nodes are shared (such as when persistent methods are used). -- Supports fast union & intersection operations with asymptotic speedups when large disjoint ranges of keys are present. +- Supports fast union & shared-key iteration via `forEachKeyInBoth` with asymptotic speedups when large disjoint ranges of keys are present. The union operation generates a new tree that shares nodes with the original trees when possible. ### Additional operations supported on this B+ tree ### diff --git a/test/intersect.test.ts b/test/forEachKeyInBoth.test.ts similarity index 83% rename from test/intersect.test.ts rename to test/forEachKeyInBoth.test.ts index c52db67..b15e90f 100644 --- a/test/intersect.test.ts +++ b/test/forEachKeyInBoth.test.ts @@ -5,11 +5,11 @@ import { makeArray } from './shared'; var test: (name: string, f: () => void) => void = it; -describe('BTree intersect tests with fanout 32', testIntersect.bind(null, 32)); -describe('BTree intersect tests with fanout 10', testIntersect.bind(null, 10)); -describe('BTree intersect tests with fanout 4', testIntersect.bind(null, 4)); +describe('BTree forEachKeyInBoth tests with fanout 32', testForEachKeyInBoth.bind(null, 32)); +describe('BTree forEachKeyInBoth tests with fanout 10', testForEachKeyInBoth.bind(null, 10)); +describe('BTree forEachKeyInBoth tests with fanout 4', testForEachKeyInBoth.bind(null, 4)); -function testIntersect(maxNodeSize: number) { +function testForEachKeyInBoth(maxNodeSize: number) { const compare = (a: number, b: number) => a - b; const buildTree = (entries: Array<[number, number]>) => @@ -19,38 +19,38 @@ function testIntersect(maxNodeSize: number) { const collectCalls = (left: BTreeEx, right: BTreeEx) => { const calls: Array<{ key: number, leftValue: number, rightValue: number }> = []; - left.intersect(right, (key, leftValue, rightValue) => { + left.forEachKeyInBoth(right, (key, leftValue, rightValue) => { calls.push({ key, leftValue, rightValue }); }); return calls; }; - test('Intersect two empty trees', () => { + test('forEachKeyInBoth two empty trees', () => { const tree1 = buildTree([]); const tree2 = buildTree([]); expect(collectCalls(tree1, tree2)).toEqual([]); }); - test('Intersect empty tree with non-empty tree', () => { + test('forEachKeyInBoth empty tree with non-empty tree', () => { const tree1 = buildTree([]); const tree2 = buildTree(tuples([1, 10], [2, 20], [3, 30])); expect(collectCalls(tree1, tree2)).toEqual([]); expect(collectCalls(tree2, tree1)).toEqual([]); }); - test('Intersect with no overlapping keys', () => { + test('forEachKeyInBoth with no overlapping keys', () => { const tree1 = buildTree(tuples([1, 10], [3, 30], [5, 50])); const tree2 = buildTree(tuples([2, 20], [4, 40], [6, 60])); expect(collectCalls(tree1, tree2)).toEqual([]); }); - test('Intersect with single overlapping key', () => { + test('forEachKeyInBoth with single overlapping key', () => { const tree1 = buildTree(tuples([1, 10], [2, 20], [3, 30])); const tree2 = buildTree(tuples([0, 100], [2, 200], [4, 400])); expect(collectCalls(tree1, tree2)).toEqual([{ key: 2, leftValue: 20, rightValue: 200 }]); }); - test('Intersect with multiple overlapping keys maintains tree contents', () => { + test('forEachKeyInBoth with multiple overlapping keys maintains tree contents', () => { const leftEntries: Array<[number, number]> = [[1, 10], [2, 20], [3, 30], [4, 40], [5, 50]]; const rightEntries: Array<[number, number]> = [[0, 100], [2, 200], [4, 400], [6, 600]]; const tree1 = buildTree(leftEntries); @@ -67,7 +67,7 @@ function testIntersect(maxNodeSize: number) { tree2.checkValid(); }); - test('Intersect with contiguous overlap yields sorted keys', () => { + test('forEachKeyInBoth with contiguous overlap yields sorted keys', () => { const tree1 = buildTree(tuples([1, 1], [2, 2], [3, 3], [4, 4], [5, 5], [6, 6])); const tree2 = buildTree(tuples([3, 30], [4, 40], [5, 50], [6, 60], [7, 70])); const calls = collectCalls(tree1, tree2); @@ -76,7 +76,7 @@ function testIntersect(maxNodeSize: number) { expect(calls.map(c => c.rightValue)).toEqual([30, 40, 50, 60]); }); - test('Intersect large overlapping range counts each shared key once', () => { + test('forEachKeyInBoth large overlapping range counts each shared key once', () => { const size = 1000; const overlapStart = 500; const leftEntries = Array.from({ length: size }, (_, i) => [i, i * 3] as [number, number]); @@ -99,7 +99,7 @@ function testIntersect(maxNodeSize: number) { expect(lastCall.rightValue).toBe((size - 1) * 7); }); - test('Intersect tree with itself visits each key once', () => { + test('forEachKeyInBoth tree with itself visits each key once', () => { const entries = Array.from({ length: 20 }, (_, i) => [i, i * 2] as [number, number]); const tree = buildTree(entries); const calls = collectCalls(tree, tree); @@ -110,7 +110,7 @@ function testIntersect(maxNodeSize: number) { } }); - test('Intersect arguments determine left/right values', () => { + test('forEachKeyInBoth arguments determine left/right values', () => { const tree1 = buildTree(tuples([1, 100], [2, 200], [4, 400])); const tree2 = buildTree(tuples([2, 20], [3, 30], [4, 40])); const callsLeft = collectCalls(tree1, tree2); @@ -126,22 +126,22 @@ function testIntersect(maxNodeSize: number) { }); } -describe('BTree intersect input/output validation', () => { - test('Intersect throws error when comparators differ', () => { +describe('BTree forEachKeyInBoth input/output validation', () => { + test('forEachKeyInBoth throws error when comparators differ', () => { const tree1 = new BTreeEx([[1, 10]], (a, b) => b + a); const tree2 = new BTreeEx([[2, 20]], (a, b) => b - a); - expect(() => tree1.intersect(tree2, () => {})).toThrow(comparatorErrorMsg); + expect(() => tree1.forEachKeyInBoth(tree2, () => {})).toThrow(comparatorErrorMsg); }); - test('Intersect throws error when max node sizes differ', () => { + test('forEachKeyInBoth throws error when max node sizes differ', () => { const compare = (a: number, b: number) => b - a; const tree1 = new BTreeEx([[1, 10]], compare, 32); const tree2 = new BTreeEx([[2, 20]], compare, 33); - expect(() => tree1.intersect(tree2, () => {})).toThrow(branchingFactorErrorMsg); + expect(() => tree1.forEachKeyInBoth(tree2, () => {})).toThrow(branchingFactorErrorMsg); }); }); -describe('BTree intersect fuzz tests', () => { +describe('BTree forEachKeyInBoth fuzz tests', () => { const compare = (a: number, b: number) => a - b; const FUZZ_SETTINGS = { branchingFactors: [4, 5, 32], @@ -208,13 +208,13 @@ describe('BTree intersect fuzz tests', () => { } const actual: Array<[number, number, number]> = []; - treeA.intersect(treeB, (key, leftValue, rightValue) => { + treeA.forEachKeyInBoth(treeB, (key, leftValue, rightValue) => { actual.push([key, leftValue, rightValue]); }); expect(actual).toEqual(expected); const swappedActual: Array<[number, number, number]> = []; - treeB.intersect(treeA, (key, leftValue, rightValue) => { + treeB.forEachKeyInBoth(treeA, (key, leftValue, rightValue) => { swappedActual.push([key, leftValue, rightValue]); }); const swappedExpected = expected.map(([key, leftValue, rightValue]) => [key, rightValue, leftValue]); From eca6c4ab5bf6693913639ac9eea2b585d5154626 Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Tue, 11 Nov 2025 19:59:07 -0800 Subject: [PATCH 069/143] verbose tests --- package.json | 1 + 1 file changed, 1 insertion(+) diff --git a/package.json b/package.json index c19b51c..3a70b55 100644 --- a/package.json +++ b/package.json @@ -83,6 +83,7 @@ "jsx", "json" ], + "verbose": true, "bail": true, "testEnvironment": "node" }, From f7d3cc73adee19a12babd489197b94a1cb8329ad Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Wed, 12 Nov 2025 08:29:18 -0800 Subject: [PATCH 070/143] wip --- extended/bulkLoad.d.ts | 2 + extended/bulkLoad.js | 34 +++++++ extended/bulkLoad.ts | 32 ++++++ extended/decompose.d.ts | 4 + extended/decompose.js | 38 ++------ extended/decompose.ts | 209 ++++++++++++++++++---------------------- extended/intersect.d.ts | 17 ++++ extended/intersect.js | 44 +++++++++ extended/intersect.ts | 47 +++++++++ 9 files changed, 283 insertions(+), 144 deletions(-) create mode 100644 extended/bulkLoad.d.ts create mode 100644 extended/bulkLoad.js create mode 100644 extended/bulkLoad.ts create mode 100644 extended/intersect.d.ts create mode 100644 extended/intersect.js create mode 100644 extended/intersect.ts diff --git a/extended/bulkLoad.d.ts b/extended/bulkLoad.d.ts new file mode 100644 index 0000000..1069c0e --- /dev/null +++ b/extended/bulkLoad.d.ts @@ -0,0 +1,2 @@ +import { BNode } from '../b+tree'; +export declare function flushToLeaves(alternatingList: (K | V)[], maxNodeSize: number, toFlushTo: (number | BNode)[]): number; diff --git a/extended/bulkLoad.js b/extended/bulkLoad.js new file mode 100644 index 0000000..53e4a25 --- /dev/null +++ b/extended/bulkLoad.js @@ -0,0 +1,34 @@ +"use strict"; +Object.defineProperty(exports, "__esModule", { value: true }); +exports.flushToLeaves = void 0; +var b_tree_1 = require("../b+tree"); +var decompose_1 = require("./decompose"); +function flushToLeaves(alternatingList, maxNodeSize, toFlushTo) { + var totalPairs = (0, decompose_1.alternatingCount)(alternatingList); + if (totalPairs === 0) + return 0; + // This method creates as many evenly filled leaves as possible from + // the pending entries. All will be > 50% full if we are creating more than one leaf. + var leafCount = Math.ceil(totalPairs / maxNodeSize); + var remainingLeaves = leafCount; + var remaining = totalPairs; + var pairIndex = 0; + while (remainingLeaves > 0) { + var chunkSize = Math.ceil(remaining / remainingLeaves); + var keys = new Array(chunkSize); + var vals = new Array(chunkSize); + for (var i = 0; i < chunkSize; i++) { + keys[i] = (0, decompose_1.alternatingGetFirst)(alternatingList, pairIndex); + vals[i] = (0, decompose_1.alternatingGetSecond)(alternatingList, pairIndex); + pairIndex++; + } + remaining -= chunkSize; + remainingLeaves--; + var leaf = new b_tree_1.BNode(keys, vals); + (0, decompose_1.alternatingPush)(toFlushTo, 0, leaf); + } + alternatingList.length = 0; + return leafCount; +} +exports.flushToLeaves = flushToLeaves; +; diff --git a/extended/bulkLoad.ts b/extended/bulkLoad.ts new file mode 100644 index 0000000..4970f86 --- /dev/null +++ b/extended/bulkLoad.ts @@ -0,0 +1,32 @@ +import { BNode, BNodeInternal } from '../b+tree'; +import { alternatingCount, alternatingGetFirst, alternatingGetSecond, alternatingPush } from './decompose'; +import type { BTreeWithInternals } from './shared'; + +export function flushToLeaves(alternatingList: (K | V)[], maxNodeSize: number, toFlushTo: (number | BNode)[]): number { + const totalPairs = alternatingCount(alternatingList); + if (totalPairs === 0) + return 0; + + // This method creates as many evenly filled leaves as possible from + // the pending entries. All will be > 50% full if we are creating more than one leaf. + const leafCount = Math.ceil(totalPairs / maxNodeSize); + let remainingLeaves = leafCount; + let remaining = totalPairs; + let pairIndex = 0; + while (remainingLeaves > 0) { + const chunkSize = Math.ceil(remaining / remainingLeaves); + const keys = new Array(chunkSize); + const vals = new Array(chunkSize); + for (let i = 0; i < chunkSize; i++) { + keys[i] = alternatingGetFirst(alternatingList, pairIndex); + vals[i] = alternatingGetSecond(alternatingList, pairIndex); + pairIndex++; + } + remaining -= chunkSize; + remainingLeaves--; + const leaf = new BNode(keys, vals); + alternatingPush(toFlushTo, 0, leaf); + } + alternatingList.length = 0; + return leafCount; +}; \ No newline at end of file diff --git a/extended/decompose.d.ts b/extended/decompose.d.ts index daf2d5f..7d50152 100644 --- a/extended/decompose.d.ts +++ b/extended/decompose.d.ts @@ -15,3 +15,7 @@ export declare type DecomposeResult = { */ export declare function decompose(left: BTreeWithInternals, right: BTreeWithInternals, mergeValues: (key: K, leftValue: V, rightValue: V) => V | undefined, ignoreRight?: boolean): DecomposeResult; export declare function buildFromDecomposition, K, V>(constructor: new (entries?: [K, V][], compare?: (a: K, b: K) => number, maxNodeSize?: number) => TBTree, branchingFactor: number, decomposed: DecomposeResult, cmp: (a: K, b: K) => number, maxNodeSize: number): TBTree; +export declare function alternatingCount(list: unknown[]): number; +export declare function alternatingGetFirst(list: Array, index: number): TFirst; +export declare function alternatingGetSecond(list: Array, index: number): TSecond; +export declare function alternatingPush(list: Array, first: TFirst, second: TSecond): void; diff --git a/extended/decompose.js b/extended/decompose.js index 3305467..9540ffe 100644 --- a/extended/decompose.js +++ b/extended/decompose.js @@ -1,8 +1,9 @@ "use strict"; Object.defineProperty(exports, "__esModule", { value: true }); -exports.buildFromDecomposition = exports.decompose = void 0; +exports.alternatingPush = exports.alternatingGetSecond = exports.alternatingGetFirst = exports.alternatingCount = exports.buildFromDecomposition = exports.decompose = void 0; var b_tree_1 = require("../b+tree"); var parallelWalk_1 = require("./parallelWalk"); +var bulkLoad_1 = require("./bulkLoad"); /** * Decomposes two trees into disjoint nodes. Reuses interior nodes when they do not overlap/intersect with any leaf nodes * in the other tree. Overlapping leaf nodes are broken down into new leaf nodes containing merged entries. @@ -33,34 +34,11 @@ function decompose(left, right, mergeValues, ignoreRight) { // because its ancestor may also be disjoint and should be reused instead. var highestDisjoint = undefined; var flushPendingEntries = function () { - var totalPairs = alternatingCount(pending); - if (totalPairs === 0) - return; - // This method creates as many evenly filled leaves as possible from - // the pending entries. All will be > 50% full if we are creating more than one leaf. - var max = left._maxNodeSize; - var leafCount = Math.ceil(totalPairs / max); - var remaining = totalPairs; - var pairIndex = 0; - while (leafCount > 0) { - var chunkSize = Math.ceil(remaining / leafCount); - var keys = new Array(chunkSize); - var vals = new Array(chunkSize); - for (var i = 0; i < chunkSize; i++) { - keys[i] = alternatingGetFirst(pending, pairIndex); - vals[i] = alternatingGetSecond(pending, pairIndex); - pairIndex++; - } - remaining -= chunkSize; - leafCount--; - var leaf = new b_tree_1.BNode(keys, vals); - alternatingPush(disjoint, 0, leaf); - if (tallestHeight < 0) { - tallestIndex = alternatingCount(disjoint) - 1; - tallestHeight = 0; - } + var createdLeaves = (0, bulkLoad_1.flushToLeaves)(pending, left._maxNodeSize, disjoint); + if (createdLeaves > 0) { + tallestIndex = alternatingCount(disjoint) - 1; + tallestHeight = 0; } - pending.length = 0; }; var addSharedNodeToDisjointSet = function (node, height) { flushPendingEntries(); @@ -537,13 +515,17 @@ function updateRightMax(node, maxBelow) { function alternatingCount(list) { return list.length >> 1; } +exports.alternatingCount = alternatingCount; function alternatingGetFirst(list, index) { return list[index << 1]; } +exports.alternatingGetFirst = alternatingGetFirst; function alternatingGetSecond(list, index) { return list[(index << 1) + 1]; } +exports.alternatingGetSecond = alternatingGetSecond; function alternatingPush(list, first, second) { // Micro benchmarks show this is the fastest way to do this list.push(first, second); } +exports.alternatingPush = alternatingPush; diff --git a/extended/decompose.ts b/extended/decompose.ts index f293577..7886c48 100644 --- a/extended/decompose.ts +++ b/extended/decompose.ts @@ -1,8 +1,9 @@ import BTree, { areOverlapping, BNode, BNodeInternal, check } from '../b+tree'; import type { BTreeWithInternals } from './shared'; import { createCursor, getKey, MergeCursor, MergeCursorPayload, moveForwardOne, moveTo, noop } from "./parallelWalk"; +import { flushToLeaves } from './bulkLoad'; -export type DecomposeResult = { disjoint: (number | BNode)[], tallestIndex: number }; +export type DecomposeResult = { disjoint: (number | BNode)[], tallestIndex: number }; /** * Decomposes two trees into disjoint nodes. Reuses interior nodes when they do not overlap/intersect with any leaf nodes @@ -13,17 +14,17 @@ export type DecomposeResult = { disjoint: (number | BNode)[], tallestI * the first key at or after the trailing cursor's previous position. * The cursor walk is efficient, meaning it skips over disjoint subtrees entirely rather than visiting every leaf. */ -export function decompose( - left: BTreeWithInternals, - right: BTreeWithInternals, +export function decompose( + left: BTreeWithInternals, + right: BTreeWithInternals, mergeValues: (key: K, leftValue: V, rightValue: V) => V | undefined, ignoreRight: boolean = false -): DecomposeResult { +): DecomposeResult { const cmp = left._compare; check(left._root.size() > 0 && right._root.size() > 0, "decompose requires non-empty inputs"); // Holds the disjoint nodes that result from decomposition. // Alternating entries of (height, node) to avoid creating small tuples - const disjoint: (number | BNode)[] = []; + const disjoint: (number | BNode)[] = []; // During the decomposition, leaves that are not disjoint are decomposed into individual entries // that accumulate in this array in sorted order. They are flushed into leaf nodes whenever a reused // disjoint subtree is added to the disjoint set. @@ -37,46 +38,22 @@ export function decompose( // During the upward part of the cursor walk, this holds the highest disjoint node seen so far. // This is done because we cannot know immediately whether we can add the node to the disjoint set // because its ancestor may also be disjoint and should be reused instead. - let highestDisjoint: { node: BNode, height: number } | undefined + let highestDisjoint: { node: BNode, height: number } | undefined // Have to do this as cast to convince TS it's ever assigned - = undefined as { node: BNode, height: number } | undefined; + = undefined as { node: BNode, height: number } | undefined; const flushPendingEntries = () => { - const totalPairs = alternatingCount(pending); - if (totalPairs === 0) - return; - - // This method creates as many evenly filled leaves as possible from - // the pending entries. All will be > 50% full if we are creating more than one leaf. - const max = left._maxNodeSize; - let leafCount = Math.ceil(totalPairs / max); - let remaining = totalPairs; - let pairIndex = 0; - while (leafCount > 0) { - const chunkSize = Math.ceil(remaining / leafCount); - const keys = new Array(chunkSize); - const vals = new Array(chunkSize); - for (let i = 0; i < chunkSize; i++) { - keys[i] = alternatingGetFirst(pending, pairIndex); - vals[i] = alternatingGetSecond(pending, pairIndex); - pairIndex++; - } - remaining -= chunkSize; - leafCount--; - const leaf = new BNode(keys, vals); - alternatingPush>(disjoint, 0, leaf); - if (tallestHeight < 0) { - tallestIndex = alternatingCount(disjoint) - 1; - tallestHeight = 0; - } + const createdLeaves = flushToLeaves(pending, left._maxNodeSize, disjoint); + if (createdLeaves > 0) { + tallestIndex = alternatingCount(disjoint) - 1; + tallestHeight = 0; } - pending.length = 0; }; - const addSharedNodeToDisjointSet = (node: BNode, height: number) => { + const addSharedNodeToDisjointSet = (node: BNode, height: number) => { flushPendingEntries(); node.isShared = true; - alternatingPush>(disjoint, height, node); + alternatingPush>(disjoint, height, node); if (height > tallestHeight) { tallestIndex = alternatingCount(disjoint) - 1; tallestHeight = height; @@ -91,7 +68,7 @@ export function decompose( }; // Mark all nodes at or above depthFrom in the cursor spine as disqualified (non-disjoint) - const disqualifySpine = (cursor: MergeCursor, depthFrom: number) => { + const disqualifySpine = (cursor: MergeCursor, depthFrom: number) => { const spine = cursor.spine; for (let i = depthFrom; i >= 0; --i) { const payload = spine[i].payload; @@ -107,15 +84,15 @@ export function decompose( // Cursor payload factory const makePayload = (): MergeCursorPayload => ({ disqualified: false }); - const pushLeafRange = (leaf: BNode, from: number, toExclusive: number) => { + const pushLeafRange = (leaf: BNode, from: number, toExclusive: number) => { const keys = leaf.keys; const values = leaf.values; for (let i = from; i < toExclusive; ++i) - alternatingPush(pending, keys[i], values[i]); + alternatingPush(pending, keys[i], values[i]); }; const onMoveInLeaf = ( - leaf: BNode, + leaf: BNode, payload: MergeCursorPayload, fromIndex: number, toIndex: number, @@ -124,15 +101,15 @@ export function decompose( check(payload.disqualified === true, "onMoveInLeaf: leaf must be disqualified"); const start = startedEqual ? fromIndex + 1 : fromIndex; if (start < toIndex) - pushLeafRange(leaf, start, toIndex); + pushLeafRange(leaf, start, toIndex); }; const onExitLeaf = ( - leaf: BNode, + leaf: BNode, payload: MergeCursorPayload, startingIndex: number, startedEqual: boolean, - cursorThis: MergeCursor, + cursorThis: MergeCursor, ) => { highestDisjoint = undefined; if (!payload.disqualified) { @@ -151,13 +128,13 @@ export function decompose( }; const onStepUp = ( - parent: BNodeInternal, + parent: BNodeInternal, height: number, payload: MergeCursorPayload, fromIndex: number, spineIndex: number, stepDownIndex: number, - cursorThis: MergeCursor + cursorThis: MergeCursor ) => { const children = parent.children; const nextHeight = height - 1; @@ -189,11 +166,11 @@ export function decompose( }; const onStepDown = ( - node: BNodeInternal, + node: BNodeInternal, height: number, spineIndex: number, stepDownIndex: number, - cursorThis: MergeCursor + cursorThis: MergeCursor ) => { if (stepDownIndex > 0) { // When we step down into a node, we know that we have walked from a key that is less than our target. @@ -211,10 +188,10 @@ export function decompose( }; const onEnterLeaf = ( - leaf: BNode, + leaf: BNode, destIndex: number, - cursorThis: MergeCursor, - cursorOther: MergeCursor + cursorThis: MergeCursor, + cursorOther: MergeCursor ) => { if (destIndex > 0 || areOverlapping(leaf.minKey()!, leaf.maxKey(), getKey(cursorOther), cursorOther.leaf.maxKey(), cmp)) { @@ -234,14 +211,14 @@ export function decompose( const maxKey = cmp(maxKeyLeft, maxKeyRight) >= 0 ? maxKeyLeft : maxKeyRight; // Initialize cursors at minimum keys. - const curA = createCursor(left, makePayload, onEnterLeaf, onMoveInLeaf, onExitLeaf, onStepUp, onStepDown); + const curA = createCursor(left, makePayload, onEnterLeaf, onMoveInLeaf, onExitLeaf, onStepUp, onStepDown); let curB: typeof curA; if (ignoreRight) { const dummyPayload: MergeCursorPayload = { disqualified: true }; - curB = createCursor(right, () => dummyPayload, noop, noop, noop, noop, noop); + curB = createCursor(right, () => dummyPayload, noop, noop, noop, noop, noop); } else { - curB = createCursor(right, makePayload, onEnterLeaf, onMoveInLeaf, onExitLeaf, onStepUp, onStepDown); + curB = createCursor(right, makePayload, onEnterLeaf, onMoveInLeaf, onExitLeaf, onStepUp, onStepDown); } // The guarantee that no overlapping interior nodes are accidentally reused relies on the careful @@ -254,7 +231,7 @@ export function decompose( // The one issue then is detecting any overlaps that occur based on their very initial position (minimum key of each tree). // This is handled by the initial disqualification step below, which essentially emulates the step down disqualification for each spine. // Initialize disqualification w.r.t. opposite leaf. - const initDisqualify = (cur: MergeCursor, other: MergeCursor) => { + const initDisqualify = (cur: MergeCursor, other: MergeCursor) => { const minKey = getKey(cur); const otherMin = getKey(other); const otherMax = other.leaf.maxKey(); @@ -287,7 +264,7 @@ export function decompose( // to pending because they respect the areEqual flag during their moves. const merged = mergeValues(key, vA, vB); if (merged !== undefined) - alternatingPush(pending, key, merged); + alternatingPush(pending, key, merged); const outTrailing = moveForwardOne(trailing, leading, key, cmp); const outLeading = moveForwardOne(leading, trailing, key, cmp); if (outTrailing || outLeading) { @@ -327,10 +304,10 @@ export function decompose( return { disjoint, tallestIndex }; } -export function buildFromDecomposition, K,V>( - constructor: new (entries?: [K,V][], compare?: (a: K, b: K) => number, maxNodeSize?: number) => TBTree, +export function buildFromDecomposition, K, V>( + constructor: new (entries?: [K, V][], compare?: (a: K, b: K) => number, maxNodeSize?: number) => TBTree, branchingFactor: number, - decomposed: DecomposeResult, + decomposed: DecomposeResult, cmp: (a: K, b: K) => number, maxNodeSize: number ): TBTree { @@ -345,8 +322,8 @@ export function buildFromDecomposition, K,V>( // the leaf level on that side of the tree. Each appended subtree is appended to the node at the // same height as itself on the frontier. Each tree is guaranteed to be at most as tall as the // current frontier because we start from the tallest subtree and work outward. - const initialRoot = alternatingGetSecond>(disjoint, tallestIndex); - const frontier: BNode[] = [initialRoot]; + const initialRoot = alternatingGetSecond>(disjoint, tallestIndex); + const frontier: BNode[] = [initialRoot]; // Process all subtrees to the right of the tallest subtree if (tallestIndex + 1 <= disjointEntryCount - 1) { @@ -383,7 +360,7 @@ export function buildFromDecomposition, K,V>( } const merged = new constructor(undefined, cmp, maxNodeSize); - (merged as unknown as BTreeWithInternals)._root = frontier[0]; + (merged as unknown as BTreeWithInternals)._root = frontier[0]; // Return the resulting tree return merged; @@ -393,17 +370,17 @@ export function buildFromDecomposition, K,V>( * Processes one side (left or right) of the disjoint subtree set during a merge operation. * Merges each subtree in the disjoint set from start to end (exclusive) into the given spine. */ -function processSide( +function processSide( branchingFactor: number, - disjoint: (number | BNode)[], - spine: BNode[], + disjoint: (number | BNode)[], + spine: BNode[], start: number, end: number, step: number, - sideIndex: (node: BNodeInternal) => number, - sideInsertionIndex: (node: BNodeInternal) => number, - splitOffSide: (node: BNodeInternal) => BNodeInternal, - updateMax: (node: BNodeInternal, maxBelow: K) => void + sideIndex: (node: BNodeInternal) => number, + sideInsertionIndex: (node: BNodeInternal) => number, + splitOffSide: (node: BNodeInternal) => BNodeInternal, + updateMax: (node: BNodeInternal, maxBelow: K) => void ): void { // Determine the depth of the first shared node on the frontier. // Appending subtrees to the frontier must respect the copy-on-write semantics by cloning @@ -414,7 +391,7 @@ function processSide( // Find the first shared node on the frontier while (!cur.isShared && isSharedFrontierDepth < spine.length - 1) { isSharedFrontierDepth++; - cur = (cur as BNodeInternal).children[sideIndex(cur as BNodeInternal)]; + cur = (cur as BNodeInternal).children[sideIndex(cur as BNodeInternal)]; } // This array holds the sum of sizes of nodes that have been inserted but not yet propagated upward. @@ -427,8 +404,8 @@ function processSide( for (let i = start; i != end; i += step) { const currentHeight = spine.length - 1; // height is number of internal levels; 0 means leaf - const subtree = alternatingGetSecond>(disjoint, i); - const subtreeHeight = alternatingGetFirst>(disjoint, i); + const subtree = alternatingGetSecond>(disjoint, i); + const subtreeHeight = alternatingGetFirst>(disjoint, i); const insertionDepth = currentHeight - (subtreeHeight + 1); // node at this depth has children of height 'subtreeHeight' // Ensure path is unshared before mutation @@ -472,15 +449,15 @@ function processSide( * This method guarantees that the size of the inserted subtree will not propagate upward beyond the insertion point. * Returns a new root if the root was split, otherwise undefined. */ -function appendAndCascade( - spine: BNode[], +function appendAndCascade( + spine: BNode[], insertionDepth: number, branchingFactor: number, - subtree: BNode, - sideIndex: (node: BNodeInternal) => number, - sideInsertionIndex: (node: BNodeInternal) => number, - splitOffSide: (node: BNodeInternal) => BNodeInternal -): BNodeInternal | undefined { + subtree: BNode, + sideIndex: (node: BNodeInternal) => number, + sideInsertionIndex: (node: BNodeInternal) => number, + splitOffSide: (node: BNodeInternal) => BNodeInternal +): BNodeInternal | undefined { // We must take care to avoid accidental propagation upward of the size of the inserted su // To do this, we first split nodes upward from the insertion point until we find a node with capacity // or create a new root. Since all un-propagated sizes have already been applied to the spine up to this point, @@ -488,16 +465,16 @@ function appendAndCascade( // Depth is -1 if the subtree is the same height as the current tree if (insertionDepth >= 0) { - let carry: BNode | undefined = undefined; + let carry: BNode | undefined = undefined; // Determine initially where to insert after any splits - let insertTarget: BNodeInternal = spine[insertionDepth] as BNodeInternal; + let insertTarget: BNodeInternal = spine[insertionDepth] as BNodeInternal; if (insertTarget.keys.length >= branchingFactor) { insertTarget = carry = splitOffSide(insertTarget); } let d = insertionDepth - 1; while (carry && d >= 0) { - const parent = spine[d] as BNodeInternal; + const parent = spine[d] as BNodeInternal; const idx = sideIndex(parent); // Refresh last key since child was split parent.keys[idx] = parent.children[idx].maxKey(); @@ -518,11 +495,11 @@ function appendAndCascade( d--; } - let newRoot: BNodeInternal | undefined = undefined; + let newRoot: BNodeInternal | undefined = undefined; if (carry !== undefined) { // Expansion reached the root, need a new root to hold carry - const oldRoot = spine[0] as BNodeInternal; - newRoot = new BNodeInternal([oldRoot], oldRoot.size() + carry.size()); + const oldRoot = spine[0] as BNodeInternal; + newRoot = new BNodeInternal([oldRoot], oldRoot.size() + carry.size()); insertNoCount(newRoot, sideInsertionIndex(newRoot), carry); } @@ -531,8 +508,8 @@ function appendAndCascade( return newRoot; } else { // Insertion of subtree with equal height to current tree - const oldRoot = spine[0] as BNodeInternal; - const newRoot = new BNodeInternal([oldRoot], oldRoot.size()); + const oldRoot = spine[0] as BNodeInternal; + const newRoot = new BNodeInternal([oldRoot], oldRoot.size()); insertNoCount(newRoot, sideInsertionIndex(newRoot), subtree); return newRoot; } @@ -542,39 +519,39 @@ function appendAndCascade( * Clone along the spine from [isSharedFrontierDepth to depthTo] inclusive so path is safe to mutate. * Short-circuits if first shared node is deeper than depthTo (the insertion depth). */ -function ensureNotShared( - spine: BNode[], +function ensureNotShared( + spine: BNode[], isSharedFrontierDepth: number, depthToInclusive: number, - sideIndex: (node: BNodeInternal) => number) { + sideIndex: (node: BNodeInternal) => number) { if (spine.length === 1 /* only a leaf */ || depthToInclusive < 0 /* new root case */) return; // nothing to clone when root is a leaf; equal-height case will handle this // Clone root if needed first (depth 0) if (isSharedFrontierDepth === 0) { const root = spine[0]; - spine[0] = root.clone() as BNodeInternal; + spine[0] = root.clone() as BNodeInternal; } // Clone downward along the frontier to 'depthToInclusive' for (let depth = Math.max(isSharedFrontierDepth, 1); depth <= depthToInclusive; depth++) { - const parent = spine[depth - 1] as BNodeInternal; + const parent = spine[depth - 1] as BNodeInternal; const childIndex = sideIndex(parent); const clone = parent.children[childIndex].clone(); parent.children[childIndex] = clone; - spine[depth] = clone as BNodeInternal; + spine[depth] = clone as BNodeInternal; } }; /** * Propagates size updates and updates max keys for nodes in (isSharedFrontierDepth, depthTo) */ -function updateSizeAndMax( - spine: BNode[], +function updateSizeAndMax( + spine: BNode[], unflushedSizes: number[], isSharedFrontierDepth: number, depthUpToInclusive: number, - updateMax: (node: BNodeInternal, maxBelow: K) => void) { + updateMax: (node: BNodeInternal, maxBelow: K) => void) { // If isSharedFrontierDepth is <= depthUpToInclusive there is nothing to update because // the insertion point is inside a shared node which will always have correct sizes const maxKey = spine[isSharedFrontierDepth].maxKey(); @@ -587,7 +564,7 @@ function updateSizeAndMax( // at the end of processing the entire side unflushedSizes[depth - 1] += sizeAtLevel; } - const node = spine[depth] as BNodeInternal; + const node = spine[depth] as BNodeInternal; node._size += sizeAtLevel; // No-op if left side, as max keys in parents are unchanged by appending to the beginning of a node updateMax(node, maxKey); @@ -598,16 +575,16 @@ function updateSizeAndMax( * Update a spine (frontier) from a specific depth down, inclusive. * Extends the frontier array if it is not already as long as the frontier. */ -function updateFrontier(frontier: BNode[], depthLastValid: number, sideIndex: (node: BNodeInternal) => number): void { +function updateFrontier(frontier: BNode[], depthLastValid: number, sideIndex: (node: BNodeInternal) => number): void { check(frontier.length > depthLastValid, "updateFrontier: depthLastValid exceeds frontier height"); const startingAncestor = frontier[depthLastValid]; if (startingAncestor.isLeaf) return; - const internal = startingAncestor as BNodeInternal; - let cur: BNode = internal.children[sideIndex(internal)]; + const internal = startingAncestor as BNodeInternal; + let cur: BNode = internal.children[sideIndex(internal)]; let depth = depthLastValid + 1; while (!cur.isLeaf) { - const ni = cur as BNodeInternal; + const ni = cur as BNodeInternal; frontier[depth] = ni; cur = ni.children[sideIndex(ni)]; depth++; @@ -618,7 +595,7 @@ function updateFrontier(frontier: BNode[], depthLastValid: number, si /** * Find the first ancestor (starting at insertionDepth) with capacity. */ -function findCascadeEndDepth(spine: BNode[], insertionDepth: number, branchingFactor: number): number { +function findCascadeEndDepth(spine: BNode[], insertionDepth: number, branchingFactor: number): number { for (let depth = insertionDepth; depth >= 0; depth--) { if (spine[depth].keys.length < branchingFactor) return depth; @@ -629,10 +606,10 @@ function findCascadeEndDepth(spine: BNode[], insertionDepth: number, b /** * Inserts the child without updating cached size counts. */ -function insertNoCount( - parent: BNodeInternal, +function insertNoCount( + parent: BNodeInternal, index: number, - child: BNode + child: BNode ): void { parent.children.splice(index, 0, child); parent.keys.splice(index, 0, child.maxKey()); @@ -640,27 +617,27 @@ function insertNoCount( // ---- Side-specific delegates for merging subtrees into a frontier ---- -function getLeftmostIndex(): number { +function getLeftmostIndex(): number { return 0; } -function getRightmostIndex(node: BNodeInternal): number { +function getRightmostIndex(node: BNodeInternal): number { return node.children.length - 1; } -function getRightInsertionIndex(node: BNodeInternal): number { +function getRightInsertionIndex(node: BNodeInternal): number { return node.children.length; } -function splitOffRightSide(node: BNodeInternal): BNodeInternal { +function splitOffRightSide(node: BNodeInternal): BNodeInternal { return node.splitOffRightSide(); } -function splitOffLeftSide(node: BNodeInternal): BNodeInternal { +function splitOffLeftSide(node: BNodeInternal): BNodeInternal { return node.splitOffLeftSide(); } -function updateRightMax(node: BNodeInternal, maxBelow: K): void { +function updateRightMax(node: BNodeInternal, maxBelow: K): void { node.keys[node.keys.length - 1] = maxBelow; } @@ -669,19 +646,19 @@ function updateRightMax(node: BNodeInternal, maxBelow: K): void { // Storing data this way avoids small tuple allocations and shows major improvements // in GC time in benchmarks. -function alternatingCount(list: unknown[]): number { +export function alternatingCount(list: unknown[]): number { return list.length >> 1; } -function alternatingGetFirst(list: Array, index: number): TFirst { +export function alternatingGetFirst(list: Array, index: number): TFirst { return list[index << 1] as TFirst; } -function alternatingGetSecond(list: Array, index: number): TSecond { +export function alternatingGetSecond(list: Array, index: number): TSecond { return list[(index << 1) + 1] as TSecond; } -function alternatingPush(list: Array, first: TFirst, second: TSecond): void { +export function alternatingPush(list: Array, first: TFirst, second: TSecond): void { // Micro benchmarks show this is the fastest way to do this list.push(first, second); } \ No newline at end of file diff --git a/extended/intersect.d.ts b/extended/intersect.d.ts new file mode 100644 index 0000000..e298bc6 --- /dev/null +++ b/extended/intersect.d.ts @@ -0,0 +1,17 @@ +import BTree from '../b+tree'; +/** + * Returns a new tree containing only keys present in both input trees. + * Neither tree is modified. + * @param treeA First tree to intersect. + * @param treeB Second tree to intersect. + * @param combineFn Called for keys that appear in both trees. Return the desired value, or + * `undefined` to omit the key from the result. + * @description Complexity is bounded O(N + M) for both time and allocations. + * However, it is additionally bounded by O(log(N + M) * D) where D is the number of disjoint ranges of keys between + * the two trees. In practice, that means for keys of random distribution the performance is O(N + M) and for + * keys with significant numbers of non-overlapping key ranges it is O(log(N + M) * D) which is much faster. + * The algorithm achieves this additional non-linear bound by skipping over non-intersecting subtrees entirely. + * Note that in benchmarks even the worst case (fully interleaved keys) performance is faster than calling `toArray` + * on both trees and performing a walk on the sorted contents due to the reduced allocation overhead. + */ +export default function intersect, K, V>(treeA: TBTree, treeB: TBTree, combineFn: (key: K, leftValue: V, rightValue: V) => V): TBTree; diff --git a/extended/intersect.js b/extended/intersect.js new file mode 100644 index 0000000..883e85d --- /dev/null +++ b/extended/intersect.js @@ -0,0 +1,44 @@ +"use strict"; +var __importDefault = (this && this.__importDefault) || function (mod) { + return (mod && mod.__esModule) ? mod : { "default": mod }; +}; +Object.defineProperty(exports, "__esModule", { value: true }); +var parallelWalk_1 = require("./parallelWalk"); +var decompose_1 = require("./decompose"); +var forEachKeyInBoth_1 = __importDefault(require("./forEachKeyInBoth")); +/** + * Returns a new tree containing only keys present in both input trees. + * Neither tree is modified. + * @param treeA First tree to intersect. + * @param treeB Second tree to intersect. + * @param combineFn Called for keys that appear in both trees. Return the desired value, or + * `undefined` to omit the key from the result. + * @description Complexity is bounded O(N + M) for both time and allocations. + * However, it is additionally bounded by O(log(N + M) * D) where D is the number of disjoint ranges of keys between + * the two trees. In practice, that means for keys of random distribution the performance is O(N + M) and for + * keys with significant numbers of non-overlapping key ranges it is O(log(N + M) * D) which is much faster. + * The algorithm achieves this additional non-linear bound by skipping over non-intersecting subtrees entirely. + * Note that in benchmarks even the worst case (fully interleaved keys) performance is faster than calling `toArray` + * on both trees and performing a walk on the sorted contents due to the reduced allocation overhead. + */ +function intersect(treeA, treeB, combineFn) { + var _treeA = treeA; + var _treeB = treeB; + var branchingFactor = (0, parallelWalk_1.checkCanDoSetOperation)(_treeA, _treeB); + if (_treeA._root.size() === 0) + return treeB.clone(); + if (_treeB._root.size() === 0) + return treeA.clone(); + var intersected = []; + (0, forEachKeyInBoth_1.default)(treeA, treeB, function (key, leftValue, rightValue) { + var mergedValue = combineFn(key, leftValue, rightValue); + (0, decompose_1.alternatingPush)(intersected, key, mergedValue); + }); + // Decompose both trees into disjoint subtrees leaves. + // As many of these as possible will be reused from the original trees, and the remaining + // will be leaves that are the result of merging intersecting leaves. + var decomposed = (0, decompose_1.decompose)(_treeA, _treeB, combineFn); + var constructor = treeA.constructor; + return (0, decompose_1.buildFromDecomposition)(constructor, branchingFactor, decomposed, _treeA._compare, _treeA._maxNodeSize); +} +exports.default = intersect; diff --git a/extended/intersect.ts b/extended/intersect.ts new file mode 100644 index 0000000..5840454 --- /dev/null +++ b/extended/intersect.ts @@ -0,0 +1,47 @@ +import BTree from '../b+tree'; +import type { BTreeWithInternals } from './shared'; +import { checkCanDoSetOperation } from "./parallelWalk" +import { alternatingPush, buildFromDecomposition, decompose } from './decompose'; +import forEachKeyInBoth from './forEachKeyInBoth'; + +/** + * Returns a new tree containing only keys present in both input trees. + * Neither tree is modified. + * @param treeA First tree to intersect. + * @param treeB Second tree to intersect. + * @param combineFn Called for keys that appear in both trees. Return the desired value, or + * `undefined` to omit the key from the result. + * @description Complexity is bounded O(N + M) for both time and allocations. + * However, it is additionally bounded by O(log(N + M) * D) where D is the number of disjoint ranges of keys between + * the two trees. In practice, that means for keys of random distribution the performance is O(N + M) and for + * keys with significant numbers of non-overlapping key ranges it is O(log(N + M) * D) which is much faster. + * The algorithm achieves this additional non-linear bound by skipping over non-intersecting subtrees entirely. + * Note that in benchmarks even the worst case (fully interleaved keys) performance is faster than calling `toArray` + * on both trees and performing a walk on the sorted contents due to the reduced allocation overhead. + */ +export default function intersect, K,V>( + treeA: TBTree, + treeB: TBTree, + combineFn: (key: K, leftValue: V, rightValue: V) => V +): TBTree { + const _treeA = treeA as unknown as BTreeWithInternals; + const _treeB = treeB as unknown as BTreeWithInternals; + const branchingFactor = checkCanDoSetOperation(_treeA, _treeB); + if (_treeA._root.size() === 0) + return treeB.clone(); + if (_treeB._root.size() === 0) + return treeA.clone(); + + const intersected: (K | V)[] = []; + forEachKeyInBoth(treeA, treeB, (key, leftValue, rightValue) => { + const mergedValue = combineFn(key, leftValue, rightValue); + alternatingPush(intersected, key, mergedValue); + }); + + // Decompose both trees into disjoint subtrees leaves. + // As many of these as possible will be reused from the original trees, and the remaining + // will be leaves that are the result of merging intersecting leaves. + const decomposed = decompose(_treeA, _treeB, combineFn); + const constructor = treeA.constructor as new (entries?: [K,V][], compare?: (a: K, b: K) => number, maxNodeSize?: number) => TBTree; + return buildFromDecomposition(constructor, branchingFactor, decomposed, _treeA._compare, _treeA._maxNodeSize); +} \ No newline at end of file From 7d1bef9f363ab6d35cdeafc307043ddbf7f74efa Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Wed, 12 Nov 2025 08:33:02 -0800 Subject: [PATCH 071/143] formatting and stub --- extended/bulkLoad.d.ts | 1 + extended/bulkLoad.js | 6 +++- extended/bulkLoad.ts | 4 +++ extended/forEachKeyInBoth.ts | 12 ++++---- extended/index.ts | 6 ++-- extended/intersect.ts | 8 ++--- extended/parallelWalk.ts | 58 ++++++++++++++++++------------------ extended/union.ts | 8 ++--- 8 files changed, 56 insertions(+), 47 deletions(-) diff --git a/extended/bulkLoad.d.ts b/extended/bulkLoad.d.ts index 1069c0e..4b66467 100644 --- a/extended/bulkLoad.d.ts +++ b/extended/bulkLoad.d.ts @@ -1,2 +1,3 @@ import { BNode } from '../b+tree'; +export declare function bulkLoad(entries: (K | V)[], maxNodeSize: number): BNode; export declare function flushToLeaves(alternatingList: (K | V)[], maxNodeSize: number, toFlushTo: (number | BNode)[]): number; diff --git a/extended/bulkLoad.js b/extended/bulkLoad.js index 53e4a25..299ec01 100644 --- a/extended/bulkLoad.js +++ b/extended/bulkLoad.js @@ -1,8 +1,12 @@ "use strict"; Object.defineProperty(exports, "__esModule", { value: true }); -exports.flushToLeaves = void 0; +exports.flushToLeaves = exports.bulkLoad = void 0; var b_tree_1 = require("../b+tree"); var decompose_1 = require("./decompose"); +function bulkLoad(entries, maxNodeSize) { + throw new Error('Not implemented'); +} +exports.bulkLoad = bulkLoad; function flushToLeaves(alternatingList, maxNodeSize, toFlushTo) { var totalPairs = (0, decompose_1.alternatingCount)(alternatingList); if (totalPairs === 0) diff --git a/extended/bulkLoad.ts b/extended/bulkLoad.ts index 4970f86..7e5f26f 100644 --- a/extended/bulkLoad.ts +++ b/extended/bulkLoad.ts @@ -2,6 +2,10 @@ import { BNode, BNodeInternal } from '../b+tree'; import { alternatingCount, alternatingGetFirst, alternatingGetSecond, alternatingPush } from './decompose'; import type { BTreeWithInternals } from './shared'; +export function bulkLoad(entries: (K | V)[], maxNodeSize: number): BNode { + throw new Error('Not implemented'); +} + export function flushToLeaves(alternatingList: (K | V)[], maxNodeSize: number, toFlushTo: (number | BNode)[]): number { const totalPairs = alternatingCount(alternatingList); if (totalPairs === 0) diff --git a/extended/forEachKeyInBoth.ts b/extended/forEachKeyInBoth.ts index 27bfaa9..5210094 100644 --- a/extended/forEachKeyInBoth.ts +++ b/extended/forEachKeyInBoth.ts @@ -16,21 +16,21 @@ import { createCursor, moveForwardOne, moveTo, getKey, noop, checkCanDoSetOperat * Note that in benchmarks even the worst case (fully interleaved keys) performance is faster than calling `toArray` * on both trees and performing a walk on the sorted contents due to the reduced allocation overhead. */ -export default function forEachKeyInBoth(treeA: BTree, treeB: BTree, callback: (key: K, leftValue: V, rightValue: V) => void): void { - const _treeA = treeA as unknown as BTreeWithInternals; - const _treeB = treeB as unknown as BTreeWithInternals; +export default function forEachKeyInBoth(treeA: BTree, treeB: BTree, callback: (key: K, leftValue: V, rightValue: V) => void): void { + const _treeA = treeA as unknown as BTreeWithInternals; + const _treeB = treeB as unknown as BTreeWithInternals; checkCanDoSetOperation(_treeA, _treeB); if (treeB.size === 0 || treeA.size === 0) return; const cmp = treeA._compare; const makePayload = (): undefined => undefined; - let cursorA = createCursor(_treeA, makePayload, noop, noop, noop, noop, noop); - let cursorB = createCursor(_treeB, makePayload, noop, noop, noop, noop, noop); + let cursorA = createCursor(_treeA, makePayload, noop, noop, noop, noop, noop); + let cursorB = createCursor(_treeB, makePayload, noop, noop, noop, noop, noop); let leading = cursorA; let trailing = cursorB; let order = cmp(getKey(leading), getKey(trailing)); - + // This walk is somewhat similar to a merge walk in that it does an alternating hop walk with cursors. // However, the only thing we care about is when the two cursors are equal (equality is intersection). // When they are not equal we just advance the trailing cursor. diff --git a/extended/index.ts b/extended/index.ts index 04a5b31..770d100 100644 --- a/extended/index.ts +++ b/extended/index.ts @@ -58,7 +58,7 @@ export class BTreeEx extends BTree { * Note that in benchmarks even the worst case (fully interleaved keys) performance is faster than calling `toArray` * on both trees and performing a walk on the sorted contents due to the reduced allocation overhead. */ - forEachKeyInBoth(other: BTree, callback: (key: K, leftValue: V, rightValue: V) => void): void { + forEachKeyInBoth(other: BTree, callback: (key: K, leftValue: V, rightValue: V) => void): void { forEachKeyInBoth(this, other, callback); } @@ -77,8 +77,8 @@ export class BTreeEx extends BTree { * Note that in benchmarks even the worst case (fully interleaved keys) performance is faster than cloning `this` * and inserting the contents of `other` into the clone. */ - union(other: BTreeEx, combineFn: (key: K, leftValue: V, rightValue: V) => V | undefined): BTreeEx { - return union, K, V>(this, other, combineFn); + union(other: BTreeEx, combineFn: (key: K, leftValue: V, rightValue: V) => V | undefined): BTreeEx { + return union, K, V>(this, other, combineFn); } } diff --git a/extended/intersect.ts b/extended/intersect.ts index 5840454..bdcb996 100644 --- a/extended/intersect.ts +++ b/extended/intersect.ts @@ -19,13 +19,13 @@ import forEachKeyInBoth from './forEachKeyInBoth'; * Note that in benchmarks even the worst case (fully interleaved keys) performance is faster than calling `toArray` * on both trees and performing a walk on the sorted contents due to the reduced allocation overhead. */ -export default function intersect, K,V>( +export default function intersect, K, V>( treeA: TBTree, treeB: TBTree, combineFn: (key: K, leftValue: V, rightValue: V) => V ): TBTree { - const _treeA = treeA as unknown as BTreeWithInternals; - const _treeB = treeB as unknown as BTreeWithInternals; + const _treeA = treeA as unknown as BTreeWithInternals; + const _treeB = treeB as unknown as BTreeWithInternals; const branchingFactor = checkCanDoSetOperation(_treeA, _treeB); if (_treeA._root.size() === 0) return treeB.clone(); @@ -42,6 +42,6 @@ export default function intersect, K,V>( // As many of these as possible will be reused from the original trees, and the remaining // will be leaves that are the result of merging intersecting leaves. const decomposed = decompose(_treeA, _treeB, combineFn); - const constructor = treeA.constructor as new (entries?: [K,V][], compare?: (a: K, b: K) => number, maxNodeSize?: number) => TBTree; + const constructor = treeA.constructor as new (entries?: [K, V][], compare?: (a: K, b: K) => number, maxNodeSize?: number) => TBTree; return buildFromDecomposition(constructor, branchingFactor, decomposed, _treeA._compare, _treeA._maxNodeSize); } \ No newline at end of file diff --git a/extended/parallelWalk.ts b/extended/parallelWalk.ts index 47a176c..d4941e2 100644 --- a/extended/parallelWalk.ts +++ b/extended/parallelWalk.ts @@ -11,8 +11,8 @@ export interface MergeCursor { leafPayload: TPayload; makePayload: () => TPayload; onMoveInLeaf: (leaf: BNode, payload: TPayload, fromIndex: number, toIndex: number, isInclusive: boolean) => void; - onExitLeaf: (leaf: BNode, payload: TPayload, startingIndex: number, isInclusive: boolean, cursorThis: MergeCursor) => void; - onStepUp: (parent: BNodeInternal, height: number, payload: TPayload, fromIndex: number, spineIndex: number, stepDownIndex: number, cursorThis: MergeCursor) => void; + onExitLeaf: (leaf: BNode, payload: TPayload, startingIndex: number, isInclusive: boolean, cursorThis: MergeCursor) => void; + onStepUp: (parent: BNodeInternal, height: number, payload: TPayload, fromIndex: number, spineIndex: number, stepDownIndex: number, cursorThis: MergeCursor) => void; onStepDown: (node: BNodeInternal, height: number, spineIndex: number, stepDownIndex: number, cursorThis: MergeCursor) => void; onEnterLeaf: (leaf: BNode, destIndex: number, cursorThis: MergeCursor, cursorOther: MergeCursor) => void; } @@ -22,11 +22,11 @@ export interface MergeCursor { * Should only be called to advance cursors that started equal. * Returns true if end-of-tree was reached (cursor not structurally mutated). */ -export function moveForwardOne( - cur: MergeCursor, - other: MergeCursor, +export function moveForwardOne( + cur: MergeCursor, + other: MergeCursor, currentKey: K, - cmp: (a:K,b:K)=>number + cmp: (a: K, b: K) => number ): boolean { const leaf = cur.leaf; const nextIndex = cur.leafIndex + 1; @@ -45,32 +45,32 @@ export function moveForwardOne( /** * Create a cursor pointing to the leftmost key of the supplied tree. */ -export function createCursor( - tree: BTreeWithInternals, - makePayload: MergeCursor["makePayload"], - onEnterLeaf: MergeCursor["onEnterLeaf"], - onMoveInLeaf: MergeCursor["onMoveInLeaf"], - onExitLeaf: MergeCursor["onExitLeaf"], - onStepUp: MergeCursor["onStepUp"], - onStepDown: MergeCursor["onStepDown"], -): MergeCursor { - const spine: Array<{ node: BNodeInternal, childIndex: number, payload: TP }> = []; - let n: BNode = tree._root; +export function createCursor( + tree: BTreeWithInternals, + makePayload: MergeCursor["makePayload"], + onEnterLeaf: MergeCursor["onEnterLeaf"], + onMoveInLeaf: MergeCursor["onMoveInLeaf"], + onExitLeaf: MergeCursor["onExitLeaf"], + onStepUp: MergeCursor["onStepUp"], + onStepDown: MergeCursor["onStepDown"], +): MergeCursor { + const spine: Array<{ node: BNodeInternal, childIndex: number, payload: TP }> = []; + let n: BNode = tree._root; while (!n.isLeaf) { - const ni = n as BNodeInternal; + const ni = n as BNodeInternal; const payload = makePayload(); spine.push({ node: ni, childIndex: 0, payload }); n = ni.children[0]; } const leafPayload = makePayload(); - const cur: MergeCursor = { + const cur: MergeCursor = { tree, leaf: n, leafIndex: 0, spine, leafPayload, makePayload: makePayload, onEnterLeaf, onMoveInLeaf, onExitLeaf, onStepUp, onStepDown }; return cur; } -export function getKey(c: MergeCursor): K { +export function getKey(c: MergeCursor): K { return c.leaf.keys[c.leafIndex]; } @@ -79,13 +79,13 @@ export function getKey(c: MergeCursor): K { * Returns a boolean indicating if end-of-tree was reached (cursor not structurally mutated). * Also returns a boolean indicating if the target key was landed on exactly. */ -export function moveTo( - cur: MergeCursor, - other: MergeCursor, +export function moveTo( + cur: MergeCursor, + other: MergeCursor, targetKey: K, isInclusive: boolean, startedEqual: boolean, - cmp: (a:K,b:K)=>number + cmp: (a: K, b: K) => number ): [outOfTree: boolean, targetExactlyReached: boolean] { // Cache callbacks for perf const onMoveInLeaf = cur.onMoveInLeaf; @@ -169,10 +169,10 @@ export function moveTo( // Descend, invoking onStepDown and creating payloads let height = initialSpineLength - descentLevel - 1; // calculate height before changing length spine.length = descentLevel + 1; - let node: BNode = spine[descentLevel].node.children[descentIndex]; + let node: BNode = spine[descentLevel].node.children[descentIndex]; while (!node.isLeaf) { - const ni = node as BNodeInternal; + const ni = node as BNodeInternal; const keys = ni.keys; let stepDownIndex = ni.indexOf(targetKey, 0, cmp); if (!isInclusive && stepDownIndex < keys.length && cmp(keys[stepDownIndex], targetKey) === 0) @@ -207,14 +207,14 @@ export function moveTo( return [false, targetExactlyReached]; } -export function noop(): void {} +export function noop(): void { } export const comparatorErrorMsg = "Cannot perform set operations on BTrees with different comparators."; export const branchingFactorErrorMsg = "Cannot perform set operations on BTrees with different max node sizes."; -export function checkCanDoSetOperation(treeA: BTreeWithInternals, treeB: BTreeWithInternals): number { - if (treeA._compare !== treeB._compare) +export function checkCanDoSetOperation(treeA: BTreeWithInternals, treeB: BTreeWithInternals): number { + if (treeA._compare !== treeB._compare) throw new Error(comparatorErrorMsg); const branchingFactor = treeA._maxNodeSize; diff --git a/extended/union.ts b/extended/union.ts index bc45a0c..1e49fd0 100644 --- a/extended/union.ts +++ b/extended/union.ts @@ -19,13 +19,13 @@ import { checkCanDoSetOperation } from "./parallelWalk"; * Note that in benchmarks even the worst case (fully interleaved keys) performance is faster than cloning `this` * and inserting the contents of `other` into the clone. */ -export default function union, K,V>( +export default function union, K, V>( treeA: TBTree, treeB: TBTree, combineFn: (key: K, leftValue: V, rightValue: V) => V | undefined ): TBTree { - const _treeA = treeA as unknown as BTreeWithInternals; - const _treeB = treeB as unknown as BTreeWithInternals; + const _treeA = treeA as unknown as BTreeWithInternals; + const _treeB = treeB as unknown as BTreeWithInternals; const branchingFactor = checkCanDoSetOperation(_treeA, _treeB); if (_treeA._root.size() === 0) return treeB.clone(); @@ -36,6 +36,6 @@ export default function union, K,V>( // As many of these as possible will be reused from the original trees, and the remaining // will be leaves that are the result of merging intersecting leaves. const decomposed = decompose(_treeA, _treeB, combineFn); - const constructor = treeA.constructor as new (entries?: [K,V][], compare?: (a: K, b: K) => number, maxNodeSize?: number) => TBTree; + const constructor = treeA.constructor as new (entries?: [K, V][], compare?: (a: K, b: K) => number, maxNodeSize?: number) => TBTree; return buildFromDecomposition(constructor, branchingFactor, decomposed, _treeA._compare, _treeA._maxNodeSize); } From 3444fd8961dcfdf6027682f134ee75f78865411f Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Wed, 12 Nov 2025 08:41:35 -0800 Subject: [PATCH 072/143] wip --- extended/bulkLoad.d.ts | 2 +- extended/bulkLoad.js | 9 ++++++++- extended/bulkLoad.ts | 11 +++++++++-- 3 files changed, 18 insertions(+), 4 deletions(-) diff --git a/extended/bulkLoad.d.ts b/extended/bulkLoad.d.ts index 4b66467..c35218c 100644 --- a/extended/bulkLoad.d.ts +++ b/extended/bulkLoad.d.ts @@ -1,3 +1,3 @@ import { BNode } from '../b+tree'; -export declare function bulkLoad(entries: (K | V)[], maxNodeSize: number): BNode; +export declare function bulkLoad(entries: (K | V)[], maxNodeSize: number): BNode | undefined; export declare function flushToLeaves(alternatingList: (K | V)[], maxNodeSize: number, toFlushTo: (number | BNode)[]): number; diff --git a/extended/bulkLoad.js b/extended/bulkLoad.js index 299ec01..effccf1 100644 --- a/extended/bulkLoad.js +++ b/extended/bulkLoad.js @@ -4,7 +4,14 @@ exports.flushToLeaves = exports.bulkLoad = void 0; var b_tree_1 = require("../b+tree"); var decompose_1 = require("./decompose"); function bulkLoad(entries, maxNodeSize) { - throw new Error('Not implemented'); + var leaves = []; + flushToLeaves(entries, maxNodeSize, leaves); + var leafCount = (0, decompose_1.alternatingCount)(leaves); + if (leafCount === 0) + return undefined; + if (leafCount === 1) + return (0, decompose_1.alternatingGetFirst)(leaves, 0); + throw new Error("bulkLoad: multiple leaves not yet supported"); } exports.bulkLoad = bulkLoad; function flushToLeaves(alternatingList, maxNodeSize, toFlushTo) { diff --git a/extended/bulkLoad.ts b/extended/bulkLoad.ts index 7e5f26f..61b31d4 100644 --- a/extended/bulkLoad.ts +++ b/extended/bulkLoad.ts @@ -2,8 +2,15 @@ import { BNode, BNodeInternal } from '../b+tree'; import { alternatingCount, alternatingGetFirst, alternatingGetSecond, alternatingPush } from './decompose'; import type { BTreeWithInternals } from './shared'; -export function bulkLoad(entries: (K | V)[], maxNodeSize: number): BNode { - throw new Error('Not implemented'); +export function bulkLoad(entries: (K | V)[], maxNodeSize: number): BNode | undefined { + const leaves: (number | BNode)[] = []; + flushToLeaves(entries, maxNodeSize, leaves); + const leafCount = alternatingCount(leaves); + if (leafCount === 0) + return undefined; + if (leafCount === 1) + return alternatingGetFirst, number>(leaves, 0); + throw new Error("bulkLoad: multiple leaves not yet supported"); } export function flushToLeaves(alternatingList: (K | V)[], maxNodeSize: number, toFlushTo: (number | BNode)[]): number { From ce97fb7624e01370262ce3b674ef80efdde468bf Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Wed, 12 Nov 2025 09:15:09 -0800 Subject: [PATCH 073/143] refactor for bulkLoad --- extended/bulkLoad.d.ts | 2 +- extended/bulkLoad.js | 10 +++++----- extended/bulkLoad.ts | 19 +++++++++++-------- extended/decompose.js | 5 ++++- extended/decompose.ts | 6 +++++- 5 files changed, 26 insertions(+), 16 deletions(-) diff --git a/extended/bulkLoad.d.ts b/extended/bulkLoad.d.ts index c35218c..3c05b12 100644 --- a/extended/bulkLoad.d.ts +++ b/extended/bulkLoad.d.ts @@ -1,3 +1,3 @@ import { BNode } from '../b+tree'; export declare function bulkLoad(entries: (K | V)[], maxNodeSize: number): BNode | undefined; -export declare function flushToLeaves(alternatingList: (K | V)[], maxNodeSize: number, toFlushTo: (number | BNode)[]): number; +export declare function flushToLeaves(alternatingList: (K | V)[], maxNodeSize: number, onLeafCreation: (node: BNode) => void): number; diff --git a/extended/bulkLoad.js b/extended/bulkLoad.js index effccf1..78fdaa6 100644 --- a/extended/bulkLoad.js +++ b/extended/bulkLoad.js @@ -5,16 +5,16 @@ var b_tree_1 = require("../b+tree"); var decompose_1 = require("./decompose"); function bulkLoad(entries, maxNodeSize) { var leaves = []; - flushToLeaves(entries, maxNodeSize, leaves); - var leafCount = (0, decompose_1.alternatingCount)(leaves); + flushToLeaves(entries, maxNodeSize, function (leaf) { return leaves.push(leaf); }); + var leafCount = leaves.length; if (leafCount === 0) return undefined; if (leafCount === 1) - return (0, decompose_1.alternatingGetFirst)(leaves, 0); + return leaves[0]; throw new Error("bulkLoad: multiple leaves not yet supported"); } exports.bulkLoad = bulkLoad; -function flushToLeaves(alternatingList, maxNodeSize, toFlushTo) { +function flushToLeaves(alternatingList, maxNodeSize, onLeafCreation) { var totalPairs = (0, decompose_1.alternatingCount)(alternatingList); if (totalPairs === 0) return 0; @@ -36,7 +36,7 @@ function flushToLeaves(alternatingList, maxNodeSize, toFlushTo) { remaining -= chunkSize; remainingLeaves--; var leaf = new b_tree_1.BNode(keys, vals); - (0, decompose_1.alternatingPush)(toFlushTo, 0, leaf); + onLeafCreation(leaf); } alternatingList.length = 0; return leafCount; diff --git a/extended/bulkLoad.ts b/extended/bulkLoad.ts index 61b31d4..8bdd4d1 100644 --- a/extended/bulkLoad.ts +++ b/extended/bulkLoad.ts @@ -1,19 +1,22 @@ import { BNode, BNodeInternal } from '../b+tree'; -import { alternatingCount, alternatingGetFirst, alternatingGetSecond, alternatingPush } from './decompose'; -import type { BTreeWithInternals } from './shared'; +import { alternatingCount, alternatingGetFirst, alternatingGetSecond } from './decompose'; export function bulkLoad(entries: (K | V)[], maxNodeSize: number): BNode | undefined { - const leaves: (number | BNode)[] = []; - flushToLeaves(entries, maxNodeSize, leaves); - const leafCount = alternatingCount(leaves); + const leaves: BNode[] = []; + flushToLeaves(entries, maxNodeSize, (leaf) => leaves.push(leaf)); + const leafCount = leaves.length; if (leafCount === 0) return undefined; if (leafCount === 1) - return alternatingGetFirst, number>(leaves, 0); + return leaves[0]; throw new Error("bulkLoad: multiple leaves not yet supported"); } -export function flushToLeaves(alternatingList: (K | V)[], maxNodeSize: number, toFlushTo: (number | BNode)[]): number { +export function flushToLeaves( + alternatingList: (K | V)[], + maxNodeSize: number, + onLeafCreation: (node: BNode) => void +): number { const totalPairs = alternatingCount(alternatingList); if (totalPairs === 0) return 0; @@ -36,7 +39,7 @@ export function flushToLeaves(alternatingList: (K | V)[], maxNodeSize: num remaining -= chunkSize; remainingLeaves--; const leaf = new BNode(keys, vals); - alternatingPush(toFlushTo, 0, leaf); + onLeafCreation(leaf); } alternatingList.length = 0; return leafCount; diff --git a/extended/decompose.js b/extended/decompose.js index 9540ffe..4d448c2 100644 --- a/extended/decompose.js +++ b/extended/decompose.js @@ -33,8 +33,11 @@ function decompose(left, right, mergeValues, ignoreRight) { // This is done because we cannot know immediately whether we can add the node to the disjoint set // because its ancestor may also be disjoint and should be reused instead. var highestDisjoint = undefined; + var onLeafCreation = function (leaf) { + alternatingPush(disjoint, 0, leaf); + }; var flushPendingEntries = function () { - var createdLeaves = (0, bulkLoad_1.flushToLeaves)(pending, left._maxNodeSize, disjoint); + var createdLeaves = (0, bulkLoad_1.flushToLeaves)(pending, left._maxNodeSize, onLeafCreation); if (createdLeaves > 0) { tallestIndex = alternatingCount(disjoint) - 1; tallestHeight = 0; diff --git a/extended/decompose.ts b/extended/decompose.ts index 7886c48..f3d15ab 100644 --- a/extended/decompose.ts +++ b/extended/decompose.ts @@ -42,8 +42,12 @@ export function decompose( // Have to do this as cast to convince TS it's ever assigned = undefined as { node: BNode, height: number } | undefined; + const onLeafCreation = (leaf: BNode) => { + alternatingPush(disjoint, 0, leaf); + } + const flushPendingEntries = () => { - const createdLeaves = flushToLeaves(pending, left._maxNodeSize, disjoint); + const createdLeaves = flushToLeaves(pending, left._maxNodeSize, onLeafCreation); if (createdLeaves > 0) { tallestIndex = alternatingCount(disjoint) - 1; tallestHeight = 0; From 9c2478d8f8f5c6538c4c070b3329cdb29563cab2 Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Wed, 12 Nov 2025 09:29:10 -0800 Subject: [PATCH 074/143] bulk load impl --- b+tree.d.ts | 6 ------ b+tree.js | 10 +++++++++- b+tree.ts | 9 ++++++++- extended/bulkLoad.js | 31 ++++++++++++++++++++++++++++--- extended/bulkLoad.ts | 38 ++++++++++++++++++++++++++++++++++---- 5 files changed, 79 insertions(+), 15 deletions(-) diff --git a/b+tree.d.ts b/b+tree.d.ts index f0cd1a5..57fddd7 100644 --- a/b+tree.d.ts +++ b/b+tree.d.ts @@ -425,11 +425,5 @@ export default class BTree implements ISortedMapF, ISort * was the intention, but TypeScript is acting weird and may return `ISortedSet` * even if `V` can't be `undefined` (discussion: btree-typescript issue #14) */ export declare function asSet(btree: BTree): undefined extends V ? ISortedSet : unknown; -/** - * Determines whether two nodes are overlapping in key range. - * Takes the leftmost known key of each node to avoid a log(n) min calculation. - * This will still catch overlapping nodes because of the alternate hopping walk of the cursors. - */ -export declare function areOverlapping(aMin: K, aMax: K, bMin: K, bMax: K, cmp: (x: K, y: K) => number): boolean; /** A BTree frozen in the empty state. */ export declare const EmptyBTree: BTree; diff --git a/b+tree.js b/b+tree.js index 7d8061c..25111bb 100644 --- a/b+tree.js +++ b/b+tree.js @@ -15,7 +15,7 @@ var __extends = (this && this.__extends) || (function () { }; })(); Object.defineProperty(exports, "__esModule", { value: true }); -exports.EmptyBTree = exports.check = exports.areOverlapping = exports.BNodeInternal = exports.BNode = exports.asSet = exports.simpleComparator = exports.defaultComparator = void 0; +exports.EmptyBTree = exports.check = exports.areOverlapping = exports.sumChildSizes = exports.BNodeInternal = exports.BNode = exports.asSet = exports.simpleComparator = exports.defaultComparator = void 0; /** * Compares DefaultComparables to form a strict partial ordering. * @@ -1490,16 +1490,24 @@ exports.BNodeInternal = BNodeInternal; // Reading outside the bounds of an array is relatively slow because it // has the side effect of scanning the prototype chain. var undefVals = []; +/** + * Sums the sizes of the given child nodes. + * @param children the child nodes + * @returns the total size + * @internal + */ function sumChildSizes(children) { var total = 0; for (var i = 0; i < children.length; i++) total += children[i].size(); return total; } +exports.sumChildSizes = sumChildSizes; /** * Determines whether two nodes are overlapping in key range. * Takes the leftmost known key of each node to avoid a log(n) min calculation. * This will still catch overlapping nodes because of the alternate hopping walk of the cursors. + * @internal */ function areOverlapping(aMin, aMax, bMin, bMax, cmp) { // There are 4 possibilities: diff --git a/b+tree.ts b/b+tree.ts index 9dae422..2694ba9 100644 --- a/b+tree.ts +++ b/b+tree.ts @@ -1639,7 +1639,13 @@ export class BNodeInternal extends BNode { // has the side effect of scanning the prototype chain. var undefVals: any[] = []; -function sumChildSizes(children: BNode[]): number { +/** + * Sums the sizes of the given child nodes. + * @param children the child nodes + * @returns the total size + * @internal + */ +export function sumChildSizes(children: BNode[]): number { var total = 0; for (var i = 0; i < children.length; i++) total += children[i].size(); @@ -1650,6 +1656,7 @@ function sumChildSizes(children: BNode[]): number { * Determines whether two nodes are overlapping in key range. * Takes the leftmost known key of each node to avoid a log(n) min calculation. * This will still catch overlapping nodes because of the alternate hopping walk of the cursors. + * @internal */ export function areOverlapping( aMin: K, diff --git a/extended/bulkLoad.js b/extended/bulkLoad.js index 78fdaa6..4bf67b7 100644 --- a/extended/bulkLoad.js +++ b/extended/bulkLoad.js @@ -9,9 +9,34 @@ function bulkLoad(entries, maxNodeSize) { var leafCount = leaves.length; if (leafCount === 0) return undefined; - if (leafCount === 1) - return leaves[0]; - throw new Error("bulkLoad: multiple leaves not yet supported"); + var currentLevel = leaves; + while (true) { + var nodeCount = currentLevel.length; + if (nodeCount === 1) + return currentLevel[0]; + if (nodeCount <= maxNodeSize) { + return new b_tree_1.BNodeInternal(currentLevel, (0, b_tree_1.sumChildSizes)(currentLevel)); + } + var nextLevelCount = Math.ceil(nodeCount / maxNodeSize); + var nextLevel = new Array(nextLevelCount); + var remainingNodes = nodeCount; + var remainingParents = nextLevelCount; + var childIndex = 0; + for (var i = 0; i < nextLevelCount; i++) { + var chunkSize = Math.ceil(remainingNodes / remainingParents); + var children = new Array(chunkSize); + var size = 0; + for (var j = 0; j < chunkSize; j++) { + var child = currentLevel[childIndex++]; + children[j] = child; + size += child.size(); + } + remainingNodes -= chunkSize; + remainingParents--; + nextLevel[i] = new b_tree_1.BNodeInternal(children, size); + } + currentLevel = nextLevel; + } } exports.bulkLoad = bulkLoad; function flushToLeaves(alternatingList, maxNodeSize, onLeafCreation) { diff --git a/extended/bulkLoad.ts b/extended/bulkLoad.ts index 8bdd4d1..fbc2ce2 100644 --- a/extended/bulkLoad.ts +++ b/extended/bulkLoad.ts @@ -1,4 +1,4 @@ -import { BNode, BNodeInternal } from '../b+tree'; +import { BNode, BNodeInternal, sumChildSizes } from '../b+tree'; import { alternatingCount, alternatingGetFirst, alternatingGetSecond } from './decompose'; export function bulkLoad(entries: (K | V)[], maxNodeSize: number): BNode | undefined { @@ -7,9 +7,39 @@ export function bulkLoad(entries: (K | V)[], maxNodeSize: number): BNode[] = leaves; + while (true) { + const nodeCount = currentLevel.length; + if (nodeCount === 1) + return currentLevel[0]; + + if (nodeCount <= maxNodeSize) { + return new BNodeInternal(currentLevel, sumChildSizes(currentLevel)); + } + + const nextLevelCount = Math.ceil(nodeCount / maxNodeSize); + const nextLevel = new Array>(nextLevelCount); + let remainingNodes = nodeCount; + let remainingParents = nextLevelCount; + let childIndex = 0; + + for (let i = 0; i < nextLevelCount; i++) { + const chunkSize = Math.ceil(remainingNodes / remainingParents); + const children = new Array>(chunkSize); + let size = 0; + for (let j = 0; j < chunkSize; j++) { + const child = currentLevel[childIndex++]; + children[j] = child; + size += child.size(); + } + remainingNodes -= chunkSize; + remainingParents--; + nextLevel[i] = new BNodeInternal(children, size); + } + + currentLevel = nextLevel; + } } export function flushToLeaves( From e5f92d3bdc3cd0126189f5fe8ea1b7dfec21f58f Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Wed, 12 Nov 2025 09:56:55 -0800 Subject: [PATCH 075/143] wip bulkLoad --- extended/bulkLoad.js | 7 +++++++ extended/bulkLoad.ts | 12 ++++++++++-- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/extended/bulkLoad.js b/extended/bulkLoad.js index 4bf67b7..eb5aaee 100644 --- a/extended/bulkLoad.js +++ b/extended/bulkLoad.js @@ -18,6 +18,7 @@ function bulkLoad(entries, maxNodeSize) { return new b_tree_1.BNodeInternal(currentLevel, (0, b_tree_1.sumChildSizes)(currentLevel)); } var nextLevelCount = Math.ceil(nodeCount / maxNodeSize); + (0, b_tree_1.check)(nextLevelCount > 1); var nextLevel = new Array(nextLevelCount); var remainingNodes = nodeCount; var remainingParents = nextLevelCount; @@ -35,6 +36,12 @@ function bulkLoad(entries, maxNodeSize) { remainingParents--; nextLevel[i] = new b_tree_1.BNodeInternal(children, size); } + var minSize = Math.floor(maxNodeSize / 2); + var secondLastNode = nextLevel[nextLevelCount - 2]; + var lastNode = nextLevel[nextLevelCount - 1]; + while (lastNode.children.length < minSize) { + lastNode.takeFromLeft(secondLastNode); + } currentLevel = nextLevel; } } diff --git a/extended/bulkLoad.ts b/extended/bulkLoad.ts index fbc2ce2..67c05cb 100644 --- a/extended/bulkLoad.ts +++ b/extended/bulkLoad.ts @@ -1,4 +1,4 @@ -import { BNode, BNodeInternal, sumChildSizes } from '../b+tree'; +import { BNode, BNodeInternal, check, sumChildSizes } from '../b+tree'; import { alternatingCount, alternatingGetFirst, alternatingGetSecond } from './decompose'; export function bulkLoad(entries: (K | V)[], maxNodeSize: number): BNode | undefined { @@ -19,7 +19,8 @@ export function bulkLoad(entries: (K | V)[], maxNodeSize: number): BNode>(nextLevelCount); + check(nextLevelCount > 1); + const nextLevel = new Array>(nextLevelCount); let remainingNodes = nodeCount; let remainingParents = nextLevelCount; let childIndex = 0; @@ -38,6 +39,13 @@ export function bulkLoad(entries: (K | V)[], maxNodeSize: number): BNode(children, size); } + const minSize = Math.floor(maxNodeSize / 2); + const secondLastNode = nextLevel[nextLevelCount - 2]; + const lastNode = nextLevel[nextLevelCount - 1]; + while (lastNode.children.length < minSize) { + lastNode.takeFromLeft(secondLastNode); + } + currentLevel = nextLevel; } } From 7a92cb39e9a25e88e19925a6fd53f0b91e1f8fdb Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Wed, 12 Nov 2025 10:29:25 -0800 Subject: [PATCH 076/143] bulk load tests --- extended/bulkLoad.d.ts | 2 +- extended/bulkLoad.js | 13 ++++- extended/bulkLoad.ts | 22 ++++++- test/bulkLoad.test.ts | 128 +++++++++++++++++++++++++++++++++++++++++ 4 files changed, 160 insertions(+), 5 deletions(-) create mode 100644 test/bulkLoad.test.ts diff --git a/extended/bulkLoad.d.ts b/extended/bulkLoad.d.ts index 3c05b12..e764c48 100644 --- a/extended/bulkLoad.d.ts +++ b/extended/bulkLoad.d.ts @@ -1,3 +1,3 @@ import { BNode } from '../b+tree'; -export declare function bulkLoad(entries: (K | V)[], maxNodeSize: number): BNode | undefined; +export declare function bulkLoad(entries: (K | V)[], maxNodeSize: number, compare?: (a: K, b: K) => number): BNode | undefined; export declare function flushToLeaves(alternatingList: (K | V)[], maxNodeSize: number, onLeafCreation: (node: BNode) => void): number; diff --git a/extended/bulkLoad.js b/extended/bulkLoad.js index eb5aaee..65625a6 100644 --- a/extended/bulkLoad.js +++ b/extended/bulkLoad.js @@ -3,7 +3,18 @@ Object.defineProperty(exports, "__esModule", { value: true }); exports.flushToLeaves = exports.bulkLoad = void 0; var b_tree_1 = require("../b+tree"); var decompose_1 = require("./decompose"); -function bulkLoad(entries, maxNodeSize) { +function bulkLoad(entries, maxNodeSize, compare) { + var totalPairs = (0, decompose_1.alternatingCount)(entries); + if (totalPairs > 1) { + var cmp = compare !== null && compare !== void 0 ? compare : b_tree_1.defaultComparator; + var previousKey = (0, decompose_1.alternatingGetFirst)(entries, 0); + for (var i = 1; i < totalPairs; i++) { + var key = (0, decompose_1.alternatingGetFirst)(entries, i); + if (cmp(previousKey, key) >= 0) + throw new Error("bulkLoad: entries must be sorted by key in strictly ascending order"); + previousKey = key; + } + } var leaves = []; flushToLeaves(entries, maxNodeSize, function (leaf) { return leaves.push(leaf); }); var leafCount = leaves.length; diff --git a/extended/bulkLoad.ts b/extended/bulkLoad.ts index 67c05cb..0891251 100644 --- a/extended/bulkLoad.ts +++ b/extended/bulkLoad.ts @@ -1,7 +1,23 @@ -import { BNode, BNodeInternal, check, sumChildSizes } from '../b+tree'; +import { BNode, BNodeInternal, check, defaultComparator, sumChildSizes } from '../b+tree'; import { alternatingCount, alternatingGetFirst, alternatingGetSecond } from './decompose'; -export function bulkLoad(entries: (K | V)[], maxNodeSize: number): BNode | undefined { +export function bulkLoad( + entries: (K | V)[], + maxNodeSize: number, + compare?: (a: K, b: K) => number +): BNode | undefined { + const totalPairs = alternatingCount(entries); + if (totalPairs > 1) { + const cmp = compare ?? (defaultComparator as unknown as (a: K, b: K) => number); + let previousKey = alternatingGetFirst(entries, 0); + for (let i = 1; i < totalPairs; i++) { + const key = alternatingGetFirst(entries, i); + if (cmp(previousKey, key) >= 0) + throw new Error("bulkLoad: entries must be sorted by key in strictly ascending order"); + previousKey = key; + } + } + const leaves: BNode[] = []; flushToLeaves(entries, maxNodeSize, (leaf) => leaves.push(leaf)); const leafCount = leaves.length; @@ -81,4 +97,4 @@ export function flushToLeaves( } alternatingList.length = 0; return leafCount; -}; \ No newline at end of file +}; diff --git a/test/bulkLoad.test.ts b/test/bulkLoad.test.ts new file mode 100644 index 0000000..244b80c --- /dev/null +++ b/test/bulkLoad.test.ts @@ -0,0 +1,128 @@ +import BTree, { BNode, BNodeInternal } from '../b+tree'; +import { bulkLoad } from '../extended/bulkLoad'; +import { makeArray } from './shared'; + +type Pair = [number, number]; +const compareNumbers = (a: number, b: number) => a - b; +const branchingFactors = [4, 10, 32, 128]; + +function sequentialPairs(count: number, start = 0, step = 1): Pair[] { + const pairs: Pair[] = []; + let key = start; + for (let i = 0; i < count; i++) { + pairs.push([key, key * 2]); + key += step; + } + return pairs; +} + +function pairsFromKeys(keys: number[]): Pair[] { + return keys.map((key, index) => [key, index - key]); +} + +function buildTreeFromPairs(maxNodeSize: number, pairs: Pair[]) { + const alternating: number[] = []; + for (const [key, value] of pairs) { + alternating.push(key, value); + } + const root = bulkLoad(alternating, maxNodeSize, compareNumbers); + const tree = new BTree(undefined, compareNumbers, maxNodeSize); + if (root !== undefined) { + (tree as any)._root = root; + } + return { tree, root }; +} + +function expectTreeMatches(tree: BTree, expected: Pair[]) { + tree.checkValid(); + expect(tree.size).toBe(expected.length); + expect(tree.toArray()).toEqual(expected); +} + +function collectLeaves(node: BNode): BNode[] { + if (node.isLeaf) + return [node]; + const internal = node as unknown as BNodeInternal; + const leaves: BNode[] = []; + for (const child of internal.children) + leaves.push(...collectLeaves(child as BNode)); + return leaves; +} + +function assertInternalNodeFanout(node: BNode, maxNodeSize: number, isRoot = true) { + if (node.isLeaf) + return; + const internal = node as unknown as BNodeInternal; + if (isRoot) { + expect(internal.children.length).toBeGreaterThanOrEqual(2); + } else { + expect(internal.children.length).toBeGreaterThanOrEqual(Math.floor(maxNodeSize / 2)); + } + expect(internal.children.length).toBeLessThanOrEqual(maxNodeSize); + for (const child of internal.children) + assertInternalNodeFanout(child as BNode, maxNodeSize, false); +} + +describe.each(branchingFactors)('bulkLoad fanout %i', (maxNodeSize) => { + test('throws when keys are not strictly ascending', () => { + const alternating = [3, 30, 2, 20]; + expect(() => bulkLoad(alternating.slice(), maxNodeSize, compareNumbers)) + .toThrow('bulkLoad: entries must be sorted by key in strictly ascending order'); + }); + + test('empty input produces empty tree', () => { + const { tree, root } = buildTreeFromPairs(maxNodeSize, []); + expect(root).toBeUndefined(); + expectTreeMatches(tree, []); + }); + + test('single entry stays in one leaf', () => { + const pairs = sequentialPairs(1, 5); + const { tree } = buildTreeFromPairs(maxNodeSize, pairs); + expectTreeMatches(tree, pairs); + const root = tree['_root'] as BNode; + expect(root.isLeaf).toBe(true); + expect(root.keys).toEqual([5]); + }); + + test('fills a single leaf up to capacity', () => { + const pairs = sequentialPairs(maxNodeSize, 0, 2); + const { tree } = buildTreeFromPairs(maxNodeSize, pairs); + expectTreeMatches(tree, pairs); + const root = tree['_root'] as BNode; + expect(root.isLeaf).toBe(true); + expect(root.keys.length).toBe(maxNodeSize); + }); + + test('distributes keys nearly evenly across leaves when not divisible by fanout', () => { + const inputSize = maxNodeSize * 3 + Math.floor(maxNodeSize / 2) + 1; + const pairs = sequentialPairs(inputSize, 10, 3); + const { tree } = buildTreeFromPairs(maxNodeSize, pairs); + expectTreeMatches(tree, pairs); + const leaves = collectLeaves(tree['_root'] as BNode); + const leafSizes = leaves.map((leaf) => leaf.keys.length); + const min = Math.min.apply(Math, leafSizes); + const max = Math.max.apply(Math, leafSizes); + expect(max - min).toBeLessThanOrEqual(1); + }); + + test('creates multiple internal layers when leaf count exceeds branching factor', () => { + const inputSize = maxNodeSize * maxNodeSize + Math.floor(maxNodeSize / 2) + 1; + const pairs = sequentialPairs(inputSize, 0, 1); + const { tree } = buildTreeFromPairs(maxNodeSize, pairs); + expectTreeMatches(tree, pairs); + const root = tree['_root'] as BNode; + expect(root.isLeaf).toBe(false); + assertInternalNodeFanout(root, maxNodeSize); + }); + + test('loads 10000 entries and preserves all data', () => { + const keys = makeArray(10000, false, 3); + const pairs = pairsFromKeys(keys); + const { tree } = buildTreeFromPairs(maxNodeSize, pairs); + expectTreeMatches(tree, pairs); + const leaves = collectLeaves(tree['_root'] as BNode); + expect(leaves.length).toBe(Math.ceil(pairs.length / maxNodeSize)); + assertInternalNodeFanout(tree['_root'] as BNode, maxNodeSize); + }); +}); From 1bb07b339a334dd4d89d26b68808e98ff43172dc Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Wed, 12 Nov 2025 10:32:44 -0800 Subject: [PATCH 077/143] move alternating helpers --- extended/bulkLoad.js | 14 ++++++------- extended/bulkLoad.ts | 2 +- extended/decompose.d.ts | 6 +----- extended/decompose.js | 44 +++++++++++------------------------------ extended/decompose.ts | 24 +--------------------- extended/intersect.js | 3 ++- extended/intersect.ts | 4 ++-- extended/shared.d.ts | 4 ++++ extended/shared.js | 22 +++++++++++++++++++++ extended/shared.ts | 22 +++++++++++++++++++++ 10 files changed, 74 insertions(+), 71 deletions(-) diff --git a/extended/bulkLoad.js b/extended/bulkLoad.js index 65625a6..7d8939b 100644 --- a/extended/bulkLoad.js +++ b/extended/bulkLoad.js @@ -2,14 +2,14 @@ Object.defineProperty(exports, "__esModule", { value: true }); exports.flushToLeaves = exports.bulkLoad = void 0; var b_tree_1 = require("../b+tree"); -var decompose_1 = require("./decompose"); +var shared_1 = require("./shared"); function bulkLoad(entries, maxNodeSize, compare) { - var totalPairs = (0, decompose_1.alternatingCount)(entries); + var totalPairs = (0, shared_1.alternatingCount)(entries); if (totalPairs > 1) { var cmp = compare !== null && compare !== void 0 ? compare : b_tree_1.defaultComparator; - var previousKey = (0, decompose_1.alternatingGetFirst)(entries, 0); + var previousKey = (0, shared_1.alternatingGetFirst)(entries, 0); for (var i = 1; i < totalPairs; i++) { - var key = (0, decompose_1.alternatingGetFirst)(entries, i); + var key = (0, shared_1.alternatingGetFirst)(entries, i); if (cmp(previousKey, key) >= 0) throw new Error("bulkLoad: entries must be sorted by key in strictly ascending order"); previousKey = key; @@ -58,7 +58,7 @@ function bulkLoad(entries, maxNodeSize, compare) { } exports.bulkLoad = bulkLoad; function flushToLeaves(alternatingList, maxNodeSize, onLeafCreation) { - var totalPairs = (0, decompose_1.alternatingCount)(alternatingList); + var totalPairs = (0, shared_1.alternatingCount)(alternatingList); if (totalPairs === 0) return 0; // This method creates as many evenly filled leaves as possible from @@ -72,8 +72,8 @@ function flushToLeaves(alternatingList, maxNodeSize, onLeafCreation) { var keys = new Array(chunkSize); var vals = new Array(chunkSize); for (var i = 0; i < chunkSize; i++) { - keys[i] = (0, decompose_1.alternatingGetFirst)(alternatingList, pairIndex); - vals[i] = (0, decompose_1.alternatingGetSecond)(alternatingList, pairIndex); + keys[i] = (0, shared_1.alternatingGetFirst)(alternatingList, pairIndex); + vals[i] = (0, shared_1.alternatingGetSecond)(alternatingList, pairIndex); pairIndex++; } remaining -= chunkSize; diff --git a/extended/bulkLoad.ts b/extended/bulkLoad.ts index 0891251..1cd6c9b 100644 --- a/extended/bulkLoad.ts +++ b/extended/bulkLoad.ts @@ -1,5 +1,5 @@ import { BNode, BNodeInternal, check, defaultComparator, sumChildSizes } from '../b+tree'; -import { alternatingCount, alternatingGetFirst, alternatingGetSecond } from './decompose'; +import { alternatingCount, alternatingGetFirst, alternatingGetSecond } from './shared'; export function bulkLoad( entries: (K | V)[], diff --git a/extended/decompose.d.ts b/extended/decompose.d.ts index 7d50152..0e235ac 100644 --- a/extended/decompose.d.ts +++ b/extended/decompose.d.ts @@ -1,5 +1,5 @@ import BTree, { BNode } from '../b+tree'; -import type { BTreeWithInternals } from './shared'; +import { type BTreeWithInternals } from './shared'; export declare type DecomposeResult = { disjoint: (number | BNode)[]; tallestIndex: number; @@ -15,7 +15,3 @@ export declare type DecomposeResult = { */ export declare function decompose(left: BTreeWithInternals, right: BTreeWithInternals, mergeValues: (key: K, leftValue: V, rightValue: V) => V | undefined, ignoreRight?: boolean): DecomposeResult; export declare function buildFromDecomposition, K, V>(constructor: new (entries?: [K, V][], compare?: (a: K, b: K) => number, maxNodeSize?: number) => TBTree, branchingFactor: number, decomposed: DecomposeResult, cmp: (a: K, b: K) => number, maxNodeSize: number): TBTree; -export declare function alternatingCount(list: unknown[]): number; -export declare function alternatingGetFirst(list: Array, index: number): TFirst; -export declare function alternatingGetSecond(list: Array, index: number): TSecond; -export declare function alternatingPush(list: Array, first: TFirst, second: TSecond): void; diff --git a/extended/decompose.js b/extended/decompose.js index 4d448c2..2fa4be5 100644 --- a/extended/decompose.js +++ b/extended/decompose.js @@ -1,7 +1,8 @@ "use strict"; Object.defineProperty(exports, "__esModule", { value: true }); -exports.alternatingPush = exports.alternatingGetSecond = exports.alternatingGetFirst = exports.alternatingCount = exports.buildFromDecomposition = exports.decompose = void 0; +exports.buildFromDecomposition = exports.decompose = void 0; var b_tree_1 = require("../b+tree"); +var shared_1 = require("./shared"); var parallelWalk_1 = require("./parallelWalk"); var bulkLoad_1 = require("./bulkLoad"); /** @@ -34,21 +35,21 @@ function decompose(left, right, mergeValues, ignoreRight) { // because its ancestor may also be disjoint and should be reused instead. var highestDisjoint = undefined; var onLeafCreation = function (leaf) { - alternatingPush(disjoint, 0, leaf); + (0, shared_1.alternatingPush)(disjoint, 0, leaf); }; var flushPendingEntries = function () { var createdLeaves = (0, bulkLoad_1.flushToLeaves)(pending, left._maxNodeSize, onLeafCreation); if (createdLeaves > 0) { - tallestIndex = alternatingCount(disjoint) - 1; + tallestIndex = (0, shared_1.alternatingCount)(disjoint) - 1; tallestHeight = 0; } }; var addSharedNodeToDisjointSet = function (node, height) { flushPendingEntries(); node.isShared = true; - alternatingPush(disjoint, height, node); + (0, shared_1.alternatingPush)(disjoint, height, node); if (height > tallestHeight) { - tallestIndex = alternatingCount(disjoint) - 1; + tallestIndex = (0, shared_1.alternatingCount)(disjoint) - 1; tallestHeight = height; } }; @@ -77,7 +78,7 @@ function decompose(left, right, mergeValues, ignoreRight) { var keys = leaf.keys; var values = leaf.values; for (var i = from; i < toExclusive; ++i) - alternatingPush(pending, keys[i], values[i]); + (0, shared_1.alternatingPush)(pending, keys[i], values[i]); }; var onMoveInLeaf = function (leaf, payload, fromIndex, toIndex, startedEqual) { (0, b_tree_1.check)(payload.disqualified === true, "onMoveInLeaf: leaf must be disqualified"); @@ -213,7 +214,7 @@ function decompose(left, right, mergeValues, ignoreRight) { // to pending because they respect the areEqual flag during their moves. var merged = mergeValues(key, vA, vB); if (merged !== undefined) - alternatingPush(pending, key, merged); + (0, shared_1.alternatingPush)(pending, key, merged); var outTrailing = (0, parallelWalk_1.moveForwardOne)(trailing, leading, key, cmp); var outLeading = (0, parallelWalk_1.moveForwardOne)(leading, trailing, key, cmp); if (outTrailing || outLeading) { @@ -258,7 +259,7 @@ function decompose(left, right, mergeValues, ignoreRight) { exports.decompose = decompose; function buildFromDecomposition(constructor, branchingFactor, decomposed, cmp, maxNodeSize) { var disjoint = decomposed.disjoint, tallestIndex = decomposed.tallestIndex; - var disjointEntryCount = alternatingCount(disjoint); + var disjointEntryCount = (0, shared_1.alternatingCount)(disjoint); // Now we have a set of disjoint subtrees and we need to merge them into a single tree. // To do this, we start with the tallest subtree from the disjoint set and, for all subtrees // to the "right" and "left" of it in sorted order, we append them onto the appropriate side @@ -267,7 +268,7 @@ function buildFromDecomposition(constructor, branchingFactor, decomposed, cmp, m // the leaf level on that side of the tree. Each appended subtree is appended to the node at the // same height as itself on the frontier. Each tree is guaranteed to be at most as tall as the // current frontier because we start from the tallest subtree and work outward. - var initialRoot = alternatingGetSecond(disjoint, tallestIndex); + var initialRoot = (0, shared_1.alternatingGetSecond)(disjoint, tallestIndex); var frontier = [initialRoot]; // Process all subtrees to the right of the tallest subtree if (tallestIndex + 1 <= disjointEntryCount - 1) { @@ -312,8 +313,8 @@ function processSide(branchingFactor, disjoint, spine, start, end, step, sideInd var unflushedSizes = new Array(spine.length).fill(0); // pre-fill to avoid "holey" array for (var i = start; i != end; i += step) { var currentHeight = spine.length - 1; // height is number of internal levels; 0 means leaf - var subtree = alternatingGetSecond(disjoint, i); - var subtreeHeight = alternatingGetFirst(disjoint, i); + var subtree = (0, shared_1.alternatingGetSecond)(disjoint, i); + var subtreeHeight = (0, shared_1.alternatingGetFirst)(disjoint, i); var insertionDepth = currentHeight - (subtreeHeight + 1); // node at this depth has children of height 'subtreeHeight' // Ensure path is unshared before mutation ensureNotShared(spine, isSharedFrontierDepth, insertionDepth, sideIndex); @@ -511,24 +512,3 @@ function splitOffLeftSide(node) { function updateRightMax(node, maxBelow) { node.keys[node.keys.length - 1] = maxBelow; } -// ------- Alternating list helpers ------- -// These helpers manage a list that alternates between two types of entries. -// Storing data this way avoids small tuple allocations and shows major improvements -// in GC time in benchmarks. -function alternatingCount(list) { - return list.length >> 1; -} -exports.alternatingCount = alternatingCount; -function alternatingGetFirst(list, index) { - return list[index << 1]; -} -exports.alternatingGetFirst = alternatingGetFirst; -function alternatingGetSecond(list, index) { - return list[(index << 1) + 1]; -} -exports.alternatingGetSecond = alternatingGetSecond; -function alternatingPush(list, first, second) { - // Micro benchmarks show this is the fastest way to do this - list.push(first, second); -} -exports.alternatingPush = alternatingPush; diff --git a/extended/decompose.ts b/extended/decompose.ts index f3d15ab..7f460b7 100644 --- a/extended/decompose.ts +++ b/extended/decompose.ts @@ -1,5 +1,5 @@ import BTree, { areOverlapping, BNode, BNodeInternal, check } from '../b+tree'; -import type { BTreeWithInternals } from './shared'; +import { alternatingCount, alternatingGetFirst, alternatingGetSecond, alternatingPush, type BTreeWithInternals } from './shared'; import { createCursor, getKey, MergeCursor, MergeCursorPayload, moveForwardOne, moveTo, noop } from "./parallelWalk"; import { flushToLeaves } from './bulkLoad'; @@ -643,26 +643,4 @@ function splitOffLeftSide(node: BNodeInternal): BNodeInternal function updateRightMax(node: BNodeInternal, maxBelow: K): void { node.keys[node.keys.length - 1] = maxBelow; -} - -// ------- Alternating list helpers ------- -// These helpers manage a list that alternates between two types of entries. -// Storing data this way avoids small tuple allocations and shows major improvements -// in GC time in benchmarks. - -export function alternatingCount(list: unknown[]): number { - return list.length >> 1; -} - -export function alternatingGetFirst(list: Array, index: number): TFirst { - return list[index << 1] as TFirst; -} - -export function alternatingGetSecond(list: Array, index: number): TSecond { - return list[(index << 1) + 1] as TSecond; -} - -export function alternatingPush(list: Array, first: TFirst, second: TSecond): void { - // Micro benchmarks show this is the fastest way to do this - list.push(first, second); } \ No newline at end of file diff --git a/extended/intersect.js b/extended/intersect.js index 883e85d..eaae693 100644 --- a/extended/intersect.js +++ b/extended/intersect.js @@ -3,6 +3,7 @@ var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); +var shared_1 = require("./shared"); var parallelWalk_1 = require("./parallelWalk"); var decompose_1 = require("./decompose"); var forEachKeyInBoth_1 = __importDefault(require("./forEachKeyInBoth")); @@ -32,7 +33,7 @@ function intersect(treeA, treeB, combineFn) { var intersected = []; (0, forEachKeyInBoth_1.default)(treeA, treeB, function (key, leftValue, rightValue) { var mergedValue = combineFn(key, leftValue, rightValue); - (0, decompose_1.alternatingPush)(intersected, key, mergedValue); + (0, shared_1.alternatingPush)(intersected, key, mergedValue); }); // Decompose both trees into disjoint subtrees leaves. // As many of these as possible will be reused from the original trees, and the remaining diff --git a/extended/intersect.ts b/extended/intersect.ts index bdcb996..49eff87 100644 --- a/extended/intersect.ts +++ b/extended/intersect.ts @@ -1,7 +1,7 @@ import BTree from '../b+tree'; -import type { BTreeWithInternals } from './shared'; +import { alternatingPush, type BTreeWithInternals } from './shared'; import { checkCanDoSetOperation } from "./parallelWalk" -import { alternatingPush, buildFromDecomposition, decompose } from './decompose'; +import { buildFromDecomposition, decompose } from './decompose'; import forEachKeyInBoth from './forEachKeyInBoth'; /** diff --git a/extended/shared.d.ts b/extended/shared.d.ts index 4527a4e..c4db16e 100644 --- a/extended/shared.d.ts +++ b/extended/shared.d.ts @@ -6,3 +6,7 @@ export declare type BTreeWithInternals = { _maxNodeSize: number; _compare: (a: K, b: K) => number; } & Omit, '_root' | '_size' | '_maxNodeSize' | '_compare'>; +export declare function alternatingCount(list: unknown[]): number; +export declare function alternatingGetFirst(list: Array, index: number): TFirst; +export declare function alternatingGetSecond(list: Array, index: number): TSecond; +export declare function alternatingPush(list: Array, first: TFirst, second: TSecond): void; diff --git a/extended/shared.js b/extended/shared.js index c8ad2e5..764f662 100644 --- a/extended/shared.js +++ b/extended/shared.js @@ -1,2 +1,24 @@ "use strict"; Object.defineProperty(exports, "__esModule", { value: true }); +exports.alternatingPush = exports.alternatingGetSecond = exports.alternatingGetFirst = exports.alternatingCount = void 0; +// ------- Alternating list helpers ------- +// These helpers manage a list that alternates between two types of entries. +// Storing data this way avoids small tuple allocations and shows major improvements +// in GC time in benchmarks. +function alternatingCount(list) { + return list.length >> 1; +} +exports.alternatingCount = alternatingCount; +function alternatingGetFirst(list, index) { + return list[index << 1]; +} +exports.alternatingGetFirst = alternatingGetFirst; +function alternatingGetSecond(list, index) { + return list[(index << 1) + 1]; +} +exports.alternatingGetSecond = alternatingGetSecond; +function alternatingPush(list, first, second) { + // Micro benchmarks show this is the fastest way to do this + list.push(first, second); +} +exports.alternatingPush = alternatingPush; diff --git a/extended/shared.ts b/extended/shared.ts index 58c7982..79a00ba 100644 --- a/extended/shared.ts +++ b/extended/shared.ts @@ -7,3 +7,25 @@ export type BTreeWithInternals = { _maxNodeSize: number; _compare: (a: K, b: K) => number; } & Omit, '_root' | '_size' | '_maxNodeSize' | '_compare'>; + +// ------- Alternating list helpers ------- +// These helpers manage a list that alternates between two types of entries. +// Storing data this way avoids small tuple allocations and shows major improvements +// in GC time in benchmarks. + +export function alternatingCount(list: unknown[]): number { + return list.length >> 1; +} + +export function alternatingGetFirst(list: Array, index: number): TFirst { + return list[index << 1] as TFirst; +} + +export function alternatingGetSecond(list: Array, index: number): TSecond { + return list[(index << 1) + 1] as TSecond; +} + +export function alternatingPush(list: Array, first: TFirst, second: TSecond): void { + // Micro benchmarks show this is the fastest way to do this + list.push(first, second); +} \ No newline at end of file From 465cc81a85d880bba42af61ab48d666b4ffdcb42 Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Wed, 12 Nov 2025 11:28:38 -0800 Subject: [PATCH 078/143] refactor bulkLoad --- extended/bulkLoad.d.ts | 5 ++- extended/bulkLoad.js | 70 +++++++++++++++++++----------------------- extended/bulkLoad.ts | 56 +++++++++------------------------ extended/decompose.js | 3 +- extended/decompose.ts | 3 +- extended/shared.d.ts | 3 +- extended/shared.js | 32 ++++++++++++++++++- extended/shared.ts | 35 ++++++++++++++++++++- test/bulkLoad.test.ts | 10 +++--- 9 files changed, 121 insertions(+), 96 deletions(-) diff --git a/extended/bulkLoad.d.ts b/extended/bulkLoad.d.ts index e764c48..bc68f5e 100644 --- a/extended/bulkLoad.d.ts +++ b/extended/bulkLoad.d.ts @@ -1,3 +1,2 @@ -import { BNode } from '../b+tree'; -export declare function bulkLoad(entries: (K | V)[], maxNodeSize: number, compare?: (a: K, b: K) => number): BNode | undefined; -export declare function flushToLeaves(alternatingList: (K | V)[], maxNodeSize: number, onLeafCreation: (node: BNode) => void): number; +import BTree from '../b+tree'; +export declare function bulkLoad(entries: (K | V)[], maxNodeSize: number, compare?: (a: K, b: K) => number): BTree; diff --git a/extended/bulkLoad.js b/extended/bulkLoad.js index 7d8939b..92d7005 100644 --- a/extended/bulkLoad.js +++ b/extended/bulkLoad.js @@ -1,12 +1,31 @@ "use strict"; +var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) { + if (k2 === undefined) k2 = k; + Object.defineProperty(o, k2, { enumerable: true, get: function() { return m[k]; } }); +}) : (function(o, m, k, k2) { + if (k2 === undefined) k2 = k; + o[k2] = m[k]; +})); +var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) { + Object.defineProperty(o, "default", { enumerable: true, value: v }); +}) : function(o, v) { + o["default"] = v; +}); +var __importStar = (this && this.__importStar) || function (mod) { + if (mod && mod.__esModule) return mod; + var result = {}; + if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k); + __setModuleDefault(result, mod); + return result; +}; Object.defineProperty(exports, "__esModule", { value: true }); -exports.flushToLeaves = exports.bulkLoad = void 0; -var b_tree_1 = require("../b+tree"); +exports.bulkLoad = void 0; +var b_tree_1 = __importStar(require("../b+tree")); var shared_1 = require("./shared"); function bulkLoad(entries, maxNodeSize, compare) { var totalPairs = (0, shared_1.alternatingCount)(entries); + var cmp = compare !== null && compare !== void 0 ? compare : b_tree_1.defaultComparator; if (totalPairs > 1) { - var cmp = compare !== null && compare !== void 0 ? compare : b_tree_1.defaultComparator; var previousKey = (0, shared_1.alternatingGetFirst)(entries, 0); for (var i = 1; i < totalPairs; i++) { var key = (0, shared_1.alternatingGetFirst)(entries, i); @@ -15,18 +34,18 @@ function bulkLoad(entries, maxNodeSize, compare) { previousKey = key; } } + var tree = new b_tree_1.default(undefined, cmp, maxNodeSize); var leaves = []; - flushToLeaves(entries, maxNodeSize, function (leaf) { return leaves.push(leaf); }); + (0, shared_1.flushToLeaves)(entries, maxNodeSize, function (leaf) { return leaves.push(leaf); }); var leafCount = leaves.length; if (leafCount === 0) - return undefined; + return tree; var currentLevel = leaves; - while (true) { + while (currentLevel.length > 1) { var nodeCount = currentLevel.length; - if (nodeCount === 1) - return currentLevel[0]; if (nodeCount <= maxNodeSize) { - return new b_tree_1.BNodeInternal(currentLevel, (0, b_tree_1.sumChildSizes)(currentLevel)); + currentLevel = [new b_tree_1.BNodeInternal(currentLevel, (0, b_tree_1.sumChildSizes)(currentLevel))]; + break; } var nextLevelCount = Math.ceil(nodeCount / maxNodeSize); (0, b_tree_1.check)(nextLevelCount > 1); @@ -55,34 +74,9 @@ function bulkLoad(entries, maxNodeSize, compare) { } currentLevel = nextLevel; } + var target = tree; + target._root = currentLevel[0]; + target._size = totalPairs; + return tree; } exports.bulkLoad = bulkLoad; -function flushToLeaves(alternatingList, maxNodeSize, onLeafCreation) { - var totalPairs = (0, shared_1.alternatingCount)(alternatingList); - if (totalPairs === 0) - return 0; - // This method creates as many evenly filled leaves as possible from - // the pending entries. All will be > 50% full if we are creating more than one leaf. - var leafCount = Math.ceil(totalPairs / maxNodeSize); - var remainingLeaves = leafCount; - var remaining = totalPairs; - var pairIndex = 0; - while (remainingLeaves > 0) { - var chunkSize = Math.ceil(remaining / remainingLeaves); - var keys = new Array(chunkSize); - var vals = new Array(chunkSize); - for (var i = 0; i < chunkSize; i++) { - keys[i] = (0, shared_1.alternatingGetFirst)(alternatingList, pairIndex); - vals[i] = (0, shared_1.alternatingGetSecond)(alternatingList, pairIndex); - pairIndex++; - } - remaining -= chunkSize; - remainingLeaves--; - var leaf = new b_tree_1.BNode(keys, vals); - onLeafCreation(leaf); - } - alternatingList.length = 0; - return leafCount; -} -exports.flushToLeaves = flushToLeaves; -; diff --git a/extended/bulkLoad.ts b/extended/bulkLoad.ts index 1cd6c9b..f93fbcc 100644 --- a/extended/bulkLoad.ts +++ b/extended/bulkLoad.ts @@ -1,14 +1,14 @@ -import { BNode, BNodeInternal, check, defaultComparator, sumChildSizes } from '../b+tree'; -import { alternatingCount, alternatingGetFirst, alternatingGetSecond } from './shared'; +import BTree, { BNode, BNodeInternal, check, defaultComparator, sumChildSizes } from '../b+tree'; +import { alternatingCount, alternatingGetFirst, alternatingGetSecond, flushToLeaves, type BTreeWithInternals } from './shared'; export function bulkLoad( entries: (K | V)[], maxNodeSize: number, compare?: (a: K, b: K) => number -): BNode | undefined { +): BTree { const totalPairs = alternatingCount(entries); + const cmp = compare ?? (defaultComparator as unknown as (a: K, b: K) => number); if (totalPairs > 1) { - const cmp = compare ?? (defaultComparator as unknown as (a: K, b: K) => number); let previousKey = alternatingGetFirst(entries, 0); for (let i = 1; i < totalPairs; i++) { const key = alternatingGetFirst(entries, i); @@ -18,20 +18,19 @@ export function bulkLoad( } } + const tree = new BTree(undefined, cmp, maxNodeSize); const leaves: BNode[] = []; flushToLeaves(entries, maxNodeSize, (leaf) => leaves.push(leaf)); const leafCount = leaves.length; if (leafCount === 0) - return undefined; + return tree; let currentLevel: BNode[] = leaves; - while (true) { + while (currentLevel.length > 1) { const nodeCount = currentLevel.length; - if (nodeCount === 1) - return currentLevel[0]; - if (nodeCount <= maxNodeSize) { - return new BNodeInternal(currentLevel, sumChildSizes(currentLevel)); + currentLevel = [new BNodeInternal(currentLevel, sumChildSizes(currentLevel))]; + break; } const nextLevelCount = Math.ceil(nodeCount / maxNodeSize); @@ -64,37 +63,10 @@ export function bulkLoad( currentLevel = nextLevel; } -} -export function flushToLeaves( - alternatingList: (K | V)[], - maxNodeSize: number, - onLeafCreation: (node: BNode) => void -): number { - const totalPairs = alternatingCount(alternatingList); - if (totalPairs === 0) - return 0; + const target = tree as unknown as BTreeWithInternals; + target._root = currentLevel[0]; + target._size = totalPairs; + return tree; +} - // This method creates as many evenly filled leaves as possible from - // the pending entries. All will be > 50% full if we are creating more than one leaf. - const leafCount = Math.ceil(totalPairs / maxNodeSize); - let remainingLeaves = leafCount; - let remaining = totalPairs; - let pairIndex = 0; - while (remainingLeaves > 0) { - const chunkSize = Math.ceil(remaining / remainingLeaves); - const keys = new Array(chunkSize); - const vals = new Array(chunkSize); - for (let i = 0; i < chunkSize; i++) { - keys[i] = alternatingGetFirst(alternatingList, pairIndex); - vals[i] = alternatingGetSecond(alternatingList, pairIndex); - pairIndex++; - } - remaining -= chunkSize; - remainingLeaves--; - const leaf = new BNode(keys, vals); - onLeafCreation(leaf); - } - alternatingList.length = 0; - return leafCount; -}; diff --git a/extended/decompose.js b/extended/decompose.js index 2fa4be5..5ac21e3 100644 --- a/extended/decompose.js +++ b/extended/decompose.js @@ -4,7 +4,6 @@ exports.buildFromDecomposition = exports.decompose = void 0; var b_tree_1 = require("../b+tree"); var shared_1 = require("./shared"); var parallelWalk_1 = require("./parallelWalk"); -var bulkLoad_1 = require("./bulkLoad"); /** * Decomposes two trees into disjoint nodes. Reuses interior nodes when they do not overlap/intersect with any leaf nodes * in the other tree. Overlapping leaf nodes are broken down into new leaf nodes containing merged entries. @@ -38,7 +37,7 @@ function decompose(left, right, mergeValues, ignoreRight) { (0, shared_1.alternatingPush)(disjoint, 0, leaf); }; var flushPendingEntries = function () { - var createdLeaves = (0, bulkLoad_1.flushToLeaves)(pending, left._maxNodeSize, onLeafCreation); + var createdLeaves = (0, shared_1.flushToLeaves)(pending, left._maxNodeSize, onLeafCreation); if (createdLeaves > 0) { tallestIndex = (0, shared_1.alternatingCount)(disjoint) - 1; tallestHeight = 0; diff --git a/extended/decompose.ts b/extended/decompose.ts index 7f460b7..4774c48 100644 --- a/extended/decompose.ts +++ b/extended/decompose.ts @@ -1,7 +1,6 @@ import BTree, { areOverlapping, BNode, BNodeInternal, check } from '../b+tree'; -import { alternatingCount, alternatingGetFirst, alternatingGetSecond, alternatingPush, type BTreeWithInternals } from './shared'; +import { alternatingCount, alternatingGetFirst, alternatingGetSecond, alternatingPush, flushToLeaves, type BTreeWithInternals } from './shared'; import { createCursor, getKey, MergeCursor, MergeCursorPayload, moveForwardOne, moveTo, noop } from "./parallelWalk"; -import { flushToLeaves } from './bulkLoad'; export type DecomposeResult = { disjoint: (number | BNode)[], tallestIndex: number }; diff --git a/extended/shared.d.ts b/extended/shared.d.ts index c4db16e..e6f92f9 100644 --- a/extended/shared.d.ts +++ b/extended/shared.d.ts @@ -1,4 +1,4 @@ -import type { BNode } from '../b+tree'; +import { BNode } from '../b+tree'; import BTree from '../b+tree'; export declare type BTreeWithInternals = { _root: BNode; @@ -6,6 +6,7 @@ export declare type BTreeWithInternals = { _maxNodeSize: number; _compare: (a: K, b: K) => number; } & Omit, '_root' | '_size' | '_maxNodeSize' | '_compare'>; +export declare function flushToLeaves(alternatingList: (K | V)[], maxNodeSize: number, onLeafCreation: (node: BNode) => void): number; export declare function alternatingCount(list: unknown[]): number; export declare function alternatingGetFirst(list: Array, index: number): TFirst; export declare function alternatingGetSecond(list: Array, index: number): TSecond; diff --git a/extended/shared.js b/extended/shared.js index 764f662..b739735 100644 --- a/extended/shared.js +++ b/extended/shared.js @@ -1,6 +1,36 @@ "use strict"; Object.defineProperty(exports, "__esModule", { value: true }); -exports.alternatingPush = exports.alternatingGetSecond = exports.alternatingGetFirst = exports.alternatingCount = void 0; +exports.alternatingPush = exports.alternatingGetSecond = exports.alternatingGetFirst = exports.alternatingCount = exports.flushToLeaves = void 0; +var b_tree_1 = require("../b+tree"); +function flushToLeaves(alternatingList, maxNodeSize, onLeafCreation) { + var totalPairs = alternatingCount(alternatingList); + if (totalPairs === 0) + return 0; + // This method creates as many evenly filled leaves as possible from + // the pending entries. All will be > 50% full if we are creating more than one leaf. + var leafCount = Math.ceil(totalPairs / maxNodeSize); + var remainingLeaves = leafCount; + var remaining = totalPairs; + var pairIndex = 0; + while (remainingLeaves > 0) { + var chunkSize = Math.ceil(remaining / remainingLeaves); + var keys = new Array(chunkSize); + var vals = new Array(chunkSize); + for (var i = 0; i < chunkSize; i++) { + keys[i] = alternatingGetFirst(alternatingList, pairIndex); + vals[i] = alternatingGetSecond(alternatingList, pairIndex); + pairIndex++; + } + remaining -= chunkSize; + remainingLeaves--; + var leaf = new b_tree_1.BNode(keys, vals); + onLeafCreation(leaf); + } + alternatingList.length = 0; + return leafCount; +} +exports.flushToLeaves = flushToLeaves; +; // ------- Alternating list helpers ------- // These helpers manage a list that alternates between two types of entries. // Storing data this way avoids small tuple allocations and shows major improvements diff --git a/extended/shared.ts b/extended/shared.ts index 79a00ba..080c53d 100644 --- a/extended/shared.ts +++ b/extended/shared.ts @@ -1,4 +1,4 @@ -import type { BNode } from '../b+tree'; +import { BNode } from '../b+tree'; import BTree from '../b+tree'; export type BTreeWithInternals = { @@ -8,6 +8,39 @@ export type BTreeWithInternals = { _compare: (a: K, b: K) => number; } & Omit, '_root' | '_size' | '_maxNodeSize' | '_compare'>; +export function flushToLeaves( + alternatingList: (K | V)[], + maxNodeSize: number, + onLeafCreation: (node: BNode) => void +): number { + const totalPairs = alternatingCount(alternatingList); + if (totalPairs === 0) + return 0; + + // This method creates as many evenly filled leaves as possible from + // the pending entries. All will be > 50% full if we are creating more than one leaf. + const leafCount = Math.ceil(totalPairs / maxNodeSize); + let remainingLeaves = leafCount; + let remaining = totalPairs; + let pairIndex = 0; + while (remainingLeaves > 0) { + const chunkSize = Math.ceil(remaining / remainingLeaves); + const keys = new Array(chunkSize); + const vals = new Array(chunkSize); + for (let i = 0; i < chunkSize; i++) { + keys[i] = alternatingGetFirst(alternatingList, pairIndex); + vals[i] = alternatingGetSecond(alternatingList, pairIndex); + pairIndex++; + } + remaining -= chunkSize; + remainingLeaves--; + const leaf = new BNode(keys, vals); + onLeafCreation(leaf); + } + alternatingList.length = 0; + return leafCount; +}; + // ------- Alternating list helpers ------- // These helpers manage a list that alternates between two types of entries. // Storing data this way avoids small tuple allocations and shows major improvements diff --git a/test/bulkLoad.test.ts b/test/bulkLoad.test.ts index 244b80c..9b215ae 100644 --- a/test/bulkLoad.test.ts +++ b/test/bulkLoad.test.ts @@ -25,11 +25,8 @@ function buildTreeFromPairs(maxNodeSize: number, pairs: Pair[]) { for (const [key, value] of pairs) { alternating.push(key, value); } - const root = bulkLoad(alternating, maxNodeSize, compareNumbers); - const tree = new BTree(undefined, compareNumbers, maxNodeSize); - if (root !== undefined) { - (tree as any)._root = root; - } + const tree = bulkLoad(alternating, maxNodeSize, compareNumbers); + const root = tree['_root'] as BNode; return { tree, root }; } @@ -72,7 +69,8 @@ describe.each(branchingFactors)('bulkLoad fanout %i', (maxNodeSize) => { test('empty input produces empty tree', () => { const { tree, root } = buildTreeFromPairs(maxNodeSize, []); - expect(root).toBeUndefined(); + expect(root?.isLeaf).toBe(true); + expect(root?.keys.length ?? 0).toBe(0); expectTreeMatches(tree, []); }); From 6bf055072b38cb27f34ee26fe683d67abd21d51e Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Wed, 12 Nov 2025 11:40:50 -0800 Subject: [PATCH 079/143] add bulk load to btreeex --- extended/bulkLoad.d.ts | 7 +++++-- extended/bulkLoad.js | 30 ++++++++++++++++-------------- extended/bulkLoad.ts | 41 ++++++++++++++++++++++++----------------- extended/index.d.ts | 1 + extended/index.js | 31 ++++++++++++++++++++++++++++++- extended/index.ts | 17 ++++++++++++++++- test/bulkLoad.test.ts | 21 ++++++++++++++++++--- 7 files changed, 110 insertions(+), 38 deletions(-) diff --git a/extended/bulkLoad.d.ts b/extended/bulkLoad.d.ts index bc68f5e..9cacd42 100644 --- a/extended/bulkLoad.d.ts +++ b/extended/bulkLoad.d.ts @@ -1,2 +1,5 @@ -import BTree from '../b+tree'; -export declare function bulkLoad(entries: (K | V)[], maxNodeSize: number, compare?: (a: K, b: K) => number): BTree; +import BTree, { BNode } from '../b+tree'; +declare type Comparator = (a: K, b: K) => number; +export declare function bulkLoad(entries: (K | V)[], maxNodeSize: number, compare: Comparator): BTree; +export declare function bulkLoadRoot(entries: (K | V)[], maxNodeSize: number, compare: Comparator): BNode; +export {}; diff --git a/extended/bulkLoad.js b/extended/bulkLoad.js index 92d7005..de3f1c9 100644 --- a/extended/bulkLoad.js +++ b/extended/bulkLoad.js @@ -19,27 +19,33 @@ var __importStar = (this && this.__importStar) || function (mod) { return result; }; Object.defineProperty(exports, "__esModule", { value: true }); -exports.bulkLoad = void 0; +exports.bulkLoadRoot = exports.bulkLoad = void 0; var b_tree_1 = __importStar(require("../b+tree")); var shared_1 = require("./shared"); function bulkLoad(entries, maxNodeSize, compare) { + var root = bulkLoadRoot(entries, maxNodeSize, compare); + var tree = new b_tree_1.default(undefined, compare, maxNodeSize); + var target = tree; + target._root = root; + target._size = root.size(); + return tree; +} +exports.bulkLoad = bulkLoad; +function bulkLoadRoot(entries, maxNodeSize, compare) { var totalPairs = (0, shared_1.alternatingCount)(entries); - var cmp = compare !== null && compare !== void 0 ? compare : b_tree_1.defaultComparator; if (totalPairs > 1) { var previousKey = (0, shared_1.alternatingGetFirst)(entries, 0); for (var i = 1; i < totalPairs; i++) { var key = (0, shared_1.alternatingGetFirst)(entries, i); - if (cmp(previousKey, key) >= 0) + if (compare(previousKey, key) >= 0) throw new Error("bulkLoad: entries must be sorted by key in strictly ascending order"); previousKey = key; } } - var tree = new b_tree_1.default(undefined, cmp, maxNodeSize); var leaves = []; (0, shared_1.flushToLeaves)(entries, maxNodeSize, function (leaf) { return leaves.push(leaf); }); - var leafCount = leaves.length; - if (leafCount === 0) - return tree; + if (leaves.length === 0) + return new b_tree_1.BNode(); var currentLevel = leaves; while (currentLevel.length > 1) { var nodeCount = currentLevel.length; @@ -69,14 +75,10 @@ function bulkLoad(entries, maxNodeSize, compare) { var minSize = Math.floor(maxNodeSize / 2); var secondLastNode = nextLevel[nextLevelCount - 2]; var lastNode = nextLevel[nextLevelCount - 1]; - while (lastNode.children.length < minSize) { + while (lastNode.children.length < minSize) lastNode.takeFromLeft(secondLastNode); - } currentLevel = nextLevel; } - var target = tree; - target._root = currentLevel[0]; - target._size = totalPairs; - return tree; + return currentLevel[0]; } -exports.bulkLoad = bulkLoad; +exports.bulkLoadRoot = bulkLoadRoot; diff --git a/extended/bulkLoad.ts b/extended/bulkLoad.ts index f93fbcc..307b937 100644 --- a/extended/bulkLoad.ts +++ b/extended/bulkLoad.ts @@ -1,29 +1,41 @@ import BTree, { BNode, BNodeInternal, check, defaultComparator, sumChildSizes } from '../b+tree'; import { alternatingCount, alternatingGetFirst, alternatingGetSecond, flushToLeaves, type BTreeWithInternals } from './shared'; +type Comparator = (a: K, b: K) => number; + export function bulkLoad( entries: (K | V)[], maxNodeSize: number, - compare?: (a: K, b: K) => number + compare: Comparator ): BTree { + const root = bulkLoadRoot(entries, maxNodeSize, compare); + const tree = new BTree(undefined, compare, maxNodeSize); + const target = tree as unknown as BTreeWithInternals; + target._root = root; + target._size = root.size(); + return tree; +} + +export function bulkLoadRoot( + entries: (K | V)[], + maxNodeSize: number, + compare: Comparator +): BNode { const totalPairs = alternatingCount(entries); - const cmp = compare ?? (defaultComparator as unknown as (a: K, b: K) => number); if (totalPairs > 1) { let previousKey = alternatingGetFirst(entries, 0); for (let i = 1; i < totalPairs; i++) { const key = alternatingGetFirst(entries, i); - if (cmp(previousKey, key) >= 0) + if (compare(previousKey, key) >= 0) throw new Error("bulkLoad: entries must be sorted by key in strictly ascending order"); previousKey = key; } } - const tree = new BTree(undefined, cmp, maxNodeSize); const leaves: BNode[] = []; flushToLeaves(entries, maxNodeSize, (leaf) => leaves.push(leaf)); - const leafCount = leaves.length; - if (leafCount === 0) - return tree; + if (leaves.length === 0) + return new BNode(); let currentLevel: BNode[] = leaves; while (currentLevel.length > 1) { @@ -35,7 +47,7 @@ export function bulkLoad( const nextLevelCount = Math.ceil(nodeCount / maxNodeSize); check(nextLevelCount > 1); - const nextLevel = new Array>(nextLevelCount); + const nextLevel = new Array>(nextLevelCount); let remainingNodes = nodeCount; let remainingParents = nextLevelCount; let childIndex = 0; @@ -55,18 +67,13 @@ export function bulkLoad( } const minSize = Math.floor(maxNodeSize / 2); - const secondLastNode = nextLevel[nextLevelCount - 2]; - const lastNode = nextLevel[nextLevelCount - 1]; - while (lastNode.children.length < minSize) { + const secondLastNode = nextLevel[nextLevelCount - 2] as BNodeInternal; + const lastNode = nextLevel[nextLevelCount - 1] as BNodeInternal; + while (lastNode.children.length < minSize) lastNode.takeFromLeft(secondLastNode); - } currentLevel = nextLevel; } - const target = tree as unknown as BTreeWithInternals; - target._root = currentLevel[0]; - target._size = totalPairs; - return tree; + return currentLevel[0]; } - diff --git a/extended/index.d.ts b/extended/index.d.ts index 18209e5..89b7474 100644 --- a/extended/index.d.ts +++ b/extended/index.d.ts @@ -1,5 +1,6 @@ import BTree from '../b+tree'; export declare class BTreeEx extends BTree { + static bulkLoad(entries: (K | V)[], maxNodeSize: number, compare?: (a: K, b: K) => number): BTreeEx; clone(): this; greedyClone(force?: boolean): this; /** diff --git a/extended/index.js b/extended/index.js index e8e0753..e2f8f2b 100644 --- a/extended/index.js +++ b/extended/index.js @@ -14,20 +14,49 @@ var __extends = (this && this.__extends) || (function () { d.prototype = b === null ? Object.create(b) : (__.prototype = b.prototype, new __()); }; })(); +var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) { + if (k2 === undefined) k2 = k; + Object.defineProperty(o, k2, { enumerable: true, get: function() { return m[k]; } }); +}) : (function(o, m, k, k2) { + if (k2 === undefined) k2 = k; + o[k2] = m[k]; +})); +var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) { + Object.defineProperty(o, "default", { enumerable: true, value: v }); +}) : function(o, v) { + o["default"] = v; +}); +var __importStar = (this && this.__importStar) || function (mod) { + if (mod && mod.__esModule) return mod; + var result = {}; + if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k); + __setModuleDefault(result, mod); + return result; +}; var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); exports.BTreeEx = void 0; -var b_tree_1 = __importDefault(require("../b+tree")); +var b_tree_1 = __importStar(require("../b+tree")); var diffAgainst_1 = require("./diffAgainst"); var forEachKeyInBoth_1 = __importDefault(require("./forEachKeyInBoth")); var union_1 = __importDefault(require("./union")); +var bulkLoad_1 = require("./bulkLoad"); var BTreeEx = /** @class */ (function (_super) { __extends(BTreeEx, _super); function BTreeEx() { return _super !== null && _super.apply(this, arguments) || this; } + BTreeEx.bulkLoad = function (entries, maxNodeSize, compare) { + var cmp = compare !== null && compare !== void 0 ? compare : b_tree_1.defaultComparator; + var root = (0, bulkLoad_1.bulkLoadRoot)(entries, maxNodeSize, cmp); + var tree = new BTreeEx(undefined, cmp, maxNodeSize); + var target = tree; + target._root = root; + target._size = root.size(); + return tree; + }; BTreeEx.prototype.clone = function () { var source = this; source._root.isShared = true; diff --git a/extended/index.ts b/extended/index.ts index 770d100..ea25134 100644 --- a/extended/index.ts +++ b/extended/index.ts @@ -1,10 +1,25 @@ -import BTree from '../b+tree'; +import BTree, { defaultComparator } from '../b+tree'; import type { BTreeWithInternals } from './shared'; import { diffAgainst as diffAgainstAlgorithm } from './diffAgainst'; import forEachKeyInBoth from './forEachKeyInBoth'; import union from './union'; +import { bulkLoadRoot } from './bulkLoad'; export class BTreeEx extends BTree { + static bulkLoad( + entries: (K | V)[], + maxNodeSize: number, + compare?: (a: K, b: K) => number + ): BTreeEx { + const cmp = compare ?? (defaultComparator as unknown as (a: K, b: K) => number); + const root = bulkLoadRoot(entries, maxNodeSize, cmp); + const tree = new BTreeEx(undefined, cmp, maxNodeSize); + const target = tree as unknown as BTreeWithInternals; + target._root = root; + target._size = root.size(); + return tree; + } + clone(): this { const source = this as unknown as BTreeWithInternals; source._root.isShared = true; diff --git a/test/bulkLoad.test.ts b/test/bulkLoad.test.ts index 9b215ae..79d68c3 100644 --- a/test/bulkLoad.test.ts +++ b/test/bulkLoad.test.ts @@ -1,4 +1,5 @@ import BTree, { BNode, BNodeInternal } from '../b+tree'; +import BTreeEx from '../extended'; import { bulkLoad } from '../extended/bulkLoad'; import { makeArray } from './shared'; @@ -20,11 +21,15 @@ function pairsFromKeys(keys: number[]): Pair[] { return keys.map((key, index) => [key, index - key]); } -function buildTreeFromPairs(maxNodeSize: number, pairs: Pair[]) { +function toAlternating(pairs: Pair[]): number[] { const alternating: number[] = []; - for (const [key, value] of pairs) { + for (const [key, value] of pairs) alternating.push(key, value); - } + return alternating; +} + +function buildTreeFromPairs(maxNodeSize: number, pairs: Pair[]) { + const alternating = toAlternating(pairs); const tree = bulkLoad(alternating, maxNodeSize, compareNumbers); const root = tree['_root'] as BNode; return { tree, root }; @@ -124,3 +129,13 @@ describe.each(branchingFactors)('bulkLoad fanout %i', (maxNodeSize) => { assertInternalNodeFanout(tree['_root'] as BNode, maxNodeSize); }); }); + +describe('BTreeEx.bulkLoad', () => { + test.each(branchingFactors)('creates tree for fanout %i', (maxNodeSize) => { + const pairs = sequentialPairs(maxNodeSize * 2 + 3, 7, 1); + const alternating = toAlternating(pairs); + const tree = BTreeEx.bulkLoad(alternating, maxNodeSize, compareNumbers); + expect(tree).toBeInstanceOf(BTreeEx); + expectTreeMatches(tree, pairs); + }); +}); From 83f5e7b6dc1e4b94ba3003b106525bec6324b980 Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Wed, 12 Nov 2025 11:43:48 -0800 Subject: [PATCH 080/143] cleanup --- extended/bulkLoad.d.ts | 16 +++++++++++----- extended/bulkLoad.js | 13 +++++++++++++ extended/bulkLoad.ts | 23 +++++++++++++++++------ 3 files changed, 41 insertions(+), 11 deletions(-) diff --git a/extended/bulkLoad.d.ts b/extended/bulkLoad.d.ts index 9cacd42..4b0235b 100644 --- a/extended/bulkLoad.d.ts +++ b/extended/bulkLoad.d.ts @@ -1,5 +1,11 @@ -import BTree, { BNode } from '../b+tree'; -declare type Comparator = (a: K, b: K) => number; -export declare function bulkLoad(entries: (K | V)[], maxNodeSize: number, compare: Comparator): BTree; -export declare function bulkLoadRoot(entries: (K | V)[], maxNodeSize: number, compare: Comparator): BNode; -export {}; +import BTree from '../b+tree'; +/** + * Loads a B-Tree from a sorted list of entries in bulk. This is faster than inserting + * entries one at a time, and produces a more optimally balanced tree. + * Time and space complexity: O(n). + * @param entries The list of key/value pairs to load. Must be sorted by key in strictly ascending order. + * @param maxNodeSize The branching factor (maximum node size) for the resulting tree. + * @param compare Function to compare keys. + * @returns A new BTree containing the given entries. + */ +export declare function bulkLoad(entries: (K | V)[], maxNodeSize: number, compare: (a: K, b: K) => number): BTree; diff --git a/extended/bulkLoad.js b/extended/bulkLoad.js index de3f1c9..f5b78a9 100644 --- a/extended/bulkLoad.js +++ b/extended/bulkLoad.js @@ -22,6 +22,15 @@ Object.defineProperty(exports, "__esModule", { value: true }); exports.bulkLoadRoot = exports.bulkLoad = void 0; var b_tree_1 = __importStar(require("../b+tree")); var shared_1 = require("./shared"); +/** + * Loads a B-Tree from a sorted list of entries in bulk. This is faster than inserting + * entries one at a time, and produces a more optimally balanced tree. + * Time and space complexity: O(n). + * @param entries The list of key/value pairs to load. Must be sorted by key in strictly ascending order. + * @param maxNodeSize The branching factor (maximum node size) for the resulting tree. + * @param compare Function to compare keys. + * @returns A new BTree containing the given entries. + */ function bulkLoad(entries, maxNodeSize, compare) { var root = bulkLoadRoot(entries, maxNodeSize, compare); var tree = new b_tree_1.default(undefined, compare, maxNodeSize); @@ -31,6 +40,10 @@ function bulkLoad(entries, maxNodeSize, compare) { return tree; } exports.bulkLoad = bulkLoad; +/** + * Bulk loads, returns the root node of the resulting tree. + * @internal + */ function bulkLoadRoot(entries, maxNodeSize, compare) { var totalPairs = (0, shared_1.alternatingCount)(entries); if (totalPairs > 1) { diff --git a/extended/bulkLoad.ts b/extended/bulkLoad.ts index 307b937..61ee152 100644 --- a/extended/bulkLoad.ts +++ b/extended/bulkLoad.ts @@ -1,12 +1,19 @@ -import BTree, { BNode, BNodeInternal, check, defaultComparator, sumChildSizes } from '../b+tree'; -import { alternatingCount, alternatingGetFirst, alternatingGetSecond, flushToLeaves, type BTreeWithInternals } from './shared'; - -type Comparator = (a: K, b: K) => number; +import BTree, { BNode, BNodeInternal, check, sumChildSizes } from '../b+tree'; +import { alternatingCount, alternatingGetFirst, flushToLeaves, type BTreeWithInternals } from './shared'; +/** + * Loads a B-Tree from a sorted list of entries in bulk. This is faster than inserting + * entries one at a time, and produces a more optimally balanced tree. + * Time and space complexity: O(n). + * @param entries The list of key/value pairs to load. Must be sorted by key in strictly ascending order. + * @param maxNodeSize The branching factor (maximum node size) for the resulting tree. + * @param compare Function to compare keys. + * @returns A new BTree containing the given entries. + */ export function bulkLoad( entries: (K | V)[], maxNodeSize: number, - compare: Comparator + compare: (a: K, b: K) => number ): BTree { const root = bulkLoadRoot(entries, maxNodeSize, compare); const tree = new BTree(undefined, compare, maxNodeSize); @@ -16,10 +23,14 @@ export function bulkLoad( return tree; } +/** + * Bulk loads, returns the root node of the resulting tree. + * @internal + */ export function bulkLoadRoot( entries: (K | V)[], maxNodeSize: number, - compare: Comparator + compare: (a: K, b: K) => number ): BNode { const totalPairs = alternatingCount(entries); if (totalPairs > 1) { From ad1d0ba5f6e4fc9b69e17235b30e84d2a01cb08b Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Wed, 12 Nov 2025 11:46:22 -0800 Subject: [PATCH 081/143] export cleanup for decompose --- extended/decompose.d.ts | 18 +----------------- extended/decompose.js | 6 ++++++ extended/decompose.ts | 10 ++++++++++ 3 files changed, 17 insertions(+), 17 deletions(-) diff --git a/extended/decompose.d.ts b/extended/decompose.d.ts index 0e235ac..cb0ff5c 100644 --- a/extended/decompose.d.ts +++ b/extended/decompose.d.ts @@ -1,17 +1 @@ -import BTree, { BNode } from '../b+tree'; -import { type BTreeWithInternals } from './shared'; -export declare type DecomposeResult = { - disjoint: (number | BNode)[]; - tallestIndex: number; -}; -/** - * Decomposes two trees into disjoint nodes. Reuses interior nodes when they do not overlap/intersect with any leaf nodes - * in the other tree. Overlapping leaf nodes are broken down into new leaf nodes containing merged entries. - * The algorithm is a parallel tree walk using two cursors. The trailing cursor (behind in key space) is walked forward - * until it is at or after the leading cursor. As it does this, any whole nodes or subtrees it passes are guaranteed to - * be disjoint. This is true because the leading cursor was also previously walked in this way, and is thus pointing to - * the first key at or after the trailing cursor's previous position. - * The cursor walk is efficient, meaning it skips over disjoint subtrees entirely rather than visiting every leaf. - */ -export declare function decompose(left: BTreeWithInternals, right: BTreeWithInternals, mergeValues: (key: K, leftValue: V, rightValue: V) => V | undefined, ignoreRight?: boolean): DecomposeResult; -export declare function buildFromDecomposition, K, V>(constructor: new (entries?: [K, V][], compare?: (a: K, b: K) => number, maxNodeSize?: number) => TBTree, branchingFactor: number, decomposed: DecomposeResult, cmp: (a: K, b: K) => number, maxNodeSize: number): TBTree; +export {}; diff --git a/extended/decompose.js b/extended/decompose.js index 5ac21e3..e6cd0d8 100644 --- a/extended/decompose.js +++ b/extended/decompose.js @@ -12,6 +12,7 @@ var parallelWalk_1 = require("./parallelWalk"); * be disjoint. This is true because the leading cursor was also previously walked in this way, and is thus pointing to * the first key at or after the trailing cursor's previous position. * The cursor walk is efficient, meaning it skips over disjoint subtrees entirely rather than visiting every leaf. + * @internal */ function decompose(left, right, mergeValues, ignoreRight) { if (ignoreRight === void 0) { ignoreRight = false; } @@ -256,6 +257,10 @@ function decompose(left, right, mergeValues, ignoreRight) { return { disjoint: disjoint, tallestIndex: tallestIndex }; } exports.decompose = decompose; +/** + * Constructs a B-Tree from the result of a decomposition (set of disjoint nodes). + * @internal + */ function buildFromDecomposition(constructor, branchingFactor, decomposed, cmp, maxNodeSize) { var disjoint = decomposed.disjoint, tallestIndex = decomposed.tallestIndex; var disjointEntryCount = (0, shared_1.alternatingCount)(disjoint); @@ -290,6 +295,7 @@ exports.buildFromDecomposition = buildFromDecomposition; /** * Processes one side (left or right) of the disjoint subtree set during a merge operation. * Merges each subtree in the disjoint set from start to end (exclusive) into the given spine. + * @internal */ function processSide(branchingFactor, disjoint, spine, start, end, step, sideIndex, sideInsertionIndex, splitOffSide, updateMax) { // Determine the depth of the first shared node on the frontier. diff --git a/extended/decompose.ts b/extended/decompose.ts index 4774c48..8ecf62b 100644 --- a/extended/decompose.ts +++ b/extended/decompose.ts @@ -2,6 +2,10 @@ import BTree, { areOverlapping, BNode, BNodeInternal, check } from '../b+tree'; import { alternatingCount, alternatingGetFirst, alternatingGetSecond, alternatingPush, flushToLeaves, type BTreeWithInternals } from './shared'; import { createCursor, getKey, MergeCursor, MergeCursorPayload, moveForwardOne, moveTo, noop } from "./parallelWalk"; +/** + * A set of disjoint nodes, their heights, and the index of the tallest node. + * @internal + */ export type DecomposeResult = { disjoint: (number | BNode)[], tallestIndex: number }; /** @@ -12,6 +16,7 @@ export type DecomposeResult = { disjoint: (number | BNode)[], talles * be disjoint. This is true because the leading cursor was also previously walked in this way, and is thus pointing to * the first key at or after the trailing cursor's previous position. * The cursor walk is efficient, meaning it skips over disjoint subtrees entirely rather than visiting every leaf. + * @internal */ export function decompose( left: BTreeWithInternals, @@ -307,6 +312,10 @@ export function decompose( return { disjoint, tallestIndex }; } +/** + * Constructs a B-Tree from the result of a decomposition (set of disjoint nodes). + * @internal + */ export function buildFromDecomposition, K, V>( constructor: new (entries?: [K, V][], compare?: (a: K, b: K) => number, maxNodeSize?: number) => TBTree, branchingFactor: number, @@ -372,6 +381,7 @@ export function buildFromDecomposition, K, V>( /** * Processes one side (left or right) of the disjoint subtree set during a merge operation. * Merges each subtree in the disjoint set from start to end (exclusive) into the given spine. + * @internal */ function processSide( branchingFactor: number, From 0d07f295c477365effed1fcf18c92708199dfafe Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Wed, 12 Nov 2025 11:50:35 -0800 Subject: [PATCH 082/143] cleanup --- extended/diffAgainst.d.ts | 3 +- extended/diffAgainst.js | 24 ++++++------- extended/diffAgainst.ts | 71 +++++++++++++++++++-------------------- extended/index.js | 4 +-- extended/index.ts | 4 +-- 5 files changed, 51 insertions(+), 55 deletions(-) diff --git a/extended/diffAgainst.d.ts b/extended/diffAgainst.d.ts index c54cc79..eb54935 100644 --- a/extended/diffAgainst.d.ts +++ b/extended/diffAgainst.d.ts @@ -12,11 +12,10 @@ import BTree from '../b+tree'; * @param onlyB Callback invoked for all keys only present in `treeB`. * @param different Callback invoked for all keys with differing values. */ -export declare function diffAgainst(_treeA: BTree, _treeB: BTree, onlyA?: (k: K, v: V) => { +export default function diffAgainst(_treeA: BTree, _treeB: BTree, onlyA?: (k: K, v: V) => { break?: R; } | void, onlyB?: (k: K, v: V) => { break?: R; } | void, different?: (k: K, vThis: V, vOther: V) => { break?: R; } | void): R | undefined; -export default diffAgainst; diff --git a/extended/diffAgainst.js b/extended/diffAgainst.js index 9efbad4..db29a34 100644 --- a/extended/diffAgainst.js +++ b/extended/diffAgainst.js @@ -1,6 +1,5 @@ "use strict"; Object.defineProperty(exports, "__esModule", { value: true }); -exports.diffAgainst = void 0; var b_tree_1 = require("../b+tree"); /** * Computes the differences between `treeA` and `treeB`. @@ -126,11 +125,11 @@ function diffAgainst(_treeA, _treeB, onlyA, onlyB, different) { return finishCursorWalk(otherCursor, thisCursor, compareKeys, onlyB); return undefined; } -exports.diffAgainst = diffAgainst; +exports.default = diffAgainst; /** * Finishes walking `cursor` once the other cursor has already completed its walk. */ -var finishCursorWalk = function (cursor, cursorFinished, compareKeys, callback) { +function finishCursorWalk(cursor, cursorFinished, compareKeys, callback) { var compared = compareDiffCursors(cursor, cursorFinished, compareKeys); if (compared === 0) { if (!stepDiffCursor(cursor)) @@ -140,11 +139,11 @@ var finishCursorWalk = function (cursor, cursorFinished, compareKeys, callback) (0, b_tree_1.check)(false, 'cursor walk terminated early'); } return stepToEnd(cursor, callback); -}; +} /** * Walks the cursor to the end of the tree, invoking the callback for each key/value pair. */ -var stepToEnd = function (cursor, callback) { +function stepToEnd(cursor, callback) { var canStep = true; while (canStep) { var leaf = cursor.leaf, levelIndices = cursor.levelIndices, currentKey = cursor.currentKey; @@ -157,8 +156,8 @@ var stepToEnd = function (cursor, callback) { canStep = stepDiffCursor(cursor); } return undefined; -}; -var makeDiffCursor = function (internal) { +} +function makeDiffCursor(internal) { var root = internal._root; return { height: internal.height, @@ -167,12 +166,12 @@ var makeDiffCursor = function (internal) { leaf: undefined, currentKey: root.maxKey() }; -}; +} /** * Advances the cursor to the next step in the walk of its tree. * Cursors are walked backwards in sort order, as this allows them to leverage maxKey() in order to be compared in O(1). */ -var stepDiffCursor = function (cursor, stepToNode) { +function stepDiffCursor(cursor, stepToNode) { var internalSpine = cursor.internalSpine, levelIndices = cursor.levelIndices, leaf = cursor.leaf; if (stepToNode === true || leaf) { var levelsLength = levelIndices.length; @@ -229,12 +228,12 @@ var stepDiffCursor = function (cursor, stepToNode) { } return true; } -}; +} /** * Compares two cursors and returns which cursor is ahead in the traversal. * Note that cursors advance in reverse sort order. */ -var compareDiffCursors = function (cursorA, cursorB, compareKeys) { +function compareDiffCursors(cursorA, cursorB, compareKeys) { var heightA = cursorA.height, currentKeyA = cursorA.currentKey, levelIndicesA = cursorA.levelIndices; var heightB = cursorB.height, currentKeyB = cursorB.currentKey, levelIndicesB = cursorB.levelIndices; // Reverse the comparison order, as cursors are advanced in reverse sorting order @@ -250,5 +249,4 @@ var compareDiffCursors = function (cursorA, cursorB, compareKeys) { var depthANormalized = levelIndicesA.length - (heightA - heightMin); var depthBNormalized = levelIndicesB.length - (heightB - heightMin); return depthANormalized - depthBNormalized; -}; -exports.default = diffAgainst; +} diff --git a/extended/diffAgainst.ts b/extended/diffAgainst.ts index 154d981..59af293 100644 --- a/extended/diffAgainst.ts +++ b/extended/diffAgainst.ts @@ -2,27 +2,6 @@ import BTree from '../b+tree'; import { BNode, BNodeInternal, check } from '../b+tree'; import type { BTreeWithInternals } from './shared'; -/** - * A walkable pointer into a BTree for computing efficient diffs between trees with shared data. - * - A cursor points to either a key/value pair (KVP) or a node (which can be either a leaf or an internal node). - * As a consequence, a cursor cannot be created for an empty tree. - * - A cursor can be walked forwards using `step`. A cursor can be compared to another cursor to - * determine which is ahead in advancement. - * - A cursor is valid only for the tree it was created from, and only until the first edit made to - * that tree since the cursor's creation. - * - A cursor contains a key for the current location, which is the maxKey when the cursor points to a node - * and a key corresponding to a value when pointing to a leaf. - * - Leaf is only populated if the cursor points to a KVP. If this is the case, levelIndices.length === internalSpine.length + 1 - * and levelIndices[levelIndices.length - 1] is the index of the value. - */ -type DiffCursor = { - height: number; - internalSpine: BNode[][]; - levelIndices: number[]; - leaf: BNode | undefined; - currentKey: K; -}; - /** * Computes the differences between `treeA` and `treeB`. * For efficiency, the diff is returned via invocations of supplied handlers. @@ -36,7 +15,7 @@ type DiffCursor = { * @param onlyB Callback invoked for all keys only present in `treeB`. * @param different Callback invoked for all keys with differing values. */ -export function diffAgainst( +export default function diffAgainst( _treeA: BTree, _treeB: BTree, onlyA?: (k: K, v: V) => { break?: R } | void, @@ -156,12 +135,12 @@ export function diffAgainst( /** * Finishes walking `cursor` once the other cursor has already completed its walk. */ -const finishCursorWalk = ( +function finishCursorWalk( cursor: DiffCursor, cursorFinished: DiffCursor, compareKeys: (a: K, b: K) => number, callback: (k: K, v: V) => { break?: R } | void -): R | undefined => { +): R | undefined { const compared = compareDiffCursors(cursor, cursorFinished, compareKeys); if (compared === 0) { if (!stepDiffCursor(cursor)) @@ -170,15 +149,15 @@ const finishCursorWalk = ( check(false, 'cursor walk terminated early'); } return stepToEnd(cursor, callback); -}; +} /** * Walks the cursor to the end of the tree, invoking the callback for each key/value pair. */ -const stepToEnd = ( +function stepToEnd( cursor: DiffCursor, callback: (k: K, v: V) => { break?: R } | void -): R | undefined => { +): R | undefined { let canStep = true; while (canStep) { const { leaf, levelIndices, currentKey } = cursor; @@ -191,11 +170,11 @@ const stepToEnd = ( canStep = stepDiffCursor(cursor); } return undefined; -}; +} -const makeDiffCursor = ( +function makeDiffCursor( internal: BTreeWithInternals -): DiffCursor => { +): DiffCursor { const root = internal._root; return { height: internal.height, @@ -204,13 +183,13 @@ const makeDiffCursor = ( leaf: undefined, currentKey: root.maxKey() }; -}; +} /** * Advances the cursor to the next step in the walk of its tree. * Cursors are walked backwards in sort order, as this allows them to leverage maxKey() in order to be compared in O(1). */ -const stepDiffCursor = (cursor: DiffCursor, stepToNode?: boolean): boolean => { +function stepDiffCursor(cursor: DiffCursor, stepToNode?: boolean): boolean { const { internalSpine, levelIndices, leaf } = cursor; if (stepToNode === true || leaf) { const levelsLength = levelIndices.length; @@ -264,17 +243,17 @@ const stepDiffCursor = (cursor: DiffCursor, stepToNode?: boolean): b } return true; } -}; +} /** * Compares two cursors and returns which cursor is ahead in the traversal. * Note that cursors advance in reverse sort order. */ -const compareDiffCursors = ( +function compareDiffCursors( cursorA: DiffCursor, cursorB: DiffCursor, compareKeys: (a: K, b: K) => number -): number => { +): number { const { height: heightA, currentKey: currentKeyA, levelIndices: levelIndicesA } = cursorA; const { height: heightB, currentKey: currentKeyB, levelIndices: levelIndicesB } = cursorB; // Reverse the comparison order, as cursors are advanced in reverse sorting order @@ -291,6 +270,26 @@ const compareDiffCursors = ( const depthANormalized = levelIndicesA.length - (heightA - heightMin); const depthBNormalized = levelIndicesB.length - (heightB - heightMin); return depthANormalized - depthBNormalized; +} + +/** + * A walkable pointer into a BTree for computing efficient diffs between trees with shared data. + * - A cursor points to either a key/value pair (KVP) or a node (which can be either a leaf or an internal node). + * As a consequence, a cursor cannot be created for an empty tree. + * - A cursor can be walked forwards using `step`. A cursor can be compared to another cursor to + * determine which is ahead in advancement. + * - A cursor is valid only for the tree it was created from, and only until the first edit made to + * that tree since the cursor's creation. + * - A cursor contains a key for the current location, which is the maxKey when the cursor points to a node + * and a key corresponding to a value when pointing to a leaf. + * - Leaf is only populated if the cursor points to a KVP. If this is the case, levelIndices.length === internalSpine.length + 1 + * and levelIndices[levelIndices.length - 1] is the index of the value. + */ +type DiffCursor = { + height: number; + internalSpine: BNode[][]; + levelIndices: number[]; + leaf: BNode | undefined; + currentKey: K; }; -export default diffAgainst; diff --git a/extended/index.js b/extended/index.js index e2f8f2b..354a048 100644 --- a/extended/index.js +++ b/extended/index.js @@ -39,7 +39,7 @@ var __importDefault = (this && this.__importDefault) || function (mod) { Object.defineProperty(exports, "__esModule", { value: true }); exports.BTreeEx = void 0; var b_tree_1 = __importStar(require("../b+tree")); -var diffAgainst_1 = require("./diffAgainst"); +var diffAgainst_1 = __importDefault(require("./diffAgainst")); var forEachKeyInBoth_1 = __importDefault(require("./forEachKeyInBoth")); var union_1 = __importDefault(require("./union")); var bulkLoad_1 = require("./bulkLoad"); @@ -87,7 +87,7 @@ var BTreeEx = /** @class */ (function (_super) { * @param different Callback invoked for all keys with differing values. */ BTreeEx.prototype.diffAgainst = function (other, onlyThis, onlyOther, different) { - return (0, diffAgainst_1.diffAgainst)(this, other, onlyThis, onlyOther, different); + return (0, diffAgainst_1.default)(this, other, onlyThis, onlyOther, different); }; /** * Calls the supplied `callback` for each key/value pair shared by this tree and `other`. diff --git a/extended/index.ts b/extended/index.ts index ea25134..7b670d8 100644 --- a/extended/index.ts +++ b/extended/index.ts @@ -1,6 +1,6 @@ import BTree, { defaultComparator } from '../b+tree'; import type { BTreeWithInternals } from './shared'; -import { diffAgainst as diffAgainstAlgorithm } from './diffAgainst'; +import diffAgainst from './diffAgainst'; import forEachKeyInBoth from './forEachKeyInBoth'; import union from './union'; import { bulkLoadRoot } from './bulkLoad'; @@ -57,7 +57,7 @@ export class BTreeEx extends BTree { onlyOther?: (k: K, v: V) => { break?: R } | void, different?: (k: K, vThis: V, vOther: V) => { break?: R } | void ): R | undefined { - return diffAgainstAlgorithm(this, other, onlyThis, onlyOther, different); + return diffAgainst(this, other, onlyThis, onlyOther, different); } /** From 6b12830bcdc52323fd1165e3625e7d5ff04d9f73 Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Wed, 12 Nov 2025 11:58:23 -0800 Subject: [PATCH 083/143] renames, prune types --- extended/decompose.js | 16 +++++----- extended/decompose.ts | 53 +++++++++++++++++-------------- extended/index.d.ts | 7 ++++ extended/index.js | 7 ++++ extended/index.ts | 7 ++++ extended/parallelWalk.d.ts | 44 +------------------------- extended/parallelWalk.js | 23 ++++++++++++++ extended/parallelWalk.ts | 65 ++++++++++++++++++++++++++------------ 8 files changed, 127 insertions(+), 95 deletions(-) diff --git a/extended/decompose.js b/extended/decompose.js index e6cd0d8..ba41e8c 100644 --- a/extended/decompose.js +++ b/extended/decompose.js @@ -14,7 +14,7 @@ var parallelWalk_1 = require("./parallelWalk"); * The cursor walk is efficient, meaning it skips over disjoint subtrees entirely rather than visiting every leaf. * @internal */ -function decompose(left, right, mergeValues, ignoreRight) { +function decompose(left, right, combineFn, ignoreRight) { if (ignoreRight === void 0) { ignoreRight = false; } var cmp = left._compare; (0, b_tree_1.check)(left._root.size() > 0 && right._root.size() > 0, "decompose requires non-empty inputs"); @@ -212,9 +212,9 @@ function decompose(left, right, mergeValues, ignoreRight) { var vB = curB.leaf.values[curB.leafIndex]; // Perform the actual merge of values here. The cursors will avoid adding a duplicate of this key/value // to pending because they respect the areEqual flag during their moves. - var merged = mergeValues(key, vA, vB); - if (merged !== undefined) - (0, shared_1.alternatingPush)(pending, key, merged); + var combined = combineFn(key, vA, vB); + if (combined !== undefined) + (0, shared_1.alternatingPush)(pending, key, combined); var outTrailing = (0, parallelWalk_1.moveForwardOne)(trailing, leading, key, cmp); var outLeading = (0, parallelWalk_1.moveForwardOne)(leading, trailing, key, cmp); if (outTrailing || outLeading) { @@ -286,14 +286,14 @@ function buildFromDecomposition(constructor, branchingFactor, decomposed, cmp, m processSide(branchingFactor, disjoint, frontier, tallestIndex - 1, -1, -1, getLeftmostIndex, getLeftmostIndex, splitOffLeftSide, parallelWalk_1.noop // left side appending doesn't update max keys ); } - var merged = new constructor(undefined, cmp, maxNodeSize); - merged._root = frontier[0]; + var reconstructed = new constructor(undefined, cmp, maxNodeSize); + reconstructed._root = frontier[0]; // Return the resulting tree - return merged; + return reconstructed; } exports.buildFromDecomposition = buildFromDecomposition; /** - * Processes one side (left or right) of the disjoint subtree set during a merge operation. + * Processes one side (left or right) of the disjoint subtree set during a reconstruction operation. * Merges each subtree in the disjoint set from start to end (exclusive) into the given spine. * @internal */ diff --git a/extended/decompose.ts b/extended/decompose.ts index 8ecf62b..e941c7d 100644 --- a/extended/decompose.ts +++ b/extended/decompose.ts @@ -1,6 +1,6 @@ import BTree, { areOverlapping, BNode, BNodeInternal, check } from '../b+tree'; import { alternatingCount, alternatingGetFirst, alternatingGetSecond, alternatingPush, flushToLeaves, type BTreeWithInternals } from './shared'; -import { createCursor, getKey, MergeCursor, MergeCursorPayload, moveForwardOne, moveTo, noop } from "./parallelWalk"; +import { createCursor, getKey, Cursor, moveForwardOne, moveTo, noop } from "./parallelWalk"; /** * A set of disjoint nodes, their heights, and the index of the tallest node. @@ -8,6 +8,11 @@ import { createCursor, getKey, MergeCursor, MergeCursorPayload, moveForwardOne, */ export type DecomposeResult = { disjoint: (number | BNode)[], tallestIndex: number }; +/** + * Payload type used by decomposition cursors. + */ +type DecomposePayload = { disqualified: boolean }; + /** * Decomposes two trees into disjoint nodes. Reuses interior nodes when they do not overlap/intersect with any leaf nodes * in the other tree. Overlapping leaf nodes are broken down into new leaf nodes containing merged entries. @@ -21,7 +26,7 @@ export type DecomposeResult = { disjoint: (number | BNode)[], talles export function decompose( left: BTreeWithInternals, right: BTreeWithInternals, - mergeValues: (key: K, leftValue: V, rightValue: V) => V | undefined, + combineFn: (key: K, leftValue: V, rightValue: V) => V | undefined, ignoreRight: boolean = false ): DecomposeResult { const cmp = left._compare; @@ -76,7 +81,7 @@ export function decompose( }; // Mark all nodes at or above depthFrom in the cursor spine as disqualified (non-disjoint) - const disqualifySpine = (cursor: MergeCursor, depthFrom: number) => { + const disqualifySpine = (cursor: Cursor, depthFrom: number) => { const spine = cursor.spine; for (let i = depthFrom; i >= 0; --i) { const payload = spine[i].payload; @@ -90,7 +95,7 @@ export function decompose( }; // Cursor payload factory - const makePayload = (): MergeCursorPayload => ({ disqualified: false }); + const makePayload = (): DecomposePayload => ({ disqualified: false }); const pushLeafRange = (leaf: BNode, from: number, toExclusive: number) => { const keys = leaf.keys; @@ -101,7 +106,7 @@ export function decompose( const onMoveInLeaf = ( leaf: BNode, - payload: MergeCursorPayload, + payload: DecomposePayload, fromIndex: number, toIndex: number, startedEqual: boolean @@ -114,10 +119,10 @@ export function decompose( const onExitLeaf = ( leaf: BNode, - payload: MergeCursorPayload, + payload: DecomposePayload, startingIndex: number, startedEqual: boolean, - cursorThis: MergeCursor, + cursorThis: Cursor, ) => { highestDisjoint = undefined; if (!payload.disqualified) { @@ -138,11 +143,11 @@ export function decompose( const onStepUp = ( parent: BNodeInternal, height: number, - payload: MergeCursorPayload, + payload: DecomposePayload, fromIndex: number, spineIndex: number, stepDownIndex: number, - cursorThis: MergeCursor + cursorThis: Cursor ) => { const children = parent.children; const nextHeight = height - 1; @@ -178,7 +183,7 @@ export function decompose( height: number, spineIndex: number, stepDownIndex: number, - cursorThis: MergeCursor + cursorThis: Cursor ) => { if (stepDownIndex > 0) { // When we step down into a node, we know that we have walked from a key that is less than our target. @@ -198,8 +203,8 @@ export function decompose( const onEnterLeaf = ( leaf: BNode, destIndex: number, - cursorThis: MergeCursor, - cursorOther: MergeCursor + cursorThis: Cursor, + cursorOther: Cursor ) => { if (destIndex > 0 || areOverlapping(leaf.minKey()!, leaf.maxKey(), getKey(cursorOther), cursorOther.leaf.maxKey(), cmp)) { @@ -219,14 +224,14 @@ export function decompose( const maxKey = cmp(maxKeyLeft, maxKeyRight) >= 0 ? maxKeyLeft : maxKeyRight; // Initialize cursors at minimum keys. - const curA = createCursor(left, makePayload, onEnterLeaf, onMoveInLeaf, onExitLeaf, onStepUp, onStepDown); + const curA = createCursor(left, makePayload, onEnterLeaf, onMoveInLeaf, onExitLeaf, onStepUp, onStepDown); let curB: typeof curA; if (ignoreRight) { - const dummyPayload: MergeCursorPayload = { disqualified: true }; - curB = createCursor(right, () => dummyPayload, noop, noop, noop, noop, noop); + const dummyPayload: DecomposePayload = { disqualified: true }; + curB = createCursor(right, () => dummyPayload, noop, noop, noop, noop, noop); } else { - curB = createCursor(right, makePayload, onEnterLeaf, onMoveInLeaf, onExitLeaf, onStepUp, onStepDown); + curB = createCursor(right, makePayload, onEnterLeaf, onMoveInLeaf, onExitLeaf, onStepUp, onStepDown); } // The guarantee that no overlapping interior nodes are accidentally reused relies on the careful @@ -239,7 +244,7 @@ export function decompose( // The one issue then is detecting any overlaps that occur based on their very initial position (minimum key of each tree). // This is handled by the initial disqualification step below, which essentially emulates the step down disqualification for each spine. // Initialize disqualification w.r.t. opposite leaf. - const initDisqualify = (cur: MergeCursor, other: MergeCursor) => { + const initDisqualify = (cur: Cursor, other: Cursor) => { const minKey = getKey(cur); const otherMin = getKey(other); const otherMax = other.leaf.maxKey(); @@ -270,9 +275,9 @@ export function decompose( const vB = curB.leaf.values[curB.leafIndex]; // Perform the actual merge of values here. The cursors will avoid adding a duplicate of this key/value // to pending because they respect the areEqual flag during their moves. - const merged = mergeValues(key, vA, vB); - if (merged !== undefined) - alternatingPush(pending, key, merged); + const combined = combineFn(key, vA, vB); + if (combined !== undefined) + alternatingPush(pending, key, combined); const outTrailing = moveForwardOne(trailing, leading, key, cmp); const outLeading = moveForwardOne(leading, trailing, key, cmp); if (outTrailing || outLeading) { @@ -371,15 +376,15 @@ export function buildFromDecomposition, K, V>( ); } - const merged = new constructor(undefined, cmp, maxNodeSize); - (merged as unknown as BTreeWithInternals)._root = frontier[0]; + const reconstructed = new constructor(undefined, cmp, maxNodeSize); + (reconstructed as unknown as BTreeWithInternals)._root = frontier[0]; // Return the resulting tree - return merged; + return reconstructed; } /** - * Processes one side (left or right) of the disjoint subtree set during a merge operation. + * Processes one side (left or right) of the disjoint subtree set during a reconstruction operation. * Merges each subtree in the disjoint set from start to end (exclusive) into the given spine. * @internal */ diff --git a/extended/index.d.ts b/extended/index.d.ts index 89b7474..ec7a2a6 100644 --- a/extended/index.d.ts +++ b/extended/index.d.ts @@ -1,4 +1,11 @@ import BTree from '../b+tree'; +/** + * An extended version of the `BTree` class that includes additional functionality + * such as bulk loading, set operations, and diffing. + * It is separated to keep the core BTree class small from a bundle size perspective. + * Note: each additional functionality piece is available as a standalone function from the extended folder. + * @extends BTree + */ export declare class BTreeEx extends BTree { static bulkLoad(entries: (K | V)[], maxNodeSize: number, compare?: (a: K, b: K) => number): BTreeEx; clone(): this; diff --git a/extended/index.js b/extended/index.js index 354a048..2efa71e 100644 --- a/extended/index.js +++ b/extended/index.js @@ -43,6 +43,13 @@ var diffAgainst_1 = __importDefault(require("./diffAgainst")); var forEachKeyInBoth_1 = __importDefault(require("./forEachKeyInBoth")); var union_1 = __importDefault(require("./union")); var bulkLoad_1 = require("./bulkLoad"); +/** + * An extended version of the `BTree` class that includes additional functionality + * such as bulk loading, set operations, and diffing. + * It is separated to keep the core BTree class small from a bundle size perspective. + * Note: each additional functionality piece is available as a standalone function from the extended folder. + * @extends BTree + */ var BTreeEx = /** @class */ (function (_super) { __extends(BTreeEx, _super); function BTreeEx() { diff --git a/extended/index.ts b/extended/index.ts index 7b670d8..215509d 100644 --- a/extended/index.ts +++ b/extended/index.ts @@ -5,6 +5,13 @@ import forEachKeyInBoth from './forEachKeyInBoth'; import union from './union'; import { bulkLoadRoot } from './bulkLoad'; +/** + * An extended version of the `BTree` class that includes additional functionality + * such as bulk loading, set operations, and diffing. + * It is separated to keep the core BTree class small from a bundle size perspective. + * Note: each additional functionality piece is available as a standalone function from the extended folder. + * @extends BTree + */ export class BTreeEx extends BTree { static bulkLoad( entries: (K | V)[], diff --git a/extended/parallelWalk.d.ts b/extended/parallelWalk.d.ts index 7e1dac9..cb0ff5c 100644 --- a/extended/parallelWalk.d.ts +++ b/extended/parallelWalk.d.ts @@ -1,43 +1 @@ -import { BNode, BNodeInternal } from '../b+tree'; -import type { BTreeWithInternals } from './shared'; -export declare type MergeCursorPayload = { - disqualified: boolean; -}; -export interface MergeCursor { - tree: BTreeWithInternals; - leaf: BNode; - leafIndex: number; - spine: Array<{ - node: BNodeInternal; - childIndex: number; - payload: TPayload; - }>; - leafPayload: TPayload; - makePayload: () => TPayload; - onMoveInLeaf: (leaf: BNode, payload: TPayload, fromIndex: number, toIndex: number, isInclusive: boolean) => void; - onExitLeaf: (leaf: BNode, payload: TPayload, startingIndex: number, isInclusive: boolean, cursorThis: MergeCursor) => void; - onStepUp: (parent: BNodeInternal, height: number, payload: TPayload, fromIndex: number, spineIndex: number, stepDownIndex: number, cursorThis: MergeCursor) => void; - onStepDown: (node: BNodeInternal, height: number, spineIndex: number, stepDownIndex: number, cursorThis: MergeCursor) => void; - onEnterLeaf: (leaf: BNode, destIndex: number, cursorThis: MergeCursor, cursorOther: MergeCursor) => void; -} -/** - * Walks the cursor forward by one key. - * Should only be called to advance cursors that started equal. - * Returns true if end-of-tree was reached (cursor not structurally mutated). - */ -export declare function moveForwardOne(cur: MergeCursor, other: MergeCursor, currentKey: K, cmp: (a: K, b: K) => number): boolean; -/** - * Create a cursor pointing to the leftmost key of the supplied tree. - */ -export declare function createCursor(tree: BTreeWithInternals, makePayload: MergeCursor["makePayload"], onEnterLeaf: MergeCursor["onEnterLeaf"], onMoveInLeaf: MergeCursor["onMoveInLeaf"], onExitLeaf: MergeCursor["onExitLeaf"], onStepUp: MergeCursor["onStepUp"], onStepDown: MergeCursor["onStepDown"]): MergeCursor; -export declare function getKey(c: MergeCursor): K; -/** - * Move cursor strictly forward to the first key >= (inclusive) or > (exclusive) target. - * Returns a boolean indicating if end-of-tree was reached (cursor not structurally mutated). - * Also returns a boolean indicating if the target key was landed on exactly. - */ -export declare function moveTo(cur: MergeCursor, other: MergeCursor, targetKey: K, isInclusive: boolean, startedEqual: boolean, cmp: (a: K, b: K) => number): [outOfTree: boolean, targetExactlyReached: boolean]; -export declare function noop(): void; -export declare const comparatorErrorMsg = "Cannot perform set operations on BTrees with different comparators."; -export declare const branchingFactorErrorMsg = "Cannot perform set operations on BTrees with different max node sizes."; -export declare function checkCanDoSetOperation(treeA: BTreeWithInternals, treeB: BTreeWithInternals): number; +export {}; diff --git a/extended/parallelWalk.js b/extended/parallelWalk.js index 11eeead..3d20a48 100644 --- a/extended/parallelWalk.js +++ b/extended/parallelWalk.js @@ -5,6 +5,7 @@ exports.checkCanDoSetOperation = exports.branchingFactorErrorMsg = exports.compa * Walks the cursor forward by one key. * Should only be called to advance cursors that started equal. * Returns true if end-of-tree was reached (cursor not structurally mutated). + * @internal */ function moveForwardOne(cur, other, currentKey, cmp) { var leaf = cur.leaf; @@ -22,6 +23,7 @@ function moveForwardOne(cur, other, currentKey, cmp) { exports.moveForwardOne = moveForwardOne; /** * Create a cursor pointing to the leftmost key of the supplied tree. + * @internal */ function createCursor(tree, makePayload, onEnterLeaf, onMoveInLeaf, onExitLeaf, onStepUp, onStepDown) { var spine = []; @@ -48,6 +50,10 @@ function createCursor(tree, makePayload, onEnterLeaf, onMoveInLeaf, onExitLeaf, return cur; } exports.createCursor = createCursor; +/** + * Gets the key at the current cursor position. + * @internal + */ function getKey(c) { return c.leaf.keys[c.leafIndex]; } @@ -56,6 +62,7 @@ exports.getKey = getKey; * Move cursor strictly forward to the first key >= (inclusive) or > (exclusive) target. * Returns a boolean indicating if end-of-tree was reached (cursor not structurally mutated). * Also returns a boolean indicating if the target key was landed on exactly. + * @internal */ function moveTo(cur, other, targetKey, isInclusive, startedEqual, cmp) { // Cache callbacks for perf @@ -172,10 +179,26 @@ function moveTo(cur, other, targetKey, isInclusive, startedEqual, cmp) { return [false, targetExactlyReached]; } exports.moveTo = moveTo; +/** + * A no-operation function. + * @internal + */ function noop() { } exports.noop = noop; +/** + * Error message used when comparators differ between trees. + * @internal + */ exports.comparatorErrorMsg = "Cannot perform set operations on BTrees with different comparators."; +/** + * Error message used when branching factors differ between trees. + * @internal + */ exports.branchingFactorErrorMsg = "Cannot perform set operations on BTrees with different max node sizes."; +/** + * Checks that two trees can be used together in a set operation. + * @internal + */ function checkCanDoSetOperation(treeA, treeB) { if (treeA._compare !== treeB._compare) throw new Error(exports.comparatorErrorMsg); diff --git a/extended/parallelWalk.ts b/extended/parallelWalk.ts index d4941e2..748ec3f 100644 --- a/extended/parallelWalk.ts +++ b/extended/parallelWalk.ts @@ -1,9 +1,11 @@ import { BNode, BNodeInternal } from '../b+tree'; import type { BTreeWithInternals } from './shared'; -export type MergeCursorPayload = { disqualified: boolean }; - -export interface MergeCursor { +/** + * A walkable cursor for BTree set operations. + * @internal + */ +export interface Cursor { tree: BTreeWithInternals; leaf: BNode; leafIndex: number; @@ -11,20 +13,21 @@ export interface MergeCursor { leafPayload: TPayload; makePayload: () => TPayload; onMoveInLeaf: (leaf: BNode, payload: TPayload, fromIndex: number, toIndex: number, isInclusive: boolean) => void; - onExitLeaf: (leaf: BNode, payload: TPayload, startingIndex: number, isInclusive: boolean, cursorThis: MergeCursor) => void; - onStepUp: (parent: BNodeInternal, height: number, payload: TPayload, fromIndex: number, spineIndex: number, stepDownIndex: number, cursorThis: MergeCursor) => void; - onStepDown: (node: BNodeInternal, height: number, spineIndex: number, stepDownIndex: number, cursorThis: MergeCursor) => void; - onEnterLeaf: (leaf: BNode, destIndex: number, cursorThis: MergeCursor, cursorOther: MergeCursor) => void; + onExitLeaf: (leaf: BNode, payload: TPayload, startingIndex: number, isInclusive: boolean, cursorThis: Cursor) => void; + onStepUp: (parent: BNodeInternal, height: number, payload: TPayload, fromIndex: number, spineIndex: number, stepDownIndex: number, cursorThis: Cursor) => void; + onStepDown: (node: BNodeInternal, height: number, spineIndex: number, stepDownIndex: number, cursorThis: Cursor) => void; + onEnterLeaf: (leaf: BNode, destIndex: number, cursorThis: Cursor, cursorOther: Cursor) => void; } /** * Walks the cursor forward by one key. * Should only be called to advance cursors that started equal. * Returns true if end-of-tree was reached (cursor not structurally mutated). + * @internal */ export function moveForwardOne( - cur: MergeCursor, - other: MergeCursor, + cur: Cursor, + other: Cursor, currentKey: K, cmp: (a: K, b: K) => number ): boolean { @@ -44,16 +47,17 @@ export function moveForwardOne( /** * Create a cursor pointing to the leftmost key of the supplied tree. + * @internal */ export function createCursor( tree: BTreeWithInternals, - makePayload: MergeCursor["makePayload"], - onEnterLeaf: MergeCursor["onEnterLeaf"], - onMoveInLeaf: MergeCursor["onMoveInLeaf"], - onExitLeaf: MergeCursor["onExitLeaf"], - onStepUp: MergeCursor["onStepUp"], - onStepDown: MergeCursor["onStepDown"], -): MergeCursor { + makePayload: Cursor["makePayload"], + onEnterLeaf: Cursor["onEnterLeaf"], + onMoveInLeaf: Cursor["onMoveInLeaf"], + onExitLeaf: Cursor["onExitLeaf"], + onStepUp: Cursor["onStepUp"], + onStepDown: Cursor["onStepDown"], +): Cursor { const spine: Array<{ node: BNodeInternal, childIndex: number, payload: TP }> = []; let n: BNode = tree._root; while (!n.isLeaf) { @@ -63,14 +67,18 @@ export function createCursor( n = ni.children[0]; } const leafPayload = makePayload(); - const cur: MergeCursor = { + const cur: Cursor = { tree, leaf: n, leafIndex: 0, spine, leafPayload, makePayload: makePayload, onEnterLeaf, onMoveInLeaf, onExitLeaf, onStepUp, onStepDown }; return cur; } -export function getKey(c: MergeCursor): K { +/** + * Gets the key at the current cursor position. + * @internal + */ +export function getKey(c: Cursor): K { return c.leaf.keys[c.leafIndex]; } @@ -78,10 +86,11 @@ export function getKey(c: MergeCursor): K { * Move cursor strictly forward to the first key >= (inclusive) or > (exclusive) target. * Returns a boolean indicating if end-of-tree was reached (cursor not structurally mutated). * Also returns a boolean indicating if the target key was landed on exactly. + * @internal */ export function moveTo( - cur: MergeCursor, - other: MergeCursor, + cur: Cursor, + other: Cursor, targetKey: K, isInclusive: boolean, startedEqual: boolean, @@ -207,12 +216,28 @@ export function moveTo( return [false, targetExactlyReached]; } +/** + * A no-operation function. + * @internal + */ export function noop(): void { } +/** + * Error message used when comparators differ between trees. + * @internal + */ export const comparatorErrorMsg = "Cannot perform set operations on BTrees with different comparators."; +/** + * Error message used when branching factors differ between trees. + * @internal + */ export const branchingFactorErrorMsg = "Cannot perform set operations on BTrees with different max node sizes."; +/** + * Checks that two trees can be used together in a set operation. + * @internal + */ export function checkCanDoSetOperation(treeA: BTreeWithInternals, treeB: BTreeWithInternals): number { if (treeA._compare !== treeB._compare) throw new Error(comparatorErrorMsg); From 03cc59c20476af57e1e5ac57257d17b3737a12ec Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Wed, 12 Nov 2025 12:00:02 -0800 Subject: [PATCH 084/143] cleanup --- extended/shared.d.ts | 9 --------- extended/shared.js | 6 ++++++ extended/shared.ts | 10 ++++++++++ 3 files changed, 16 insertions(+), 9 deletions(-) diff --git a/extended/shared.d.ts b/extended/shared.d.ts index e6f92f9..81d48ef 100644 --- a/extended/shared.d.ts +++ b/extended/shared.d.ts @@ -1,12 +1,3 @@ -import { BNode } from '../b+tree'; -import BTree from '../b+tree'; -export declare type BTreeWithInternals = { - _root: BNode; - _size: number; - _maxNodeSize: number; - _compare: (a: K, b: K) => number; -} & Omit, '_root' | '_size' | '_maxNodeSize' | '_compare'>; -export declare function flushToLeaves(alternatingList: (K | V)[], maxNodeSize: number, onLeafCreation: (node: BNode) => void): number; export declare function alternatingCount(list: unknown[]): number; export declare function alternatingGetFirst(list: Array, index: number): TFirst; export declare function alternatingGetSecond(list: Array, index: number): TSecond; diff --git a/extended/shared.js b/extended/shared.js index b739735..4f3c094 100644 --- a/extended/shared.js +++ b/extended/shared.js @@ -2,6 +2,12 @@ Object.defineProperty(exports, "__esModule", { value: true }); exports.alternatingPush = exports.alternatingGetSecond = exports.alternatingGetFirst = exports.alternatingCount = exports.flushToLeaves = void 0; var b_tree_1 = require("../b+tree"); +/** + * Flushes entries from an alternating list into leaf nodes. + * The leaf nodes are packed as tightly as possible while ensuring all + * nodes are at least 50% full (if more than one leaf is created). + * @internal + */ function flushToLeaves(alternatingList, maxNodeSize, onLeafCreation) { var totalPairs = alternatingCount(alternatingList); if (totalPairs === 0) diff --git a/extended/shared.ts b/extended/shared.ts index 080c53d..0395d1b 100644 --- a/extended/shared.ts +++ b/extended/shared.ts @@ -1,6 +1,10 @@ import { BNode } from '../b+tree'; import BTree from '../b+tree'; +/** + * BTree with access to internal properties. + * @internal + */ export type BTreeWithInternals = { _root: BNode; _size: number; @@ -8,6 +12,12 @@ export type BTreeWithInternals = { _compare: (a: K, b: K) => number; } & Omit, '_root' | '_size' | '_maxNodeSize' | '_compare'>; +/** + * Flushes entries from an alternating list into leaf nodes. + * The leaf nodes are packed as tightly as possible while ensuring all + * nodes are at least 50% full (if more than one leaf is created). + * @internal + */ export function flushToLeaves( alternatingList: (K | V)[], maxNodeSize: number, From d33e2005f9bbb6a1e0370ecf534ed82c85f73fcf Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Wed, 12 Nov 2025 12:16:49 -0800 Subject: [PATCH 085/143] cleanup alternating helpers --- extended/bulkLoad.js | 3 ++- extended/bulkLoad.ts | 13 +++++------ extended/decompose.js | 4 ++-- extended/decompose.ts | 24 ++++++++++----------- extended/intersect.js | 2 +- extended/intersect.ts | 6 +++--- extended/shared.d.ts | 5 +---- extended/shared.js | 26 +++++++++++++++++++++- extended/shared.ts | 50 ++++++++++++++++++++++++++++++++++--------- 9 files changed, 93 insertions(+), 40 deletions(-) diff --git a/extended/bulkLoad.js b/extended/bulkLoad.js index f5b78a9..94fbb25 100644 --- a/extended/bulkLoad.js +++ b/extended/bulkLoad.js @@ -32,7 +32,8 @@ var shared_1 = require("./shared"); * @returns A new BTree containing the given entries. */ function bulkLoad(entries, maxNodeSize, compare) { - var root = bulkLoadRoot(entries, maxNodeSize, compare); + var alternatingEntries = entries; + var root = bulkLoadRoot(alternatingEntries, maxNodeSize, compare); var tree = new b_tree_1.default(undefined, compare, maxNodeSize); var target = tree; target._root = root; diff --git a/extended/bulkLoad.ts b/extended/bulkLoad.ts index 61ee152..91e587c 100644 --- a/extended/bulkLoad.ts +++ b/extended/bulkLoad.ts @@ -1,5 +1,5 @@ import BTree, { BNode, BNodeInternal, check, sumChildSizes } from '../b+tree'; -import { alternatingCount, alternatingGetFirst, flushToLeaves, type BTreeWithInternals } from './shared'; +import { alternatingCount, alternatingGetFirst, flushToLeaves, type AlternatingList, type BTreeWithInternals } from './shared'; /** * Loads a B-Tree from a sorted list of entries in bulk. This is faster than inserting @@ -15,7 +15,8 @@ export function bulkLoad( maxNodeSize: number, compare: (a: K, b: K) => number ): BTree { - const root = bulkLoadRoot(entries, maxNodeSize, compare); + const alternatingEntries = entries as AlternatingList; + const root = bulkLoadRoot(alternatingEntries, maxNodeSize, compare); const tree = new BTree(undefined, compare, maxNodeSize); const target = tree as unknown as BTreeWithInternals; target._root = root; @@ -28,15 +29,15 @@ export function bulkLoad( * @internal */ export function bulkLoadRoot( - entries: (K | V)[], + entries: AlternatingList, maxNodeSize: number, compare: (a: K, b: K) => number ): BNode { const totalPairs = alternatingCount(entries); if (totalPairs > 1) { - let previousKey = alternatingGetFirst(entries, 0); + let previousKey = alternatingGetFirst(entries, 0); for (let i = 1; i < totalPairs; i++) { - const key = alternatingGetFirst(entries, i); + const key = alternatingGetFirst(entries, i); if (compare(previousKey, key) >= 0) throw new Error("bulkLoad: entries must be sorted by key in strictly ascending order"); previousKey = key; @@ -44,7 +45,7 @@ export function bulkLoadRoot( } const leaves: BNode[] = []; - flushToLeaves(entries, maxNodeSize, (leaf) => leaves.push(leaf)); + flushToLeaves(entries, maxNodeSize, (leaf) => leaves.push(leaf)); if (leaves.length === 0) return new BNode(); diff --git a/extended/decompose.js b/extended/decompose.js index ba41e8c..874b022 100644 --- a/extended/decompose.js +++ b/extended/decompose.js @@ -20,7 +20,7 @@ function decompose(left, right, combineFn, ignoreRight) { (0, b_tree_1.check)(left._root.size() > 0 && right._root.size() > 0, "decompose requires non-empty inputs"); // Holds the disjoint nodes that result from decomposition. // Alternating entries of (height, node) to avoid creating small tuples - var disjoint = []; + var disjoint = (0, shared_1.createAlternatingList)(); // During the decomposition, leaves that are not disjoint are decomposed into individual entries // that accumulate in this array in sorted order. They are flushed into leaf nodes whenever a reused // disjoint subtree is added to the disjoint set. @@ -28,7 +28,7 @@ function decompose(left, right, combineFn, ignoreRight) { // An example of this would be a leaf in one tree that contained keys [0, 100, 101, 102]. // In the other tree, there is a leaf that contains [2, 3, 4, 5]. This leaf can be reused entirely, // but the first tree's leaf must be decomposed into [0] and [100, 101, 102] - var pending = []; + var pending = (0, shared_1.createAlternatingList)(); var tallestIndex = -1, tallestHeight = -1; // During the upward part of the cursor walk, this holds the highest disjoint node seen so far. // This is done because we cannot know immediately whether we can add the node to the disjoint set diff --git a/extended/decompose.ts b/extended/decompose.ts index e941c7d..4d38a96 100644 --- a/extended/decompose.ts +++ b/extended/decompose.ts @@ -1,12 +1,12 @@ import BTree, { areOverlapping, BNode, BNodeInternal, check } from '../b+tree'; -import { alternatingCount, alternatingGetFirst, alternatingGetSecond, alternatingPush, flushToLeaves, type BTreeWithInternals } from './shared'; +import { alternatingCount, alternatingGetFirst, alternatingGetSecond, alternatingPush, createAlternatingList, flushToLeaves, type AlternatingList, type BTreeWithInternals } from './shared'; import { createCursor, getKey, Cursor, moveForwardOne, moveTo, noop } from "./parallelWalk"; /** * A set of disjoint nodes, their heights, and the index of the tallest node. * @internal */ -export type DecomposeResult = { disjoint: (number | BNode)[], tallestIndex: number }; +export type DecomposeResult = { disjoint: AlternatingList>, tallestIndex: number }; /** * Payload type used by decomposition cursors. @@ -33,7 +33,7 @@ export function decompose( check(left._root.size() > 0 && right._root.size() > 0, "decompose requires non-empty inputs"); // Holds the disjoint nodes that result from decomposition. // Alternating entries of (height, node) to avoid creating small tuples - const disjoint: (number | BNode)[] = []; + const disjoint = createAlternatingList>(); // During the decomposition, leaves that are not disjoint are decomposed into individual entries // that accumulate in this array in sorted order. They are flushed into leaf nodes whenever a reused // disjoint subtree is added to the disjoint set. @@ -41,7 +41,7 @@ export function decompose( // An example of this would be a leaf in one tree that contained keys [0, 100, 101, 102]. // In the other tree, there is a leaf that contains [2, 3, 4, 5]. This leaf can be reused entirely, // but the first tree's leaf must be decomposed into [0] and [100, 101, 102] - const pending: (K | V)[] = []; + const pending = createAlternatingList(); let tallestIndex = -1, tallestHeight = -1; // During the upward part of the cursor walk, this holds the highest disjoint node seen so far. @@ -66,7 +66,7 @@ export function decompose( const addSharedNodeToDisjointSet = (node: BNode, height: number) => { flushPendingEntries(); node.isShared = true; - alternatingPush>(disjoint, height, node); + alternatingPush(disjoint, height, node); if (height > tallestHeight) { tallestIndex = alternatingCount(disjoint) - 1; tallestHeight = height; @@ -101,7 +101,7 @@ export function decompose( const keys = leaf.keys; const values = leaf.values; for (let i = from; i < toExclusive; ++i) - alternatingPush(pending, keys[i], values[i]); + alternatingPush(pending, keys[i], values[i]); }; const onMoveInLeaf = ( @@ -277,7 +277,7 @@ export function decompose( // to pending because they respect the areEqual flag during their moves. const combined = combineFn(key, vA, vB); if (combined !== undefined) - alternatingPush(pending, key, combined); + alternatingPush(pending, key, combined); const outTrailing = moveForwardOne(trailing, leading, key, cmp); const outLeading = moveForwardOne(leading, trailing, key, cmp); if (outTrailing || outLeading) { @@ -339,7 +339,7 @@ export function buildFromDecomposition, K, V>( // the leaf level on that side of the tree. Each appended subtree is appended to the node at the // same height as itself on the frontier. Each tree is guaranteed to be at most as tall as the // current frontier because we start from the tallest subtree and work outward. - const initialRoot = alternatingGetSecond>(disjoint, tallestIndex); + const initialRoot = alternatingGetSecond(disjoint, tallestIndex); const frontier: BNode[] = [initialRoot]; // Process all subtrees to the right of the tallest subtree @@ -390,7 +390,7 @@ export function buildFromDecomposition, K, V>( */ function processSide( branchingFactor: number, - disjoint: (number | BNode)[], + disjoint: AlternatingList>, spine: BNode[], start: number, end: number, @@ -422,8 +422,8 @@ function processSide( for (let i = start; i != end; i += step) { const currentHeight = spine.length - 1; // height is number of internal levels; 0 means leaf - const subtree = alternatingGetSecond>(disjoint, i); - const subtreeHeight = alternatingGetFirst>(disjoint, i); + const subtree = alternatingGetSecond(disjoint, i); + const subtreeHeight = alternatingGetFirst(disjoint, i); const insertionDepth = currentHeight - (subtreeHeight + 1); // node at this depth has children of height 'subtreeHeight' // Ensure path is unshared before mutation @@ -657,4 +657,4 @@ function splitOffLeftSide(node: BNodeInternal): BNodeInternal function updateRightMax(node: BNodeInternal, maxBelow: K): void { node.keys[node.keys.length - 1] = maxBelow; -} \ No newline at end of file +} diff --git a/extended/intersect.js b/extended/intersect.js index eaae693..3429aaf 100644 --- a/extended/intersect.js +++ b/extended/intersect.js @@ -30,7 +30,7 @@ function intersect(treeA, treeB, combineFn) { return treeB.clone(); if (_treeB._root.size() === 0) return treeA.clone(); - var intersected = []; + var intersected = (0, shared_1.createAlternatingList)(); (0, forEachKeyInBoth_1.default)(treeA, treeB, function (key, leftValue, rightValue) { var mergedValue = combineFn(key, leftValue, rightValue); (0, shared_1.alternatingPush)(intersected, key, mergedValue); diff --git a/extended/intersect.ts b/extended/intersect.ts index 49eff87..cf11ee2 100644 --- a/extended/intersect.ts +++ b/extended/intersect.ts @@ -1,5 +1,5 @@ import BTree from '../b+tree'; -import { alternatingPush, type BTreeWithInternals } from './shared'; +import { alternatingPush, createAlternatingList, type BTreeWithInternals } from './shared'; import { checkCanDoSetOperation } from "./parallelWalk" import { buildFromDecomposition, decompose } from './decompose'; import forEachKeyInBoth from './forEachKeyInBoth'; @@ -32,7 +32,7 @@ export default function intersect, K, V>( if (_treeB._root.size() === 0) return treeA.clone(); - const intersected: (K | V)[] = []; + const intersected = createAlternatingList(); forEachKeyInBoth(treeA, treeB, (key, leftValue, rightValue) => { const mergedValue = combineFn(key, leftValue, rightValue); alternatingPush(intersected, key, mergedValue); @@ -44,4 +44,4 @@ export default function intersect, K, V>( const decomposed = decompose(_treeA, _treeB, combineFn); const constructor = treeA.constructor as new (entries?: [K, V][], compare?: (a: K, b: K) => number, maxNodeSize?: number) => TBTree; return buildFromDecomposition(constructor, branchingFactor, decomposed, _treeA._compare, _treeA._maxNodeSize); -} \ No newline at end of file +} diff --git a/extended/shared.d.ts b/extended/shared.d.ts index 81d48ef..cb0ff5c 100644 --- a/extended/shared.d.ts +++ b/extended/shared.d.ts @@ -1,4 +1 @@ -export declare function alternatingCount(list: unknown[]): number; -export declare function alternatingGetFirst(list: Array, index: number): TFirst; -export declare function alternatingGetSecond(list: Array, index: number): TSecond; -export declare function alternatingPush(list: Array, first: TFirst, second: TSecond): void; +export {}; diff --git a/extended/shared.js b/extended/shared.js index 4f3c094..92b45b3 100644 --- a/extended/shared.js +++ b/extended/shared.js @@ -1,6 +1,6 @@ "use strict"; Object.defineProperty(exports, "__esModule", { value: true }); -exports.alternatingPush = exports.alternatingGetSecond = exports.alternatingGetFirst = exports.alternatingCount = exports.flushToLeaves = void 0; +exports.alternatingPush = exports.alternatingGetSecond = exports.alternatingGetFirst = exports.alternatingCount = exports.createAlternatingList = exports.flushToLeaves = void 0; var b_tree_1 = require("../b+tree"); /** * Flushes entries from an alternating list into leaf nodes. @@ -41,18 +41,42 @@ exports.flushToLeaves = flushToLeaves; // These helpers manage a list that alternates between two types of entries. // Storing data this way avoids small tuple allocations and shows major improvements // in GC time in benchmarks. +/** + * Creates an empty alternating list with the specified element types. + * @internal + */ +function createAlternatingList() { + return []; +} +exports.createAlternatingList = createAlternatingList; +/** + * Counts the number of `[A, B]` pairs stored in the alternating list. + * @internal + */ function alternatingCount(list) { return list.length >> 1; } exports.alternatingCount = alternatingCount; +/** + * Reads the first entry of the pair at the given index. + * @internal + */ function alternatingGetFirst(list, index) { return list[index << 1]; } exports.alternatingGetFirst = alternatingGetFirst; +/** + * Reads the second entry of the pair at the given index. + * @internal + */ function alternatingGetSecond(list, index) { return list[(index << 1) + 1]; } exports.alternatingGetSecond = alternatingGetSecond; +/** + * Appends a pair to the alternating list. + * @internal + */ function alternatingPush(list, first, second) { // Micro benchmarks show this is the fastest way to do this list.push(first, second); diff --git a/extended/shared.ts b/extended/shared.ts index 0395d1b..15295f5 100644 --- a/extended/shared.ts +++ b/extended/shared.ts @@ -12,6 +12,12 @@ export type BTreeWithInternals = { _compare: (a: K, b: K) => number; } & Omit, '_root' | '_size' | '_maxNodeSize' | '_compare'>; +/** + * Alternating list storing entries as `[A0, B0, A1, B1, ...]`. + * @internal + */ +export type AlternatingList = Array; + /** * Flushes entries from an alternating list into leaf nodes. * The leaf nodes are packed as tightly as possible while ensuring all @@ -19,7 +25,7 @@ export type BTreeWithInternals = { * @internal */ export function flushToLeaves( - alternatingList: (K | V)[], + alternatingList: AlternatingList, maxNodeSize: number, onLeafCreation: (node: BNode) => void ): number { @@ -38,8 +44,8 @@ export function flushToLeaves( const keys = new Array(chunkSize); const vals = new Array(chunkSize); for (let i = 0; i < chunkSize; i++) { - keys[i] = alternatingGetFirst(alternatingList, pairIndex); - vals[i] = alternatingGetSecond(alternatingList, pairIndex); + keys[i] = alternatingGetFirst(alternatingList, pairIndex); + vals[i] = alternatingGetSecond(alternatingList, pairIndex); pairIndex++; } remaining -= chunkSize; @@ -56,19 +62,43 @@ export function flushToLeaves( // Storing data this way avoids small tuple allocations and shows major improvements // in GC time in benchmarks. -export function alternatingCount(list: unknown[]): number { +/** + * Creates an empty alternating list with the specified element types. + * @internal + */ +export function createAlternatingList(): AlternatingList { + return [] as AlternatingList; +} + +/** + * Counts the number of `[A, B]` pairs stored in the alternating list. + * @internal + */ +export function alternatingCount(list: AlternatingList): number { return list.length >> 1; } -export function alternatingGetFirst(list: Array, index: number): TFirst { - return list[index << 1] as TFirst; +/** + * Reads the first entry of the pair at the given index. + * @internal + */ +export function alternatingGetFirst(list: AlternatingList, index: number): A { + return list[index << 1] as A; } -export function alternatingGetSecond(list: Array, index: number): TSecond { - return list[(index << 1) + 1] as TSecond; +/** + * Reads the second entry of the pair at the given index. + * @internal + */ +export function alternatingGetSecond(list: AlternatingList, index: number): B { + return list[(index << 1) + 1] as B; } -export function alternatingPush(list: Array, first: TFirst, second: TSecond): void { +/** + * Appends a pair to the alternating list. + * @internal + */ +export function alternatingPush(list: AlternatingList, first: A, second: B): void { // Micro benchmarks show this is the fastest way to do this list.push(first, second); -} \ No newline at end of file +} From ae96df135d4f6d5cd5fdae3dc40b57221ff5b399 Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Wed, 12 Nov 2025 12:21:30 -0800 Subject: [PATCH 086/143] total to sizes --- scripts/size-report.js | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/scripts/size-report.js b/scripts/size-report.js index f40c19d..4cab044 100644 --- a/scripts/size-report.js +++ b/scripts/size-report.js @@ -73,7 +73,10 @@ const header = console.log(header); console.log('-'.repeat(header.length)); -for (const entry of entryPoints) { +const nonCoreTotals = { raw: 0, min: 0, gz: 0 }; +const nonCoreHasValue = { raw: false, min: false, gz: false }; + +entryPoints.forEach((entry, index) => { const raw = fileSize(entry.raw); const min = fileSize(entry.min); const gz = gzipSize(entry.min); @@ -83,6 +86,30 @@ for (const entry of entryPoints) { pad(formatBytes(min), 13) + formatBytes(gz); console.log(line); + if (index > 0) { + if (typeof raw === 'number') { + nonCoreTotals.raw += raw; + nonCoreHasValue.raw = true; + } + if (typeof min === 'number') { + nonCoreTotals.min += min; + nonCoreHasValue.min = true; + } + if (typeof gz === 'number') { + nonCoreTotals.gz += gz; + nonCoreHasValue.gz = true; + } + } +}); + +if (entryPoints.length > 1) { + const line = + pad('Non-core total', nameColumnWidth) + + pad(nonCoreHasValue.raw ? formatBytes(nonCoreTotals.raw) : 'n/a', 13) + + pad(nonCoreHasValue.min ? formatBytes(nonCoreTotals.min) : 'n/a', 13) + + (nonCoreHasValue.gz ? formatBytes(nonCoreTotals.gz) : 'n/a'); + console.log('-'.repeat(header.length)); + console.log(line); } if (process.exitCode) { From 070a26a005b96de2a8afab08d03d55e832842570 Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Wed, 12 Nov 2025 12:53:57 -0800 Subject: [PATCH 087/143] support early breaking --- extended/forEachKeyInBoth.d.ts | 7 +++++-- extended/forEachKeyInBoth.js | 8 ++++++-- extended/forEachKeyInBoth.ts | 14 ++++++++++--- extended/index.d.ts | 7 +++++-- extended/index.js | 5 +++-- extended/index.ts | 10 ++++++--- test/forEachKeyInBoth.test.ts | 37 ++++++++++++++++++++++++++++++++++ 7 files changed, 74 insertions(+), 14 deletions(-) diff --git a/extended/forEachKeyInBoth.d.ts b/extended/forEachKeyInBoth.d.ts index c5eea92..f9cdef1 100644 --- a/extended/forEachKeyInBoth.d.ts +++ b/extended/forEachKeyInBoth.d.ts @@ -1,10 +1,11 @@ import BTree from '../b+tree'; /** * Calls the supplied `callback` for each key/value pair shared by both trees. + * The callback will be called in sorted key order. * Neither tree is modified. * @param treeA First tree to compare. * @param treeB Second tree to compare. - * @param callback Invoked for keys that appear in both trees. + * @param callback Invoked for keys that appear in both trees. It can cause iteration to early exit by returning `{ break: R }`. * @description Complexity is bounded by O(N + M) for time. * However, time is additionally bounded by O(log(N + M) * D) where D is the number of disjoint ranges of keys between * the two trees. In practice, that means for keys of random distribution the performance is O(N + M) and for @@ -13,4 +14,6 @@ import BTree from '../b+tree'; * Note that in benchmarks even the worst case (fully interleaved keys) performance is faster than calling `toArray` * on both trees and performing a walk on the sorted contents due to the reduced allocation overhead. */ -export default function forEachKeyInBoth(treeA: BTree, treeB: BTree, callback: (key: K, leftValue: V, rightValue: V) => void): void; +export default function forEachKeyInBoth(treeA: BTree, treeB: BTree, callback: (key: K, leftValue: V, rightValue: V) => { + break?: R; +} | void): R | undefined; diff --git a/extended/forEachKeyInBoth.js b/extended/forEachKeyInBoth.js index e9e7f05..e4f6a67 100644 --- a/extended/forEachKeyInBoth.js +++ b/extended/forEachKeyInBoth.js @@ -3,10 +3,11 @@ Object.defineProperty(exports, "__esModule", { value: true }); var parallelWalk_1 = require("./parallelWalk"); /** * Calls the supplied `callback` for each key/value pair shared by both trees. + * The callback will be called in sorted key order. * Neither tree is modified. * @param treeA First tree to compare. * @param treeB Second tree to compare. - * @param callback Invoked for keys that appear in both trees. + * @param callback Invoked for keys that appear in both trees. It can cause iteration to early exit by returning `{ break: R }`. * @description Complexity is bounded by O(N + M) for time. * However, time is additionally bounded by O(log(N + M) * D) where D is the number of disjoint ranges of keys between * the two trees. In practice, that means for keys of random distribution the performance is O(N + M) and for @@ -37,7 +38,10 @@ function forEachKeyInBoth(treeA, treeB, callback) { var key = (0, parallelWalk_1.getKey)(leading); var vA = cursorA.leaf.values[cursorA.leafIndex]; var vB = cursorB.leaf.values[cursorB.leafIndex]; - callback(key, vA, vB); + var result = callback(key, vA, vB); + if (result && result.break) { + return result.break; + } var outT = (0, parallelWalk_1.moveForwardOne)(trailing, leading, key, cmp); var outL = (0, parallelWalk_1.moveForwardOne)(leading, trailing, key, cmp); if (outT && outL) diff --git a/extended/forEachKeyInBoth.ts b/extended/forEachKeyInBoth.ts index 5210094..5e95090 100644 --- a/extended/forEachKeyInBoth.ts +++ b/extended/forEachKeyInBoth.ts @@ -4,10 +4,11 @@ import { createCursor, moveForwardOne, moveTo, getKey, noop, checkCanDoSetOperat /** * Calls the supplied `callback` for each key/value pair shared by both trees. + * The callback will be called in sorted key order. * Neither tree is modified. * @param treeA First tree to compare. * @param treeB Second tree to compare. - * @param callback Invoked for keys that appear in both trees. + * @param callback Invoked for keys that appear in both trees. It can cause iteration to early exit by returning `{ break: R }`. * @description Complexity is bounded by O(N + M) for time. * However, time is additionally bounded by O(log(N + M) * D) where D is the number of disjoint ranges of keys between * the two trees. In practice, that means for keys of random distribution the performance is O(N + M) and for @@ -16,7 +17,11 @@ import { createCursor, moveForwardOne, moveTo, getKey, noop, checkCanDoSetOperat * Note that in benchmarks even the worst case (fully interleaved keys) performance is faster than calling `toArray` * on both trees and performing a walk on the sorted contents due to the reduced allocation overhead. */ -export default function forEachKeyInBoth(treeA: BTree, treeB: BTree, callback: (key: K, leftValue: V, rightValue: V) => void): void { +export default function forEachKeyInBoth( + treeA: BTree, + treeB: BTree, + callback: (key: K, leftValue: V, rightValue: V) => { break?: R } | void +): R | undefined { const _treeA = treeA as unknown as BTreeWithInternals; const _treeB = treeB as unknown as BTreeWithInternals; checkCanDoSetOperation(_treeA, _treeB); @@ -40,7 +45,10 @@ export default function forEachKeyInBoth(treeA: BTree, treeB: BTree< const key = getKey(leading); const vA = cursorA.leaf.values[cursorA.leafIndex]; const vB = cursorB.leaf.values[cursorB.leafIndex]; - callback(key, vA, vB); + const result = callback(key, vA, vB); + if (result && result.break) { + return result.break; + } const outT = moveForwardOne(trailing, leading, key, cmp); const outL = moveForwardOne(leading, trailing, key, cmp); if (outT && outL) diff --git a/extended/index.d.ts b/extended/index.d.ts index ec7a2a6..c8673d8 100644 --- a/extended/index.d.ts +++ b/extended/index.d.ts @@ -31,9 +31,10 @@ export declare class BTreeEx extends BTree { } | void): R | undefined; /** * Calls the supplied `callback` for each key/value pair shared by this tree and `other`. + * The callback will be called in sorted key order. * Neither tree is modified. * @param other The other tree to compare with this one. - * @param callback Called for keys that appear in both trees. + * @param callback Called for keys that appear in both trees. It can cause iteration to early exit by returning `{ break: R }`. * @description Complexity is bounded by O(N + M) time. * However, time is additionally bounded by O(log(N + M) * D) where D is the number of disjoint ranges of keys between * the two trees. In practice, that means for keys of random distribution the performance is O(N + M) and for @@ -42,7 +43,9 @@ export declare class BTreeEx extends BTree { * Note that in benchmarks even the worst case (fully interleaved keys) performance is faster than calling `toArray` * on both trees and performing a walk on the sorted contents due to the reduced allocation overhead. */ - forEachKeyInBoth(other: BTree, callback: (key: K, leftValue: V, rightValue: V) => void): void; + forEachKeyInBoth(other: BTree, callback: (key: K, leftValue: V, rightValue: V) => { + break?: R; + } | void): R | undefined; /** * Efficiently unions this tree with `other`, reusing subtrees wherever possible. * Neither input tree is modified. diff --git a/extended/index.js b/extended/index.js index 2efa71e..1196537 100644 --- a/extended/index.js +++ b/extended/index.js @@ -98,9 +98,10 @@ var BTreeEx = /** @class */ (function (_super) { }; /** * Calls the supplied `callback` for each key/value pair shared by this tree and `other`. + * The callback will be called in sorted key order. * Neither tree is modified. * @param other The other tree to compare with this one. - * @param callback Called for keys that appear in both trees. + * @param callback Called for keys that appear in both trees. It can cause iteration to early exit by returning `{ break: R }`. * @description Complexity is bounded by O(N + M) time. * However, time is additionally bounded by O(log(N + M) * D) where D is the number of disjoint ranges of keys between * the two trees. In practice, that means for keys of random distribution the performance is O(N + M) and for @@ -110,7 +111,7 @@ var BTreeEx = /** @class */ (function (_super) { * on both trees and performing a walk on the sorted contents due to the reduced allocation overhead. */ BTreeEx.prototype.forEachKeyInBoth = function (other, callback) { - (0, forEachKeyInBoth_1.default)(this, other, callback); + return (0, forEachKeyInBoth_1.default)(this, other, callback); }; /** * Efficiently unions this tree with `other`, reusing subtrees wherever possible. diff --git a/extended/index.ts b/extended/index.ts index 215509d..27babb3 100644 --- a/extended/index.ts +++ b/extended/index.ts @@ -69,9 +69,10 @@ export class BTreeEx extends BTree { /** * Calls the supplied `callback` for each key/value pair shared by this tree and `other`. + * The callback will be called in sorted key order. * Neither tree is modified. * @param other The other tree to compare with this one. - * @param callback Called for keys that appear in both trees. + * @param callback Called for keys that appear in both trees. It can cause iteration to early exit by returning `{ break: R }`. * @description Complexity is bounded by O(N + M) time. * However, time is additionally bounded by O(log(N + M) * D) where D is the number of disjoint ranges of keys between * the two trees. In practice, that means for keys of random distribution the performance is O(N + M) and for @@ -80,8 +81,11 @@ export class BTreeEx extends BTree { * Note that in benchmarks even the worst case (fully interleaved keys) performance is faster than calling `toArray` * on both trees and performing a walk on the sorted contents due to the reduced allocation overhead. */ - forEachKeyInBoth(other: BTree, callback: (key: K, leftValue: V, rightValue: V) => void): void { - forEachKeyInBoth(this, other, callback); + forEachKeyInBoth( + other: BTree, + callback: (key: K, leftValue: V, rightValue: V) => { break?: R } | void + ): R | undefined { + return forEachKeyInBoth(this, other, callback); } /** diff --git a/test/forEachKeyInBoth.test.ts b/test/forEachKeyInBoth.test.ts index b15e90f..8f14917 100644 --- a/test/forEachKeyInBoth.test.ts +++ b/test/forEachKeyInBoth.test.ts @@ -124,6 +124,43 @@ function testForEachKeyInBoth(maxNodeSize: number) { { key: 4, leftValue: 40, rightValue: 400 }, ]); }); + + test('forEachKeyInBoth returns undefined when callback returns void', () => { + const tree1 = buildTree(tuples([1, 10], [2, 20], [3, 30])); + const tree2 = buildTree(tuples([0, 100], [2, 200], [3, 300], [4, 400])); + const visited: number[] = []; + const result = tree1.forEachKeyInBoth(tree2, key => { + visited.push(key); + }); + expect(result).toBeUndefined(); + expect(visited).toEqual([2, 3]); + }); + + test('forEachKeyInBoth ignores undefined break values and completes traversal', () => { + const tree1 = buildTree(tuples([1, 10], [2, 20], [3, 30])); + const tree2 = buildTree(tuples([2, 200], [3, 300], [5, 500])); + const visited: number[] = []; + const result = tree1.forEachKeyInBoth(tree2, key => { + visited.push(key); + return { break: undefined }; + }); + expect(result).toBeUndefined(); + expect(visited).toEqual([2, 3]); + }); + + test('forEachKeyInBoth breaks early when callback returns a value', () => { + const tree1 = buildTree(tuples([1, 10], [2, 20], [3, 30], [4, 40])); + const tree2 = buildTree(tuples([2, 200], [3, 300], [4, 400], [5, 500])); + const visited: number[] = []; + const breakResult = tree1.forEachKeyInBoth(tree2, (key, leftValue, rightValue) => { + visited.push(key); + if (key === 3) { + return { break: { key, sum: leftValue + rightValue } }; + } + }); + expect(breakResult).toEqual({ key: 3, sum: 330 }); + expect(visited).toEqual([2, 3]); + }); } describe('BTree forEachKeyInBoth input/output validation', () => { From 35eb623c9d11cb23281fc502408c4350eb7e6bd5 Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Wed, 12 Nov 2025 20:17:58 -0800 Subject: [PATCH 088/143] wip --- extended/diffAgainst.d.ts | 21 ++++ extended/diffAgainst.js | 139 ++++++++++++++++++++++++- extended/diffAgainst.ts | 190 +++++++++++++++++++++++++++++++++- extended/forEachKeyInBoth.js | 3 +- extended/forEachKeyInBoth.ts | 6 +- extended/intersect.js | 3 +- extended/intersect.ts | 5 +- extended/parallelWalk.js | 33 +----- extended/parallelWalk.ts | 38 ++----- extended/shared.js | 25 ++++- extended/shared.ts | 26 +++++ extended/union.js | 4 +- extended/union.ts | 5 +- test/forEachKeyInBoth.test.ts | 9 +- 14 files changed, 422 insertions(+), 85 deletions(-) diff --git a/extended/diffAgainst.d.ts b/extended/diffAgainst.d.ts index eb54935..7845209 100644 --- a/extended/diffAgainst.d.ts +++ b/extended/diffAgainst.d.ts @@ -6,6 +6,7 @@ import BTree from '../b+tree'; * (obtained by calling the `clone` or `with` APIs) and will avoid any iteration of shared state. * The handlers can cause computation to early exit by returning `{ break: R }`. * Neither collection should be mutated during the comparison (inside your callbacks), as this method assumes they remain stable. + * Time complexity is O(N + M), but shared nodes are skipped entirely. * @param treeA The tree whose differences will be reported via the callbacks. * @param treeB The tree to compute a diff against. * @param onlyA Callback invoked for all keys only present in `treeA`. @@ -16,6 +17,26 @@ export default function diffAgainst(_treeA: BTree, _treeB: BTree< break?: R; } | void, onlyB?: (k: K, v: V) => { break?: R; +} | void, different?: (k: K, vA: V, vB: V) => { + break?: R; +} | void): R | undefined; +/** + * Computes the differences between `treeA` and `treeB`. + * For efficiency, the diff is returned via invocations of supplied handlers. + * The computation is optimized for the case in which the two trees have large amounts of shared data + * (obtained by calling the `clone` or `with` APIs) and will avoid any iteration of shared state. + * The handlers can cause computation to early exit by returning `{ break: R }`. + * Neither collection should be mutated during the comparison (inside your callbacks), as this method assumes they remain stable. + * @param treeA The tree whose differences will be reported via the callbacks. + * @param treeB The tree to compute a diff against. + * @param onlyA Callback invoked for all keys only present in `treeA`. + * @param onlyB Callback invoked for all keys only present in `treeB`. + * @param different Callback invoked for all keys with differing values. + */ +export declare function diffAgainst2(_treeA: BTree, _treeB: BTree, onlyA?: (k: K, v: V) => { + break?: R; +} | void, onlyB?: (k: K, v: V) => { + break?: R; } | void, different?: (k: K, vThis: V, vOther: V) => { break?: R; } | void): R | undefined; diff --git a/extended/diffAgainst.js b/extended/diffAgainst.js index db29a34..98e42c4 100644 --- a/extended/diffAgainst.js +++ b/extended/diffAgainst.js @@ -1,6 +1,9 @@ "use strict"; Object.defineProperty(exports, "__esModule", { value: true }); +exports.diffAgainst2 = void 0; var b_tree_1 = require("../b+tree"); +var parallelWalk_1 = require("./parallelWalk"); +var shared_1 = require("./shared"); /** * Computes the differences between `treeA` and `treeB`. * For efficiency, the diff is returned via invocations of supplied handlers. @@ -8,6 +11,7 @@ var b_tree_1 = require("../b+tree"); * (obtained by calling the `clone` or `with` APIs) and will avoid any iteration of shared state. * The handlers can cause computation to early exit by returning `{ break: R }`. * Neither collection should be mutated during the comparison (inside your callbacks), as this method assumes they remain stable. + * Time complexity is O(N + M), but shared nodes are skipped entirely. * @param treeA The tree whose differences will be reported via the callbacks. * @param treeB The tree to compute a diff against. * @param onlyA Callback invoked for all keys only present in `treeA`. @@ -15,6 +19,139 @@ var b_tree_1 = require("../b+tree"); * @param different Callback invoked for all keys with differing values. */ function diffAgainst(_treeA, _treeB, onlyA, onlyB, different) { + var treeA = _treeA; + var treeB = _treeB; + (0, shared_1.checkCanDoSetOperation)(treeA, treeB, true); + // During the downward walk of the cursors, this will be assigned the index of the highest node that is shared between the two trees + // along the paths of the two cursors. + var highestSharedIndex = -1; + var onExitLeaf = function () { + highestSharedIndex = -1; + }; + var maybeSetHighest = function (node, height, spineIndex, cursorOther) { + if (highestSharedIndex < 0) { + var heightOther = cursorOther.spine.length; + if (height <= heightOther) { + var depthOther = heightOther - height; + if (depthOther >= 0) { + var otherNode = cursorOther.spine[depthOther].node; + if (otherNode === node) { + highestSharedIndex = spineIndex; + } + } + } + } + }; + var onStepUp = function (parent, height, _, __, spineIndex, stepDownIndex, ___, cursorOther) { + (0, b_tree_1.check)(highestSharedIndex < 0, "Shared nodes should have been skipped"); + if (stepDownIndex > 0) { + maybeSetHighest(parent, height, spineIndex, cursorOther); + } + }; + var onStepDown = function (node, height, spineIndex, _, __, cursorOther) { + maybeSetHighest(node, height, spineIndex, cursorOther); + }; + var onEnterLeaf = function (leaf, _, cursorThis, cursorOther) { + if (highestSharedIndex < 0) { + if (cursorOther.leaf === leaf) { + highestSharedIndex = cursorThis.spine.length - 1; + } + } + }; + var cmp = treeA._compare; + // Need the max key of both trees to perform the "finishing" walk of which ever cursor finishes second + var maxKeyLeft = treeA.maxKey(); + var maxKeyRight = treeB.maxKey(); + var maxKey = cmp(maxKeyLeft, maxKeyRight) >= 0 ? maxKeyLeft : maxKeyRight; + var payloadA = { only: onlyA ? onlyA : function () { } }; + var payloadB = { only: onlyB ? onlyB : function () { } }; + var curA = (0, parallelWalk_1.createCursor)(treeA, function () { return payloadA; }, onEnterLeaf, parallelWalk_1.noop, onExitLeaf, onStepUp, onStepDown); + var curB = (0, parallelWalk_1.createCursor)(treeB, function () { return payloadB; }, onEnterLeaf, parallelWalk_1.noop, onExitLeaf, onStepUp, onStepDown); + for (var depth = 0; depth < curA.spine.length - 1; depth++) { + onStepDown(curA.spine[depth].node, curA.spine.length - depth, depth, curA.spine[depth].childIndex, curA, curB); + } + onEnterLeaf(curA.leaf, curA.leafIndex, curA, curB); + var leading = curA; + var trailing = curB; + var order = cmp((0, parallelWalk_1.getKey)(leading), (0, parallelWalk_1.getKey)(trailing)); + // Walk both cursors in alternating hops + while (true) { + var areEqual = order === 0; + if (areEqual) { + var key = (0, parallelWalk_1.getKey)(leading); + var vA = curA.leaf.values[curA.leafIndex]; + var vB = curB.leaf.values[curB.leafIndex]; + var combined = different ? different(key, vA, vB) : undefined; + if (combined && combined.break) { + return combined.break; + } + var outTrailing = (0, parallelWalk_1.moveForwardOne)(trailing, leading, key, cmp); + var outLeading = (0, parallelWalk_1.moveForwardOne)(leading, trailing, key, cmp); + if (outTrailing || outLeading) { + if (!outTrailing || !outLeading) { + // In these cases, we pass areEqual=false because a return value of "out of tree" means + // the cursor did not move. This must be true because they started equal and one of them had more tree + // to walk (one is !out), so they cannot be equal at this point. + if (outTrailing) { + finishWalk(leading, trailing); + } + else { + finishWalk(trailing, leading); + } + } + break; + } + order = cmp((0, parallelWalk_1.getKey)(leading), (0, parallelWalk_1.getKey)(trailing)); + } + else { + if (order < 0) { + var tmp = trailing; + trailing = leading; + leading = tmp; + } + var _a = (0, parallelWalk_1.moveTo)(trailing, leading, (0, parallelWalk_1.getKey)(leading), true, areEqual, cmp), out = _a[0], nowEqual = _a[1]; + if (out) { + return finishWalk(leading, trailing); + } + else if (nowEqual) { + order = 0; + } + else { + order = -1; + } + } + } +} +exports.default = diffAgainst; +function finishWalk(toFinish, done) { + var outOfTree; + do { + outOfTree = (0, parallelWalk_1.moveForwardOne)(toFinish, done, (0, parallelWalk_1.getKey)(done), toFinish.tree._compare); + if (!outOfTree) { + var key = (0, parallelWalk_1.getKey)(toFinish); + var value = toFinish.leaf.values[toFinish.leafIndex]; + var result = toFinish.leafPayload.only(key, value); + if (result && result.break) { + return result.break; + } + } + } while (!outOfTree); + return undefined; +} +/** + * Computes the differences between `treeA` and `treeB`. + * For efficiency, the diff is returned via invocations of supplied handlers. + * The computation is optimized for the case in which the two trees have large amounts of shared data + * (obtained by calling the `clone` or `with` APIs) and will avoid any iteration of shared state. + * The handlers can cause computation to early exit by returning `{ break: R }`. + * Neither collection should be mutated during the comparison (inside your callbacks), as this method assumes they remain stable. + * @param treeA The tree whose differences will be reported via the callbacks. + * @param treeB The tree to compute a diff against. + * @param onlyA Callback invoked for all keys only present in `treeA`. + * @param onlyB Callback invoked for all keys only present in `treeB`. + * @param different Callback invoked for all keys with differing values. + */ +function diffAgainst2(_treeA, _treeB, onlyA, onlyB, different) { var treeA = _treeA; var treeB = _treeB; if (treeB._compare !== treeA._compare) { @@ -125,7 +262,7 @@ function diffAgainst(_treeA, _treeB, onlyA, onlyB, different) { return finishCursorWalk(otherCursor, thisCursor, compareKeys, onlyB); return undefined; } -exports.default = diffAgainst; +exports.diffAgainst2 = diffAgainst2; /** * Finishes walking `cursor` once the other cursor has already completed its walk. */ diff --git a/extended/diffAgainst.ts b/extended/diffAgainst.ts index 59af293..35b743f 100644 --- a/extended/diffAgainst.ts +++ b/extended/diffAgainst.ts @@ -1,6 +1,7 @@ import BTree from '../b+tree'; import { BNode, BNodeInternal, check } from '../b+tree'; -import type { BTreeWithInternals } from './shared'; +import { createCursor, Cursor, getKey, moveForwardOne, moveTo, noop } from './parallelWalk'; +import { checkCanDoSetOperation, type BTreeWithInternals } from './shared'; /** * Computes the differences between `treeA` and `treeB`. @@ -9,6 +10,7 @@ import type { BTreeWithInternals } from './shared'; * (obtained by calling the `clone` or `with` APIs) and will avoid any iteration of shared state. * The handlers can cause computation to early exit by returning `{ break: R }`. * Neither collection should be mutated during the comparison (inside your callbacks), as this method assumes they remain stable. + * Time complexity is O(N + M), but shared nodes are skipped entirely. * @param treeA The tree whose differences will be reported via the callbacks. * @param treeB The tree to compute a diff against. * @param onlyA Callback invoked for all keys only present in `treeA`. @@ -16,6 +18,192 @@ import type { BTreeWithInternals } from './shared'; * @param different Callback invoked for all keys with differing values. */ export default function diffAgainst( + _treeA: BTree, + _treeB: BTree, + onlyA?: (k: K, v: V) => { break?: R } | void, + onlyB?: (k: K, v: V) => { break?: R } | void, + different?: (k: K, vA: V, vB: V) => { break?: R } | void +): R | undefined { + const treeA = _treeA as unknown as BTreeWithInternals; + const treeB = _treeB as unknown as BTreeWithInternals; + checkCanDoSetOperation(treeA, treeB, true); + + // During the downward walk of the cursors, this will be assigned the index of the highest node that is shared between the two trees + // along the paths of the two cursors. + let highestSharedIndex: number = -1; + + const onExitLeaf = () => { + highestSharedIndex = -1; + } + + const maybeSetHighest = ( + node: BNodeInternal, + height: number, + spineIndex: number, + cursorOther: Cursor> + ) => { + if (highestSharedIndex < 0) { + const heightOther = cursorOther.spine.length; + if (height <= heightOther) { + const depthOther = heightOther - height; + if (depthOther >= 0) { + const otherNode = cursorOther.spine[depthOther].node; + if (otherNode === node) { + highestSharedIndex = spineIndex; + } + } + } + } + } + + const onStepUp = ( + parent: BNodeInternal, + height: number, + _: DiffPayload, + __: number, + spineIndex: number, + stepDownIndex: number, + ___: Cursor>, + cursorOther: Cursor> + ) => { + check(highestSharedIndex < 0, "Shared nodes should have been skipped"); + if (stepDownIndex > 0) { + maybeSetHighest(parent, height, spineIndex, cursorOther); + } + }; + + const onStepDown = ( + node: BNodeInternal, + height: number, + spineIndex: number, + _: number, + __: Cursor>, + cursorOther: Cursor> + ) => { + maybeSetHighest(node, height, spineIndex, cursorOther); + }; + + const onEnterLeaf = ( + leaf: BNode, + _: number, + cursorThis: Cursor>, + cursorOther: Cursor> + ) => { + if (highestSharedIndex < 0) { + if (cursorOther.leaf === leaf) { + highestSharedIndex = cursorThis.spine.length - 1; + } + } + }; + + const cmp = treeA._compare; + // Need the max key of both trees to perform the "finishing" walk of which ever cursor finishes second + const maxKeyLeft = treeA.maxKey() as K; + const maxKeyRight = treeB.maxKey() as K; + const maxKey = cmp(maxKeyLeft, maxKeyRight) >= 0 ? maxKeyLeft : maxKeyRight; + + const payloadA = { only: onlyA ? onlyA : () => {} }; + const payloadB = { only: onlyB ? onlyB : () => {} }; + const curA = createCursor>(treeA, () => payloadA, onEnterLeaf, noop, onExitLeaf, onStepUp, onStepDown); + const curB = createCursor>(treeB, () => payloadB, onEnterLeaf, noop, onExitLeaf, onStepUp, onStepDown); + + for (let depth = 0; depth < curA.spine.length - 1; depth++) { + onStepDown( + curA.spine[depth].node, + curA.spine.length - depth, + depth, + curA.spine[depth].childIndex, + curA, + curB + ); + } + onEnterLeaf(curA.leaf, curA.leafIndex, curA, curB); + + let leading = curA; + let trailing = curB; + let order = cmp(getKey(leading), getKey(trailing)); + + // Walk both cursors in alternating hops + while (true) { + const areEqual = order === 0; + + if (areEqual) { + const key = getKey(leading); + const vA = curA.leaf.values[curA.leafIndex]; + const vB = curB.leaf.values[curB.leafIndex]; + const combined = different ? different(key, vA, vB) : undefined; + if (combined && combined.break) { + return combined.break; + } + const outTrailing = moveForwardOne(trailing, leading, key, cmp); + const outLeading = moveForwardOne(leading, trailing, key, cmp); + if (outTrailing || outLeading) { + if (!outTrailing || !outLeading) { + // In these cases, we pass areEqual=false because a return value of "out of tree" means + // the cursor did not move. This must be true because they started equal and one of them had more tree + // to walk (one is !out), so they cannot be equal at this point. + if (outTrailing) { + finishWalk(leading, trailing); + } else { + finishWalk(trailing, leading); + } + } + break; + } + order = cmp(getKey(leading), getKey(trailing)); + } else { + if (order < 0) { + const tmp = trailing; + trailing = leading; + leading = tmp; + } + const [out, nowEqual] = moveTo(trailing, leading, getKey(leading), true, areEqual, cmp); + if (out) { + return finishWalk(leading, trailing); + } else if (nowEqual) { + order = 0; + } else { + order = -1; + } + } + } +} + +function finishWalk( + toFinish: Cursor>, + done: Cursor> +): R | undefined { + let outOfTree: boolean; + do { + outOfTree = moveForwardOne(toFinish, done, getKey(done), toFinish.tree._compare); + if (!outOfTree) { + const key = getKey(toFinish); + const value = toFinish.leaf.values[toFinish.leafIndex]; + const result = toFinish.leafPayload.only(key, value); + if (result && result.break) { + return result.break; + } + } + } while (!outOfTree); + return undefined; +} + +type DiffPayload = { only: (k: K, v: V) => { break?: R } | void}; + +/** + * Computes the differences between `treeA` and `treeB`. + * For efficiency, the diff is returned via invocations of supplied handlers. + * The computation is optimized for the case in which the two trees have large amounts of shared data + * (obtained by calling the `clone` or `with` APIs) and will avoid any iteration of shared state. + * The handlers can cause computation to early exit by returning `{ break: R }`. + * Neither collection should be mutated during the comparison (inside your callbacks), as this method assumes they remain stable. + * @param treeA The tree whose differences will be reported via the callbacks. + * @param treeB The tree to compute a diff against. + * @param onlyA Callback invoked for all keys only present in `treeA`. + * @param onlyB Callback invoked for all keys only present in `treeB`. + * @param different Callback invoked for all keys with differing values. + */ +export function diffAgainst2( _treeA: BTree, _treeB: BTree, onlyA?: (k: K, v: V) => { break?: R } | void, diff --git a/extended/forEachKeyInBoth.js b/extended/forEachKeyInBoth.js index e4f6a67..a24aba6 100644 --- a/extended/forEachKeyInBoth.js +++ b/extended/forEachKeyInBoth.js @@ -1,5 +1,6 @@ "use strict"; Object.defineProperty(exports, "__esModule", { value: true }); +var shared_1 = require("./shared"); var parallelWalk_1 = require("./parallelWalk"); /** * Calls the supplied `callback` for each key/value pair shared by both trees. @@ -19,7 +20,7 @@ var parallelWalk_1 = require("./parallelWalk"); function forEachKeyInBoth(treeA, treeB, callback) { var _treeA = treeA; var _treeB = treeB; - (0, parallelWalk_1.checkCanDoSetOperation)(_treeA, _treeB); + (0, shared_1.checkCanDoSetOperation)(_treeA, _treeB, true); if (treeB.size === 0 || treeA.size === 0) return; var cmp = treeA._compare; diff --git a/extended/forEachKeyInBoth.ts b/extended/forEachKeyInBoth.ts index 5e95090..fbf55f4 100644 --- a/extended/forEachKeyInBoth.ts +++ b/extended/forEachKeyInBoth.ts @@ -1,6 +1,6 @@ import BTree from '../b+tree'; -import type { BTreeWithInternals } from './shared'; -import { createCursor, moveForwardOne, moveTo, getKey, noop, checkCanDoSetOperation } from "./parallelWalk" +import { type BTreeWithInternals, checkCanDoSetOperation } from './shared'; +import { createCursor, moveForwardOne, moveTo, getKey, noop } from "./parallelWalk" /** * Calls the supplied `callback` for each key/value pair shared by both trees. @@ -24,7 +24,7 @@ export default function forEachKeyInBoth( ): R | undefined { const _treeA = treeA as unknown as BTreeWithInternals; const _treeB = treeB as unknown as BTreeWithInternals; - checkCanDoSetOperation(_treeA, _treeB); + checkCanDoSetOperation(_treeA, _treeB, true); if (treeB.size === 0 || treeA.size === 0) return; diff --git a/extended/intersect.js b/extended/intersect.js index 3429aaf..10d4627 100644 --- a/extended/intersect.js +++ b/extended/intersect.js @@ -4,7 +4,6 @@ var __importDefault = (this && this.__importDefault) || function (mod) { }; Object.defineProperty(exports, "__esModule", { value: true }); var shared_1 = require("./shared"); -var parallelWalk_1 = require("./parallelWalk"); var decompose_1 = require("./decompose"); var forEachKeyInBoth_1 = __importDefault(require("./forEachKeyInBoth")); /** @@ -25,7 +24,7 @@ var forEachKeyInBoth_1 = __importDefault(require("./forEachKeyInBoth")); function intersect(treeA, treeB, combineFn) { var _treeA = treeA; var _treeB = treeB; - var branchingFactor = (0, parallelWalk_1.checkCanDoSetOperation)(_treeA, _treeB); + var branchingFactor = (0, shared_1.checkCanDoSetOperation)(_treeA, _treeB, true); if (_treeA._root.size() === 0) return treeB.clone(); if (_treeB._root.size() === 0) diff --git a/extended/intersect.ts b/extended/intersect.ts index cf11ee2..083a1ac 100644 --- a/extended/intersect.ts +++ b/extended/intersect.ts @@ -1,6 +1,5 @@ import BTree from '../b+tree'; -import { alternatingPush, createAlternatingList, type BTreeWithInternals } from './shared'; -import { checkCanDoSetOperation } from "./parallelWalk" +import { alternatingPush, createAlternatingList, checkCanDoSetOperation, type BTreeWithInternals } from './shared'; import { buildFromDecomposition, decompose } from './decompose'; import forEachKeyInBoth from './forEachKeyInBoth'; @@ -26,7 +25,7 @@ export default function intersect, K, V>( ): TBTree { const _treeA = treeA as unknown as BTreeWithInternals; const _treeB = treeB as unknown as BTreeWithInternals; - const branchingFactor = checkCanDoSetOperation(_treeA, _treeB); + const branchingFactor = checkCanDoSetOperation(_treeA, _treeB, true); if (_treeA._root.size() === 0) return treeB.clone(); if (_treeB._root.size() === 0) diff --git a/extended/parallelWalk.js b/extended/parallelWalk.js index 3d20a48..22ff026 100644 --- a/extended/parallelWalk.js +++ b/extended/parallelWalk.js @@ -1,6 +1,6 @@ "use strict"; Object.defineProperty(exports, "__esModule", { value: true }); -exports.checkCanDoSetOperation = exports.branchingFactorErrorMsg = exports.comparatorErrorMsg = exports.noop = exports.moveTo = exports.getKey = exports.createCursor = exports.moveForwardOne = void 0; +exports.noop = exports.moveTo = exports.getKey = exports.createCursor = exports.moveForwardOne = void 0; /** * Walks the cursor forward by one key. * Should only be called to advance cursors that started equal. @@ -124,17 +124,17 @@ function moveTo(cur, other, targetKey, isInclusive, startedEqual, cmp) { for (var depth = initialSpineLength - 1; depth >= 0; depth--) { var entry_1 = spine[depth]; var sd = depth === 0 ? Number.POSITIVE_INFINITY : Number.NaN; - onStepUp(entry_1.node, initialSpineLength - depth, entry_1.payload, entry_1.childIndex, depth, sd, cur); + onStepUp(entry_1.node, initialSpineLength - depth, entry_1.payload, entry_1.childIndex, depth, sd, cur, other); } return [true, false]; } // Step up through ancestors above the descentLevel for (var depth = initialSpineLength - 1; depth > descentLevel; depth--) { var entry_2 = spine[depth]; - onStepUp(entry_2.node, initialSpineLength - depth, entry_2.payload, entry_2.childIndex, depth, Number.NaN, cur); + onStepUp(entry_2.node, initialSpineLength - depth, entry_2.payload, entry_2.childIndex, depth, Number.NaN, cur, other); } var entry = spine[descentLevel]; - onStepUp(entry.node, initialSpineLength - descentLevel, entry.payload, entry.childIndex, descentLevel, descentIndex, cur); + onStepUp(entry.node, initialSpineLength - descentLevel, entry.payload, entry.childIndex, descentLevel, descentIndex, cur, other); entry.childIndex = descentIndex; var onStepDown = cur.onStepDown; var makePayload = cur.makePayload; @@ -151,7 +151,7 @@ function moveTo(cur, other, targetKey, isInclusive, startedEqual, cmp) { var payload = makePayload(); var spineIndex = spine.length; spine.push({ node: ni, childIndex: stepDownIndex, payload: payload }); - onStepDown(ni, height, spineIndex, stepDownIndex, cur); + onStepDown(ni, height, spineIndex, stepDownIndex, cur, other); node = ni.children[stepDownIndex]; height -= 1; } @@ -185,26 +185,3 @@ exports.moveTo = moveTo; */ function noop() { } exports.noop = noop; -/** - * Error message used when comparators differ between trees. - * @internal - */ -exports.comparatorErrorMsg = "Cannot perform set operations on BTrees with different comparators."; -/** - * Error message used when branching factors differ between trees. - * @internal - */ -exports.branchingFactorErrorMsg = "Cannot perform set operations on BTrees with different max node sizes."; -/** - * Checks that two trees can be used together in a set operation. - * @internal - */ -function checkCanDoSetOperation(treeA, treeB) { - if (treeA._compare !== treeB._compare) - throw new Error(exports.comparatorErrorMsg); - var branchingFactor = treeA._maxNodeSize; - if (branchingFactor !== treeB._maxNodeSize) - throw new Error(exports.branchingFactorErrorMsg); - return branchingFactor; -} -exports.checkCanDoSetOperation = checkCanDoSetOperation; diff --git a/extended/parallelWalk.ts b/extended/parallelWalk.ts index 748ec3f..3b11a85 100644 --- a/extended/parallelWalk.ts +++ b/extended/parallelWalk.ts @@ -14,8 +14,8 @@ export interface Cursor { makePayload: () => TPayload; onMoveInLeaf: (leaf: BNode, payload: TPayload, fromIndex: number, toIndex: number, isInclusive: boolean) => void; onExitLeaf: (leaf: BNode, payload: TPayload, startingIndex: number, isInclusive: boolean, cursorThis: Cursor) => void; - onStepUp: (parent: BNodeInternal, height: number, payload: TPayload, fromIndex: number, spineIndex: number, stepDownIndex: number, cursorThis: Cursor) => void; - onStepDown: (node: BNodeInternal, height: number, spineIndex: number, stepDownIndex: number, cursorThis: Cursor) => void; + onStepUp: (parent: BNodeInternal, height: number, payload: TPayload, fromIndex: number, spineIndex: number, stepDownIndex: number, cursorThis: Cursor, cursorOther: Cursor) => void; + onStepDown: (node: BNodeInternal, height: number, spineIndex: number, stepDownIndex: number, cursorThis: Cursor, cursorOther: Cursor) => void; onEnterLeaf: (leaf: BNode, destIndex: number, cursorThis: Cursor, cursorOther: Cursor) => void; } @@ -157,7 +157,7 @@ export function moveTo( for (let depth = initialSpineLength - 1; depth >= 0; depth--) { const entry = spine[depth]; const sd = depth === 0 ? Number.POSITIVE_INFINITY : Number.NaN; - onStepUp(entry.node, initialSpineLength - depth, entry.payload, entry.childIndex, depth, sd, cur); + onStepUp(entry.node, initialSpineLength - depth, entry.payload, entry.childIndex, depth, sd, cur, other); } return [true, false]; } @@ -165,11 +165,11 @@ export function moveTo( // Step up through ancestors above the descentLevel for (let depth = initialSpineLength - 1; depth > descentLevel; depth--) { const entry = spine[depth]; - onStepUp(entry.node, initialSpineLength - depth, entry.payload, entry.childIndex, depth, Number.NaN, cur); + onStepUp(entry.node, initialSpineLength - depth, entry.payload, entry.childIndex, depth, Number.NaN, cur, other); } const entry = spine[descentLevel]; - onStepUp(entry.node, initialSpineLength - descentLevel, entry.payload, entry.childIndex, descentLevel, descentIndex, cur); + onStepUp(entry.node, initialSpineLength - descentLevel, entry.payload, entry.childIndex, descentLevel, descentIndex, cur, other); entry.childIndex = descentIndex; const onStepDown = cur.onStepDown; @@ -189,7 +189,7 @@ export function moveTo( const payload = makePayload(); const spineIndex = spine.length; spine.push({ node: ni, childIndex: stepDownIndex, payload }); - onStepDown(ni, height, spineIndex, stepDownIndex, cur); + onStepDown(ni, height, spineIndex, stepDownIndex, cur, other); node = ni.children[stepDownIndex]; height -= 1; } @@ -221,29 +221,3 @@ export function moveTo( * @internal */ export function noop(): void { } - -/** - * Error message used when comparators differ between trees. - * @internal - */ -export const comparatorErrorMsg = "Cannot perform set operations on BTrees with different comparators."; - -/** - * Error message used when branching factors differ between trees. - * @internal - */ -export const branchingFactorErrorMsg = "Cannot perform set operations on BTrees with different max node sizes."; - -/** - * Checks that two trees can be used together in a set operation. - * @internal - */ -export function checkCanDoSetOperation(treeA: BTreeWithInternals, treeB: BTreeWithInternals): number { - if (treeA._compare !== treeB._compare) - throw new Error(comparatorErrorMsg); - - const branchingFactor = treeA._maxNodeSize; - if (branchingFactor !== treeB._maxNodeSize) - throw new Error(branchingFactorErrorMsg); - return branchingFactor; -} diff --git a/extended/shared.js b/extended/shared.js index 92b45b3..8e6749c 100644 --- a/extended/shared.js +++ b/extended/shared.js @@ -1,6 +1,6 @@ "use strict"; Object.defineProperty(exports, "__esModule", { value: true }); -exports.alternatingPush = exports.alternatingGetSecond = exports.alternatingGetFirst = exports.alternatingCount = exports.createAlternatingList = exports.flushToLeaves = void 0; +exports.checkCanDoSetOperation = exports.branchingFactorErrorMsg = exports.comparatorErrorMsg = exports.alternatingPush = exports.alternatingGetSecond = exports.alternatingGetFirst = exports.alternatingCount = exports.createAlternatingList = exports.flushToLeaves = void 0; var b_tree_1 = require("../b+tree"); /** * Flushes entries from an alternating list into leaf nodes. @@ -82,3 +82,26 @@ function alternatingPush(list, first, second) { list.push(first, second); } exports.alternatingPush = alternatingPush; +/** + * Error message used when comparators differ between trees. + * @internal + */ +exports.comparatorErrorMsg = "Cannot perform set operations on BTrees with different comparators."; +/** + * Error message used when branching factors differ between trees. + * @internal + */ +exports.branchingFactorErrorMsg = "Cannot perform set operations on BTrees with different max node sizes."; +/** + * Checks that two trees can be used together in a set operation. + * @internal + */ +function checkCanDoSetOperation(treeA, treeB, supportsDifferentBranchingFactors) { + if (treeA._compare !== treeB._compare) + throw new Error(exports.comparatorErrorMsg); + var branchingFactor = treeA._maxNodeSize; + if (!supportsDifferentBranchingFactors && branchingFactor !== treeB._maxNodeSize) + throw new Error(exports.branchingFactorErrorMsg); + return branchingFactor; +} +exports.checkCanDoSetOperation = checkCanDoSetOperation; diff --git a/extended/shared.ts b/extended/shared.ts index 15295f5..3317b08 100644 --- a/extended/shared.ts +++ b/extended/shared.ts @@ -102,3 +102,29 @@ export function alternatingPush(list: AlternatingList, first: A, sec // Micro benchmarks show this is the fastest way to do this list.push(first, second); } + +/** + * Error message used when comparators differ between trees. + * @internal + */ +export const comparatorErrorMsg = "Cannot perform set operations on BTrees with different comparators."; + +/** + * Error message used when branching factors differ between trees. + * @internal + */ +export const branchingFactorErrorMsg = "Cannot perform set operations on BTrees with different max node sizes."; + +/** + * Checks that two trees can be used together in a set operation. + * @internal + */ +export function checkCanDoSetOperation(treeA: BTreeWithInternals, treeB: BTreeWithInternals, supportsDifferentBranchingFactors: boolean): number { + if (treeA._compare !== treeB._compare) + throw new Error(comparatorErrorMsg); + + const branchingFactor = treeA._maxNodeSize; + if (!supportsDifferentBranchingFactors && branchingFactor !== treeB._maxNodeSize) + throw new Error(branchingFactorErrorMsg); + return branchingFactor; +} \ No newline at end of file diff --git a/extended/union.js b/extended/union.js index 84db6b2..aeb9ced 100644 --- a/extended/union.js +++ b/extended/union.js @@ -1,7 +1,7 @@ "use strict"; Object.defineProperty(exports, "__esModule", { value: true }); +var shared_1 = require("./shared"); var decompose_1 = require("./decompose"); -var parallelWalk_1 = require("./parallelWalk"); /** * Efficiently unions two trees, reusing subtrees wherever possible. * Neither input tree is modified. @@ -21,7 +21,7 @@ var parallelWalk_1 = require("./parallelWalk"); function union(treeA, treeB, combineFn) { var _treeA = treeA; var _treeB = treeB; - var branchingFactor = (0, parallelWalk_1.checkCanDoSetOperation)(_treeA, _treeB); + var branchingFactor = (0, shared_1.checkCanDoSetOperation)(_treeA, _treeB, false); if (_treeA._root.size() === 0) return treeB.clone(); if (_treeB._root.size() === 0) diff --git a/extended/union.ts b/extended/union.ts index 1e49fd0..b6411bf 100644 --- a/extended/union.ts +++ b/extended/union.ts @@ -1,7 +1,6 @@ import BTree from '../b+tree'; -import type { BTreeWithInternals } from './shared'; +import { type BTreeWithInternals, checkCanDoSetOperation } from './shared'; import { decompose, buildFromDecomposition } from "./decompose"; -import { checkCanDoSetOperation } from "./parallelWalk"; /** * Efficiently unions two trees, reusing subtrees wherever possible. @@ -26,7 +25,7 @@ export default function union, K, V>( ): TBTree { const _treeA = treeA as unknown as BTreeWithInternals; const _treeB = treeB as unknown as BTreeWithInternals; - const branchingFactor = checkCanDoSetOperation(_treeA, _treeB); + const branchingFactor = checkCanDoSetOperation(_treeA, _treeB, false); if (_treeA._root.size() === 0) return treeB.clone(); if (_treeB._root.size() === 0) diff --git a/test/forEachKeyInBoth.test.ts b/test/forEachKeyInBoth.test.ts index 8f14917..19064e1 100644 --- a/test/forEachKeyInBoth.test.ts +++ b/test/forEachKeyInBoth.test.ts @@ -1,5 +1,5 @@ import BTreeEx from '../extended'; -import { branchingFactorErrorMsg, comparatorErrorMsg } from '../extended/parallelWalk'; +import { branchingFactorErrorMsg, comparatorErrorMsg } from '../extended/shared'; import MersenneTwister from 'mersenne-twister'; import { makeArray } from './shared'; @@ -169,13 +169,6 @@ describe('BTree forEachKeyInBoth input/output validation', () => { const tree2 = new BTreeEx([[2, 20]], (a, b) => b - a); expect(() => tree1.forEachKeyInBoth(tree2, () => {})).toThrow(comparatorErrorMsg); }); - - test('forEachKeyInBoth throws error when max node sizes differ', () => { - const compare = (a: number, b: number) => b - a; - const tree1 = new BTreeEx([[1, 10]], compare, 32); - const tree2 = new BTreeEx([[2, 20]], compare, 33); - expect(() => tree1.forEachKeyInBoth(tree2, () => {})).toThrow(branchingFactorErrorMsg); - }); }); describe('BTree forEachKeyInBoth fuzz tests', () => { From 992675e7e7918e4e3216e289fa38c4916302d3aa Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Thu, 13 Nov 2025 07:33:42 -0800 Subject: [PATCH 089/143] debuggability, shuffling imports --- .vscode/launch.json | 6 +- extended/diffAgainst.d.ts | 21 ----- extended/diffAgainst.js | 139 +--------------------------- extended/diffAgainst.ts | 190 +------------------------------------- package.json | 2 +- test/union.test.ts | 2 +- 6 files changed, 7 insertions(+), 353 deletions(-) diff --git a/.vscode/launch.json b/.vscode/launch.json index c39e819..9b0df17 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -9,7 +9,7 @@ "request": "launch", "name": "Jest Tests", "program": "${workspaceRoot}/node_modules/jest/bin/jest.js", - "args": [ "--runInBand", "--ci", "--bail" ], + "args": [ "--runInBand", "--ci" ], //"preLaunchTask": "build", "internalConsoleOptions": "openOnSessionStart", "outFiles": [ @@ -24,7 +24,7 @@ "name": "Debug Jest Tests", "runtimeArgs": [ "--nolazy", "--inspect-brk", "${workspaceRoot}/node_modules/jest/bin/jest.js", - "--runInBand", "--coverage", "false", "--ci", "--bail" ], + "--runInBand", "--coverage", "false", "--ci" ], "console": "integratedTerminal", //"internalConsoleOptions": "openOnSessionStart", "port": 9229, @@ -41,4 +41,4 @@ "port": 9229, } ] -} \ No newline at end of file +} diff --git a/extended/diffAgainst.d.ts b/extended/diffAgainst.d.ts index 7845209..eb54935 100644 --- a/extended/diffAgainst.d.ts +++ b/extended/diffAgainst.d.ts @@ -6,7 +6,6 @@ import BTree from '../b+tree'; * (obtained by calling the `clone` or `with` APIs) and will avoid any iteration of shared state. * The handlers can cause computation to early exit by returning `{ break: R }`. * Neither collection should be mutated during the comparison (inside your callbacks), as this method assumes they remain stable. - * Time complexity is O(N + M), but shared nodes are skipped entirely. * @param treeA The tree whose differences will be reported via the callbacks. * @param treeB The tree to compute a diff against. * @param onlyA Callback invoked for all keys only present in `treeA`. @@ -17,26 +16,6 @@ export default function diffAgainst(_treeA: BTree, _treeB: BTree< break?: R; } | void, onlyB?: (k: K, v: V) => { break?: R; -} | void, different?: (k: K, vA: V, vB: V) => { - break?: R; -} | void): R | undefined; -/** - * Computes the differences between `treeA` and `treeB`. - * For efficiency, the diff is returned via invocations of supplied handlers. - * The computation is optimized for the case in which the two trees have large amounts of shared data - * (obtained by calling the `clone` or `with` APIs) and will avoid any iteration of shared state. - * The handlers can cause computation to early exit by returning `{ break: R }`. - * Neither collection should be mutated during the comparison (inside your callbacks), as this method assumes they remain stable. - * @param treeA The tree whose differences will be reported via the callbacks. - * @param treeB The tree to compute a diff against. - * @param onlyA Callback invoked for all keys only present in `treeA`. - * @param onlyB Callback invoked for all keys only present in `treeB`. - * @param different Callback invoked for all keys with differing values. - */ -export declare function diffAgainst2(_treeA: BTree, _treeB: BTree, onlyA?: (k: K, v: V) => { - break?: R; -} | void, onlyB?: (k: K, v: V) => { - break?: R; } | void, different?: (k: K, vThis: V, vOther: V) => { break?: R; } | void): R | undefined; diff --git a/extended/diffAgainst.js b/extended/diffAgainst.js index 98e42c4..db29a34 100644 --- a/extended/diffAgainst.js +++ b/extended/diffAgainst.js @@ -1,9 +1,6 @@ "use strict"; Object.defineProperty(exports, "__esModule", { value: true }); -exports.diffAgainst2 = void 0; var b_tree_1 = require("../b+tree"); -var parallelWalk_1 = require("./parallelWalk"); -var shared_1 = require("./shared"); /** * Computes the differences between `treeA` and `treeB`. * For efficiency, the diff is returned via invocations of supplied handlers. @@ -11,7 +8,6 @@ var shared_1 = require("./shared"); * (obtained by calling the `clone` or `with` APIs) and will avoid any iteration of shared state. * The handlers can cause computation to early exit by returning `{ break: R }`. * Neither collection should be mutated during the comparison (inside your callbacks), as this method assumes they remain stable. - * Time complexity is O(N + M), but shared nodes are skipped entirely. * @param treeA The tree whose differences will be reported via the callbacks. * @param treeB The tree to compute a diff against. * @param onlyA Callback invoked for all keys only present in `treeA`. @@ -19,139 +15,6 @@ var shared_1 = require("./shared"); * @param different Callback invoked for all keys with differing values. */ function diffAgainst(_treeA, _treeB, onlyA, onlyB, different) { - var treeA = _treeA; - var treeB = _treeB; - (0, shared_1.checkCanDoSetOperation)(treeA, treeB, true); - // During the downward walk of the cursors, this will be assigned the index of the highest node that is shared between the two trees - // along the paths of the two cursors. - var highestSharedIndex = -1; - var onExitLeaf = function () { - highestSharedIndex = -1; - }; - var maybeSetHighest = function (node, height, spineIndex, cursorOther) { - if (highestSharedIndex < 0) { - var heightOther = cursorOther.spine.length; - if (height <= heightOther) { - var depthOther = heightOther - height; - if (depthOther >= 0) { - var otherNode = cursorOther.spine[depthOther].node; - if (otherNode === node) { - highestSharedIndex = spineIndex; - } - } - } - } - }; - var onStepUp = function (parent, height, _, __, spineIndex, stepDownIndex, ___, cursorOther) { - (0, b_tree_1.check)(highestSharedIndex < 0, "Shared nodes should have been skipped"); - if (stepDownIndex > 0) { - maybeSetHighest(parent, height, spineIndex, cursorOther); - } - }; - var onStepDown = function (node, height, spineIndex, _, __, cursorOther) { - maybeSetHighest(node, height, spineIndex, cursorOther); - }; - var onEnterLeaf = function (leaf, _, cursorThis, cursorOther) { - if (highestSharedIndex < 0) { - if (cursorOther.leaf === leaf) { - highestSharedIndex = cursorThis.spine.length - 1; - } - } - }; - var cmp = treeA._compare; - // Need the max key of both trees to perform the "finishing" walk of which ever cursor finishes second - var maxKeyLeft = treeA.maxKey(); - var maxKeyRight = treeB.maxKey(); - var maxKey = cmp(maxKeyLeft, maxKeyRight) >= 0 ? maxKeyLeft : maxKeyRight; - var payloadA = { only: onlyA ? onlyA : function () { } }; - var payloadB = { only: onlyB ? onlyB : function () { } }; - var curA = (0, parallelWalk_1.createCursor)(treeA, function () { return payloadA; }, onEnterLeaf, parallelWalk_1.noop, onExitLeaf, onStepUp, onStepDown); - var curB = (0, parallelWalk_1.createCursor)(treeB, function () { return payloadB; }, onEnterLeaf, parallelWalk_1.noop, onExitLeaf, onStepUp, onStepDown); - for (var depth = 0; depth < curA.spine.length - 1; depth++) { - onStepDown(curA.spine[depth].node, curA.spine.length - depth, depth, curA.spine[depth].childIndex, curA, curB); - } - onEnterLeaf(curA.leaf, curA.leafIndex, curA, curB); - var leading = curA; - var trailing = curB; - var order = cmp((0, parallelWalk_1.getKey)(leading), (0, parallelWalk_1.getKey)(trailing)); - // Walk both cursors in alternating hops - while (true) { - var areEqual = order === 0; - if (areEqual) { - var key = (0, parallelWalk_1.getKey)(leading); - var vA = curA.leaf.values[curA.leafIndex]; - var vB = curB.leaf.values[curB.leafIndex]; - var combined = different ? different(key, vA, vB) : undefined; - if (combined && combined.break) { - return combined.break; - } - var outTrailing = (0, parallelWalk_1.moveForwardOne)(trailing, leading, key, cmp); - var outLeading = (0, parallelWalk_1.moveForwardOne)(leading, trailing, key, cmp); - if (outTrailing || outLeading) { - if (!outTrailing || !outLeading) { - // In these cases, we pass areEqual=false because a return value of "out of tree" means - // the cursor did not move. This must be true because they started equal and one of them had more tree - // to walk (one is !out), so they cannot be equal at this point. - if (outTrailing) { - finishWalk(leading, trailing); - } - else { - finishWalk(trailing, leading); - } - } - break; - } - order = cmp((0, parallelWalk_1.getKey)(leading), (0, parallelWalk_1.getKey)(trailing)); - } - else { - if (order < 0) { - var tmp = trailing; - trailing = leading; - leading = tmp; - } - var _a = (0, parallelWalk_1.moveTo)(trailing, leading, (0, parallelWalk_1.getKey)(leading), true, areEqual, cmp), out = _a[0], nowEqual = _a[1]; - if (out) { - return finishWalk(leading, trailing); - } - else if (nowEqual) { - order = 0; - } - else { - order = -1; - } - } - } -} -exports.default = diffAgainst; -function finishWalk(toFinish, done) { - var outOfTree; - do { - outOfTree = (0, parallelWalk_1.moveForwardOne)(toFinish, done, (0, parallelWalk_1.getKey)(done), toFinish.tree._compare); - if (!outOfTree) { - var key = (0, parallelWalk_1.getKey)(toFinish); - var value = toFinish.leaf.values[toFinish.leafIndex]; - var result = toFinish.leafPayload.only(key, value); - if (result && result.break) { - return result.break; - } - } - } while (!outOfTree); - return undefined; -} -/** - * Computes the differences between `treeA` and `treeB`. - * For efficiency, the diff is returned via invocations of supplied handlers. - * The computation is optimized for the case in which the two trees have large amounts of shared data - * (obtained by calling the `clone` or `with` APIs) and will avoid any iteration of shared state. - * The handlers can cause computation to early exit by returning `{ break: R }`. - * Neither collection should be mutated during the comparison (inside your callbacks), as this method assumes they remain stable. - * @param treeA The tree whose differences will be reported via the callbacks. - * @param treeB The tree to compute a diff against. - * @param onlyA Callback invoked for all keys only present in `treeA`. - * @param onlyB Callback invoked for all keys only present in `treeB`. - * @param different Callback invoked for all keys with differing values. - */ -function diffAgainst2(_treeA, _treeB, onlyA, onlyB, different) { var treeA = _treeA; var treeB = _treeB; if (treeB._compare !== treeA._compare) { @@ -262,7 +125,7 @@ function diffAgainst2(_treeA, _treeB, onlyA, onlyB, different) { return finishCursorWalk(otherCursor, thisCursor, compareKeys, onlyB); return undefined; } -exports.diffAgainst2 = diffAgainst2; +exports.default = diffAgainst; /** * Finishes walking `cursor` once the other cursor has already completed its walk. */ diff --git a/extended/diffAgainst.ts b/extended/diffAgainst.ts index 35b743f..2d5be24 100644 --- a/extended/diffAgainst.ts +++ b/extended/diffAgainst.ts @@ -1,7 +1,6 @@ import BTree from '../b+tree'; import { BNode, BNodeInternal, check } from '../b+tree'; -import { createCursor, Cursor, getKey, moveForwardOne, moveTo, noop } from './parallelWalk'; -import { checkCanDoSetOperation, type BTreeWithInternals } from './shared'; +import { type BTreeWithInternals } from './shared'; /** * Computes the differences between `treeA` and `treeB`. @@ -10,7 +9,6 @@ import { checkCanDoSetOperation, type BTreeWithInternals } from './shared'; * (obtained by calling the `clone` or `with` APIs) and will avoid any iteration of shared state. * The handlers can cause computation to early exit by returning `{ break: R }`. * Neither collection should be mutated during the comparison (inside your callbacks), as this method assumes they remain stable. - * Time complexity is O(N + M), but shared nodes are skipped entirely. * @param treeA The tree whose differences will be reported via the callbacks. * @param treeB The tree to compute a diff against. * @param onlyA Callback invoked for all keys only present in `treeA`. @@ -18,192 +16,6 @@ import { checkCanDoSetOperation, type BTreeWithInternals } from './shared'; * @param different Callback invoked for all keys with differing values. */ export default function diffAgainst( - _treeA: BTree, - _treeB: BTree, - onlyA?: (k: K, v: V) => { break?: R } | void, - onlyB?: (k: K, v: V) => { break?: R } | void, - different?: (k: K, vA: V, vB: V) => { break?: R } | void -): R | undefined { - const treeA = _treeA as unknown as BTreeWithInternals; - const treeB = _treeB as unknown as BTreeWithInternals; - checkCanDoSetOperation(treeA, treeB, true); - - // During the downward walk of the cursors, this will be assigned the index of the highest node that is shared between the two trees - // along the paths of the two cursors. - let highestSharedIndex: number = -1; - - const onExitLeaf = () => { - highestSharedIndex = -1; - } - - const maybeSetHighest = ( - node: BNodeInternal, - height: number, - spineIndex: number, - cursorOther: Cursor> - ) => { - if (highestSharedIndex < 0) { - const heightOther = cursorOther.spine.length; - if (height <= heightOther) { - const depthOther = heightOther - height; - if (depthOther >= 0) { - const otherNode = cursorOther.spine[depthOther].node; - if (otherNode === node) { - highestSharedIndex = spineIndex; - } - } - } - } - } - - const onStepUp = ( - parent: BNodeInternal, - height: number, - _: DiffPayload, - __: number, - spineIndex: number, - stepDownIndex: number, - ___: Cursor>, - cursorOther: Cursor> - ) => { - check(highestSharedIndex < 0, "Shared nodes should have been skipped"); - if (stepDownIndex > 0) { - maybeSetHighest(parent, height, spineIndex, cursorOther); - } - }; - - const onStepDown = ( - node: BNodeInternal, - height: number, - spineIndex: number, - _: number, - __: Cursor>, - cursorOther: Cursor> - ) => { - maybeSetHighest(node, height, spineIndex, cursorOther); - }; - - const onEnterLeaf = ( - leaf: BNode, - _: number, - cursorThis: Cursor>, - cursorOther: Cursor> - ) => { - if (highestSharedIndex < 0) { - if (cursorOther.leaf === leaf) { - highestSharedIndex = cursorThis.spine.length - 1; - } - } - }; - - const cmp = treeA._compare; - // Need the max key of both trees to perform the "finishing" walk of which ever cursor finishes second - const maxKeyLeft = treeA.maxKey() as K; - const maxKeyRight = treeB.maxKey() as K; - const maxKey = cmp(maxKeyLeft, maxKeyRight) >= 0 ? maxKeyLeft : maxKeyRight; - - const payloadA = { only: onlyA ? onlyA : () => {} }; - const payloadB = { only: onlyB ? onlyB : () => {} }; - const curA = createCursor>(treeA, () => payloadA, onEnterLeaf, noop, onExitLeaf, onStepUp, onStepDown); - const curB = createCursor>(treeB, () => payloadB, onEnterLeaf, noop, onExitLeaf, onStepUp, onStepDown); - - for (let depth = 0; depth < curA.spine.length - 1; depth++) { - onStepDown( - curA.spine[depth].node, - curA.spine.length - depth, - depth, - curA.spine[depth].childIndex, - curA, - curB - ); - } - onEnterLeaf(curA.leaf, curA.leafIndex, curA, curB); - - let leading = curA; - let trailing = curB; - let order = cmp(getKey(leading), getKey(trailing)); - - // Walk both cursors in alternating hops - while (true) { - const areEqual = order === 0; - - if (areEqual) { - const key = getKey(leading); - const vA = curA.leaf.values[curA.leafIndex]; - const vB = curB.leaf.values[curB.leafIndex]; - const combined = different ? different(key, vA, vB) : undefined; - if (combined && combined.break) { - return combined.break; - } - const outTrailing = moveForwardOne(trailing, leading, key, cmp); - const outLeading = moveForwardOne(leading, trailing, key, cmp); - if (outTrailing || outLeading) { - if (!outTrailing || !outLeading) { - // In these cases, we pass areEqual=false because a return value of "out of tree" means - // the cursor did not move. This must be true because they started equal and one of them had more tree - // to walk (one is !out), so they cannot be equal at this point. - if (outTrailing) { - finishWalk(leading, trailing); - } else { - finishWalk(trailing, leading); - } - } - break; - } - order = cmp(getKey(leading), getKey(trailing)); - } else { - if (order < 0) { - const tmp = trailing; - trailing = leading; - leading = tmp; - } - const [out, nowEqual] = moveTo(trailing, leading, getKey(leading), true, areEqual, cmp); - if (out) { - return finishWalk(leading, trailing); - } else if (nowEqual) { - order = 0; - } else { - order = -1; - } - } - } -} - -function finishWalk( - toFinish: Cursor>, - done: Cursor> -): R | undefined { - let outOfTree: boolean; - do { - outOfTree = moveForwardOne(toFinish, done, getKey(done), toFinish.tree._compare); - if (!outOfTree) { - const key = getKey(toFinish); - const value = toFinish.leaf.values[toFinish.leafIndex]; - const result = toFinish.leafPayload.only(key, value); - if (result && result.break) { - return result.break; - } - } - } while (!outOfTree); - return undefined; -} - -type DiffPayload = { only: (k: K, v: V) => { break?: R } | void}; - -/** - * Computes the differences between `treeA` and `treeB`. - * For efficiency, the diff is returned via invocations of supplied handlers. - * The computation is optimized for the case in which the two trees have large amounts of shared data - * (obtained by calling the `clone` or `with` APIs) and will avoid any iteration of shared state. - * The handlers can cause computation to early exit by returning `{ break: R }`. - * Neither collection should be mutated during the comparison (inside your callbacks), as this method assumes they remain stable. - * @param treeA The tree whose differences will be reported via the callbacks. - * @param treeB The tree to compute a diff against. - * @param onlyA Callback invoked for all keys only present in `treeA`. - * @param onlyB Callback invoked for all keys only present in `treeB`. - * @param different Callback invoked for all keys with differing values. - */ -export function diffAgainst2( _treeA: BTree, _treeB: BTree, onlyA?: (k: K, v: V) => { break?: R } | void, diff --git a/package.json b/package.json index 3a70b55..a375e43 100644 --- a/package.json +++ b/package.json @@ -84,7 +84,7 @@ "json" ], "verbose": true, - "bail": true, + "bail": false, "testEnvironment": "node" }, "testpack": { diff --git a/test/union.test.ts b/test/union.test.ts index d1a0da4..78d2887 100644 --- a/test/union.test.ts +++ b/test/union.test.ts @@ -1,7 +1,7 @@ import BTree from '../b+tree'; import BTreeEx from '../extended'; import union from '../extended/union'; -import { branchingFactorErrorMsg, comparatorErrorMsg } from '../extended/parallelWalk'; +import { branchingFactorErrorMsg, comparatorErrorMsg } from '../extended/shared'; import MersenneTwister from 'mersenne-twister'; import { makeArray, randomInt } from './shared'; From a6e8c77b78740600fef217064576f4e2f3ea8782 Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Thu, 13 Nov 2025 09:10:26 -0800 Subject: [PATCH 090/143] Introduce constructor type, add comments --- extended/bulkLoad.d.ts | 3 ++- extended/bulkLoad.js | 3 ++- extended/bulkLoad.ts | 3 ++- extended/decompose.ts | 8 ++++---- extended/intersect.js | 11 +++++------ extended/intersect.ts | 15 +++++++-------- extended/shared.d.ts | 3 ++- extended/shared.ts | 8 +++++--- extended/union.ts | 4 ++-- 9 files changed, 31 insertions(+), 27 deletions(-) diff --git a/extended/bulkLoad.d.ts b/extended/bulkLoad.d.ts index 4b0235b..b405eca 100644 --- a/extended/bulkLoad.d.ts +++ b/extended/bulkLoad.d.ts @@ -3,7 +3,8 @@ import BTree from '../b+tree'; * Loads a B-Tree from a sorted list of entries in bulk. This is faster than inserting * entries one at a time, and produces a more optimally balanced tree. * Time and space complexity: O(n). - * @param entries The list of key/value pairs to load. Must be sorted by key in strictly ascending order. + * @param entries The list of key/value pairs to load. Must be sorted by key in strictly ascending order. Note that + * the array is an alternating list of keys and values: [key0, value0, key1, value1, ...]. * @param maxNodeSize The branching factor (maximum node size) for the resulting tree. * @param compare Function to compare keys. * @returns A new BTree containing the given entries. diff --git a/extended/bulkLoad.js b/extended/bulkLoad.js index 94fbb25..22ae301 100644 --- a/extended/bulkLoad.js +++ b/extended/bulkLoad.js @@ -26,7 +26,8 @@ var shared_1 = require("./shared"); * Loads a B-Tree from a sorted list of entries in bulk. This is faster than inserting * entries one at a time, and produces a more optimally balanced tree. * Time and space complexity: O(n). - * @param entries The list of key/value pairs to load. Must be sorted by key in strictly ascending order. + * @param entries The list of key/value pairs to load. Must be sorted by key in strictly ascending order. Note that + * the array is an alternating list of keys and values: [key0, value0, key1, value1, ...]. * @param maxNodeSize The branching factor (maximum node size) for the resulting tree. * @param compare Function to compare keys. * @returns A new BTree containing the given entries. diff --git a/extended/bulkLoad.ts b/extended/bulkLoad.ts index 91e587c..b2508b1 100644 --- a/extended/bulkLoad.ts +++ b/extended/bulkLoad.ts @@ -5,7 +5,8 @@ import { alternatingCount, alternatingGetFirst, flushToLeaves, type AlternatingL * Loads a B-Tree from a sorted list of entries in bulk. This is faster than inserting * entries one at a time, and produces a more optimally balanced tree. * Time and space complexity: O(n). - * @param entries The list of key/value pairs to load. Must be sorted by key in strictly ascending order. + * @param entries The list of key/value pairs to load. Must be sorted by key in strictly ascending order. Note that + * the array is an alternating list of keys and values: [key0, value0, key1, value1, ...]. * @param maxNodeSize The branching factor (maximum node size) for the resulting tree. * @param compare Function to compare keys. * @returns A new BTree containing the given entries. diff --git a/extended/decompose.ts b/extended/decompose.ts index 4d38a96..ac0e06b 100644 --- a/extended/decompose.ts +++ b/extended/decompose.ts @@ -1,5 +1,5 @@ import BTree, { areOverlapping, BNode, BNodeInternal, check } from '../b+tree'; -import { alternatingCount, alternatingGetFirst, alternatingGetSecond, alternatingPush, createAlternatingList, flushToLeaves, type AlternatingList, type BTreeWithInternals } from './shared'; +import { alternatingCount, alternatingGetFirst, alternatingGetSecond, alternatingPush, BTreeConstructor, createAlternatingList, flushToLeaves, type AlternatingList, type BTreeWithInternals } from './shared'; import { createCursor, getKey, Cursor, moveForwardOne, moveTo, noop } from "./parallelWalk"; /** @@ -322,7 +322,7 @@ export function decompose( * @internal */ export function buildFromDecomposition, K, V>( - constructor: new (entries?: [K, V][], compare?: (a: K, b: K) => number, maxNodeSize?: number) => TBTree, + constructor: BTreeConstructor, branchingFactor: number, decomposed: DecomposeResult, cmp: (a: K, b: K) => number, @@ -377,10 +377,10 @@ export function buildFromDecomposition, K, V>( } const reconstructed = new constructor(undefined, cmp, maxNodeSize); - (reconstructed as unknown as BTreeWithInternals)._root = frontier[0]; + reconstructed._root = frontier[0]; // Return the resulting tree - return reconstructed; + return reconstructed as unknown as TBTree; } /** diff --git a/extended/intersect.js b/extended/intersect.js index 10d4627..c6ace9c 100644 --- a/extended/intersect.js +++ b/extended/intersect.js @@ -4,8 +4,8 @@ var __importDefault = (this && this.__importDefault) || function (mod) { }; Object.defineProperty(exports, "__esModule", { value: true }); var shared_1 = require("./shared"); -var decompose_1 = require("./decompose"); var forEachKeyInBoth_1 = __importDefault(require("./forEachKeyInBoth")); +var bulkLoad_1 = require("./bulkLoad"); /** * Returns a new tree containing only keys present in both input trees. * Neither tree is modified. @@ -34,11 +34,10 @@ function intersect(treeA, treeB, combineFn) { var mergedValue = combineFn(key, leftValue, rightValue); (0, shared_1.alternatingPush)(intersected, key, mergedValue); }); - // Decompose both trees into disjoint subtrees leaves. - // As many of these as possible will be reused from the original trees, and the remaining - // will be leaves that are the result of merging intersecting leaves. - var decomposed = (0, decompose_1.decompose)(_treeA, _treeB, combineFn); + // Intersected keys are guaranteed to be in order, so we can bulk load var constructor = treeA.constructor; - return (0, decompose_1.buildFromDecomposition)(constructor, branchingFactor, decomposed, _treeA._compare, _treeA._maxNodeSize); + var resultTree = new constructor(undefined, treeA._compare, branchingFactor); + resultTree._root = (0, bulkLoad_1.bulkLoadRoot)(intersected, branchingFactor, treeA._compare); + return resultTree; } exports.default = intersect; diff --git a/extended/intersect.ts b/extended/intersect.ts index 083a1ac..802a8b4 100644 --- a/extended/intersect.ts +++ b/extended/intersect.ts @@ -1,7 +1,7 @@ import BTree from '../b+tree'; -import { alternatingPush, createAlternatingList, checkCanDoSetOperation, type BTreeWithInternals } from './shared'; -import { buildFromDecomposition, decompose } from './decompose'; +import { alternatingPush, createAlternatingList, checkCanDoSetOperation, type BTreeWithInternals, BTreeConstructor } from './shared'; import forEachKeyInBoth from './forEachKeyInBoth'; +import { bulkLoadRoot } from './bulkLoad'; /** * Returns a new tree containing only keys present in both input trees. @@ -37,10 +37,9 @@ export default function intersect, K, V>( alternatingPush(intersected, key, mergedValue); }); - // Decompose both trees into disjoint subtrees leaves. - // As many of these as possible will be reused from the original trees, and the remaining - // will be leaves that are the result of merging intersecting leaves. - const decomposed = decompose(_treeA, _treeB, combineFn); - const constructor = treeA.constructor as new (entries?: [K, V][], compare?: (a: K, b: K) => number, maxNodeSize?: number) => TBTree; - return buildFromDecomposition(constructor, branchingFactor, decomposed, _treeA._compare, _treeA._maxNodeSize); + // Intersected keys are guaranteed to be in order, so we can bulk load + const constructor = treeA.constructor as BTreeConstructor; + const resultTree = new constructor(undefined, treeA._compare, branchingFactor); + resultTree._root = bulkLoadRoot(intersected, branchingFactor, treeA._compare); + return resultTree as unknown as TBTree; } diff --git a/extended/shared.d.ts b/extended/shared.d.ts index cb0ff5c..5b8009f 100644 --- a/extended/shared.d.ts +++ b/extended/shared.d.ts @@ -1 +1,2 @@ -export {}; +import BTree from '../b+tree'; +export declare type BTreeConstructor, K, V> = new (entries?: [K, V][], compare?: (a: K, b: K) => number, maxNodeSize?: number) => BTreeWithInternals; diff --git a/extended/shared.ts b/extended/shared.ts index 3317b08..d938113 100644 --- a/extended/shared.ts +++ b/extended/shared.ts @@ -5,12 +5,12 @@ import BTree from '../b+tree'; * BTree with access to internal properties. * @internal */ -export type BTreeWithInternals = { +export type BTreeWithInternals = BTree> = { _root: BNode; _size: number; _maxNodeSize: number; _compare: (a: K, b: K) => number; -} & Omit, '_root' | '_size' | '_maxNodeSize' | '_compare'>; +} & Omit; /** * Alternating list storing entries as `[A0, B0, A1, B1, ...]`. @@ -127,4 +127,6 @@ export function checkCanDoSetOperation(treeA: BTreeWithInternals, tr if (!supportsDifferentBranchingFactors && branchingFactor !== treeB._maxNodeSize) throw new Error(branchingFactorErrorMsg); return branchingFactor; -} \ No newline at end of file +} + +export type BTreeConstructor, K, V> = new (entries?: [K, V][], compare?: (a: K, b: K) => number, maxNodeSize?: number) => BTreeWithInternals; \ No newline at end of file diff --git a/extended/union.ts b/extended/union.ts index b6411bf..168ab2e 100644 --- a/extended/union.ts +++ b/extended/union.ts @@ -1,5 +1,5 @@ import BTree from '../b+tree'; -import { type BTreeWithInternals, checkCanDoSetOperation } from './shared'; +import { BTreeConstructor, type BTreeWithInternals, checkCanDoSetOperation } from './shared'; import { decompose, buildFromDecomposition } from "./decompose"; /** @@ -35,6 +35,6 @@ export default function union, K, V>( // As many of these as possible will be reused from the original trees, and the remaining // will be leaves that are the result of merging intersecting leaves. const decomposed = decompose(_treeA, _treeB, combineFn); - const constructor = treeA.constructor as new (entries?: [K, V][], compare?: (a: K, b: K) => number, maxNodeSize?: number) => TBTree; + const constructor = treeA.constructor as BTreeConstructor; return buildFromDecomposition(constructor, branchingFactor, decomposed, _treeA._compare, _treeA._maxNodeSize); } From 38608fb6a2a8c10e98dc9cdcdcc73966b8f41efb Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Thu, 13 Nov 2025 09:29:05 -0800 Subject: [PATCH 091/143] docs --- extended/intersect.d.ts | 5 ++--- extended/intersect.js | 5 ++--- extended/intersect.ts | 5 ++--- 3 files changed, 6 insertions(+), 9 deletions(-) diff --git a/extended/intersect.d.ts b/extended/intersect.d.ts index e298bc6..d2a9b48 100644 --- a/extended/intersect.d.ts +++ b/extended/intersect.d.ts @@ -4,14 +4,13 @@ import BTree from '../b+tree'; * Neither tree is modified. * @param treeA First tree to intersect. * @param treeB Second tree to intersect. - * @param combineFn Called for keys that appear in both trees. Return the desired value, or - * `undefined` to omit the key from the result. + * @param combineFn Called for keys that appear in both trees. Return the desired value. * @description Complexity is bounded O(N + M) for both time and allocations. * However, it is additionally bounded by O(log(N + M) * D) where D is the number of disjoint ranges of keys between * the two trees. In practice, that means for keys of random distribution the performance is O(N + M) and for * keys with significant numbers of non-overlapping key ranges it is O(log(N + M) * D) which is much faster. * The algorithm achieves this additional non-linear bound by skipping over non-intersecting subtrees entirely. - * Note that in benchmarks even the worst case (fully interleaved keys) performance is faster than calling `toArray` + * Note that in benchmarks even the worst case (fully intersecting keys) performance is faster than calling `toArray` * on both trees and performing a walk on the sorted contents due to the reduced allocation overhead. */ export default function intersect, K, V>(treeA: TBTree, treeB: TBTree, combineFn: (key: K, leftValue: V, rightValue: V) => V): TBTree; diff --git a/extended/intersect.js b/extended/intersect.js index c6ace9c..bbaf716 100644 --- a/extended/intersect.js +++ b/extended/intersect.js @@ -11,14 +11,13 @@ var bulkLoad_1 = require("./bulkLoad"); * Neither tree is modified. * @param treeA First tree to intersect. * @param treeB Second tree to intersect. - * @param combineFn Called for keys that appear in both trees. Return the desired value, or - * `undefined` to omit the key from the result. + * @param combineFn Called for keys that appear in both trees. Return the desired value. * @description Complexity is bounded O(N + M) for both time and allocations. * However, it is additionally bounded by O(log(N + M) * D) where D is the number of disjoint ranges of keys between * the two trees. In practice, that means for keys of random distribution the performance is O(N + M) and for * keys with significant numbers of non-overlapping key ranges it is O(log(N + M) * D) which is much faster. * The algorithm achieves this additional non-linear bound by skipping over non-intersecting subtrees entirely. - * Note that in benchmarks even the worst case (fully interleaved keys) performance is faster than calling `toArray` + * Note that in benchmarks even the worst case (fully intersecting keys) performance is faster than calling `toArray` * on both trees and performing a walk on the sorted contents due to the reduced allocation overhead. */ function intersect(treeA, treeB, combineFn) { diff --git a/extended/intersect.ts b/extended/intersect.ts index 802a8b4..5888d6e 100644 --- a/extended/intersect.ts +++ b/extended/intersect.ts @@ -8,14 +8,13 @@ import { bulkLoadRoot } from './bulkLoad'; * Neither tree is modified. * @param treeA First tree to intersect. * @param treeB Second tree to intersect. - * @param combineFn Called for keys that appear in both trees. Return the desired value, or - * `undefined` to omit the key from the result. + * @param combineFn Called for keys that appear in both trees. Return the desired value. * @description Complexity is bounded O(N + M) for both time and allocations. * However, it is additionally bounded by O(log(N + M) * D) where D is the number of disjoint ranges of keys between * the two trees. In practice, that means for keys of random distribution the performance is O(N + M) and for * keys with significant numbers of non-overlapping key ranges it is O(log(N + M) * D) which is much faster. * The algorithm achieves this additional non-linear bound by skipping over non-intersecting subtrees entirely. - * Note that in benchmarks even the worst case (fully interleaved keys) performance is faster than calling `toArray` + * Note that in benchmarks even the worst case (fully intersecting keys) performance is faster than calling `toArray` * on both trees and performing a walk on the sorted contents due to the reduced allocation overhead. */ export default function intersect, K, V>( From 31e0c7c4993abc4af78d94927d62b06abc2927a9 Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Thu, 13 Nov 2025 09:33:54 -0800 Subject: [PATCH 092/143] bug fixe for empty intersect --- extended/intersect.js | 4 ++-- extended/intersect.ts | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/extended/intersect.js b/extended/intersect.js index bbaf716..e563217 100644 --- a/extended/intersect.js +++ b/extended/intersect.js @@ -25,9 +25,9 @@ function intersect(treeA, treeB, combineFn) { var _treeB = treeB; var branchingFactor = (0, shared_1.checkCanDoSetOperation)(_treeA, _treeB, true); if (_treeA._root.size() === 0) - return treeB.clone(); - if (_treeB._root.size() === 0) return treeA.clone(); + if (_treeB._root.size() === 0) + return treeB.clone(); var intersected = (0, shared_1.createAlternatingList)(); (0, forEachKeyInBoth_1.default)(treeA, treeB, function (key, leftValue, rightValue) { var mergedValue = combineFn(key, leftValue, rightValue); diff --git a/extended/intersect.ts b/extended/intersect.ts index 5888d6e..9222ff7 100644 --- a/extended/intersect.ts +++ b/extended/intersect.ts @@ -26,9 +26,9 @@ export default function intersect, K, V>( const _treeB = treeB as unknown as BTreeWithInternals; const branchingFactor = checkCanDoSetOperation(_treeA, _treeB, true); if (_treeA._root.size() === 0) - return treeB.clone(); - if (_treeB._root.size() === 0) return treeA.clone(); + if (_treeB._root.size() === 0) + return treeB.clone(); const intersected = createAlternatingList(); forEachKeyInBoth(treeA, treeB, (key, leftValue, rightValue) => { From 2b2aae8b6dc4a5a579521f2f2842fdfcd9bc1294 Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Thu, 13 Nov 2025 12:46:42 -0800 Subject: [PATCH 093/143] add intersect to tests --- test/forEachKeyInBoth.test.ts | 145 +++++++++++++++++++++------------- 1 file changed, 90 insertions(+), 55 deletions(-) diff --git a/test/forEachKeyInBoth.test.ts b/test/forEachKeyInBoth.test.ts index 19064e1..3764e78 100644 --- a/test/forEachKeyInBoth.test.ts +++ b/test/forEachKeyInBoth.test.ts @@ -1,10 +1,50 @@ import BTreeEx from '../extended'; -import { branchingFactorErrorMsg, comparatorErrorMsg } from '../extended/shared'; +import intersect from '../extended/intersect'; +import { comparatorErrorMsg } from '../extended/shared'; import MersenneTwister from 'mersenne-twister'; import { makeArray } from './shared'; var test: (name: string, f: () => void) => void = it; +type SharedCall = { key: number, leftValue: number, rightValue: number }; + +const runIntersectionImplementations = ( + left: BTreeEx, + right: BTreeEx, + assertion: (calls: SharedCall[]) => void +) => { + const forEachCalls: SharedCall[] = []; + left.forEachKeyInBoth(right, (key, leftValue, rightValue) => { + forEachCalls.push({ key, leftValue, rightValue }); + }); + assertion(forEachCalls); + + const intersectionCalls: SharedCall[] = []; + const resultTree = intersect, number, number>(left, right, (key, leftValue, rightValue) => { + intersectionCalls.push({ key, leftValue, rightValue }); + return leftValue; + }); + const expectedEntries = intersectionCalls.map(({ key, leftValue }) => [key, leftValue] as [number, number]); + expect(resultTree.toArray()).toEqual(expectedEntries); + resultTree.checkValid(); + assertion(intersectionCalls); +}; + +const expectIntersectionCalls = ( + left: BTreeEx, + right: BTreeEx, + expected: SharedCall[] +) => { + runIntersectionImplementations(left, right, (calls) => { + expect(calls).toEqual(expected); + }); +}; + +const tuplesToRecords = (entries: Array<[number, number, number]>): SharedCall[] => + entries.map(([key, leftValue, rightValue]) => ({ key, leftValue, rightValue })); + +const tuples = (...pairs: Array<[number, number]>) => pairs; + describe('BTree forEachKeyInBoth tests with fanout 32', testForEachKeyInBoth.bind(null, 32)); describe('BTree forEachKeyInBoth tests with fanout 10', testForEachKeyInBoth.bind(null, 10)); describe('BTree forEachKeyInBoth tests with fanout 4', testForEachKeyInBoth.bind(null, 4)); @@ -15,39 +55,29 @@ function testForEachKeyInBoth(maxNodeSize: number) { const buildTree = (entries: Array<[number, number]>) => new BTreeEx(entries, compare, maxNodeSize); - const tuples = (...pairs: Array<[number, number]>) => pairs; - - const collectCalls = (left: BTreeEx, right: BTreeEx) => { - const calls: Array<{ key: number, leftValue: number, rightValue: number }> = []; - left.forEachKeyInBoth(right, (key, leftValue, rightValue) => { - calls.push({ key, leftValue, rightValue }); - }); - return calls; - }; - test('forEachKeyInBoth two empty trees', () => { const tree1 = buildTree([]); const tree2 = buildTree([]); - expect(collectCalls(tree1, tree2)).toEqual([]); + expectIntersectionCalls(tree1, tree2, []); }); test('forEachKeyInBoth empty tree with non-empty tree', () => { const tree1 = buildTree([]); const tree2 = buildTree(tuples([1, 10], [2, 20], [3, 30])); - expect(collectCalls(tree1, tree2)).toEqual([]); - expect(collectCalls(tree2, tree1)).toEqual([]); + expectIntersectionCalls(tree1, tree2, []); + expectIntersectionCalls(tree2, tree1, []); }); test('forEachKeyInBoth with no overlapping keys', () => { const tree1 = buildTree(tuples([1, 10], [3, 30], [5, 50])); const tree2 = buildTree(tuples([2, 20], [4, 40], [6, 60])); - expect(collectCalls(tree1, tree2)).toEqual([]); + expectIntersectionCalls(tree1, tree2, []); }); test('forEachKeyInBoth with single overlapping key', () => { const tree1 = buildTree(tuples([1, 10], [2, 20], [3, 30])); const tree2 = buildTree(tuples([0, 100], [2, 200], [4, 400])); - expect(collectCalls(tree1, tree2)).toEqual([{ key: 2, leftValue: 20, rightValue: 200 }]); + expectIntersectionCalls(tree1, tree2, [{ key: 2, leftValue: 20, rightValue: 200 }]); }); test('forEachKeyInBoth with multiple overlapping keys maintains tree contents', () => { @@ -57,7 +87,7 @@ function testForEachKeyInBoth(maxNodeSize: number) { const tree2 = buildTree(rightEntries); const leftBefore = tree1.toArray(); const rightBefore = tree2.toArray(); - expect(collectCalls(tree1, tree2)).toEqual([ + expectIntersectionCalls(tree1, tree2, [ { key: 2, leftValue: 20, rightValue: 200 }, { key: 4, leftValue: 40, rightValue: 400 }, ]); @@ -70,10 +100,11 @@ function testForEachKeyInBoth(maxNodeSize: number) { test('forEachKeyInBoth with contiguous overlap yields sorted keys', () => { const tree1 = buildTree(tuples([1, 1], [2, 2], [3, 3], [4, 4], [5, 5], [6, 6])); const tree2 = buildTree(tuples([3, 30], [4, 40], [5, 50], [6, 60], [7, 70])); - const calls = collectCalls(tree1, tree2); - expect(calls.map(c => c.key)).toEqual([3, 4, 5, 6]); - expect(calls.map(c => c.leftValue)).toEqual([3, 4, 5, 6]); - expect(calls.map(c => c.rightValue)).toEqual([30, 40, 50, 60]); + runIntersectionImplementations(tree1, tree2, ( calls ) => { + expect(calls.map(c => c.key)).toEqual([3, 4, 5, 6]); + expect(calls.map(c => c.leftValue)).toEqual([3, 4, 5, 6]); + expect(calls.map(c => c.rightValue)).toEqual([30, 40, 50, 60]); + }); }); test('forEachKeyInBoth large overlapping range counts each shared key once', () => { @@ -86,44 +117,51 @@ function testForEachKeyInBoth(maxNodeSize: number) { }); const tree1 = buildTree(leftEntries); const tree2 = buildTree(rightEntries); - const calls = collectCalls(tree1, tree2); - expect(calls.length).toBe(size - overlapStart); - expect(calls[0]).toEqual({ - key: overlapStart, - leftValue: overlapStart * 3, - rightValue: overlapStart * 7 + runIntersectionImplementations(tree1, tree2, (calls) => { + expect(calls.length).toBe(size - overlapStart); + expect(calls[0]).toEqual({ + key: overlapStart, + leftValue: overlapStart * 3, + rightValue: overlapStart * 7 + }); + const lastCall = calls[calls.length - 1]; + expect(lastCall.key).toBe(size - 1); + expect(lastCall.leftValue).toBe((size - 1) * 3); + expect(lastCall.rightValue).toBe((size - 1) * 7); }); - const lastCall = calls[calls.length - 1]; - expect(lastCall.key).toBe(size - 1); - expect(lastCall.leftValue).toBe((size - 1) * 3); - expect(lastCall.rightValue).toBe((size - 1) * 7); }); test('forEachKeyInBoth tree with itself visits each key once', () => { const entries = Array.from({ length: 20 }, (_, i) => [i, i * 2] as [number, number]); const tree = buildTree(entries); - const calls = collectCalls(tree, tree); - expect(calls.length).toBe(entries.length); - for (let i = 0; i < entries.length; i++) { - const [key, value] = entries[i]; - expect(calls[i]).toEqual({ key, leftValue: value, rightValue: value }); - } + runIntersectionImplementations(tree, tree, (calls) => { + expect(calls.length).toBe(entries.length); + for (let i = 0; i < entries.length; i++) { + const [key, value] = entries[i]; + expect(calls[i]).toEqual({ key, leftValue: value, rightValue: value }); + } + }); }); test('forEachKeyInBoth arguments determine left/right values', () => { const tree1 = buildTree(tuples([1, 100], [2, 200], [4, 400])); const tree2 = buildTree(tuples([2, 20], [3, 30], [4, 40])); - const callsLeft = collectCalls(tree1, tree2); - const callsRight = collectCalls(tree2, tree1); - expect(callsLeft).toEqual([ + expectIntersectionCalls(tree1, tree2, [ { key: 2, leftValue: 200, rightValue: 20 }, { key: 4, leftValue: 400, rightValue: 40 }, ]); - expect(callsRight).toEqual([ + expectIntersectionCalls(tree2, tree1, [ { key: 2, leftValue: 20, rightValue: 200 }, { key: 4, leftValue: 40, rightValue: 400 }, ]); }); +} + +describe('BTree forEachKeyInBoth early exiting', () => { + const compare = (a: number, b: number) => a - b; + + const buildTree = (entries: Array<[number, number]>) => + new BTreeEx(entries, compare, 4); test('forEachKeyInBoth returns undefined when callback returns void', () => { const tree1 = buildTree(tuples([1, 10], [2, 20], [3, 30])); @@ -161,13 +199,14 @@ function testForEachKeyInBoth(maxNodeSize: number) { expect(breakResult).toEqual({ key: 3, sum: 330 }); expect(visited).toEqual([2, 3]); }); -} +}); -describe('BTree forEachKeyInBoth input/output validation', () => { +describe('BTree forEachKeyInBoth and intersect input/output validation', () => { test('forEachKeyInBoth throws error when comparators differ', () => { const tree1 = new BTreeEx([[1, 10]], (a, b) => b + a); const tree2 = new BTreeEx([[2, 20]], (a, b) => b - a); expect(() => tree1.forEachKeyInBoth(tree2, () => {})).toThrow(comparatorErrorMsg); + expect(() => intersect, number, number>(tree1, tree2, () => 0)).toThrow(comparatorErrorMsg); }); }); @@ -237,18 +276,14 @@ describe('BTree forEachKeyInBoth fuzz tests', () => { expected.push([key, leftValue, rightValue]); } - const actual: Array<[number, number, number]> = []; - treeA.forEachKeyInBoth(treeB, (key, leftValue, rightValue) => { - actual.push([key, leftValue, rightValue]); - }); - expect(actual).toEqual(expected); - - const swappedActual: Array<[number, number, number]> = []; - treeB.forEachKeyInBoth(treeA, (key, leftValue, rightValue) => { - swappedActual.push([key, leftValue, rightValue]); - }); - const swappedExpected = expected.map(([key, leftValue, rightValue]) => [key, rightValue, leftValue]); - expect(swappedActual).toEqual(swappedExpected); + const expectedRecords = tuplesToRecords(expected); + expectIntersectionCalls(treeA, treeB, expectedRecords); + const swappedExpected = expectedRecords.map(({ key, leftValue, rightValue }) => ({ + key, + leftValue: rightValue, + rightValue: leftValue, + })); + expectIntersectionCalls(treeB, treeA, swappedExpected); expect(treeA.toArray()).toEqual(aArray); expect(treeB.toArray()).toEqual(bArray); From b649f4be431890c8d3a6da8931e500fbceca6ed0 Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Thu, 13 Nov 2025 14:41:47 -0800 Subject: [PATCH 094/143] add subtract --- extended/subtract.d.ts | 16 ++++++++++++++++ extended/subtract.js | 32 ++++++++++++++++++++++++++++++++ extended/subtract.ts | 35 +++++++++++++++++++++++++++++++++++ 3 files changed, 83 insertions(+) create mode 100644 extended/subtract.d.ts create mode 100644 extended/subtract.js create mode 100644 extended/subtract.ts diff --git a/extended/subtract.d.ts b/extended/subtract.d.ts new file mode 100644 index 0000000..a08094d --- /dev/null +++ b/extended/subtract.d.ts @@ -0,0 +1,16 @@ +import BTree from '../b+tree'; +/** + * Returns a new tree containing only keys that are present in treeA but notTreeB (set subtraction). + * Neither tree is modified. + * @param targetTree The tree to subtract from. + * @param subtractTree The tree to subtract. + * @description Complexity is bounded O(N + M) for time and O(N) for allocations. + * However, it is additionally bounded by O(log(N + M) * D1) for time and O(log(N) * D2) for space where D1/D2 are the + * number of disjoint ranges of keys between the two trees and in targetTree, respectively. In practice, that means for + * keys of random distribution the performance is O(N + M) and for keys with significant numbers of non-overlapping key + * ranges it is much faster. + * The algorithm achieves this additional non-linear bound by skipping over non-intersecting subtrees entirely. + * Note that in benchmarks even the worst case (fully interleaved keys) performance is faster than calling `toArray` + * on both trees and performing a walk on the sorted contents due to the reduced allocation overhead. + */ +export default function subtract, K, V>(targetTree: TBTree, subtractTree: TBTree): TBTree; diff --git a/extended/subtract.js b/extended/subtract.js new file mode 100644 index 0000000..ba46a23 --- /dev/null +++ b/extended/subtract.js @@ -0,0 +1,32 @@ +"use strict"; +Object.defineProperty(exports, "__esModule", { value: true }); +var shared_1 = require("./shared"); +var decompose_1 = require("./decompose"); +/** + * Returns a new tree containing only keys that are present in treeA but notTreeB (set subtraction). + * Neither tree is modified. + * @param targetTree The tree to subtract from. + * @param subtractTree The tree to subtract. + * @description Complexity is bounded O(N + M) for time and O(N) for allocations. + * However, it is additionally bounded by O(log(N + M) * D1) for time and O(log(N) * D2) for space where D1/D2 are the + * number of disjoint ranges of keys between the two trees and in targetTree, respectively. In practice, that means for + * keys of random distribution the performance is O(N + M) and for keys with significant numbers of non-overlapping key + * ranges it is much faster. + * The algorithm achieves this additional non-linear bound by skipping over non-intersecting subtrees entirely. + * Note that in benchmarks even the worst case (fully interleaved keys) performance is faster than calling `toArray` + * on both trees and performing a walk on the sorted contents due to the reduced allocation overhead. + */ +function subtract(targetTree, subtractTree) { + var _targetTree = targetTree; + var _subtractTree = subtractTree; + var branchingFactor = (0, shared_1.checkCanDoSetOperation)(_targetTree, _subtractTree, false); + if (_targetTree._root.size() === 0 || _subtractTree._root.size() === 0) + return targetTree.clone(); + // Decompose target tree into disjoint subtrees leaves. + // As many of these as possible will be reused from the original trees, and the remaining + // will be leaves that are exploded (and filtered) due to intersecting leaves in subtractTree. + var decomposed = (0, decompose_1.decompose)(_targetTree, _subtractTree, function () { return undefined; }, true); + var constructor = targetTree.constructor; + return (0, decompose_1.buildFromDecomposition)(constructor, branchingFactor, decomposed, targetTree._compare, targetTree._maxNodeSize); +} +exports.default = subtract; diff --git a/extended/subtract.ts b/extended/subtract.ts new file mode 100644 index 0000000..0bce807 --- /dev/null +++ b/extended/subtract.ts @@ -0,0 +1,35 @@ +import BTree from '../b+tree'; +import { checkCanDoSetOperation, type BTreeWithInternals, BTreeConstructor } from './shared'; +import { buildFromDecomposition, decompose } from './decompose'; + +/** + * Returns a new tree containing only keys that are present in treeA but notTreeB (set subtraction). + * Neither tree is modified. + * @param targetTree The tree to subtract from. + * @param subtractTree The tree to subtract. + * @description Complexity is bounded O(N + M) for time and O(N) for allocations. + * However, it is additionally bounded by O(log(N + M) * D1) for time and O(log(N) * D2) for space where D1/D2 are the + * number of disjoint ranges of keys between the two trees and in targetTree, respectively. In practice, that means for + * keys of random distribution the performance is O(N + M) and for keys with significant numbers of non-overlapping key + * ranges it is much faster. + * The algorithm achieves this additional non-linear bound by skipping over non-intersecting subtrees entirely. + * Note that in benchmarks even the worst case (fully interleaved keys) performance is faster than calling `toArray` + * on both trees and performing a walk on the sorted contents due to the reduced allocation overhead. + */ +export default function subtract, K, V>( + targetTree: TBTree, + subtractTree: TBTree +): TBTree { + const _targetTree = targetTree as unknown as BTreeWithInternals; + const _subtractTree = subtractTree as unknown as BTreeWithInternals; + const branchingFactor = checkCanDoSetOperation(_targetTree, _subtractTree, false); + if (_targetTree._root.size() === 0 || _subtractTree._root.size() === 0) + return targetTree.clone(); + + // Decompose target tree into disjoint subtrees leaves. + // As many of these as possible will be reused from the original trees, and the remaining + // will be leaves that are exploded (and filtered) due to intersecting leaves in subtractTree. + const decomposed = decompose(_targetTree, _subtractTree, () => undefined, true); + const constructor = targetTree.constructor as BTreeConstructor; + return buildFromDecomposition(constructor, branchingFactor, decomposed, targetTree._compare, targetTree._maxNodeSize); +} From c380f32604d9c4317bceeab522047e236473bcfe Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Thu, 13 Nov 2025 15:17:41 -0800 Subject: [PATCH 095/143] add forEachKeyNotIn, cleanup --- extended/decompose.js | 12 ++--- extended/decompose.ts | 12 ++--- extended/forEachKeyInBoth.js | 6 +-- extended/forEachKeyInBoth.ts | 6 +-- extended/forEachKeyNotIn.d.ts | 20 +++++++ extended/forEachKeyNotIn.js | 95 +++++++++++++++++++++++++++++++++ extended/forEachKeyNotIn.ts | 99 +++++++++++++++++++++++++++++++++++ extended/parallelWalk.js | 11 ++-- extended/parallelWalk.ts | 12 ++--- 9 files changed, 243 insertions(+), 30 deletions(-) create mode 100644 extended/forEachKeyNotIn.d.ts create mode 100644 extended/forEachKeyNotIn.js create mode 100644 extended/forEachKeyNotIn.ts diff --git a/extended/decompose.js b/extended/decompose.js index 874b022..13d5632 100644 --- a/extended/decompose.js +++ b/extended/decompose.js @@ -215,18 +215,18 @@ function decompose(left, right, combineFn, ignoreRight) { var combined = combineFn(key, vA, vB); if (combined !== undefined) (0, shared_1.alternatingPush)(pending, key, combined); - var outTrailing = (0, parallelWalk_1.moveForwardOne)(trailing, leading, key, cmp); - var outLeading = (0, parallelWalk_1.moveForwardOne)(leading, trailing, key, cmp); + var outTrailing = (0, parallelWalk_1.moveForwardOne)(trailing, leading); + var outLeading = (0, parallelWalk_1.moveForwardOne)(leading, trailing); if (outTrailing || outLeading) { if (!outTrailing || !outLeading) { // In these cases, we pass areEqual=false because a return value of "out of tree" means // the cursor did not move. This must be true because they started equal and one of them had more tree // to walk (one is !out), so they cannot be equal at this point. if (outTrailing) { - (0, parallelWalk_1.moveTo)(leading, trailing, maxKey, false, false, cmp); + (0, parallelWalk_1.moveTo)(leading, trailing, maxKey, false, false); } else { - (0, parallelWalk_1.moveTo)(trailing, leading, maxKey, false, false, cmp); + (0, parallelWalk_1.moveTo)(trailing, leading, maxKey, false, false); } } break; @@ -239,9 +239,9 @@ function decompose(left, right, combineFn, ignoreRight) { trailing = leading; leading = tmp; } - var _a = (0, parallelWalk_1.moveTo)(trailing, leading, (0, parallelWalk_1.getKey)(leading), true, areEqual, cmp), out = _a[0], nowEqual = _a[1]; + var _a = (0, parallelWalk_1.moveTo)(trailing, leading, (0, parallelWalk_1.getKey)(leading), true, areEqual), out = _a[0], nowEqual = _a[1]; if (out) { - (0, parallelWalk_1.moveTo)(leading, trailing, maxKey, false, areEqual, cmp); + (0, parallelWalk_1.moveTo)(leading, trailing, maxKey, false, areEqual); break; } else if (nowEqual) { diff --git a/extended/decompose.ts b/extended/decompose.ts index ac0e06b..c8cbdd9 100644 --- a/extended/decompose.ts +++ b/extended/decompose.ts @@ -278,17 +278,17 @@ export function decompose( const combined = combineFn(key, vA, vB); if (combined !== undefined) alternatingPush(pending, key, combined); - const outTrailing = moveForwardOne(trailing, leading, key, cmp); - const outLeading = moveForwardOne(leading, trailing, key, cmp); + const outTrailing = moveForwardOne(trailing, leading); + const outLeading = moveForwardOne(leading, trailing); if (outTrailing || outLeading) { if (!outTrailing || !outLeading) { // In these cases, we pass areEqual=false because a return value of "out of tree" means // the cursor did not move. This must be true because they started equal and one of them had more tree // to walk (one is !out), so they cannot be equal at this point. if (outTrailing) { - moveTo(leading, trailing, maxKey, false, false, cmp); + moveTo(leading, trailing, maxKey, false, false); } else { - moveTo(trailing, leading, maxKey, false, false, cmp); + moveTo(trailing, leading, maxKey, false, false); } } break; @@ -300,9 +300,9 @@ export function decompose( trailing = leading; leading = tmp; } - const [out, nowEqual] = moveTo(trailing, leading, getKey(leading), true, areEqual, cmp); + const [out, nowEqual] = moveTo(trailing, leading, getKey(leading), true, areEqual); if (out) { - moveTo(leading, trailing, maxKey, false, areEqual, cmp); + moveTo(leading, trailing, maxKey, false, areEqual); break; } else if (nowEqual) { order = 0; diff --git a/extended/forEachKeyInBoth.js b/extended/forEachKeyInBoth.js index a24aba6..d4a8417 100644 --- a/extended/forEachKeyInBoth.js +++ b/extended/forEachKeyInBoth.js @@ -43,8 +43,8 @@ function forEachKeyInBoth(treeA, treeB, callback) { if (result && result.break) { return result.break; } - var outT = (0, parallelWalk_1.moveForwardOne)(trailing, leading, key, cmp); - var outL = (0, parallelWalk_1.moveForwardOne)(leading, trailing, key, cmp); + var outT = (0, parallelWalk_1.moveForwardOne)(trailing, leading); + var outL = (0, parallelWalk_1.moveForwardOne)(leading, trailing); if (outT && outL) break; order = cmp((0, parallelWalk_1.getKey)(leading), (0, parallelWalk_1.getKey)(trailing)); @@ -56,7 +56,7 @@ function forEachKeyInBoth(treeA, treeB, callback) { leading = tmp; } // At this point, leading is guaranteed to be ahead of trailing. - var _a = (0, parallelWalk_1.moveTo)(trailing, leading, (0, parallelWalk_1.getKey)(leading), true, areEqual, cmp), out = _a[0], nowEqual = _a[1]; + var _a = (0, parallelWalk_1.moveTo)(trailing, leading, (0, parallelWalk_1.getKey)(leading), true, areEqual), out = _a[0], nowEqual = _a[1]; if (out) { // We've reached the end of one tree, so intersections are guaranteed to be done. break; diff --git a/extended/forEachKeyInBoth.ts b/extended/forEachKeyInBoth.ts index fbf55f4..2acb19c 100644 --- a/extended/forEachKeyInBoth.ts +++ b/extended/forEachKeyInBoth.ts @@ -49,8 +49,8 @@ export default function forEachKeyInBoth( if (result && result.break) { return result.break; } - const outT = moveForwardOne(trailing, leading, key, cmp); - const outL = moveForwardOne(leading, trailing, key, cmp); + const outT = moveForwardOne(trailing, leading); + const outL = moveForwardOne(leading, trailing); if (outT && outL) break; order = cmp(getKey(leading), getKey(trailing)); @@ -60,7 +60,7 @@ export default function forEachKeyInBoth( trailing = leading; leading = tmp; } // At this point, leading is guaranteed to be ahead of trailing. - const [out, nowEqual] = moveTo(trailing, leading, getKey(leading), true, areEqual, cmp) + const [out, nowEqual] = moveTo(trailing, leading, getKey(leading), true, areEqual) if (out) { // We've reached the end of one tree, so intersections are guaranteed to be done. break; diff --git a/extended/forEachKeyNotIn.d.ts b/extended/forEachKeyNotIn.d.ts new file mode 100644 index 0000000..eae84ae --- /dev/null +++ b/extended/forEachKeyNotIn.d.ts @@ -0,0 +1,20 @@ +import BTree from '../b+tree'; +/** + * Calls the supplied `callback` for each key/value pair that is in includeTree but not in excludeTree. + * This is also known as set subtraction. + * The callback will be called in sorted key order. + * Neither tree is modified. + * @param includeTree The first tree. This is the tree from which keys will be taken. + * @param excludeTree The second tree. Keys present in this tree will be excluded. + * @param callback Invoked for keys that are in includeTree but not in excludeTree. It can cause iteration to early exit by returning `{ break: R }`. + * @description Complexity is bounded by O(N + M) for time. + * However, time is additionally bounded by O(log(N + M) * D) where D is the number of disjoint ranges of keys between + * the two trees. In practice, that means for keys of random distribution the performance is O(N + M) and for + * keys with significant numbers of non-overlapping key ranges it is O(log(N + M) * D) which is much faster. + * The algorithm achieves this additional non-linear bound by skipping over non-intersecting subtrees entirely. + * Note that in benchmarks even the worst case (fully interleaved keys, none intersecting) performance is faster than calling `toArray` + * on both trees and performing a walk on the sorted contents due to the reduced allocation overhead. + */ +export default function forEachKeyNotIn(includeTree: BTree, excludeTree: BTree, callback: (key: K, value: V) => { + break?: R; +} | void): R | undefined; diff --git a/extended/forEachKeyNotIn.js b/extended/forEachKeyNotIn.js new file mode 100644 index 0000000..fd5cf0f --- /dev/null +++ b/extended/forEachKeyNotIn.js @@ -0,0 +1,95 @@ +"use strict"; +Object.defineProperty(exports, "__esModule", { value: true }); +var shared_1 = require("./shared"); +var parallelWalk_1 = require("./parallelWalk"); +/** + * Calls the supplied `callback` for each key/value pair that is in includeTree but not in excludeTree. + * This is also known as set subtraction. + * The callback will be called in sorted key order. + * Neither tree is modified. + * @param includeTree The first tree. This is the tree from which keys will be taken. + * @param excludeTree The second tree. Keys present in this tree will be excluded. + * @param callback Invoked for keys that are in includeTree but not in excludeTree. It can cause iteration to early exit by returning `{ break: R }`. + * @description Complexity is bounded by O(N + M) for time. + * However, time is additionally bounded by O(log(N + M) * D) where D is the number of disjoint ranges of keys between + * the two trees. In practice, that means for keys of random distribution the performance is O(N + M) and for + * keys with significant numbers of non-overlapping key ranges it is O(log(N + M) * D) which is much faster. + * The algorithm achieves this additional non-linear bound by skipping over non-intersecting subtrees entirely. + * Note that in benchmarks even the worst case (fully interleaved keys, none intersecting) performance is faster than calling `toArray` + * on both trees and performing a walk on the sorted contents due to the reduced allocation overhead. + */ +function forEachKeyNotIn(includeTree, excludeTree, callback) { + var _includeTree = includeTree; + var _excludeTree = excludeTree; + (0, shared_1.checkCanDoSetOperation)(_includeTree, _excludeTree, true); + if (includeTree.size === 0) { + return; + } + var finishWalk = function () { + var out = false; + do { + var key = (0, parallelWalk_1.getKey)(cursorInclude); + var value = cursorInclude.leaf.values[cursorInclude.leafIndex]; + var result = callback(key, value); + if (result && result.break) { + return result.break; + } + out = (0, parallelWalk_1.moveForwardOne)(cursorInclude, cursorExclude); + } while (!out); + }; + var cmp = includeTree._compare; + var makePayload = function () { return undefined; }; + var cursorInclude = (0, parallelWalk_1.createCursor)(_includeTree, makePayload, parallelWalk_1.noop, parallelWalk_1.noop, parallelWalk_1.noop, parallelWalk_1.noop, parallelWalk_1.noop); + if (excludeTree.size === 0) { + finishWalk(); + return; + } + var cursorExclude = (0, parallelWalk_1.createCursor)(_excludeTree, makePayload, parallelWalk_1.noop, parallelWalk_1.noop, parallelWalk_1.noop, parallelWalk_1.noop, parallelWalk_1.noop); + var order = cmp((0, parallelWalk_1.getKey)(cursorInclude), (0, parallelWalk_1.getKey)(cursorExclude)); + while (true) { + var areEqual = order === 0; + if (areEqual) { + // Keys are equal, so this key is in both trees and should be skipped. + var outInclude = (0, parallelWalk_1.moveForwardOne)(cursorExclude, cursorInclude); + if (outInclude) + break; + var _a = (0, parallelWalk_1.moveTo)(cursorInclude, cursorExclude, (0, parallelWalk_1.getKey)(cursorInclude), true, areEqual), outExclude = _a[0], nowEqual = _a[1]; + if (outExclude) { + finishWalk(); + break; + } + order = nowEqual ? 0 : -1; + } + else { + if (order < 0) { + var key = (0, parallelWalk_1.getKey)(cursorInclude); + var value = cursorInclude.leaf.values[cursorInclude.leafIndex]; + var result = callback(key, value); + if (result && result.break) { + return result.break; + } + var outInclude = (0, parallelWalk_1.moveForwardOne)(cursorInclude, cursorExclude); + if (outInclude) { + break; + } + order = cmp((0, parallelWalk_1.getKey)(cursorInclude), (0, parallelWalk_1.getKey)(cursorExclude)); + } + else { + // At this point, include is guaranteed to be ahead of exclude. + var _b = (0, parallelWalk_1.moveTo)(cursorExclude, cursorInclude, (0, parallelWalk_1.getKey)(cursorInclude), true, areEqual), out = _b[0], nowEqual = _b[1]; + if (out) { + // We've reached the end of exclude, so call for all remaining keys in include + finishWalk(); + break; + } + else if (nowEqual) { + order = 0; + } + else { + order = -1; // trailing is ahead of leading + } + } + } + } +} +exports.default = forEachKeyNotIn; diff --git a/extended/forEachKeyNotIn.ts b/extended/forEachKeyNotIn.ts new file mode 100644 index 0000000..471dd1b --- /dev/null +++ b/extended/forEachKeyNotIn.ts @@ -0,0 +1,99 @@ +import BTree from '../b+tree'; +import { type BTreeWithInternals, checkCanDoSetOperation } from './shared'; +import { createCursor, moveForwardOne, moveTo, getKey, noop } from "./parallelWalk" + +/** + * Calls the supplied `callback` for each key/value pair that is in includeTree but not in excludeTree. + * This is also known as set subtraction. + * The callback will be called in sorted key order. + * Neither tree is modified. + * @param includeTree The first tree. This is the tree from which keys will be taken. + * @param excludeTree The second tree. Keys present in this tree will be excluded. + * @param callback Invoked for keys that are in includeTree but not in excludeTree. It can cause iteration to early exit by returning `{ break: R }`. + * @description Complexity is bounded by O(N + M) for time. + * However, time is additionally bounded by O(log(N + M) * D) where D is the number of disjoint ranges of keys between + * the two trees. In practice, that means for keys of random distribution the performance is O(N + M) and for + * keys with significant numbers of non-overlapping key ranges it is O(log(N + M) * D) which is much faster. + * The algorithm achieves this additional non-linear bound by skipping over non-intersecting subtrees entirely. + * Note that in benchmarks even the worst case (fully interleaved keys, none intersecting) performance is faster than calling `toArray` + * on both trees and performing a walk on the sorted contents due to the reduced allocation overhead. + */ +export default function forEachKeyNotIn( + includeTree: BTree, + excludeTree: BTree, + callback: (key: K, value: V) => { break?: R } | void +): R | undefined { + const _includeTree = includeTree as unknown as BTreeWithInternals; + const _excludeTree = excludeTree as unknown as BTreeWithInternals; + checkCanDoSetOperation(_includeTree, _excludeTree, true); + if (includeTree.size === 0) { + return; + } + + const finishWalk = () => { + let out = false; + do { + const key = getKey(cursorInclude); + const value = cursorInclude.leaf.values[cursorInclude.leafIndex]; + const result = callback(key, value); + if (result && result.break) { + return result.break; + } + out = moveForwardOne(cursorInclude, cursorExclude); + } while (!out); + } + + const cmp = includeTree._compare; + const makePayload = (): undefined => undefined; + let cursorInclude = createCursor(_includeTree, makePayload, noop, noop, noop, noop, noop); + + if (excludeTree.size === 0) { + finishWalk(); + return; + } + + let cursorExclude = createCursor(_excludeTree, makePayload, noop, noop, noop, noop, noop); + let order = cmp(getKey(cursorInclude), getKey(cursorExclude)); + + while (true) { + const areEqual = order === 0; + if (areEqual) { + // Keys are equal, so this key is in both trees and should be skipped. + const outInclude = moveForwardOne(cursorExclude, cursorInclude); + if (outInclude) + break; + const [outExclude, nowEqual] = moveTo(cursorInclude, cursorExclude, getKey(cursorInclude), true, areEqual); + if (outExclude) { + finishWalk(); + break; + } + order = nowEqual ? 0 : -1; + } else { + if (order < 0) { + const key = getKey(cursorInclude); + const value = cursorInclude.leaf.values[cursorInclude.leafIndex]; + const result = callback(key, value); + if (result && result.break) { + return result.break; + } + const outInclude = moveForwardOne(cursorInclude, cursorExclude); + if (outInclude) { + break; + } + order = cmp(getKey(cursorInclude), getKey(cursorExclude)); + } else { + // At this point, include is guaranteed to be ahead of exclude. + const [out, nowEqual] = moveTo(cursorExclude, cursorInclude, getKey(cursorInclude), true, areEqual) + if (out) { + // We've reached the end of exclude, so call for all remaining keys in include + finishWalk(); + break; + } else if (nowEqual) { + order = 0; + } else { + order = -1; // trailing is ahead of leading + } + } + } + } +} diff --git a/extended/parallelWalk.js b/extended/parallelWalk.js index 22ff026..1cfb82b 100644 --- a/extended/parallelWalk.js +++ b/extended/parallelWalk.js @@ -3,11 +3,11 @@ Object.defineProperty(exports, "__esModule", { value: true }); exports.noop = exports.moveTo = exports.getKey = exports.createCursor = exports.moveForwardOne = void 0; /** * Walks the cursor forward by one key. - * Should only be called to advance cursors that started equal. * Returns true if end-of-tree was reached (cursor not structurally mutated). + * Optimized for this case over the more general `moveTo` function. * @internal */ -function moveForwardOne(cur, other, currentKey, cmp) { +function moveForwardOne(cur, other) { var leaf = cur.leaf; var nextIndex = cur.leafIndex + 1; if (nextIndex < leaf.keys.length) { @@ -18,7 +18,7 @@ function moveForwardOne(cur, other, currentKey, cmp) { } // If our optimized step within leaf failed, use full moveTo logic // Pass isInclusive=false to ensure we walk forward to the key exactly after the current - return moveTo(cur, other, currentKey, false, true, cmp)[0]; + return moveTo(cur, other, getKey(cur), false, true)[0]; } exports.moveForwardOne = moveForwardOne; /** @@ -64,8 +64,9 @@ exports.getKey = getKey; * Also returns a boolean indicating if the target key was landed on exactly. * @internal */ -function moveTo(cur, other, targetKey, isInclusive, startedEqual, cmp) { - // Cache callbacks for perf +function moveTo(cur, other, targetKey, isInclusive, startedEqual) { + // Cache for perf + var cmp = cur.tree._compare; var onMoveInLeaf = cur.onMoveInLeaf; // Fast path: destination within current leaf var leaf = cur.leaf; diff --git a/extended/parallelWalk.ts b/extended/parallelWalk.ts index 3b11a85..fe9a3ac 100644 --- a/extended/parallelWalk.ts +++ b/extended/parallelWalk.ts @@ -21,15 +21,13 @@ export interface Cursor { /** * Walks the cursor forward by one key. - * Should only be called to advance cursors that started equal. * Returns true if end-of-tree was reached (cursor not structurally mutated). + * Optimized for this case over the more general `moveTo` function. * @internal */ export function moveForwardOne( cur: Cursor, - other: Cursor, - currentKey: K, - cmp: (a: K, b: K) => number + other: Cursor ): boolean { const leaf = cur.leaf; const nextIndex = cur.leafIndex + 1; @@ -42,7 +40,7 @@ export function moveForwardOne( // If our optimized step within leaf failed, use full moveTo logic // Pass isInclusive=false to ensure we walk forward to the key exactly after the current - return moveTo(cur, other, currentKey, false, true, cmp)[0]; + return moveTo(cur, other, getKey(cur), false, true)[0]; } /** @@ -94,9 +92,9 @@ export function moveTo( targetKey: K, isInclusive: boolean, startedEqual: boolean, - cmp: (a: K, b: K) => number ): [outOfTree: boolean, targetExactlyReached: boolean] { - // Cache callbacks for perf + // Cache for perf + const cmp = cur.tree._compare const onMoveInLeaf = cur.onMoveInLeaf; // Fast path: destination within current leaf const leaf = cur.leaf; From 104f3587e3e201d75ff44b0575e79ce484686aa5 Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Thu, 13 Nov 2025 15:46:28 -0800 Subject: [PATCH 096/143] tests, fixes --- extended/forEachKeyNotIn.js | 13 +- extended/forEachKeyNotIn.ts | 11 +- extended/subtract.js | 3 + extended/subtract.ts | 5 +- test/forEachKeyNotIn.test.ts | 313 +++++++++++++++++++++++++++++++++++ 5 files changed, 327 insertions(+), 18 deletions(-) create mode 100644 test/forEachKeyNotIn.test.ts diff --git a/extended/forEachKeyNotIn.js b/extended/forEachKeyNotIn.js index fd5cf0f..729a3d5 100644 --- a/extended/forEachKeyNotIn.js +++ b/extended/forEachKeyNotIn.js @@ -50,15 +50,10 @@ function forEachKeyNotIn(includeTree, excludeTree, callback) { var areEqual = order === 0; if (areEqual) { // Keys are equal, so this key is in both trees and should be skipped. - var outInclude = (0, parallelWalk_1.moveForwardOne)(cursorExclude, cursorInclude); + var outInclude = (0, parallelWalk_1.moveForwardOne)(cursorInclude, cursorExclude); if (outInclude) break; - var _a = (0, parallelWalk_1.moveTo)(cursorInclude, cursorExclude, (0, parallelWalk_1.getKey)(cursorInclude), true, areEqual), outExclude = _a[0], nowEqual = _a[1]; - if (outExclude) { - finishWalk(); - break; - } - order = nowEqual ? 0 : -1; + order = 1; // include is now ahead of exclude } else { if (order < 0) { @@ -76,7 +71,7 @@ function forEachKeyNotIn(includeTree, excludeTree, callback) { } else { // At this point, include is guaranteed to be ahead of exclude. - var _b = (0, parallelWalk_1.moveTo)(cursorExclude, cursorInclude, (0, parallelWalk_1.getKey)(cursorInclude), true, areEqual), out = _b[0], nowEqual = _b[1]; + var _a = (0, parallelWalk_1.moveTo)(cursorExclude, cursorInclude, (0, parallelWalk_1.getKey)(cursorInclude), true, areEqual), out = _a[0], nowEqual = _a[1]; if (out) { // We've reached the end of exclude, so call for all remaining keys in include finishWalk(); @@ -86,7 +81,7 @@ function forEachKeyNotIn(includeTree, excludeTree, callback) { order = 0; } else { - order = -1; // trailing is ahead of leading + order = -1; } } } diff --git a/extended/forEachKeyNotIn.ts b/extended/forEachKeyNotIn.ts index 471dd1b..0ea5b08 100644 --- a/extended/forEachKeyNotIn.ts +++ b/extended/forEachKeyNotIn.ts @@ -59,15 +59,10 @@ export default function forEachKeyNotIn( const areEqual = order === 0; if (areEqual) { // Keys are equal, so this key is in both trees and should be skipped. - const outInclude = moveForwardOne(cursorExclude, cursorInclude); + const outInclude = moveForwardOne(cursorInclude, cursorExclude); if (outInclude) break; - const [outExclude, nowEqual] = moveTo(cursorInclude, cursorExclude, getKey(cursorInclude), true, areEqual); - if (outExclude) { - finishWalk(); - break; - } - order = nowEqual ? 0 : -1; + order = 1; // include is now ahead of exclude } else { if (order < 0) { const key = getKey(cursorInclude); @@ -91,7 +86,7 @@ export default function forEachKeyNotIn( } else if (nowEqual) { order = 0; } else { - order = -1; // trailing is ahead of leading + order = -1; } } } diff --git a/extended/subtract.js b/extended/subtract.js index ba46a23..06ac189 100644 --- a/extended/subtract.js +++ b/extended/subtract.js @@ -27,6 +27,9 @@ function subtract(targetTree, subtractTree) { // will be leaves that are exploded (and filtered) due to intersecting leaves in subtractTree. var decomposed = (0, decompose_1.decompose)(_targetTree, _subtractTree, function () { return undefined; }, true); var constructor = targetTree.constructor; + if ((0, shared_1.alternatingCount)(decomposed.disjoint) === 0) { + return new constructor(undefined, targetTree._compare, targetTree._maxNodeSize); + } return (0, decompose_1.buildFromDecomposition)(constructor, branchingFactor, decomposed, targetTree._compare, targetTree._maxNodeSize); } exports.default = subtract; diff --git a/extended/subtract.ts b/extended/subtract.ts index 0bce807..4da06fc 100644 --- a/extended/subtract.ts +++ b/extended/subtract.ts @@ -1,5 +1,5 @@ import BTree from '../b+tree'; -import { checkCanDoSetOperation, type BTreeWithInternals, BTreeConstructor } from './shared'; +import { checkCanDoSetOperation, type BTreeWithInternals, BTreeConstructor, alternatingCount } from './shared'; import { buildFromDecomposition, decompose } from './decompose'; /** @@ -31,5 +31,8 @@ export default function subtract, K, V>( // will be leaves that are exploded (and filtered) due to intersecting leaves in subtractTree. const decomposed = decompose(_targetTree, _subtractTree, () => undefined, true); const constructor = targetTree.constructor as BTreeConstructor; + if (alternatingCount(decomposed.disjoint) === 0) { + return new constructor(undefined, targetTree._compare, targetTree._maxNodeSize) as unknown as TBTree; + } return buildFromDecomposition(constructor, branchingFactor, decomposed, targetTree._compare, targetTree._maxNodeSize); } diff --git a/test/forEachKeyNotIn.test.ts b/test/forEachKeyNotIn.test.ts new file mode 100644 index 0000000..8bb90fb --- /dev/null +++ b/test/forEachKeyNotIn.test.ts @@ -0,0 +1,313 @@ +import BTreeEx from '../extended'; +import forEachKeyNotIn from '../extended/forEachKeyNotIn'; +import subtract from '../extended/subtract'; +import { comparatorErrorMsg, branchingFactorErrorMsg } from '../extended/shared'; +import MersenneTwister from 'mersenne-twister'; +import { makeArray } from './shared'; + +type NotInCall = { key: number, value: number }; + +const runSubtractionImplementations = ( + include: BTreeEx, + exclude: BTreeEx, + assertion: (calls: NotInCall[]) => void +) => { + const forEachCalls: NotInCall[] = []; + forEachKeyNotIn(include, exclude, (key, value) => { + forEachCalls.push({ key, value }); + }); + assertion(forEachCalls); + + const resultTree = subtract, number, number>(include, exclude); + const subtractCalls = resultTree.toArray().map(([key, value]) => ({ key, value })); + expect(subtractCalls).toEqual(forEachCalls); + resultTree.checkValid(); + assertion(subtractCalls); +}; + +const expectSubtractionCalls = ( + include: BTreeEx, + exclude: BTreeEx, + expected: NotInCall[] +) => { + runSubtractionImplementations(include, exclude, (calls) => { + expect(calls).toEqual(expected); + }); +}; + +const tuplesToRecords = (entries: Array<[number, number]>): NotInCall[] => + entries.map(([key, value]) => ({ key, value })); + +const tuples = (...pairs: Array<[number, number]>) => pairs; + +describe('BTree forEachKeyNotIn tests with fanout 32', testForEachKeyNotIn.bind(null, 32)); +describe('BTree forEachKeyNotIn tests with fanout 10', testForEachKeyNotIn.bind(null, 10)); +describe('BTree forEachKeyNotIn tests with fanout 4', testForEachKeyNotIn.bind(null, 4)); + +function testForEachKeyNotIn(maxNodeSize: number) { + const compare = (a: number, b: number) => a - b; + + const buildTree = (entries: Array<[number, number]>) => + new BTreeEx(entries, compare, maxNodeSize); + + it('forEachKeyNotIn two empty trees', () => { + const includeTree = buildTree([]); + const excludeTree = buildTree([]); + expectSubtractionCalls(includeTree, excludeTree, []); + }); + + it('forEachKeyNotIn include empty tree with non-empty tree', () => { + const includeTree = buildTree([]); + const excludeTree = buildTree(tuples([1, 10], [2, 20], [3, 30])); + expectSubtractionCalls(includeTree, excludeTree, []); + }); + + it('forEachKeyNotIn exclude tree empty yields all include keys', () => { + const includeEntries: Array<[number, number]> = [[1, 10], [3, 30], [5, 50]]; + const includeTree = buildTree(includeEntries); + const excludeTree = buildTree([]); + const expected = tuplesToRecords(includeEntries); + expectSubtractionCalls(includeTree, excludeTree, expected); + }); + + it('forEachKeyNotIn with no overlapping keys returns include tree contents', () => { + const includeEntries: Array<[number, number]> = [[1, 10], [3, 30], [5, 50]]; + const excludeEntries: Array<[number, number]> = [[0, 100], [2, 200], [4, 400]]; + const includeTree = buildTree(includeEntries); + const excludeTree = buildTree(excludeEntries); + const expected = tuplesToRecords(includeEntries); + expectSubtractionCalls(includeTree, excludeTree, expected); + }); + + it('forEachKeyNotIn with overlapping keys excludes matches', () => { + const includeTree = buildTree(tuples([1, 10], [2, 20], [3, 30], [4, 40], [5, 50])); + const excludeTree = buildTree(tuples([0, 100], [2, 200], [4, 400], [6, 600])); + expectSubtractionCalls(includeTree, excludeTree, [ + { key: 1, value: 10 }, + { key: 3, value: 30 }, + { key: 5, value: 50 }, + ]); + }); + + it('forEachKeyNotIn excludes leading overlap then emits remaining keys', () => { + const includeTree = buildTree(tuples([1, 10], [2, 20], [3, 30], [4, 40])); + const excludeTree = buildTree(tuples([1, 100], [2, 200])); + expectSubtractionCalls(includeTree, excludeTree, [ + { key: 3, value: 30 }, + { key: 4, value: 40 }, + ]); + }); + + it('forEachKeyNotIn maintains tree contents', () => { + const includeEntries: Array<[number, number]> = [[1, 10], [2, 20], [3, 30], [4, 40], [5, 50]]; + const excludeEntries: Array<[number, number]> = [[1, 100], [3, 300], [5, 500]]; + const includeTree = buildTree(includeEntries); + const excludeTree = buildTree(excludeEntries); + const includeBefore = includeTree.toArray(); + const excludeBefore = excludeTree.toArray(); + expectSubtractionCalls(includeTree, excludeTree, [ + { key: 2, value: 20 }, + { key: 4, value: 40 }, + ]); + expect(includeTree.toArray()).toEqual(includeBefore); + expect(excludeTree.toArray()).toEqual(excludeBefore); + includeTree.checkValid(); + excludeTree.checkValid(); + }); + + it('forEachKeyNotIn with contiguous overlap yields sorted survivors', () => { + const includeTree = buildTree(tuples([1, 1], [2, 2], [3, 3], [4, 4], [5, 5], [6, 6])); + const excludeTree = buildTree(tuples([3, 30], [4, 40], [5, 50])); + runSubtractionImplementations(includeTree, excludeTree, (calls) => { + expect(calls.map(c => c.key)).toEqual([1, 2, 6]); + expect(calls.map(c => c.value)).toEqual([1, 2, 6]); + }); + }); + + it('forEachKeyNotIn large subtraction leaves prefix and suffix ranges', () => { + const size = 1000; + const excludeStart = 200; + const excludeSpan = 500; + const includeEntries = Array.from({ length: size }, (_, i) => [i, i * 2] as [number, number]); + const excludeEntries = Array.from({ length: excludeSpan }, (_, i) => { + const key = i + excludeStart; + return [key, key * 3] as [number, number]; + }); + const includeTree = buildTree(includeEntries); + const excludeTree = buildTree(excludeEntries); + runSubtractionImplementations(includeTree, excludeTree, (calls) => { + expect(calls.length).toBe(size - excludeSpan); + expect(calls[0]).toEqual({ key: 0, value: 0 }); + const lastCall = calls[calls.length - 1]; + expect(lastCall.key).toBe(size - 1); + expect(lastCall.value).toBe((size - 1) * 2); + expect(calls.filter(c => c.key >= excludeStart && c.key < excludeStart + excludeSpan)).toEqual([]); + }); + }); + + it('forEachKeyNotIn tree with itself visits no keys', () => { + const entries = Array.from({ length: 20 }, (_, i) => [i, i * 2] as [number, number]); + const tree = buildTree(entries); + expectSubtractionCalls(tree, tree, []); + }); + + it('forEachKeyNotIn exclude superset yields empty result', () => { + const includeTree = buildTree(tuples([2, 200], [3, 300])); + const excludeTree = buildTree(tuples([1, 100], [2, 200], [3, 300], [4, 400])); + expectSubtractionCalls(includeTree, excludeTree, []); + }); + + it('forEachKeyNotIn arguments determine surviving keys', () => { + const tree1 = buildTree(tuples([1, 100], [2, 200], [4, 400])); + const tree2 = buildTree(tuples([2, 20], [3, 30], [4, 40])); + expectSubtractionCalls(tree1, tree2, [ + { key: 1, value: 100 }, + ]); + expectSubtractionCalls(tree2, tree1, [ + { key: 3, value: 30 }, + ]); + }); +} + +describe('BTree forEachKeyNotIn early exiting', () => { + const compare = (a: number, b: number) => a - b; + + const buildTree = (entries: Array<[number, number]>) => + new BTreeEx(entries, compare, 4); + + it('forEachKeyNotIn returns undefined when callback returns void', () => { + const includeTree = buildTree(tuples([1, 10], [2, 20], [3, 30])); + const excludeTree = buildTree(tuples([2, 200])); + const visited: number[] = []; + const result = forEachKeyNotIn(includeTree, excludeTree, key => { + visited.push(key); + }); + expect(result).toBeUndefined(); + expect(visited).toEqual([1, 3]); + }); + + it('forEachKeyNotIn ignores undefined break values and completes traversal', () => { + const includeTree = buildTree(tuples([1, 10], [2, 20], [3, 30], [4, 40])); + const excludeTree = buildTree(tuples([2, 200])); + const visited: number[] = []; + const result = forEachKeyNotIn(includeTree, excludeTree, key => { + visited.push(key); + return { break: undefined }; + }); + expect(result).toBeUndefined(); + expect(visited).toEqual([1, 3, 4]); + }); + + it('forEachKeyNotIn breaks early when callback returns a value', () => { + const includeTree = buildTree(tuples([1, 10], [2, 20], [3, 30], [4, 40])); + const excludeTree = buildTree(tuples([2, 200])); + const visited: number[] = []; + const breakResult = forEachKeyNotIn(includeTree, excludeTree, (key, value) => { + visited.push(key); + if (key === 3) { + return { break: { key, value } }; + } + }); + expect(breakResult).toEqual({ key: 3, value: 30 }); + expect(visited).toEqual([1, 3]); + }); +}); + +describe('BTree forEachKeyNotIn and subtract input/output validation', () => { + it('forEachKeyNotIn throws error when comparators differ', () => { + const includeTree = new BTreeEx([[1, 10]], (a, b) => b - a); + const excludeTree = new BTreeEx([[2, 20]], (a, b) => a - b); + expect(() => forEachKeyNotIn(includeTree, excludeTree, () => {})).toThrow(comparatorErrorMsg); + }); + + it('subtract throws error when comparators differ', () => { + const includeTree = new BTreeEx([[1, 10]], (a, b) => b - a); + const excludeTree = new BTreeEx([[2, 20]], (a, b) => a - b); + expect(() => subtract, number, number>(includeTree, excludeTree)).toThrow(comparatorErrorMsg); + }); + + it('subtract throws error when branching factors differ', () => { + const includeTree = new BTreeEx([[1, 10]], (a, b) => a - b, 4); + const excludeTree = new BTreeEx([[2, 20]], (a, b) => a - b, 8); + expect(() => subtract, number, number>(includeTree, excludeTree)).toThrow(branchingFactorErrorMsg); + }); +}); + +describe.skip('BTree forEachKeyNotIn fuzz tests', () => { + const compare = (a: number, b: number) => a - b; + const FUZZ_SETTINGS = { + branchingFactors: [4, 5, 32], + ooms: [2, 3], + fractionsPerOOM: [0.1, 0.25, 0.5], + collisionChances: [0.05, 0.1, 0.3], + timeoutMs: 30_000 + } as const; + + FUZZ_SETTINGS.fractionsPerOOM.forEach(fraction => { + if (fraction < 0 || fraction > 1) + throw new Error('FUZZ_SETTINGS.fractionsPerOOM must contain values between 0 and 1'); + }); + FUZZ_SETTINGS.collisionChances.forEach(chance => { + if (chance < 0 || chance > 1) + throw new Error('FUZZ_SETTINGS.collisionChances must contain values between 0 and 1'); + }); + + jest.setTimeout(FUZZ_SETTINGS.timeoutMs); + + const rng = new MersenneTwister(0xBAD_C0DE); + + for (const maxNodeSize of FUZZ_SETTINGS.branchingFactors) { + describe(`branching factor ${maxNodeSize}`, () => { + for (const collisionChance of FUZZ_SETTINGS.collisionChances) { + for (const oom of FUZZ_SETTINGS.ooms) { + const size = 5 * Math.pow(10, oom); + for (const fractionA of FUZZ_SETTINGS.fractionsPerOOM) { + const fractionB = 1 - fractionA; + const collisionLabel = collisionChance.toFixed(2); + + it(`size ${size}, fractionA ${fractionA.toFixed(2)}, fractionB ${fractionB.toFixed(2)}, collision ${collisionLabel}`, () => { + const treeA = new BTreeEx([], compare, maxNodeSize); + const treeB = new BTreeEx([], compare, maxNodeSize); + + const keys = makeArray(size, true, 1, collisionChance, rng); + + for (const value of keys) { + const assignToA = rng.random() < fractionA; + const assignToB = rng.random() < fractionB; + + if (!assignToA && !assignToB) { + if (rng.random() < 0.5) + treeA.set(value, value); + else + treeB.set(value, value); + continue; + } + + if (assignToA) + treeA.set(value, value); + if (assignToB) + treeB.set(value, value); + } + + const aArray = treeA.toArray(); + const bArray = treeB.toArray(); + const bMap = new Map(bArray); + const aMap = new Map(aArray); + + const expectedA = aArray.filter(([key]) => !bMap.has(key)); + const expectedB = bArray.filter(([key]) => !aMap.has(key)); + + expectSubtractionCalls(treeA, treeB, tuplesToRecords(expectedA)); + expectSubtractionCalls(treeB, treeA, tuplesToRecords(expectedB)); + + expect(treeA.toArray()).toEqual(aArray); + expect(treeB.toArray()).toEqual(bArray); + treeA.checkValid(); + treeB.checkValid(); + }); + } + } + } + }); + } +}); From dd91dd46c4c6018f8ee80f0c298da53fcd6ba43f Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Thu, 13 Nov 2025 15:50:38 -0800 Subject: [PATCH 097/143] bad test --- test/forEachKeyNotIn.test.ts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/forEachKeyNotIn.test.ts b/test/forEachKeyNotIn.test.ts index 8bb90fb..c85c641 100644 --- a/test/forEachKeyNotIn.test.ts +++ b/test/forEachKeyNotIn.test.ts @@ -216,19 +216,19 @@ describe('BTree forEachKeyNotIn early exiting', () => { describe('BTree forEachKeyNotIn and subtract input/output validation', () => { it('forEachKeyNotIn throws error when comparators differ', () => { const includeTree = new BTreeEx([[1, 10]], (a, b) => b - a); - const excludeTree = new BTreeEx([[2, 20]], (a, b) => a - b); + const excludeTree = new BTreeEx([[2, 20]], (a, b) => a + b); expect(() => forEachKeyNotIn(includeTree, excludeTree, () => {})).toThrow(comparatorErrorMsg); }); it('subtract throws error when comparators differ', () => { const includeTree = new BTreeEx([[1, 10]], (a, b) => b - a); - const excludeTree = new BTreeEx([[2, 20]], (a, b) => a - b); + const excludeTree = new BTreeEx([[2, 20]], (a, b) => a + b); expect(() => subtract, number, number>(includeTree, excludeTree)).toThrow(comparatorErrorMsg); }); it('subtract throws error when branching factors differ', () => { const includeTree = new BTreeEx([[1, 10]], (a, b) => a - b, 4); - const excludeTree = new BTreeEx([[2, 20]], (a, b) => a - b, 8); + const excludeTree = new BTreeEx([[2, 20]], includeTree._compare, 8); expect(() => subtract, number, number>(includeTree, excludeTree)).toThrow(branchingFactorErrorMsg); }); }); From 683b194f9b9aae6033e0e840e5100e9684361031 Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Thu, 13 Nov 2025 17:33:28 -0800 Subject: [PATCH 098/143] more fixes in subtract --- extended/decompose.js | 19 +++++++++++++++++- extended/decompose.ts | 40 ++++++++++++++++++++++++++++++++++++- extended/forEachKeyNotIn.js | 7 +++---- extended/forEachKeyNotIn.ts | 9 ++++----- 4 files changed, 64 insertions(+), 11 deletions(-) diff --git a/extended/decompose.js b/extended/decompose.js index 13d5632..25d9cc3 100644 --- a/extended/decompose.js +++ b/extended/decompose.js @@ -170,7 +170,24 @@ function decompose(left, right, combineFn, ignoreRight) { var curB; if (ignoreRight) { var dummyPayload_1 = { disqualified: true }; - curB = (0, parallelWalk_1.createCursor)(right, function () { return dummyPayload_1; }, parallelWalk_1.noop, parallelWalk_1.noop, parallelWalk_1.noop, parallelWalk_1.noop, parallelWalk_1.noop); + var onStepUpIgnore = function (_1, _2, _3, _4, spineIndex, stepDownIndex, cursorThis) { + if (stepDownIndex > 0) { + disqualifySpine(cursorThis, spineIndex); + } + }; + var onStepDownIgnore = function (_, __, spineIndex, stepDownIndex, cursorThis) { + if (stepDownIndex > 0) { + disqualifySpine(cursorThis, spineIndex); + } + }; + var onEnterLeafIgnore = function (leaf, destIndex, _, cursorOther) { + if (destIndex > 0 + || (0, b_tree_1.areOverlapping)(leaf.minKey(), leaf.maxKey(), (0, parallelWalk_1.getKey)(cursorOther), cursorOther.leaf.maxKey(), cmp)) { + cursorOther.leafPayload.disqualified = true; + disqualifySpine(cursorOther, cursorOther.spine.length - 1); + } + }; + curB = (0, parallelWalk_1.createCursor)(right, function () { return dummyPayload_1; }, onEnterLeafIgnore, parallelWalk_1.noop, parallelWalk_1.noop, onStepUpIgnore, onStepDownIgnore); } else { curB = (0, parallelWalk_1.createCursor)(right, makePayload, onEnterLeaf, onMoveInLeaf, onExitLeaf, onStepUp, onStepDown); diff --git a/extended/decompose.ts b/extended/decompose.ts index c8cbdd9..5ab1442 100644 --- a/extended/decompose.ts +++ b/extended/decompose.ts @@ -229,7 +229,45 @@ export function decompose( let curB: typeof curA; if (ignoreRight) { const dummyPayload: DecomposePayload = { disqualified: true }; - curB = createCursor(right, () => dummyPayload, noop, noop, noop, noop, noop); + const onStepUpIgnore = ( + _1: BNodeInternal, + _2: number, + _3: DecomposePayload, + _4: number, + spineIndex: number, + stepDownIndex: number, + cursorThis: Cursor + ) => { + if (stepDownIndex > 0) { + disqualifySpine(cursorThis, spineIndex); + } + }; + + const onStepDownIgnore = ( + _: BNodeInternal, + __: number, + spineIndex: number, + stepDownIndex: number, + cursorThis: Cursor + ) => { + if (stepDownIndex > 0) { + disqualifySpine(cursorThis, spineIndex); + } + }; + + const onEnterLeafIgnore = ( + leaf: BNode, + destIndex: number, + _: Cursor, + cursorOther: Cursor + ) => { + if (destIndex > 0 + || areOverlapping(leaf.minKey()!, leaf.maxKey(), getKey(cursorOther), cursorOther.leaf.maxKey(), cmp)) { + cursorOther.leafPayload.disqualified = true; + disqualifySpine(cursorOther, cursorOther.spine.length - 1); + } + }; + curB = createCursor(right, () => dummyPayload, onEnterLeafIgnore, noop, noop, onStepUpIgnore, onStepDownIgnore); } else { curB = createCursor(right, makePayload, onEnterLeaf, onMoveInLeaf, onExitLeaf, onStepUp, onStepDown); } diff --git a/extended/forEachKeyNotIn.js b/extended/forEachKeyNotIn.js index 729a3d5..b0783b2 100644 --- a/extended/forEachKeyNotIn.js +++ b/extended/forEachKeyNotIn.js @@ -36,13 +36,13 @@ function forEachKeyNotIn(includeTree, excludeTree, callback) { } out = (0, parallelWalk_1.moveForwardOne)(cursorInclude, cursorExclude); } while (!out); + return undefined; }; var cmp = includeTree._compare; var makePayload = function () { return undefined; }; var cursorInclude = (0, parallelWalk_1.createCursor)(_includeTree, makePayload, parallelWalk_1.noop, parallelWalk_1.noop, parallelWalk_1.noop, parallelWalk_1.noop, parallelWalk_1.noop); if (excludeTree.size === 0) { - finishWalk(); - return; + return finishWalk(); } var cursorExclude = (0, parallelWalk_1.createCursor)(_excludeTree, makePayload, parallelWalk_1.noop, parallelWalk_1.noop, parallelWalk_1.noop, parallelWalk_1.noop, parallelWalk_1.noop); var order = cmp((0, parallelWalk_1.getKey)(cursorInclude), (0, parallelWalk_1.getKey)(cursorExclude)); @@ -74,8 +74,7 @@ function forEachKeyNotIn(includeTree, excludeTree, callback) { var _a = (0, parallelWalk_1.moveTo)(cursorExclude, cursorInclude, (0, parallelWalk_1.getKey)(cursorInclude), true, areEqual), out = _a[0], nowEqual = _a[1]; if (out) { // We've reached the end of exclude, so call for all remaining keys in include - finishWalk(); - break; + return finishWalk(); } else if (nowEqual) { order = 0; diff --git a/extended/forEachKeyNotIn.ts b/extended/forEachKeyNotIn.ts index 0ea5b08..0a8db3f 100644 --- a/extended/forEachKeyNotIn.ts +++ b/extended/forEachKeyNotIn.ts @@ -30,7 +30,7 @@ export default function forEachKeyNotIn( return; } - const finishWalk = () => { + const finishWalk = (): R | undefined => { let out = false; do { const key = getKey(cursorInclude); @@ -41,6 +41,7 @@ export default function forEachKeyNotIn( } out = moveForwardOne(cursorInclude, cursorExclude); } while (!out); + return undefined; } const cmp = includeTree._compare; @@ -48,8 +49,7 @@ export default function forEachKeyNotIn( let cursorInclude = createCursor(_includeTree, makePayload, noop, noop, noop, noop, noop); if (excludeTree.size === 0) { - finishWalk(); - return; + return finishWalk(); } let cursorExclude = createCursor(_excludeTree, makePayload, noop, noop, noop, noop, noop); @@ -81,8 +81,7 @@ export default function forEachKeyNotIn( const [out, nowEqual] = moveTo(cursorExclude, cursorInclude, getKey(cursorInclude), true, areEqual) if (out) { // We've reached the end of exclude, so call for all remaining keys in include - finishWalk(); - break; + return finishWalk(); } else if (nowEqual) { order = 0; } else { From f2fe485cbc0575e617eb6240b933b2411db338ea Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Thu, 13 Nov 2025 17:42:09 -0800 Subject: [PATCH 099/143] un skip fuzz test --- test/forEachKeyNotIn.test.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/forEachKeyNotIn.test.ts b/test/forEachKeyNotIn.test.ts index c85c641..919a178 100644 --- a/test/forEachKeyNotIn.test.ts +++ b/test/forEachKeyNotIn.test.ts @@ -233,7 +233,7 @@ describe('BTree forEachKeyNotIn and subtract input/output validation', () => { }); }); -describe.skip('BTree forEachKeyNotIn fuzz tests', () => { +describe('BTree forEachKeyNotIn fuzz tests', () => { const compare = (a: number, b: number) => a - b; const FUZZ_SETTINGS = { branchingFactors: [4, 5, 32], From 9665e4b3c46f6da0b27909410185de5edbc11a28 Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Thu, 13 Nov 2025 18:06:48 -0800 Subject: [PATCH 100/143] bulk load fuzz testing --- test/bulkLoad.test.ts | 54 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 53 insertions(+), 1 deletion(-) diff --git a/test/bulkLoad.test.ts b/test/bulkLoad.test.ts index 79d68c3..f981b62 100644 --- a/test/bulkLoad.test.ts +++ b/test/bulkLoad.test.ts @@ -1,7 +1,8 @@ import BTree, { BNode, BNodeInternal } from '../b+tree'; import BTreeEx from '../extended'; import { bulkLoad } from '../extended/bulkLoad'; -import { makeArray } from './shared'; +import MersenneTwister from 'mersenne-twister'; +import { makeArray, randomInt } from './shared'; type Pair = [number, number]; const compareNumbers = (a: number, b: number) => a - b; @@ -139,3 +140,54 @@ describe('BTreeEx.bulkLoad', () => { expectTreeMatches(tree, pairs); }); }); + +describe('bulkLoad fuzz tests', () => { + const FUZZ_SETTINGS = { + branchingFactors, + ooms: [2, 3], + iterationsPerOOM: 3, + spacings: [1, 2, 3, 5, 8, 13], + payloadMods: [1, 2, 5, 11, 17], + timeoutMs: 30_000, + } as const; + + jest.setTimeout(FUZZ_SETTINGS.timeoutMs); + + const rng = new MersenneTwister(0xB01C10AD); + + for (const maxNodeSize of FUZZ_SETTINGS.branchingFactors) { + describe(`fanout ${maxNodeSize}`, () => { + for (const oom of FUZZ_SETTINGS.ooms) { + const baseSize = 5 * Math.pow(10, oom); + for (let iteration = 0; iteration < FUZZ_SETTINGS.iterationsPerOOM; iteration++) { + const spacing = FUZZ_SETTINGS.spacings[randomInt(rng, FUZZ_SETTINGS.spacings.length)]; + const payloadMod = FUZZ_SETTINGS.payloadMods[randomInt(rng, FUZZ_SETTINGS.payloadMods.length)]; + const sizeJitter = randomInt(rng, baseSize); + const size = baseSize + sizeJitter; + + test(`size ${size}, spacing ${spacing}, payload ${payloadMod}, iteration ${iteration}`, () => { + const keys = makeArray(size, false, spacing, 0, rng); + const pairs = pairsFromKeys(keys).map(([key, value], index) => [key, value * payloadMod + index] as Pair); + const { tree, root } = buildTreeFromPairs(maxNodeSize, pairs); + expectTreeMatches(tree, pairs); + + const leaves = collectLeaves(root); + const leafSizes = leaves.map((leaf) => leaf.keys.length); + const expectedLeafCount = Math.ceil(pairs.length / maxNodeSize); + expect(leaves.length).toBe(expectedLeafCount); + const minLeaf = Math.min(...leafSizes); + const maxLeaf = Math.max(...leafSizes); + expect(maxLeaf - minLeaf).toBeLessThanOrEqual(1); + + if (!root.isLeaf) + assertInternalNodeFanout(root, maxNodeSize); + + const alternating = toAlternating(pairs); + const bulkLoadTree = BTreeEx.bulkLoad(alternating, maxNodeSize, compareNumbers); + expectTreeMatches(bulkLoadTree, pairs); + }); + } + } + }); + } +}); From b49c8d691fba0dddae416440e4efae5009cdc9da Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Thu, 13 Nov 2025 18:17:03 -0800 Subject: [PATCH 101/143] self test for union, fast path --- extended/union.js | 2 ++ extended/union.ts | 2 ++ test/union.test.ts | 34 ++++++++++++++++++++++++++++++++++ 3 files changed, 38 insertions(+) diff --git a/extended/union.js b/extended/union.js index aeb9ced..65369c4 100644 --- a/extended/union.js +++ b/extended/union.js @@ -19,6 +19,8 @@ var decompose_1 = require("./decompose"); * and inserting the contents of `other` into the clone. */ function union(treeA, treeB, combineFn) { + if (treeA === treeB) + return treeA.clone(); var _treeA = treeA; var _treeB = treeB; var branchingFactor = (0, shared_1.checkCanDoSetOperation)(_treeA, _treeB, false); diff --git a/extended/union.ts b/extended/union.ts index 168ab2e..6f6ae58 100644 --- a/extended/union.ts +++ b/extended/union.ts @@ -23,6 +23,8 @@ export default function union, K, V>( treeB: TBTree, combineFn: (key: K, leftValue: V, rightValue: V) => V | undefined ): TBTree { + if (treeA === treeB) + return treeA.clone(); const _treeA = treeA as unknown as BTreeWithInternals; const _treeB = treeB as unknown as BTreeWithInternals; const branchingFactor = checkCanDoSetOperation(_treeA, _treeB, false); diff --git a/test/union.test.ts b/test/union.test.ts index 78d2887..fd865df 100644 --- a/test/union.test.ts +++ b/test/union.test.ts @@ -519,6 +519,40 @@ function testUnion(maxNodeSize: number) { result.checkValid(); }); + test('Union tree with itself returns a clone without invoking combineFn', () => { + const size = maxNodeSize * 2 + 5; + const tree = buildTree(range(0, size), 3, 1); + let unionCalls = 0; + const unionFn: UnionFn = (key, leftValue, rightValue) => { + unionCalls++; + return leftValue + rightValue; + }; + + const original = tree.toArray(); + const result = tree.union(tree, unionFn); + expect(unionCalls).toBe(0); + expect(result).not.toBe(tree); + expect(result.toArray()).toEqual(original); + expect(tree.toArray()).toEqual(original); + }); + + test('Standalone union short-circuits when given the same tree twice', () => { + const size = maxNodeSize * 2 + 1; + const tree = buildTree(range(0, size), 1, 0); + let unionCalls = 0; + const unionFn: UnionFn = (_key, _leftValue, _rightValue) => { + unionCalls++; + return undefined; + }; + + const original = tree.toArray(); + const result = union(tree, tree, unionFn); + expect(unionCalls).toBe(0); + expect(result).not.toBe(tree); + expect(result.toArray()).toEqual(original); + expect(tree.toArray()).toEqual(original); + }); + test('Union with disjoint ranges', () => { const entries1: [number, number][] = []; for (let i = 1; i <= 100; i++) entries1.push([i, i]); From 9ab59cd58cf099d8b8ed5fda73744522c6e644ec Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Thu, 13 Nov 2025 18:43:50 -0800 Subject: [PATCH 102/143] benchmarks --- benchmarks.ts | 219 +++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 199 insertions(+), 20 deletions(-) diff --git a/benchmarks.ts b/benchmarks.ts index 1175cd0..253a323 100644 --- a/benchmarks.ts +++ b/benchmarks.ts @@ -2,6 +2,7 @@ import BTree from '.'; import BTreeEx from './extended'; import SortedArray from './sorted-array'; +import forEachKeyNotIn from './extended/forEachKeyNotIn'; // Note: The `bintrees` package also includes a `BinTree` type which turned // out to be an unbalanced binary tree. It is faster than `RBTree` for // randomized data, but it becomes extremely slow when filled with sorted @@ -98,6 +99,31 @@ function intersectBySorting( } } +function subtractBySorting( + includeTree: BTree, + excludeTree: BTree, + callback: (k: number, value: number) => void +) { + const include = includeTree.toArray(); + const exclude = excludeTree.toArray(); + let i = 0; + let j = 0; + const includeLen = include.length; + const excludeLen = exclude.length; + + while (i < includeLen) { + const [includeKey, includeValue] = include[i]; + while (j < excludeLen && exclude[j][0] < includeKey) + j++; + if (j < excludeLen && exclude[j][0] === includeKey) { + i++; + continue; + } + callback(includeKey, includeValue); + i++; + } +} + console.log("Benchmark results (milliseconds with integer keys/values)"); console.log("---------------------------------------------------------"); @@ -409,7 +435,7 @@ console.log("### Delta between B+ trees"); } console.log(); -console.log("### Merge between B+ trees"); +console.log("### Union between B+ trees"); { console.log(); const sizes = [100, 1000, 10000, 100000]; @@ -445,7 +471,6 @@ console.log("### Merge between B+ trees"); timeBaselineMerge(`${baseTitle} using ${baselineLabel}`, tree1, tree2); }; - // Test 1: Non-overlapping ranges (best case - minimal intersections) console.log("# Non-overlapping ranges (disjoint keys)"); sizes.forEach((size) => { const tree1 = new BTreeEx(); @@ -453,8 +478,8 @@ console.log("### Merge between B+ trees"); const offset = size * 3; for (let i = 0; i < size; i++) { - tree1.set(i, i); // Keys: 0...(size-1) - tree2.set(offset + i, offset + i); // Keys well beyond tree1's range + tree1.set(i, i); + tree2.set(offset + i, offset + i); } const baseTitle = `Union ${size}+${size} non-overlapping trees`; @@ -496,7 +521,7 @@ console.log("### Merge between B+ trees"); }); console.log(); - console.log("# Complete overlap (worst case - all keys intersect)"); + console.log("# Complete overlap (all keys intersect)"); sizes.forEach((size) => { const tree1 = new BTreeEx(); const tree2 = new BTreeEx(); @@ -566,21 +591,6 @@ console.log("### Merge between B+ trees"); timeUnionVsBaseline(baseTitle, tree1, tree2); }); - console.log(); - console.log("# Compare union vs manual iteration for complete overlap"); - sizes.forEach((size) => { - const tree1 = new BTreeEx(); - const tree2 = new BTreeEx(); - - for (let i = 0; i < size; i++) { - tree1.set(i, i); - tree2.set(i, i * 10); - } - - const baseTitle = `Union ${size}+${size} overlapping trees`; - timeUnionVsBaseline(baseTitle, tree1, tree2); - }); - console.log(); console.log("# Large sparse-overlap trees (1M keys each, 10 overlaps per 100k)"); { @@ -724,4 +734,173 @@ console.log("### forEachKeyInBoth"); const baseTitle = `forEachKeyInBoth ${tree1.size}+${tree2.size} random trees`; timeForEachKeyInBothVsBaseline(baseTitle, tree1, tree2); }); + + console.log(); + console.log("# Large sparse-overlap trees (1M keys each, 10 overlaps per 100k)"); + { + const totalKeys = 1_000_000; + const overlapInterval = 100_000; + const overlapPerInterval = 10; + + const tree1 = new BTreeEx(); + for (let i = 0; i < totalKeys; i++) { + tree1.set(i, i); + } + + const tree2 = new BTreeEx(); + for (let i = 0; i < totalKeys; i++) { + if ((i % overlapInterval) < overlapPerInterval) { + tree2.set(i, i * 7); + } else { + tree2.set(totalKeys + i, (totalKeys + i) * 7); + } + } + + const baseTitle = `forEachKeyInBoth ${tree1.size}+${tree2.size} sparse-overlap trees`; + timeForEachKeyInBothVsBaseline(baseTitle, tree1, tree2); + } +} + +console.log(); +console.log("### forEachKeyNotIn"); +{ + const sizes = [100, 1000, 10000, 100000]; + + const runForEachKeyNotIn = ( + includeTree: BTreeEx, + excludeTree: BTreeEx + ) => { + let count = 0; + let checksum = 0; + forEachKeyNotIn(includeTree, excludeTree, (_key, value) => { + count++; + checksum += value; + }); + return { count, checksum }; + }; + + const runBaseline = ( + includeTree: BTree, + excludeTree: BTree + ) => { + let count = 0; + let checksum = 0; + subtractBySorting(includeTree, excludeTree, (_key, value) => { + count++; + checksum += value; + }); + return { count, checksum }; + }; + + const timeForEachKeyNotInVsBaseline = ( + baseTitle: string, + includeTree: BTreeEx, + excludeTree: BTreeEx, + forEachKeyNotInLabel = 'forEachKeyNotIn()', + baselineLabel = 'sort baseline' + ) => { + measure( + result => `${baseTitle} using ${forEachKeyNotInLabel} [count=${result.count}, checksum=${result.checksum}]`, + () => runForEachKeyNotIn(includeTree, excludeTree) + ); + measure( + result => `${baseTitle} using ${baselineLabel} [count=${result.count}, checksum=${result.checksum}]`, + () => runBaseline(includeTree, excludeTree) + ); + }; + + console.log(); + console.log("# Non-overlapping ranges (all keys survive)"); + sizes.forEach((size) => { + const includeTree = new BTreeEx(); + const excludeTree = new BTreeEx(); + const offset = size * 3; + for (let i = 0; i < size; i++) { + includeTree.set(i, i); + excludeTree.set(offset + i, offset + i); + } + + const baseTitle = `forEachKeyNotIn ${includeTree.size}+${excludeTree.size} disjoint trees`; + timeForEachKeyNotInVsBaseline(baseTitle, includeTree, excludeTree); + }); + + console.log(); + console.log("# 50% overlapping ranges"); + sizes.forEach((size) => { + const includeTree = new BTreeEx(); + const excludeTree = new BTreeEx(); + const offset = Math.floor(size / 2); + for (let i = 0; i < size; i++) { + includeTree.set(i, i); + excludeTree.set(i + offset, (i + offset) * 2); + } + + const baseTitle = `forEachKeyNotIn ${includeTree.size}+${excludeTree.size} half-overlapping trees`; + timeForEachKeyNotInVsBaseline(baseTitle, includeTree, excludeTree); + }); + + console.log(); + console.log("# Complete overlap (no keys survive)"); + sizes.forEach((size) => { + const includeTree = new BTreeEx(); + const excludeTree = new BTreeEx(); + for (let i = 0; i < size; i++) { + includeTree.set(i, i); + excludeTree.set(i, i * 3); + } + + const baseTitle = `forEachKeyNotIn ${includeTree.size}+${excludeTree.size} identical-key trees`; + timeForEachKeyNotInVsBaseline(baseTitle, includeTree, excludeTree); + }); + + console.log(); + console.log("# Random overlaps (~10% of include removed)"); + sizes.forEach((size) => { + const keysInclude = makeArray(size, true); + const keysExclude = makeArray(size, true); + const overlapCount = Math.max(1, Math.floor(size * 0.1)); + for (let i = 0; i < overlapCount && i < keysInclude.length && i < keysExclude.length; i++) { + keysExclude[i] = keysInclude[i]; + } + + const includeTree = new BTreeEx(); + const excludeTree = new BTreeEx(); + + for (let i = 0; i < keysInclude.length; i++) { + const key = keysInclude[i]; + includeTree.set(key, key * 5); + } + for (let i = 0; i < keysExclude.length; i++) { + const key = keysExclude[i]; + excludeTree.set(key, key * 7); + } + + const baseTitle = `forEachKeyNotIn ${includeTree.size}+${excludeTree.size} random trees`; + timeForEachKeyNotInVsBaseline(baseTitle, includeTree, excludeTree); + }); + + console.log(); + console.log("# Large sparse-overlap trees (1M keys each, 10 overlaps per 100k)"); + { + const totalKeys = 1_000_000; + const overlapInterval = 100_000; + const overlapPerInterval = 10; + + const includeTree = new BTreeEx(); + for (let i = 0; i < totalKeys; i++) { + includeTree.set(i, i); + } + + const excludeTree = new BTreeEx(); + for (let i = 0; i < totalKeys; i++) { + if ((i % overlapInterval) < overlapPerInterval) { + excludeTree.set(i, i); + } else { + excludeTree.set(totalKeys + i, totalKeys + i); + } + } + + const baseTitle = `forEachKeyNotIn ${includeTree.size}+${excludeTree.size} sparse-overlap trees`; + timeForEachKeyNotInVsBaseline(baseTitle, includeTree, excludeTree); + } } From 93cbb1475b570da3dbfdebf620ad4e2070f8a423 Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Thu, 13 Nov 2025 18:58:29 -0800 Subject: [PATCH 103/143] benchmark for set ops --- benchmarks.ts | 158 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 158 insertions(+) diff --git a/benchmarks.ts b/benchmarks.ts index 253a323..551d5b5 100644 --- a/benchmarks.ts +++ b/benchmarks.ts @@ -3,6 +3,7 @@ import BTree from '.'; import BTreeEx from './extended'; import SortedArray from './sorted-array'; import forEachKeyNotIn from './extended/forEachKeyNotIn'; +import subtract from './extended/subtract'; // Note: The `bintrees` package also includes a `BinTree` type which turned // out to be an unbalanced binary tree. It is faster than `RBTree` for // randomized data, but it becomes extremely slow when filled with sorted @@ -617,6 +618,163 @@ console.log("### Union between B+ trees"); } } +console.log(); +console.log("### Subtract between B+ trees"); +{ + console.log(); + const sizes = [100, 1000, 10000, 100000]; + + const timeBaselineSubtract = ( + title: string, + includeTree: BTreeEx, + excludeTree: BTreeEx + ) => { + const baselineResult = measure(() => title, () => { + const result = includeTree.clone(); + excludeTree.forEachPair((key) => { + result.delete(key); + }); + return result; + }); + const stats = countTreeNodeStats(baselineResult); + console.log(`\tShared nodes (baseline): ${stats.shared}/${stats.total}`); + }; + + const timeSubtractVsBaseline = ( + baseTitle: string, + includeTree: BTreeEx, + excludeTree: BTreeEx, + subtractLabel = 'subtract()', + baselineLabel = 'clone+delete loop (baseline)' + ) => { + const subtractResult = measure(() => `${baseTitle} using ${subtractLabel}`, () => { + return subtract, number, number>(includeTree, excludeTree); + }); + const subtractStats = countTreeNodeStats(subtractResult); + console.log(`\tShared nodes (subtract): ${subtractStats.shared}/${subtractStats.total}`); + + timeBaselineSubtract(`${baseTitle} using ${baselineLabel}`, includeTree, excludeTree); + }; + + console.log("# Non-overlapping ranges (nothing removed)"); + sizes.forEach((size) => { + const includeTree = new BTreeEx(); + const excludeTree = new BTreeEx(); + const offset = size * 3; + for (let i = 0; i < size; i++) { + includeTree.set(i, i); + excludeTree.set(offset + i, offset + i); + } + + const baseTitle = `Subtract ${includeTree.size}-${excludeTree.size} disjoint trees`; + timeSubtractVsBaseline(baseTitle, includeTree, excludeTree); + }); + + console.log(); + console.log("# Partial overlap (middle segment removed)"); + sizes.forEach((size) => { + const includeTree = new BTreeEx(); + const excludeTree = new BTreeEx(); + const overlapStart = Math.floor(size / 3); + const overlapEnd = overlapStart + Math.floor(size / 2); + for (let i = 0; i < size; i++) { + includeTree.set(i, i); + if (i >= overlapStart && i < overlapEnd) + excludeTree.set(i, i * 10); + } + + const baseTitle = `Subtract ${includeTree.size}-${excludeTree.size} partially overlapping trees`; + timeSubtractVsBaseline(baseTitle, includeTree, excludeTree); + }); + + console.log(); + console.log("# Interleaved keys (every other key removed)"); + sizes.forEach((size) => { + const includeTree = new BTreeEx(); + const excludeTree = new BTreeEx(); + for (let i = 0; i < size * 2; i++) { + includeTree.set(i, i); + if (i % 2 === 0) + excludeTree.set(i, i); + } + + const baseTitle = `Subtract ${includeTree.size}-${excludeTree.size} interleaved trees`; + timeSubtractVsBaseline(baseTitle, includeTree, excludeTree); + }); + + console.log(); + console.log("# Complete overlap (entire tree removed)"); + sizes.forEach((size) => { + const includeTree = new BTreeEx(); + const excludeTree = new BTreeEx(); + for (let i = 0; i < size; i++) { + includeTree.set(i, i); + excludeTree.set(i, i * 5); + } + + const baseTitle = `Subtract ${includeTree.size}-${excludeTree.size} identical trees`; + timeSubtractVsBaseline(baseTitle, includeTree, excludeTree); + }); + + console.log(); + console.log("# Random overlaps (~10% removed)"); + sizes.forEach((size) => { + const keysInclude = makeArray(size, true); + const keysExclude = makeArray(size, true); + const overlapCount = Math.max(1, Math.floor(size * 0.1)); + for (let i = 0; i < overlapCount && i < keysInclude.length && i < keysExclude.length; i++) { + keysExclude[i] = keysInclude[i]; + } + + const includeTree = new BTreeEx(); + const excludeTree = new BTreeEx(); + for (const key of keysInclude) + includeTree.set(key, key * 3); + for (const key of keysExclude) + excludeTree.set(key, key * 7); + + const baseTitle = `Subtract ${includeTree.size}-${excludeTree.size} random trees`; + timeSubtractVsBaseline(baseTitle, includeTree, excludeTree); + }); + + console.log(); + console.log("# Subtract with empty exclude tree"); + sizes.forEach((size) => { + const includeTree = new BTreeEx(); + const excludeTree = new BTreeEx(); + for (let i = 0; i < size; i++) + includeTree.set(i, i); + + const baseTitle = `Subtract ${includeTree.size}-0 keys`; + timeSubtractVsBaseline(baseTitle, includeTree, excludeTree); + }); + + console.log(); + console.log("# Large sparse-overlap trees (1M keys each, 10 overlaps per 100k)"); + { + const totalKeys = 1_000_000; + const overlapInterval = 100_000; + const overlapPerInterval = 10; + + const includeTree = new BTreeEx(); + for (let i = 0; i < totalKeys; i++) { + includeTree.set(i, i); + } + + const excludeTree = new BTreeEx(); + for (let i = 0; i < totalKeys; i++) { + if ((i % overlapInterval) < overlapPerInterval) { + excludeTree.set(i, i); + } else { + excludeTree.set(totalKeys + i, totalKeys + i); + } + } + + const baseTitle = `Subtract ${includeTree.size}-${excludeTree.size} sparse-overlap trees`; + timeSubtractVsBaseline(baseTitle, includeTree, excludeTree); + } +} + console.log(); console.log("### forEachKeyInBoth"); { From 4262b1c412ef4342120c324439bcdec0d6ca143b Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Thu, 13 Nov 2025 19:02:39 -0800 Subject: [PATCH 104/143] docs --- extended/union.d.ts | 2 +- extended/union.js | 2 +- extended/union.ts | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/extended/union.d.ts b/extended/union.d.ts index de33870..1f345f8 100644 --- a/extended/union.d.ts +++ b/extended/union.d.ts @@ -5,7 +5,7 @@ import BTree from '../b+tree'; * @param treeA First tree to union. * @param treeB Second tree to union. * @param combineFn Called for keys that appear in both trees. Return the desired value, or - * `undefined` to omit the key from the result. + * `undefined` to omit the key from the result. Note: symmetric difference can be achieved by always returning `undefined`. * @returns A new BTree that contains the unioned key/value pairs. * @description Complexity is bounded O(N + M) for both time and allocations. * However, it is additionally bounded by O(log(N + M) * D) where D is the number of disjoint ranges of keys between diff --git a/extended/union.js b/extended/union.js index 65369c4..f06ea46 100644 --- a/extended/union.js +++ b/extended/union.js @@ -8,7 +8,7 @@ var decompose_1 = require("./decompose"); * @param treeA First tree to union. * @param treeB Second tree to union. * @param combineFn Called for keys that appear in both trees. Return the desired value, or - * `undefined` to omit the key from the result. + * `undefined` to omit the key from the result. Note: symmetric difference can be achieved by always returning `undefined`. * @returns A new BTree that contains the unioned key/value pairs. * @description Complexity is bounded O(N + M) for both time and allocations. * However, it is additionally bounded by O(log(N + M) * D) where D is the number of disjoint ranges of keys between diff --git a/extended/union.ts b/extended/union.ts index 6f6ae58..a9025cc 100644 --- a/extended/union.ts +++ b/extended/union.ts @@ -8,7 +8,7 @@ import { decompose, buildFromDecomposition } from "./decompose"; * @param treeA First tree to union. * @param treeB Second tree to union. * @param combineFn Called for keys that appear in both trees. Return the desired value, or - * `undefined` to omit the key from the result. + * `undefined` to omit the key from the result. Note: symmetric difference can be achieved by always returning `undefined`. * @returns A new BTree that contains the unioned key/value pairs. * @description Complexity is bounded O(N + M) for both time and allocations. * However, it is additionally bounded by O(log(N + M) * D) where D is the number of disjoint ranges of keys between From 92e30bbc6e1c21350a431b0728e6db79003e97db Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Fri, 14 Nov 2025 07:46:55 -0800 Subject: [PATCH 105/143] docs --- extended/bulkLoad.d.ts | 1 + extended/bulkLoad.js | 1 + extended/bulkLoad.ts | 1 + extended/diffAgainst.d.ts | 2 ++ extended/diffAgainst.js | 2 ++ extended/diffAgainst.ts | 3 ++- 6 files changed, 9 insertions(+), 1 deletion(-) diff --git a/extended/bulkLoad.d.ts b/extended/bulkLoad.d.ts index b405eca..b154d12 100644 --- a/extended/bulkLoad.d.ts +++ b/extended/bulkLoad.d.ts @@ -8,5 +8,6 @@ import BTree from '../b+tree'; * @param maxNodeSize The branching factor (maximum node size) for the resulting tree. * @param compare Function to compare keys. * @returns A new BTree containing the given entries. + * @throws Error if the entries are not sorted by key in strictly ascending order (duplicates disallowed). */ export declare function bulkLoad(entries: (K | V)[], maxNodeSize: number, compare: (a: K, b: K) => number): BTree; diff --git a/extended/bulkLoad.js b/extended/bulkLoad.js index 22ae301..1150ba1 100644 --- a/extended/bulkLoad.js +++ b/extended/bulkLoad.js @@ -31,6 +31,7 @@ var shared_1 = require("./shared"); * @param maxNodeSize The branching factor (maximum node size) for the resulting tree. * @param compare Function to compare keys. * @returns A new BTree containing the given entries. + * @throws Error if the entries are not sorted by key in strictly ascending order (duplicates disallowed). */ function bulkLoad(entries, maxNodeSize, compare) { var alternatingEntries = entries; diff --git a/extended/bulkLoad.ts b/extended/bulkLoad.ts index b2508b1..7e2b825 100644 --- a/extended/bulkLoad.ts +++ b/extended/bulkLoad.ts @@ -10,6 +10,7 @@ import { alternatingCount, alternatingGetFirst, flushToLeaves, type AlternatingL * @param maxNodeSize The branching factor (maximum node size) for the resulting tree. * @param compare Function to compare keys. * @returns A new BTree containing the given entries. + * @throws Error if the entries are not sorted by key in strictly ascending order (duplicates disallowed). */ export function bulkLoad( entries: (K | V)[], diff --git a/extended/diffAgainst.d.ts b/extended/diffAgainst.d.ts index eb54935..7b4e0aa 100644 --- a/extended/diffAgainst.d.ts +++ b/extended/diffAgainst.d.ts @@ -11,6 +11,8 @@ import BTree from '../b+tree'; * @param onlyA Callback invoked for all keys only present in `treeA`. * @param onlyB Callback invoked for all keys only present in `treeB`. * @param different Callback invoked for all keys with differing values. + * @returns The first `break` payload returned by a handler, or `undefined` if no handler breaks. + * @throws Error if the supplied trees were created with different comparators. */ export default function diffAgainst(_treeA: BTree, _treeB: BTree, onlyA?: (k: K, v: V) => { break?: R; diff --git a/extended/diffAgainst.js b/extended/diffAgainst.js index db29a34..94673e9 100644 --- a/extended/diffAgainst.js +++ b/extended/diffAgainst.js @@ -13,6 +13,8 @@ var b_tree_1 = require("../b+tree"); * @param onlyA Callback invoked for all keys only present in `treeA`. * @param onlyB Callback invoked for all keys only present in `treeB`. * @param different Callback invoked for all keys with differing values. + * @returns The first `break` payload returned by a handler, or `undefined` if no handler breaks. + * @throws Error if the supplied trees were created with different comparators. */ function diffAgainst(_treeA, _treeB, onlyA, onlyB, different) { var treeA = _treeA; diff --git a/extended/diffAgainst.ts b/extended/diffAgainst.ts index 2d5be24..647244b 100644 --- a/extended/diffAgainst.ts +++ b/extended/diffAgainst.ts @@ -14,6 +14,8 @@ import { type BTreeWithInternals } from './shared'; * @param onlyA Callback invoked for all keys only present in `treeA`. * @param onlyB Callback invoked for all keys only present in `treeB`. * @param different Callback invoked for all keys with differing values. + * @returns The first `break` payload returned by a handler, or `undefined` if no handler breaks. + * @throws Error if the supplied trees were created with different comparators. */ export default function diffAgainst( _treeA: BTree, @@ -292,4 +294,3 @@ type DiffCursor = { leaf: BNode | undefined; currentKey: K; }; - From 59c12703c9f52b6b67145e2c05c5715686f127d3 Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Fri, 14 Nov 2025 07:50:50 -0800 Subject: [PATCH 106/143] docs --- extended/forEachKeyInBoth.d.ts | 18 +++++++++--------- extended/forEachKeyInBoth.js | 18 +++++++++--------- extended/forEachKeyInBoth.ts | 18 +++++++++--------- extended/forEachKeyNotIn.d.ts | 26 ++++++++++++-------------- extended/forEachKeyNotIn.js | 26 ++++++++++++-------------- extended/forEachKeyNotIn.ts | 26 ++++++++++++-------------- 6 files changed, 63 insertions(+), 69 deletions(-) diff --git a/extended/forEachKeyInBoth.d.ts b/extended/forEachKeyInBoth.d.ts index f9cdef1..296d439 100644 --- a/extended/forEachKeyInBoth.d.ts +++ b/extended/forEachKeyInBoth.d.ts @@ -1,18 +1,18 @@ import BTree from '../b+tree'; /** - * Calls the supplied `callback` for each key/value pair shared by both trees. - * The callback will be called in sorted key order. + * Calls the supplied `callback` for each key/value pair shared by both trees, in sorted key order. * Neither tree is modified. + * + * Complexity is O(N + M) when the trees overlap heavily, and additionally bounded by O(log(N + M) * D) + * where `D` is the number of disjoint key ranges between the trees, because whole non-intersecting subtrees + * are skipped. + * In practice, that means for keys of random distribution the performance is linear and for keys with significant + * numbers of non-overlapping key ranges it is much faster. * @param treeA First tree to compare. * @param treeB Second tree to compare. * @param callback Invoked for keys that appear in both trees. It can cause iteration to early exit by returning `{ break: R }`. - * @description Complexity is bounded by O(N + M) for time. - * However, time is additionally bounded by O(log(N + M) * D) where D is the number of disjoint ranges of keys between - * the two trees. In practice, that means for keys of random distribution the performance is O(N + M) and for - * keys with significant numbers of non-overlapping key ranges it is O(log(N + M) * D) which is much faster. - * The algorithm achieves this additional non-linear bound by skipping over non-intersecting subtrees entirely. - * Note that in benchmarks even the worst case (fully interleaved keys) performance is faster than calling `toArray` - * on both trees and performing a walk on the sorted contents due to the reduced allocation overhead. + * @returns The first `break` payload returned by the callback, or `undefined` if the walk finishes. + * @throws Error if the trees were built with different comparators. */ export default function forEachKeyInBoth(treeA: BTree, treeB: BTree, callback: (key: K, leftValue: V, rightValue: V) => { break?: R; diff --git a/extended/forEachKeyInBoth.js b/extended/forEachKeyInBoth.js index d4a8417..92f10b6 100644 --- a/extended/forEachKeyInBoth.js +++ b/extended/forEachKeyInBoth.js @@ -3,19 +3,19 @@ Object.defineProperty(exports, "__esModule", { value: true }); var shared_1 = require("./shared"); var parallelWalk_1 = require("./parallelWalk"); /** - * Calls the supplied `callback` for each key/value pair shared by both trees. - * The callback will be called in sorted key order. + * Calls the supplied `callback` for each key/value pair shared by both trees, in sorted key order. * Neither tree is modified. + * + * Complexity is O(N + M) when the trees overlap heavily, and additionally bounded by O(log(N + M) * D) + * where `D` is the number of disjoint key ranges between the trees, because whole non-intersecting subtrees + * are skipped. + * In practice, that means for keys of random distribution the performance is linear and for keys with significant + * numbers of non-overlapping key ranges it is much faster. * @param treeA First tree to compare. * @param treeB Second tree to compare. * @param callback Invoked for keys that appear in both trees. It can cause iteration to early exit by returning `{ break: R }`. - * @description Complexity is bounded by O(N + M) for time. - * However, time is additionally bounded by O(log(N + M) * D) where D is the number of disjoint ranges of keys between - * the two trees. In practice, that means for keys of random distribution the performance is O(N + M) and for - * keys with significant numbers of non-overlapping key ranges it is O(log(N + M) * D) which is much faster. - * The algorithm achieves this additional non-linear bound by skipping over non-intersecting subtrees entirely. - * Note that in benchmarks even the worst case (fully interleaved keys) performance is faster than calling `toArray` - * on both trees and performing a walk on the sorted contents due to the reduced allocation overhead. + * @returns The first `break` payload returned by the callback, or `undefined` if the walk finishes. + * @throws Error if the trees were built with different comparators. */ function forEachKeyInBoth(treeA, treeB, callback) { var _treeA = treeA; diff --git a/extended/forEachKeyInBoth.ts b/extended/forEachKeyInBoth.ts index 2acb19c..6bd34b2 100644 --- a/extended/forEachKeyInBoth.ts +++ b/extended/forEachKeyInBoth.ts @@ -3,19 +3,19 @@ import { type BTreeWithInternals, checkCanDoSetOperation } from './shared'; import { createCursor, moveForwardOne, moveTo, getKey, noop } from "./parallelWalk" /** - * Calls the supplied `callback` for each key/value pair shared by both trees. - * The callback will be called in sorted key order. + * Calls the supplied `callback` for each key/value pair shared by both trees, in sorted key order. * Neither tree is modified. + * + * Complexity is O(N + M) when the trees overlap heavily, and additionally bounded by O(log(N + M) * D) + * where `D` is the number of disjoint key ranges between the trees, because whole non-intersecting subtrees + * are skipped. + * In practice, that means for keys of random distribution the performance is linear and for keys with significant + * numbers of non-overlapping key ranges it is much faster. * @param treeA First tree to compare. * @param treeB Second tree to compare. * @param callback Invoked for keys that appear in both trees. It can cause iteration to early exit by returning `{ break: R }`. - * @description Complexity is bounded by O(N + M) for time. - * However, time is additionally bounded by O(log(N + M) * D) where D is the number of disjoint ranges of keys between - * the two trees. In practice, that means for keys of random distribution the performance is O(N + M) and for - * keys with significant numbers of non-overlapping key ranges it is O(log(N + M) * D) which is much faster. - * The algorithm achieves this additional non-linear bound by skipping over non-intersecting subtrees entirely. - * Note that in benchmarks even the worst case (fully interleaved keys) performance is faster than calling `toArray` - * on both trees and performing a walk on the sorted contents due to the reduced allocation overhead. + * @returns The first `break` payload returned by the callback, or `undefined` if the walk finishes. + * @throws Error if the trees were built with different comparators. */ export default function forEachKeyInBoth( treeA: BTree, diff --git a/extended/forEachKeyNotIn.d.ts b/extended/forEachKeyNotIn.d.ts index eae84ae..474750a 100644 --- a/extended/forEachKeyNotIn.d.ts +++ b/extended/forEachKeyNotIn.d.ts @@ -1,19 +1,17 @@ import BTree from '../b+tree'; /** - * Calls the supplied `callback` for each key/value pair that is in includeTree but not in excludeTree. - * This is also known as set subtraction. - * The callback will be called in sorted key order. - * Neither tree is modified. - * @param includeTree The first tree. This is the tree from which keys will be taken. - * @param excludeTree The second tree. Keys present in this tree will be excluded. - * @param callback Invoked for keys that are in includeTree but not in excludeTree. It can cause iteration to early exit by returning `{ break: R }`. - * @description Complexity is bounded by O(N + M) for time. - * However, time is additionally bounded by O(log(N + M) * D) where D is the number of disjoint ranges of keys between - * the two trees. In practice, that means for keys of random distribution the performance is O(N + M) and for - * keys with significant numbers of non-overlapping key ranges it is O(log(N + M) * D) which is much faster. - * The algorithm achieves this additional non-linear bound by skipping over non-intersecting subtrees entirely. - * Note that in benchmarks even the worst case (fully interleaved keys, none intersecting) performance is faster than calling `toArray` - * on both trees and performing a walk on the sorted contents due to the reduced allocation overhead. + * Calls the supplied `callback` for each key/value pair that is in `includeTree` but not in `excludeTree` + * (set subtraction). The callback runs in sorted key order and neither tree is modified. + * + * Complexity is O(N + M) when the key ranges overlap heavily, and additionally bounded by O(log(N + M) * D) + * where `D` is the number of disjoint ranges between the trees, because non-overlapping subtrees are skipped. + * In practice, that means for keys of random distribution the performance is linear and for keys with significant + * numbers of non-overlapping key ranges it is much faster. + * @param includeTree The tree to iterate keys from. + * @param excludeTree Keys present in this tree are omitted from the callback. + * @param callback Invoked for keys that are in `includeTree` but not `excludeTree`. It can cause iteration to early exit by returning `{ break: R }`. + * @returns The first `break` payload returned by the callback, or `undefined` if all qualifying keys are visited. + * @throws Error if the trees were built with different comparators. */ export default function forEachKeyNotIn(includeTree: BTree, excludeTree: BTree, callback: (key: K, value: V) => { break?: R; diff --git a/extended/forEachKeyNotIn.js b/extended/forEachKeyNotIn.js index b0783b2..22d5961 100644 --- a/extended/forEachKeyNotIn.js +++ b/extended/forEachKeyNotIn.js @@ -3,20 +3,18 @@ Object.defineProperty(exports, "__esModule", { value: true }); var shared_1 = require("./shared"); var parallelWalk_1 = require("./parallelWalk"); /** - * Calls the supplied `callback` for each key/value pair that is in includeTree but not in excludeTree. - * This is also known as set subtraction. - * The callback will be called in sorted key order. - * Neither tree is modified. - * @param includeTree The first tree. This is the tree from which keys will be taken. - * @param excludeTree The second tree. Keys present in this tree will be excluded. - * @param callback Invoked for keys that are in includeTree but not in excludeTree. It can cause iteration to early exit by returning `{ break: R }`. - * @description Complexity is bounded by O(N + M) for time. - * However, time is additionally bounded by O(log(N + M) * D) where D is the number of disjoint ranges of keys between - * the two trees. In practice, that means for keys of random distribution the performance is O(N + M) and for - * keys with significant numbers of non-overlapping key ranges it is O(log(N + M) * D) which is much faster. - * The algorithm achieves this additional non-linear bound by skipping over non-intersecting subtrees entirely. - * Note that in benchmarks even the worst case (fully interleaved keys, none intersecting) performance is faster than calling `toArray` - * on both trees and performing a walk on the sorted contents due to the reduced allocation overhead. + * Calls the supplied `callback` for each key/value pair that is in `includeTree` but not in `excludeTree` + * (set subtraction). The callback runs in sorted key order and neither tree is modified. + * + * Complexity is O(N + M) when the key ranges overlap heavily, and additionally bounded by O(log(N + M) * D) + * where `D` is the number of disjoint ranges between the trees, because non-overlapping subtrees are skipped. + * In practice, that means for keys of random distribution the performance is linear and for keys with significant + * numbers of non-overlapping key ranges it is much faster. + * @param includeTree The tree to iterate keys from. + * @param excludeTree Keys present in this tree are omitted from the callback. + * @param callback Invoked for keys that are in `includeTree` but not `excludeTree`. It can cause iteration to early exit by returning `{ break: R }`. + * @returns The first `break` payload returned by the callback, or `undefined` if all qualifying keys are visited. + * @throws Error if the trees were built with different comparators. */ function forEachKeyNotIn(includeTree, excludeTree, callback) { var _includeTree = includeTree; diff --git a/extended/forEachKeyNotIn.ts b/extended/forEachKeyNotIn.ts index 0a8db3f..30072d1 100644 --- a/extended/forEachKeyNotIn.ts +++ b/extended/forEachKeyNotIn.ts @@ -3,20 +3,18 @@ import { type BTreeWithInternals, checkCanDoSetOperation } from './shared'; import { createCursor, moveForwardOne, moveTo, getKey, noop } from "./parallelWalk" /** - * Calls the supplied `callback` for each key/value pair that is in includeTree but not in excludeTree. - * This is also known as set subtraction. - * The callback will be called in sorted key order. - * Neither tree is modified. - * @param includeTree The first tree. This is the tree from which keys will be taken. - * @param excludeTree The second tree. Keys present in this tree will be excluded. - * @param callback Invoked for keys that are in includeTree but not in excludeTree. It can cause iteration to early exit by returning `{ break: R }`. - * @description Complexity is bounded by O(N + M) for time. - * However, time is additionally bounded by O(log(N + M) * D) where D is the number of disjoint ranges of keys between - * the two trees. In practice, that means for keys of random distribution the performance is O(N + M) and for - * keys with significant numbers of non-overlapping key ranges it is O(log(N + M) * D) which is much faster. - * The algorithm achieves this additional non-linear bound by skipping over non-intersecting subtrees entirely. - * Note that in benchmarks even the worst case (fully interleaved keys, none intersecting) performance is faster than calling `toArray` - * on both trees and performing a walk on the sorted contents due to the reduced allocation overhead. + * Calls the supplied `callback` for each key/value pair that is in `includeTree` but not in `excludeTree` + * (set subtraction). The callback runs in sorted key order and neither tree is modified. + * + * Complexity is O(N + M) when the key ranges overlap heavily, and additionally bounded by O(log(N + M) * D) + * where `D` is the number of disjoint ranges between the trees, because non-overlapping subtrees are skipped. + * In practice, that means for keys of random distribution the performance is linear and for keys with significant + * numbers of non-overlapping key ranges it is much faster. + * @param includeTree The tree to iterate keys from. + * @param excludeTree Keys present in this tree are omitted from the callback. + * @param callback Invoked for keys that are in `includeTree` but not `excludeTree`. It can cause iteration to early exit by returning `{ break: R }`. + * @returns The first `break` payload returned by the callback, or `undefined` if all qualifying keys are visited. + * @throws Error if the trees were built with different comparators. */ export default function forEachKeyNotIn( includeTree: BTree, From a554d4ac4ab24340578c7381685d9c2fb141a604 Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Fri, 14 Nov 2025 07:56:06 -0800 Subject: [PATCH 107/143] docs --- extended/intersect.d.ts | 14 +++++++------- extended/intersect.js | 14 +++++++------- extended/intersect.ts | 14 +++++++------- extended/shared.d.ts | 3 +-- extended/shared.ts | 6 +++++- extended/subtract.d.ts | 20 ++++++++++---------- extended/subtract.js | 20 ++++++++++---------- extended/subtract.ts | 20 ++++++++++---------- extended/union.d.ts | 16 +++++++--------- extended/union.js | 16 +++++++--------- extended/union.ts | 16 +++++++--------- 11 files changed, 78 insertions(+), 81 deletions(-) diff --git a/extended/intersect.d.ts b/extended/intersect.d.ts index d2a9b48..ddda066 100644 --- a/extended/intersect.d.ts +++ b/extended/intersect.d.ts @@ -2,15 +2,15 @@ import BTree from '../b+tree'; /** * Returns a new tree containing only keys present in both input trees. * Neither tree is modified. + * + * Complexity is O(N + M) in the fully overlapping case and additionally bounded by O(log(N + M) * D), + * where `D` is the number of disjoint key ranges, because disjoint subtrees are skipped entirely. + * In practice, that means for keys of random distribution the performance is linear and for keys with significant + * numbers of non-overlapping key ranges it is much faster. * @param treeA First tree to intersect. * @param treeB Second tree to intersect. * @param combineFn Called for keys that appear in both trees. Return the desired value. - * @description Complexity is bounded O(N + M) for both time and allocations. - * However, it is additionally bounded by O(log(N + M) * D) where D is the number of disjoint ranges of keys between - * the two trees. In practice, that means for keys of random distribution the performance is O(N + M) and for - * keys with significant numbers of non-overlapping key ranges it is O(log(N + M) * D) which is much faster. - * The algorithm achieves this additional non-linear bound by skipping over non-intersecting subtrees entirely. - * Note that in benchmarks even the worst case (fully intersecting keys) performance is faster than calling `toArray` - * on both trees and performing a walk on the sorted contents due to the reduced allocation overhead. + * @returns A new tree populated with the intersection. + * @throws Error if the trees were created with different comparators. */ export default function intersect, K, V>(treeA: TBTree, treeB: TBTree, combineFn: (key: K, leftValue: V, rightValue: V) => V): TBTree; diff --git a/extended/intersect.js b/extended/intersect.js index e563217..2f8c89b 100644 --- a/extended/intersect.js +++ b/extended/intersect.js @@ -9,16 +9,16 @@ var bulkLoad_1 = require("./bulkLoad"); /** * Returns a new tree containing only keys present in both input trees. * Neither tree is modified. + * + * Complexity is O(N + M) in the fully overlapping case and additionally bounded by O(log(N + M) * D), + * where `D` is the number of disjoint key ranges, because disjoint subtrees are skipped entirely. + * In practice, that means for keys of random distribution the performance is linear and for keys with significant + * numbers of non-overlapping key ranges it is much faster. * @param treeA First tree to intersect. * @param treeB Second tree to intersect. * @param combineFn Called for keys that appear in both trees. Return the desired value. - * @description Complexity is bounded O(N + M) for both time and allocations. - * However, it is additionally bounded by O(log(N + M) * D) where D is the number of disjoint ranges of keys between - * the two trees. In practice, that means for keys of random distribution the performance is O(N + M) and for - * keys with significant numbers of non-overlapping key ranges it is O(log(N + M) * D) which is much faster. - * The algorithm achieves this additional non-linear bound by skipping over non-intersecting subtrees entirely. - * Note that in benchmarks even the worst case (fully intersecting keys) performance is faster than calling `toArray` - * on both trees and performing a walk on the sorted contents due to the reduced allocation overhead. + * @returns A new tree populated with the intersection. + * @throws Error if the trees were created with different comparators. */ function intersect(treeA, treeB, combineFn) { var _treeA = treeA; diff --git a/extended/intersect.ts b/extended/intersect.ts index 9222ff7..133faf7 100644 --- a/extended/intersect.ts +++ b/extended/intersect.ts @@ -6,16 +6,16 @@ import { bulkLoadRoot } from './bulkLoad'; /** * Returns a new tree containing only keys present in both input trees. * Neither tree is modified. + * + * Complexity is O(N + M) in the fully overlapping case and additionally bounded by O(log(N + M) * D), + * where `D` is the number of disjoint key ranges, because disjoint subtrees are skipped entirely. + * In practice, that means for keys of random distribution the performance is linear and for keys with significant + * numbers of non-overlapping key ranges it is much faster. * @param treeA First tree to intersect. * @param treeB Second tree to intersect. * @param combineFn Called for keys that appear in both trees. Return the desired value. - * @description Complexity is bounded O(N + M) for both time and allocations. - * However, it is additionally bounded by O(log(N + M) * D) where D is the number of disjoint ranges of keys between - * the two trees. In practice, that means for keys of random distribution the performance is O(N + M) and for - * keys with significant numbers of non-overlapping key ranges it is O(log(N + M) * D) which is much faster. - * The algorithm achieves this additional non-linear bound by skipping over non-intersecting subtrees entirely. - * Note that in benchmarks even the worst case (fully intersecting keys) performance is faster than calling `toArray` - * on both trees and performing a walk on the sorted contents due to the reduced allocation overhead. + * @returns A new tree populated with the intersection. + * @throws Error if the trees were created with different comparators. */ export default function intersect, K, V>( treeA: TBTree, diff --git a/extended/shared.d.ts b/extended/shared.d.ts index 5b8009f..cb0ff5c 100644 --- a/extended/shared.d.ts +++ b/extended/shared.d.ts @@ -1,2 +1 @@ -import BTree from '../b+tree'; -export declare type BTreeConstructor, K, V> = new (entries?: [K, V][], compare?: (a: K, b: K) => number, maxNodeSize?: number) => BTreeWithInternals; +export {}; diff --git a/extended/shared.ts b/extended/shared.ts index d938113..83ad838 100644 --- a/extended/shared.ts +++ b/extended/shared.ts @@ -129,4 +129,8 @@ export function checkCanDoSetOperation(treeA: BTreeWithInternals, tr return branchingFactor; } -export type BTreeConstructor, K, V> = new (entries?: [K, V][], compare?: (a: K, b: K) => number, maxNodeSize?: number) => BTreeWithInternals; \ No newline at end of file +/** + * Helper constructor signature used by set-operation helpers to create a result tree that preserves the input subtype. + * @internal + */ +export type BTreeConstructor, K, V> = new (entries?: [K, V][], compare?: (a: K, b: K) => number, maxNodeSize?: number) => BTreeWithInternals; diff --git a/extended/subtract.d.ts b/extended/subtract.d.ts index a08094d..27060ce 100644 --- a/extended/subtract.d.ts +++ b/extended/subtract.d.ts @@ -1,16 +1,16 @@ import BTree from '../b+tree'; /** - * Returns a new tree containing only keys that are present in treeA but notTreeB (set subtraction). + * Returns a new tree containing only the keys that are present in `targetTree` but not `subtractTree` (set subtraction). * Neither tree is modified. + * + * Complexity is O(N + M) for time and O(N) for allocations in the worst case. Additionally, time is bounded by + * O(log(N + M) * D1) and space by O(log N * D2), where `D1` is the number of disjoint key ranges between the trees + * and `D2` is the number of disjoint ranges inside `targetTree`, because disjoint subtrees are skipped entirely. + * In practice, that means for keys of random distribution the performance is linear and for keys with significant + * numbers of non-overlapping key ranges it is much faster. * @param targetTree The tree to subtract from. - * @param subtractTree The tree to subtract. - * @description Complexity is bounded O(N + M) for time and O(N) for allocations. - * However, it is additionally bounded by O(log(N + M) * D1) for time and O(log(N) * D2) for space where D1/D2 are the - * number of disjoint ranges of keys between the two trees and in targetTree, respectively. In practice, that means for - * keys of random distribution the performance is O(N + M) and for keys with significant numbers of non-overlapping key - * ranges it is much faster. - * The algorithm achieves this additional non-linear bound by skipping over non-intersecting subtrees entirely. - * Note that in benchmarks even the worst case (fully interleaved keys) performance is faster than calling `toArray` - * on both trees and performing a walk on the sorted contents due to the reduced allocation overhead. + * @param subtractTree The tree whose keys will be removed from the result. + * @returns A new tree that contains the subtraction result. + * @throws Error if the trees were created with different comparators or max node sizes. */ export default function subtract, K, V>(targetTree: TBTree, subtractTree: TBTree): TBTree; diff --git a/extended/subtract.js b/extended/subtract.js index 06ac189..b118d73 100644 --- a/extended/subtract.js +++ b/extended/subtract.js @@ -3,18 +3,18 @@ Object.defineProperty(exports, "__esModule", { value: true }); var shared_1 = require("./shared"); var decompose_1 = require("./decompose"); /** - * Returns a new tree containing only keys that are present in treeA but notTreeB (set subtraction). + * Returns a new tree containing only the keys that are present in `targetTree` but not `subtractTree` (set subtraction). * Neither tree is modified. + * + * Complexity is O(N + M) for time and O(N) for allocations in the worst case. Additionally, time is bounded by + * O(log(N + M) * D1) and space by O(log N * D2), where `D1` is the number of disjoint key ranges between the trees + * and `D2` is the number of disjoint ranges inside `targetTree`, because disjoint subtrees are skipped entirely. + * In practice, that means for keys of random distribution the performance is linear and for keys with significant + * numbers of non-overlapping key ranges it is much faster. * @param targetTree The tree to subtract from. - * @param subtractTree The tree to subtract. - * @description Complexity is bounded O(N + M) for time and O(N) for allocations. - * However, it is additionally bounded by O(log(N + M) * D1) for time and O(log(N) * D2) for space where D1/D2 are the - * number of disjoint ranges of keys between the two trees and in targetTree, respectively. In practice, that means for - * keys of random distribution the performance is O(N + M) and for keys with significant numbers of non-overlapping key - * ranges it is much faster. - * The algorithm achieves this additional non-linear bound by skipping over non-intersecting subtrees entirely. - * Note that in benchmarks even the worst case (fully interleaved keys) performance is faster than calling `toArray` - * on both trees and performing a walk on the sorted contents due to the reduced allocation overhead. + * @param subtractTree The tree whose keys will be removed from the result. + * @returns A new tree that contains the subtraction result. + * @throws Error if the trees were created with different comparators or max node sizes. */ function subtract(targetTree, subtractTree) { var _targetTree = targetTree; diff --git a/extended/subtract.ts b/extended/subtract.ts index 4da06fc..89cd7e8 100644 --- a/extended/subtract.ts +++ b/extended/subtract.ts @@ -3,18 +3,18 @@ import { checkCanDoSetOperation, type BTreeWithInternals, BTreeConstructor, alte import { buildFromDecomposition, decompose } from './decompose'; /** - * Returns a new tree containing only keys that are present in treeA but notTreeB (set subtraction). + * Returns a new tree containing only the keys that are present in `targetTree` but not `subtractTree` (set subtraction). * Neither tree is modified. + * + * Complexity is O(N + M) for time and O(N) for allocations in the worst case. Additionally, time is bounded by + * O(log(N + M) * D1) and space by O(log N * D2), where `D1` is the number of disjoint key ranges between the trees + * and `D2` is the number of disjoint ranges inside `targetTree`, because disjoint subtrees are skipped entirely. + * In practice, that means for keys of random distribution the performance is linear and for keys with significant + * numbers of non-overlapping key ranges it is much faster. * @param targetTree The tree to subtract from. - * @param subtractTree The tree to subtract. - * @description Complexity is bounded O(N + M) for time and O(N) for allocations. - * However, it is additionally bounded by O(log(N + M) * D1) for time and O(log(N) * D2) for space where D1/D2 are the - * number of disjoint ranges of keys between the two trees and in targetTree, respectively. In practice, that means for - * keys of random distribution the performance is O(N + M) and for keys with significant numbers of non-overlapping key - * ranges it is much faster. - * The algorithm achieves this additional non-linear bound by skipping over non-intersecting subtrees entirely. - * Note that in benchmarks even the worst case (fully interleaved keys) performance is faster than calling `toArray` - * on both trees and performing a walk on the sorted contents due to the reduced allocation overhead. + * @param subtractTree The tree whose keys will be removed from the result. + * @returns A new tree that contains the subtraction result. + * @throws Error if the trees were created with different comparators or max node sizes. */ export default function subtract, K, V>( targetTree: TBTree, diff --git a/extended/union.d.ts b/extended/union.d.ts index 1f345f8..74ea5cc 100644 --- a/extended/union.d.ts +++ b/extended/union.d.ts @@ -1,18 +1,16 @@ import BTree from '../b+tree'; /** - * Efficiently unions two trees, reusing subtrees wherever possible. - * Neither input tree is modified. + * Efficiently unions two trees, reusing subtrees wherever possible without mutating either input. + * + * Complexity is O(N + M) when the trees overlap heavily, and additionally bounded by O(log(N + M) * D) + * where `D` is the number of disjoint key ranges, because disjoint subtrees are skipped entirely. + * In practice, that means for keys of random distribution the performance is linear and for keys with significant + * numbers of non-overlapping key ranges it is much faster. * @param treeA First tree to union. * @param treeB Second tree to union. * @param combineFn Called for keys that appear in both trees. Return the desired value, or * `undefined` to omit the key from the result. Note: symmetric difference can be achieved by always returning `undefined`. * @returns A new BTree that contains the unioned key/value pairs. - * @description Complexity is bounded O(N + M) for both time and allocations. - * However, it is additionally bounded by O(log(N + M) * D) where D is the number of disjoint ranges of keys between - * the two trees. In practice, that means for keys of random distribution the performance is O(N + M) and for - * keys with significant numbers of non-overlapping key ranges it is O(log(N + M) * D) which is much faster. - * The algorithm achieves this additional non-linear bound by skipping over non-intersecting subtrees entirely. - * Note that in benchmarks even the worst case (fully interleaved keys) performance is faster than cloning `this` - * and inserting the contents of `other` into the clone. + * @throws Error if the trees were created with different comparators or max node sizes. */ export default function union, K, V>(treeA: TBTree, treeB: TBTree, combineFn: (key: K, leftValue: V, rightValue: V) => V | undefined): TBTree; diff --git a/extended/union.js b/extended/union.js index f06ea46..b7144ed 100644 --- a/extended/union.js +++ b/extended/union.js @@ -3,20 +3,18 @@ Object.defineProperty(exports, "__esModule", { value: true }); var shared_1 = require("./shared"); var decompose_1 = require("./decompose"); /** - * Efficiently unions two trees, reusing subtrees wherever possible. - * Neither input tree is modified. + * Efficiently unions two trees, reusing subtrees wherever possible without mutating either input. + * + * Complexity is O(N + M) when the trees overlap heavily, and additionally bounded by O(log(N + M) * D) + * where `D` is the number of disjoint key ranges, because disjoint subtrees are skipped entirely. + * In practice, that means for keys of random distribution the performance is linear and for keys with significant + * numbers of non-overlapping key ranges it is much faster. * @param treeA First tree to union. * @param treeB Second tree to union. * @param combineFn Called for keys that appear in both trees. Return the desired value, or * `undefined` to omit the key from the result. Note: symmetric difference can be achieved by always returning `undefined`. * @returns A new BTree that contains the unioned key/value pairs. - * @description Complexity is bounded O(N + M) for both time and allocations. - * However, it is additionally bounded by O(log(N + M) * D) where D is the number of disjoint ranges of keys between - * the two trees. In practice, that means for keys of random distribution the performance is O(N + M) and for - * keys with significant numbers of non-overlapping key ranges it is O(log(N + M) * D) which is much faster. - * The algorithm achieves this additional non-linear bound by skipping over non-intersecting subtrees entirely. - * Note that in benchmarks even the worst case (fully interleaved keys) performance is faster than cloning `this` - * and inserting the contents of `other` into the clone. + * @throws Error if the trees were created with different comparators or max node sizes. */ function union(treeA, treeB, combineFn) { if (treeA === treeB) diff --git a/extended/union.ts b/extended/union.ts index a9025cc..8a84c56 100644 --- a/extended/union.ts +++ b/extended/union.ts @@ -3,20 +3,18 @@ import { BTreeConstructor, type BTreeWithInternals, checkCanDoSetOperation } fro import { decompose, buildFromDecomposition } from "./decompose"; /** - * Efficiently unions two trees, reusing subtrees wherever possible. - * Neither input tree is modified. + * Efficiently unions two trees, reusing subtrees wherever possible without mutating either input. + * + * Complexity is O(N + M) when the trees overlap heavily, and additionally bounded by O(log(N + M) * D) + * where `D` is the number of disjoint key ranges, because disjoint subtrees are skipped entirely. + * In practice, that means for keys of random distribution the performance is linear and for keys with significant + * numbers of non-overlapping key ranges it is much faster. * @param treeA First tree to union. * @param treeB Second tree to union. * @param combineFn Called for keys that appear in both trees. Return the desired value, or * `undefined` to omit the key from the result. Note: symmetric difference can be achieved by always returning `undefined`. * @returns A new BTree that contains the unioned key/value pairs. - * @description Complexity is bounded O(N + M) for both time and allocations. - * However, it is additionally bounded by O(log(N + M) * D) where D is the number of disjoint ranges of keys between - * the two trees. In practice, that means for keys of random distribution the performance is O(N + M) and for - * keys with significant numbers of non-overlapping key ranges it is O(log(N + M) * D) which is much faster. - * The algorithm achieves this additional non-linear bound by skipping over non-intersecting subtrees entirely. - * Note that in benchmarks even the worst case (fully interleaved keys) performance is faster than cloning `this` - * and inserting the contents of `other` into the clone. + * @throws Error if the trees were created with different comparators or max node sizes. */ export default function union, K, V>( treeA: TBTree, From ff05eb7afdcf666f2335ff9727673991b934bf93 Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Fri, 14 Nov 2025 07:56:52 -0800 Subject: [PATCH 108/143] docs --- extended/index.d.ts | 70 +++++++++++++++++++++++++++++++-------------- extended/index.js | 58 +++++++++++++++++++++++-------------- extended/index.ts | 70 +++++++++++++++++++++++++++++++-------------- 3 files changed, 135 insertions(+), 63 deletions(-) diff --git a/extended/index.d.ts b/extended/index.d.ts index c8673d8..914024b 100644 --- a/extended/index.d.ts +++ b/extended/index.d.ts @@ -7,8 +7,26 @@ import BTree from '../b+tree'; * @extends BTree */ export declare class BTreeEx extends BTree { + /** + * Bulk loads a new `BTreeEx` from a sorted alternating list of entries. + * This reuses the same algorithm as `extended/bulkLoad`, but produces a `BTreeEx`. + * Time and space complexity are O(n). + * @param entries Alternating array of keys and values: `[key0, value0, key1, value1, ...]`. Must be sorted by key in strictly ascending order. + * @param maxNodeSize The branching factor (maximum number of children per node). + * @param compare Comparator to use. Defaults to the standard comparator if omitted. + * @returns A fully built tree containing the supplied entries. + * @throws Error if the entries are not strictly sorted or contain duplicate keys. + */ static bulkLoad(entries: (K | V)[], maxNodeSize: number, compare?: (a: K, b: K) => number): BTreeEx; + /** + * Quickly clones the tree while preserving the `BTreeEx` prototype. + * The clone shares structure (copy-on-write) until either instance is mutated. + */ clone(): this; + /** + * Performs a greedy clone that eagerly duplicates non-shared nodes to avoid marking the original tree as shared. + * @param force When true, clones even the nodes that are already marked as shared. + */ greedyClone(force?: boolean): this; /** * Computes the differences between `this` and `other`. @@ -21,6 +39,8 @@ export declare class BTreeEx extends BTree { * @param onlyThis Callback invoked for all keys only present in `this`. * @param onlyOther Callback invoked for all keys only present in `other`. * @param different Callback invoked for all keys with differing values. + * @returns The first `break` payload returned by a handler, or `undefined` if no handler breaks. + * @throws Error if the supplied trees were created with different comparators. */ diffAgainst(other: BTree, onlyThis?: (k: K, v: V) => { break?: R; @@ -30,45 +50,53 @@ export declare class BTreeEx extends BTree { break?: R; } | void): R | undefined; /** - * Calls the supplied `callback` for each key/value pair shared by this tree and `other`. - * The callback will be called in sorted key order. + * Calls the supplied `callback` for each key/value pair shared by this tree and `other`, in sorted key order. * Neither tree is modified. + * + * Complexity is O(N + M) when the trees overlap heavily, and additionally bounded by O(log(N + M) * D) + * where `D` is the number of disjoint key ranges between the trees, because disjoint subtrees are skipped. + * In practice, that means for keys of random distribution the performance is linear and for keys with significant + * numbers of non-overlapping key ranges it is much faster. * @param other The other tree to compare with this one. * @param callback Called for keys that appear in both trees. It can cause iteration to early exit by returning `{ break: R }`. - * @description Complexity is bounded by O(N + M) time. - * However, time is additionally bounded by O(log(N + M) * D) where D is the number of disjoint ranges of keys between - * the two trees. In practice, that means for keys of random distribution the performance is O(N + M) and for - * keys with significant numbers of non-overlapping key ranges it is O(log(N + M) * D) which is much faster. - * The algorithm achieves this additional non-linear bound by skipping over non-intersecting subtrees entirely. - * Note that in benchmarks even the worst case (fully interleaved keys) performance is faster than calling `toArray` - * on both trees and performing a walk on the sorted contents due to the reduced allocation overhead. + * @returns The first `break` payload returned by the callback, or `undefined` if the walk finishes. + * @throws Error if the two trees were created with different comparators. */ forEachKeyInBoth(other: BTree, callback: (key: K, leftValue: V, rightValue: V) => { break?: R; } | void): R | undefined; /** - * Efficiently unions this tree with `other`, reusing subtrees wherever possible. - * Neither input tree is modified. + * Efficiently unions this tree with `other`, reusing subtrees wherever possible without modifying either input. + * + * Complexity is O(N + M) in the fully overlapping case, and additionally bounded by O(log(N + M) * D) + * where `D` is the number of disjoint key ranges, because disjoint subtrees are skipped entirely. + * In practice, that means for keys of random distribution the performance is linear and for keys with significant + * numbers of non-overlapping key ranges it is much faster. * @param other The other tree to union with this one. - * @param combineFn Called for keys that appear in both trees. Return the desired value, or - * `undefined` to omit the key from the result. - * @returns A new BTree that contains the unioned key/value pairs. - * @description Complexity is bounded by O(N + M) for both time and allocations. - * However, it is additionally bounded by O(log(N + M) * D) where D is the number of disjoint ranges of keys between - * the two trees. In practice, that means for keys of random distribution the performance is O(N + M) and for - * keys with significant numbers of non-overlapping key ranges it is O(log(N + M) * D) which is much faster. - * The algorithm achieves this additional non-linear bound by skipping over non-intersecting subtrees entirely. - * Note that in benchmarks even the worst case (fully interleaved keys) performance is faster than cloning `this` - * and inserting the contents of `other` into the clone. + * @param combineFn Called for keys that appear in both trees. Return the desired value, or `undefined` to omit the key. + * @returns A new `BTreeEx` that contains the unioned key/value pairs. + * @throws Error if the trees were created with different comparators or max node sizes. */ union(other: BTreeEx, combineFn: (key: K, leftValue: V, rightValue: V) => V | undefined): BTreeEx; } export interface BTreeEx { + /** + * Variants of `BTree#with` that preserve the `BTreeEx` return type for fluent chaining. + */ with(key: K): BTreeEx; with(key: K, value: V2, overwrite?: boolean): BTreeEx; with(key: K, value?: V2, overwrite?: boolean): BTreeEx; + /** + * Equivalent to `BTree#withPairs`, but returns a `BTreeEx`. + */ withPairs(pairs: [K, V | V2][], overwrite: boolean): BTreeEx; + /** + * Equivalent to `BTree#withKeys`, but returns a `BTreeEx`. + */ withKeys(keys: K[], returnThisIfUnchanged?: boolean): BTreeEx; + /** + * Equivalent to `BTree#mapValues`, but returns a `BTreeEx` so the extended helpers remain available. + */ mapValues(callback: (v: V, k: K, counter: number) => R): BTreeEx; } export default BTreeEx; diff --git a/extended/index.js b/extended/index.js index 1196537..cad4ca9 100644 --- a/extended/index.js +++ b/extended/index.js @@ -55,6 +55,16 @@ var BTreeEx = /** @class */ (function (_super) { function BTreeEx() { return _super !== null && _super.apply(this, arguments) || this; } + /** + * Bulk loads a new `BTreeEx` from a sorted alternating list of entries. + * This reuses the same algorithm as `extended/bulkLoad`, but produces a `BTreeEx`. + * Time and space complexity are O(n). + * @param entries Alternating array of keys and values: `[key0, value0, key1, value1, ...]`. Must be sorted by key in strictly ascending order. + * @param maxNodeSize The branching factor (maximum number of children per node). + * @param compare Comparator to use. Defaults to the standard comparator if omitted. + * @returns A fully built tree containing the supplied entries. + * @throws Error if the entries are not strictly sorted or contain duplicate keys. + */ BTreeEx.bulkLoad = function (entries, maxNodeSize, compare) { var cmp = compare !== null && compare !== void 0 ? compare : b_tree_1.defaultComparator; var root = (0, bulkLoad_1.bulkLoadRoot)(entries, maxNodeSize, cmp); @@ -64,6 +74,10 @@ var BTreeEx = /** @class */ (function (_super) { target._size = root.size(); return tree; }; + /** + * Quickly clones the tree while preserving the `BTreeEx` prototype. + * The clone shares structure (copy-on-write) until either instance is mutated. + */ BTreeEx.prototype.clone = function () { var source = this; source._root.isShared = true; @@ -73,6 +87,10 @@ var BTreeEx = /** @class */ (function (_super) { target._size = source._size; return result; }; + /** + * Performs a greedy clone that eagerly duplicates non-shared nodes to avoid marking the original tree as shared. + * @param force When true, clones even the nodes that are already marked as shared. + */ BTreeEx.prototype.greedyClone = function (force) { var source = this; var result = new BTreeEx(undefined, this._compare, this._maxNodeSize); @@ -92,41 +110,39 @@ var BTreeEx = /** @class */ (function (_super) { * @param onlyThis Callback invoked for all keys only present in `this`. * @param onlyOther Callback invoked for all keys only present in `other`. * @param different Callback invoked for all keys with differing values. + * @returns The first `break` payload returned by a handler, or `undefined` if no handler breaks. + * @throws Error if the supplied trees were created with different comparators. */ BTreeEx.prototype.diffAgainst = function (other, onlyThis, onlyOther, different) { return (0, diffAgainst_1.default)(this, other, onlyThis, onlyOther, different); }; /** - * Calls the supplied `callback` for each key/value pair shared by this tree and `other`. - * The callback will be called in sorted key order. + * Calls the supplied `callback` for each key/value pair shared by this tree and `other`, in sorted key order. * Neither tree is modified. + * + * Complexity is O(N + M) when the trees overlap heavily, and additionally bounded by O(log(N + M) * D) + * where `D` is the number of disjoint key ranges between the trees, because disjoint subtrees are skipped. + * In practice, that means for keys of random distribution the performance is linear and for keys with significant + * numbers of non-overlapping key ranges it is much faster. * @param other The other tree to compare with this one. * @param callback Called for keys that appear in both trees. It can cause iteration to early exit by returning `{ break: R }`. - * @description Complexity is bounded by O(N + M) time. - * However, time is additionally bounded by O(log(N + M) * D) where D is the number of disjoint ranges of keys between - * the two trees. In practice, that means for keys of random distribution the performance is O(N + M) and for - * keys with significant numbers of non-overlapping key ranges it is O(log(N + M) * D) which is much faster. - * The algorithm achieves this additional non-linear bound by skipping over non-intersecting subtrees entirely. - * Note that in benchmarks even the worst case (fully interleaved keys) performance is faster than calling `toArray` - * on both trees and performing a walk on the sorted contents due to the reduced allocation overhead. + * @returns The first `break` payload returned by the callback, or `undefined` if the walk finishes. + * @throws Error if the two trees were created with different comparators. */ BTreeEx.prototype.forEachKeyInBoth = function (other, callback) { return (0, forEachKeyInBoth_1.default)(this, other, callback); }; /** - * Efficiently unions this tree with `other`, reusing subtrees wherever possible. - * Neither input tree is modified. + * Efficiently unions this tree with `other`, reusing subtrees wherever possible without modifying either input. + * + * Complexity is O(N + M) in the fully overlapping case, and additionally bounded by O(log(N + M) * D) + * where `D` is the number of disjoint key ranges, because disjoint subtrees are skipped entirely. + * In practice, that means for keys of random distribution the performance is linear and for keys with significant + * numbers of non-overlapping key ranges it is much faster. * @param other The other tree to union with this one. - * @param combineFn Called for keys that appear in both trees. Return the desired value, or - * `undefined` to omit the key from the result. - * @returns A new BTree that contains the unioned key/value pairs. - * @description Complexity is bounded by O(N + M) for both time and allocations. - * However, it is additionally bounded by O(log(N + M) * D) where D is the number of disjoint ranges of keys between - * the two trees. In practice, that means for keys of random distribution the performance is O(N + M) and for - * keys with significant numbers of non-overlapping key ranges it is O(log(N + M) * D) which is much faster. - * The algorithm achieves this additional non-linear bound by skipping over non-intersecting subtrees entirely. - * Note that in benchmarks even the worst case (fully interleaved keys) performance is faster than cloning `this` - * and inserting the contents of `other` into the clone. + * @param combineFn Called for keys that appear in both trees. Return the desired value, or `undefined` to omit the key. + * @returns A new `BTreeEx` that contains the unioned key/value pairs. + * @throws Error if the trees were created with different comparators or max node sizes. */ BTreeEx.prototype.union = function (other, combineFn) { return (0, union_1.default)(this, other, combineFn); diff --git a/extended/index.ts b/extended/index.ts index 27babb3..579c18a 100644 --- a/extended/index.ts +++ b/extended/index.ts @@ -13,6 +13,16 @@ import { bulkLoadRoot } from './bulkLoad'; * @extends BTree */ export class BTreeEx extends BTree { + /** + * Bulk loads a new `BTreeEx` from a sorted alternating list of entries. + * This reuses the same algorithm as `extended/bulkLoad`, but produces a `BTreeEx`. + * Time and space complexity are O(n). + * @param entries Alternating array of keys and values: `[key0, value0, key1, value1, ...]`. Must be sorted by key in strictly ascending order. + * @param maxNodeSize The branching factor (maximum number of children per node). + * @param compare Comparator to use. Defaults to the standard comparator if omitted. + * @returns A fully built tree containing the supplied entries. + * @throws Error if the entries are not strictly sorted or contain duplicate keys. + */ static bulkLoad( entries: (K | V)[], maxNodeSize: number, @@ -27,6 +37,10 @@ export class BTreeEx extends BTree { return tree; } + /** + * Quickly clones the tree while preserving the `BTreeEx` prototype. + * The clone shares structure (copy-on-write) until either instance is mutated. + */ clone(): this { const source = this as unknown as BTreeWithInternals; source._root.isShared = true; @@ -37,6 +51,10 @@ export class BTreeEx extends BTree { return result as this; } + /** + * Performs a greedy clone that eagerly duplicates non-shared nodes to avoid marking the original tree as shared. + * @param force When true, clones even the nodes that are already marked as shared. + */ greedyClone(force?: boolean): this { const source = this as unknown as BTreeWithInternals; const result = new BTreeEx(undefined, this._compare, this._maxNodeSize); @@ -57,6 +75,8 @@ export class BTreeEx extends BTree { * @param onlyThis Callback invoked for all keys only present in `this`. * @param onlyOther Callback invoked for all keys only present in `other`. * @param different Callback invoked for all keys with differing values. + * @returns The first `break` payload returned by a handler, or `undefined` if no handler breaks. + * @throws Error if the supplied trees were created with different comparators. */ diffAgainst( other: BTree, @@ -68,18 +88,17 @@ export class BTreeEx extends BTree { } /** - * Calls the supplied `callback` for each key/value pair shared by this tree and `other`. - * The callback will be called in sorted key order. + * Calls the supplied `callback` for each key/value pair shared by this tree and `other`, in sorted key order. * Neither tree is modified. + * + * Complexity is O(N + M) when the trees overlap heavily, and additionally bounded by O(log(N + M) * D) + * where `D` is the number of disjoint key ranges between the trees, because disjoint subtrees are skipped. + * In practice, that means for keys of random distribution the performance is linear and for keys with significant + * numbers of non-overlapping key ranges it is much faster. * @param other The other tree to compare with this one. * @param callback Called for keys that appear in both trees. It can cause iteration to early exit by returning `{ break: R }`. - * @description Complexity is bounded by O(N + M) time. - * However, time is additionally bounded by O(log(N + M) * D) where D is the number of disjoint ranges of keys between - * the two trees. In practice, that means for keys of random distribution the performance is O(N + M) and for - * keys with significant numbers of non-overlapping key ranges it is O(log(N + M) * D) which is much faster. - * The algorithm achieves this additional non-linear bound by skipping over non-intersecting subtrees entirely. - * Note that in benchmarks even the worst case (fully interleaved keys) performance is faster than calling `toArray` - * on both trees and performing a walk on the sorted contents due to the reduced allocation overhead. + * @returns The first `break` payload returned by the callback, or `undefined` if the walk finishes. + * @throws Error if the two trees were created with different comparators. */ forEachKeyInBoth( other: BTree, @@ -89,19 +108,16 @@ export class BTreeEx extends BTree { } /** - * Efficiently unions this tree with `other`, reusing subtrees wherever possible. - * Neither input tree is modified. + * Efficiently unions this tree with `other`, reusing subtrees wherever possible without modifying either input. + * + * Complexity is O(N + M) in the fully overlapping case, and additionally bounded by O(log(N + M) * D) + * where `D` is the number of disjoint key ranges, because disjoint subtrees are skipped entirely. + * In practice, that means for keys of random distribution the performance is linear and for keys with significant + * numbers of non-overlapping key ranges it is much faster. * @param other The other tree to union with this one. - * @param combineFn Called for keys that appear in both trees. Return the desired value, or - * `undefined` to omit the key from the result. - * @returns A new BTree that contains the unioned key/value pairs. - * @description Complexity is bounded by O(N + M) for both time and allocations. - * However, it is additionally bounded by O(log(N + M) * D) where D is the number of disjoint ranges of keys between - * the two trees. In practice, that means for keys of random distribution the performance is O(N + M) and for - * keys with significant numbers of non-overlapping key ranges it is O(log(N + M) * D) which is much faster. - * The algorithm achieves this additional non-linear bound by skipping over non-intersecting subtrees entirely. - * Note that in benchmarks even the worst case (fully interleaved keys) performance is faster than cloning `this` - * and inserting the contents of `other` into the clone. + * @param combineFn Called for keys that appear in both trees. Return the desired value, or `undefined` to omit the key. + * @returns A new `BTreeEx` that contains the unioned key/value pairs. + * @throws Error if the trees were created with different comparators or max node sizes. */ union(other: BTreeEx, combineFn: (key: K, leftValue: V, rightValue: V) => V | undefined): BTreeEx { return union, K, V>(this, other, combineFn); @@ -109,11 +125,23 @@ export class BTreeEx extends BTree { } export interface BTreeEx { + /** + * Variants of `BTree#with` that preserve the `BTreeEx` return type for fluent chaining. + */ with(key: K): BTreeEx; with(key: K, value: V2, overwrite?: boolean): BTreeEx; with(key: K, value?: V2, overwrite?: boolean): BTreeEx; + /** + * Equivalent to `BTree#withPairs`, but returns a `BTreeEx`. + */ withPairs(pairs: [K, V | V2][], overwrite: boolean): BTreeEx; + /** + * Equivalent to `BTree#withKeys`, but returns a `BTreeEx`. + */ withKeys(keys: K[], returnThisIfUnchanged?: boolean): BTreeEx; + /** + * Equivalent to `BTree#mapValues`, but returns a `BTreeEx` so the extended helpers remain available. + */ mapValues(callback: (v: V, k: K, counter: number) => R): BTreeEx; } From 94c37c39047ab474035d36362d7e3d1ca142ec36 Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Fri, 14 Nov 2025 07:59:40 -0800 Subject: [PATCH 109/143] docs --- extended/index.d.ts | 26 ++++++-------------------- extended/index.js | 10 ++-------- extended/index.ts | 26 ++++++-------------------- 3 files changed, 14 insertions(+), 48 deletions(-) diff --git a/extended/index.d.ts b/extended/index.d.ts index 914024b..32ba3fa 100644 --- a/extended/index.d.ts +++ b/extended/index.d.ts @@ -18,15 +18,9 @@ export declare class BTreeEx extends BTree { * @throws Error if the entries are not strictly sorted or contain duplicate keys. */ static bulkLoad(entries: (K | V)[], maxNodeSize: number, compare?: (a: K, b: K) => number): BTreeEx; - /** - * Quickly clones the tree while preserving the `BTreeEx` prototype. - * The clone shares structure (copy-on-write) until either instance is mutated. - */ + /** See {@link BTree.clone}. */ clone(): this; - /** - * Performs a greedy clone that eagerly duplicates non-shared nodes to avoid marking the original tree as shared. - * @param force When true, clones even the nodes that are already marked as shared. - */ + /** See {@link BTree.greedyClone}. */ greedyClone(force?: boolean): this; /** * Computes the differences between `this` and `other`. @@ -80,23 +74,15 @@ export declare class BTreeEx extends BTree { union(other: BTreeEx, combineFn: (key: K, leftValue: V, rightValue: V) => V | undefined): BTreeEx; } export interface BTreeEx { - /** - * Variants of `BTree#with` that preserve the `BTreeEx` return type for fluent chaining. - */ + /** See {@link BTree.with}. */ with(key: K): BTreeEx; with(key: K, value: V2, overwrite?: boolean): BTreeEx; with(key: K, value?: V2, overwrite?: boolean): BTreeEx; - /** - * Equivalent to `BTree#withPairs`, but returns a `BTreeEx`. - */ + /** See {@link BTree.withPairs}. */ withPairs(pairs: [K, V | V2][], overwrite: boolean): BTreeEx; - /** - * Equivalent to `BTree#withKeys`, but returns a `BTreeEx`. - */ + /** See {@link BTree.withKeys}. */ withKeys(keys: K[], returnThisIfUnchanged?: boolean): BTreeEx; - /** - * Equivalent to `BTree#mapValues`, but returns a `BTreeEx` so the extended helpers remain available. - */ + /** See {@link BTree.mapValues}. */ mapValues(callback: (v: V, k: K, counter: number) => R): BTreeEx; } export default BTreeEx; diff --git a/extended/index.js b/extended/index.js index cad4ca9..2f0c46c 100644 --- a/extended/index.js +++ b/extended/index.js @@ -74,10 +74,7 @@ var BTreeEx = /** @class */ (function (_super) { target._size = root.size(); return tree; }; - /** - * Quickly clones the tree while preserving the `BTreeEx` prototype. - * The clone shares structure (copy-on-write) until either instance is mutated. - */ + /** See {@link BTree.clone}. */ BTreeEx.prototype.clone = function () { var source = this; source._root.isShared = true; @@ -87,10 +84,7 @@ var BTreeEx = /** @class */ (function (_super) { target._size = source._size; return result; }; - /** - * Performs a greedy clone that eagerly duplicates non-shared nodes to avoid marking the original tree as shared. - * @param force When true, clones even the nodes that are already marked as shared. - */ + /** See {@link BTree.greedyClone}. */ BTreeEx.prototype.greedyClone = function (force) { var source = this; var result = new BTreeEx(undefined, this._compare, this._maxNodeSize); diff --git a/extended/index.ts b/extended/index.ts index 579c18a..6cb5db4 100644 --- a/extended/index.ts +++ b/extended/index.ts @@ -37,10 +37,7 @@ export class BTreeEx extends BTree { return tree; } - /** - * Quickly clones the tree while preserving the `BTreeEx` prototype. - * The clone shares structure (copy-on-write) until either instance is mutated. - */ + /** See {@link BTree.clone}. */ clone(): this { const source = this as unknown as BTreeWithInternals; source._root.isShared = true; @@ -51,10 +48,7 @@ export class BTreeEx extends BTree { return result as this; } - /** - * Performs a greedy clone that eagerly duplicates non-shared nodes to avoid marking the original tree as shared. - * @param force When true, clones even the nodes that are already marked as shared. - */ + /** See {@link BTree.greedyClone}. */ greedyClone(force?: boolean): this { const source = this as unknown as BTreeWithInternals; const result = new BTreeEx(undefined, this._compare, this._maxNodeSize); @@ -125,23 +119,15 @@ export class BTreeEx extends BTree { } export interface BTreeEx { - /** - * Variants of `BTree#with` that preserve the `BTreeEx` return type for fluent chaining. - */ + /** See {@link BTree.with}. */ with(key: K): BTreeEx; with(key: K, value: V2, overwrite?: boolean): BTreeEx; with(key: K, value?: V2, overwrite?: boolean): BTreeEx; - /** - * Equivalent to `BTree#withPairs`, but returns a `BTreeEx`. - */ + /** See {@link BTree.withPairs}. */ withPairs(pairs: [K, V | V2][], overwrite: boolean): BTreeEx; - /** - * Equivalent to `BTree#withKeys`, but returns a `BTreeEx`. - */ + /** See {@link BTree.withKeys}. */ withKeys(keys: K[], returnThisIfUnchanged?: boolean): BTreeEx; - /** - * Equivalent to `BTree#mapValues`, but returns a `BTreeEx` so the extended helpers remain available. - */ + /** See {@link BTree.mapValues}. */ mapValues(callback: (v: V, k: K, counter: number) => R): BTreeEx; } From 557dd5e9a4ee174821bc1ca2f766391fd16dec6f Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Fri, 14 Nov 2025 08:46:54 -0800 Subject: [PATCH 110/143] add new methods --- extended/index.d.ts | 30 ++++++++++++++++++++++++++++++ extended/index.js | 34 ++++++++++++++++++++++++++++++++++ extended/index.ts | 39 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 103 insertions(+) diff --git a/extended/index.d.ts b/extended/index.d.ts index 32ba3fa..3818ef7 100644 --- a/extended/index.d.ts +++ b/extended/index.d.ts @@ -59,6 +59,22 @@ export declare class BTreeEx extends BTree { forEachKeyInBoth(other: BTree, callback: (key: K, leftValue: V, rightValue: V) => { break?: R; } | void): R | undefined; + /** + * Calls the supplied `callback` for each key/value pair that exists in this tree but not in `other` + * (set subtraction). The callback runs in sorted key order and neither tree is modified. + * + * Complexity is O(N + M) when the key ranges overlap heavily, and additionally bounded by O(log(N + M) * D) + * where `D` is the number of disjoint ranges between the trees, because non-overlapping subtrees are skipped. + * In practice, that means for keys of random distribution the performance is linear and for keys with significant + * numbers of non-overlapping key ranges it is much faster. + * @param other Keys present in this tree will be omitted from the callback. + * @param callback Invoked for keys unique to `this`. It can cause iteration to early exit by returning `{ break: R }`. + * @returns The first `break` payload returned by the callback, or `undefined` if all qualifying keys are visited. + * @throws Error if the trees were created with different comparators. + */ + forEachKeyNotIn(other: BTree, callback: (key: K, value: V) => { + break?: R; + } | void): R | undefined; /** * Efficiently unions this tree with `other`, reusing subtrees wherever possible without modifying either input. * @@ -72,6 +88,20 @@ export declare class BTreeEx extends BTree { * @throws Error if the trees were created with different comparators or max node sizes. */ union(other: BTreeEx, combineFn: (key: K, leftValue: V, rightValue: V) => V | undefined): BTreeEx; + /** + * Returns a new tree containing only the keys that are present in this tree but not `other` (set subtraction). + * Neither input tree is modified. + * + * Complexity is O(N + M) for time and O(N) for allocations in the worst case. Additionally, time is bounded by + * O(log(N + M) * D1) and space by O(log N * D2) where `D1` is the number of disjoint key ranges between the trees + * and `D2` is the number of disjoint ranges inside this tree. + * In practice, that means for keys of random distribution the performance is linear and for keys with significant + * numbers of non-overlapping key ranges it is much faster. + * @param other The tree whose keys will be removed from the result. + * @returns A new `BTreeEx` representing `this \ other`. + * @throws Error if the trees were created with different comparators or max node sizes. + */ + subtract(other: BTreeEx): BTreeEx; } export interface BTreeEx { /** See {@link BTree.with}. */ diff --git a/extended/index.js b/extended/index.js index 2f0c46c..d623262 100644 --- a/extended/index.js +++ b/extended/index.js @@ -41,6 +41,8 @@ exports.BTreeEx = void 0; var b_tree_1 = __importStar(require("../b+tree")); var diffAgainst_1 = __importDefault(require("./diffAgainst")); var forEachKeyInBoth_1 = __importDefault(require("./forEachKeyInBoth")); +var forEachKeyNotIn_1 = __importDefault(require("./forEachKeyNotIn")); +var subtract_1 = __importDefault(require("./subtract")); var union_1 = __importDefault(require("./union")); var bulkLoad_1 = require("./bulkLoad"); /** @@ -126,6 +128,22 @@ var BTreeEx = /** @class */ (function (_super) { BTreeEx.prototype.forEachKeyInBoth = function (other, callback) { return (0, forEachKeyInBoth_1.default)(this, other, callback); }; + /** + * Calls the supplied `callback` for each key/value pair that exists in this tree but not in `other` + * (set subtraction). The callback runs in sorted key order and neither tree is modified. + * + * Complexity is O(N + M) when the key ranges overlap heavily, and additionally bounded by O(log(N + M) * D) + * where `D` is the number of disjoint ranges between the trees, because non-overlapping subtrees are skipped. + * In practice, that means for keys of random distribution the performance is linear and for keys with significant + * numbers of non-overlapping key ranges it is much faster. + * @param other Keys present in this tree will be omitted from the callback. + * @param callback Invoked for keys unique to `this`. It can cause iteration to early exit by returning `{ break: R }`. + * @returns The first `break` payload returned by the callback, or `undefined` if all qualifying keys are visited. + * @throws Error if the trees were created with different comparators. + */ + BTreeEx.prototype.forEachKeyNotIn = function (other, callback) { + return (0, forEachKeyNotIn_1.default)(this, other, callback); + }; /** * Efficiently unions this tree with `other`, reusing subtrees wherever possible without modifying either input. * @@ -141,6 +159,22 @@ var BTreeEx = /** @class */ (function (_super) { BTreeEx.prototype.union = function (other, combineFn) { return (0, union_1.default)(this, other, combineFn); }; + /** + * Returns a new tree containing only the keys that are present in this tree but not `other` (set subtraction). + * Neither input tree is modified. + * + * Complexity is O(N + M) for time and O(N) for allocations in the worst case. Additionally, time is bounded by + * O(log(N + M) * D1) and space by O(log N * D2) where `D1` is the number of disjoint key ranges between the trees + * and `D2` is the number of disjoint ranges inside this tree. + * In practice, that means for keys of random distribution the performance is linear and for keys with significant + * numbers of non-overlapping key ranges it is much faster. + * @param other The tree whose keys will be removed from the result. + * @returns A new `BTreeEx` representing `this \ other`. + * @throws Error if the trees were created with different comparators or max node sizes. + */ + BTreeEx.prototype.subtract = function (other) { + return (0, subtract_1.default)(this, other); + }; return BTreeEx; }(b_tree_1.default)); exports.BTreeEx = BTreeEx; diff --git a/extended/index.ts b/extended/index.ts index 6cb5db4..56bef42 100644 --- a/extended/index.ts +++ b/extended/index.ts @@ -2,6 +2,8 @@ import BTree, { defaultComparator } from '../b+tree'; import type { BTreeWithInternals } from './shared'; import diffAgainst from './diffAgainst'; import forEachKeyInBoth from './forEachKeyInBoth'; +import forEachKeyNotIn from './forEachKeyNotIn'; +import subtract from './subtract'; import union from './union'; import { bulkLoadRoot } from './bulkLoad'; @@ -101,6 +103,26 @@ export class BTreeEx extends BTree { return forEachKeyInBoth(this, other, callback); } + /** + * Calls the supplied `callback` for each key/value pair that exists in this tree but not in `other` + * (set subtraction). The callback runs in sorted key order and neither tree is modified. + * + * Complexity is O(N + M) when the key ranges overlap heavily, and additionally bounded by O(log(N + M) * D) + * where `D` is the number of disjoint ranges between the trees, because non-overlapping subtrees are skipped. + * In practice, that means for keys of random distribution the performance is linear and for keys with significant + * numbers of non-overlapping key ranges it is much faster. + * @param other Keys present in this tree will be omitted from the callback. + * @param callback Invoked for keys unique to `this`. It can cause iteration to early exit by returning `{ break: R }`. + * @returns The first `break` payload returned by the callback, or `undefined` if all qualifying keys are visited. + * @throws Error if the trees were created with different comparators. + */ + forEachKeyNotIn( + other: BTree, + callback: (key: K, value: V) => { break?: R } | void + ): R | undefined { + return forEachKeyNotIn(this, other, callback); + } + /** * Efficiently unions this tree with `other`, reusing subtrees wherever possible without modifying either input. * @@ -116,6 +138,23 @@ export class BTreeEx extends BTree { union(other: BTreeEx, combineFn: (key: K, leftValue: V, rightValue: V) => V | undefined): BTreeEx { return union, K, V>(this, other, combineFn); } + + /** + * Returns a new tree containing only the keys that are present in this tree but not `other` (set subtraction). + * Neither input tree is modified. + * + * Complexity is O(N + M) for time and O(N) for allocations in the worst case. Additionally, time is bounded by + * O(log(N + M) * D1) and space by O(log N * D2) where `D1` is the number of disjoint key ranges between the trees + * and `D2` is the number of disjoint ranges inside this tree. + * In practice, that means for keys of random distribution the performance is linear and for keys with significant + * numbers of non-overlapping key ranges it is much faster. + * @param other The tree whose keys will be removed from the result. + * @returns A new `BTreeEx` representing `this \ other`. + * @throws Error if the trees were created with different comparators or max node sizes. + */ + subtract(other: BTreeEx): BTreeEx { + return subtract, K, V>(this, other); + } } export interface BTreeEx { From 68425d62b6d8dffd523fab62499983b070ef230e Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Sat, 15 Nov 2025 12:32:16 -0800 Subject: [PATCH 111/143] add intersect to BTreeEx --- extended/index.d.ts | 14 ++++++++++++++ extended/index.js | 17 +++++++++++++++++ extended/index.ts | 18 ++++++++++++++++++ 3 files changed, 49 insertions(+) diff --git a/extended/index.d.ts b/extended/index.d.ts index 3818ef7..240c2d4 100644 --- a/extended/index.d.ts +++ b/extended/index.d.ts @@ -75,6 +75,20 @@ export declare class BTreeEx extends BTree { forEachKeyNotIn(other: BTree, callback: (key: K, value: V) => { break?: R; } | void): R | undefined; + /** + * Returns a new tree containing only keys present in both trees. + * Neither tree is modified. + * + * Complexity is O(N + M) in the fully overlapping case and additionally bounded by O(log(N + M) * D), + * where `D` is the number of disjoint key ranges, because disjoint subtrees are skipped entirely. + * In practice, that means for keys of random distribution the performance is linear and for keys with significant + * numbers of non-overlapping key ranges it is much faster. + * @param other The other tree to intersect with this one. + * @param combineFn Called for keys that appear in both trees. Return the desired value. + * @returns A new `BTreeEx` populated with the intersection. + * @throws Error if the trees were created with different comparators. + */ + intersect(other: BTreeEx, combineFn: (key: K, leftValue: V, rightValue: V) => V): BTreeEx; /** * Efficiently unions this tree with `other`, reusing subtrees wherever possible without modifying either input. * diff --git a/extended/index.js b/extended/index.js index d623262..4966bc9 100644 --- a/extended/index.js +++ b/extended/index.js @@ -42,6 +42,7 @@ var b_tree_1 = __importStar(require("../b+tree")); var diffAgainst_1 = __importDefault(require("./diffAgainst")); var forEachKeyInBoth_1 = __importDefault(require("./forEachKeyInBoth")); var forEachKeyNotIn_1 = __importDefault(require("./forEachKeyNotIn")); +var intersect_1 = __importDefault(require("./intersect")); var subtract_1 = __importDefault(require("./subtract")); var union_1 = __importDefault(require("./union")); var bulkLoad_1 = require("./bulkLoad"); @@ -144,6 +145,22 @@ var BTreeEx = /** @class */ (function (_super) { BTreeEx.prototype.forEachKeyNotIn = function (other, callback) { return (0, forEachKeyNotIn_1.default)(this, other, callback); }; + /** + * Returns a new tree containing only keys present in both trees. + * Neither tree is modified. + * + * Complexity is O(N + M) in the fully overlapping case and additionally bounded by O(log(N + M) * D), + * where `D` is the number of disjoint key ranges, because disjoint subtrees are skipped entirely. + * In practice, that means for keys of random distribution the performance is linear and for keys with significant + * numbers of non-overlapping key ranges it is much faster. + * @param other The other tree to intersect with this one. + * @param combineFn Called for keys that appear in both trees. Return the desired value. + * @returns A new `BTreeEx` populated with the intersection. + * @throws Error if the trees were created with different comparators. + */ + BTreeEx.prototype.intersect = function (other, combineFn) { + return (0, intersect_1.default)(this, other, combineFn); + }; /** * Efficiently unions this tree with `other`, reusing subtrees wherever possible without modifying either input. * diff --git a/extended/index.ts b/extended/index.ts index 56bef42..a88636c 100644 --- a/extended/index.ts +++ b/extended/index.ts @@ -3,6 +3,7 @@ import type { BTreeWithInternals } from './shared'; import diffAgainst from './diffAgainst'; import forEachKeyInBoth from './forEachKeyInBoth'; import forEachKeyNotIn from './forEachKeyNotIn'; +import intersect from './intersect'; import subtract from './subtract'; import union from './union'; import { bulkLoadRoot } from './bulkLoad'; @@ -123,6 +124,23 @@ export class BTreeEx extends BTree { return forEachKeyNotIn(this, other, callback); } + /** + * Returns a new tree containing only keys present in both trees. + * Neither tree is modified. + * + * Complexity is O(N + M) in the fully overlapping case and additionally bounded by O(log(N + M) * D), + * where `D` is the number of disjoint key ranges, because disjoint subtrees are skipped entirely. + * In practice, that means for keys of random distribution the performance is linear and for keys with significant + * numbers of non-overlapping key ranges it is much faster. + * @param other The other tree to intersect with this one. + * @param combineFn Called for keys that appear in both trees. Return the desired value. + * @returns A new `BTreeEx` populated with the intersection. + * @throws Error if the trees were created with different comparators. + */ + intersect(other: BTreeEx, combineFn: (key: K, leftValue: V, rightValue: V) => V): BTreeEx { + return intersect, K, V>(this, other, combineFn); + } + /** * Efficiently unions this tree with `other`, reusing subtrees wherever possible without modifying either input. * From b849d0535bd1eebcbb779794de294402f11b2934 Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Sat, 15 Nov 2025 12:37:35 -0800 Subject: [PATCH 112/143] add complex set operation fuzz test suite --- test/setOperationFuzz.ts | 89 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 89 insertions(+) create mode 100644 test/setOperationFuzz.ts diff --git a/test/setOperationFuzz.ts b/test/setOperationFuzz.ts new file mode 100644 index 0000000..56eaf88 --- /dev/null +++ b/test/setOperationFuzz.ts @@ -0,0 +1,89 @@ +import BTreeEx from '../extended'; +import MersenneTwister from 'mersenne-twister'; +import { makeArray } from './shared'; + +const compare = (a: number, b: number) => a - b; + +describe('Complicated set operation fuzz tests', () => { + const FUZZ_SETTINGS = { + branchingFactors: [4, 5, 32], + ooms: [2, 3], + fractionsPerOOM: [0.1, 0.25, 0.5], + collisionChances: [0.05, 0.1, 0.3], + timeoutMs: 30_000 + } as const; + + FUZZ_SETTINGS.fractionsPerOOM.forEach(fraction => { + if (fraction < 0 || fraction > 1) + throw new Error('FUZZ_SETTINGS.fractionsPerOOM must contain values between 0 and 1'); + }); + FUZZ_SETTINGS.collisionChances.forEach(chance => { + if (chance < 0 || chance > 1) + throw new Error('FUZZ_SETTINGS.collisionChances must contain values between 0 and 1'); + }); + + jest.setTimeout(FUZZ_SETTINGS.timeoutMs); + + const rng = new MersenneTwister(0xC0FFEE); + + for (const maxNodeSize of FUZZ_SETTINGS.branchingFactors) { + describe(`branching factor ${maxNodeSize}`, () => { + for (const collisionChance of FUZZ_SETTINGS.collisionChances) { + for (const oom of FUZZ_SETTINGS.ooms) { + const size = 5 * Math.pow(10, oom); + for (const fractionA of FUZZ_SETTINGS.fractionsPerOOM) { + const fractionB = 1 - fractionA; + const collisionLabel = collisionChance.toFixed(2); + + test(`size ${size}, fractionA ${fractionA.toFixed(2)}, fractionB ${fractionB.toFixed(2)}, collision ${collisionLabel}`, () => { + const treeA = new BTreeEx([], compare, maxNodeSize); + const treeB = new BTreeEx([], compare, maxNodeSize); + + const keys = makeArray(size, true, 1, collisionChance, rng); + + for (const value of keys) { + const assignToA = rng.random() < fractionA; + const assignToB = rng.random() < fractionB; + + if (!assignToA && !assignToB) { + if (rng.random() < 0.5) + treeA.set(value, value); + else + treeB.set(value, value); + continue; + } + + if (assignToA) + treeA.set(value, value); + if (assignToB) + treeB.set(value, value); + } + + const keepValue = (_k: number, left: number, _right: number) => left; + const dropValue = () => undefined; + + const symmetricViaUnion = treeA.union(treeB, dropValue); + const fullUnion = treeA.union(treeB, keepValue); + const intersection = treeA.intersect(treeB, keepValue); + const symmetricViaSubtract = fullUnion.subtract(intersection); + + expect(symmetricViaUnion.toArray()).toEqual(symmetricViaSubtract.toArray()); + + const diffBA = treeB.subtract(treeA); + const diffAB = treeA.subtract(treeB); + const mergedDiffs = diffAB.union(diffBA, keepValue); + + expect(mergedDiffs.toArray()).toEqual(symmetricViaUnion.toArray()); + + symmetricViaUnion.checkValid(); + symmetricViaSubtract.checkValid(); + mergedDiffs.checkValid(); + treeA.checkValid(); + treeB.checkValid(); + }); + } + } + } + }); + } +}); From e54ef1479a9d5aaf7b43568d8335ed8eefa91a55 Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Sat, 15 Nov 2025 12:55:15 -0800 Subject: [PATCH 113/143] enable test --- test/setOperationFuzz.test.ts | 147 ++++++++++++++++++++++++++++++++++ test/setOperationFuzz.ts | 89 -------------------- 2 files changed, 147 insertions(+), 89 deletions(-) create mode 100644 test/setOperationFuzz.test.ts delete mode 100644 test/setOperationFuzz.ts diff --git a/test/setOperationFuzz.test.ts b/test/setOperationFuzz.test.ts new file mode 100644 index 0000000..da7c160 --- /dev/null +++ b/test/setOperationFuzz.test.ts @@ -0,0 +1,147 @@ +import BTreeEx from '../extended'; +import MersenneTwister from 'mersenne-twister'; +import { makeArray } from './shared'; + +const compare = (a: number, b: number) => a - b; + +describe('Set operation fuzz tests', () => { + const FUZZ_SETTINGS = { + branchingFactors: [4, 5, 32], + ooms: [2, 3], + fractionsPerOOM: [0.1, 0.25, 0.5], + collisionChances: [0.05, 0.1, 0.3], + timeoutMs: 30_000 + } as const; + + FUZZ_SETTINGS.fractionsPerOOM.forEach(fraction => { + if (fraction < 0 || fraction > 1) + throw new Error('FUZZ_SETTINGS.fractionsPerOOM must contain values between 0 and 1'); + }); + FUZZ_SETTINGS.collisionChances.forEach(chance => { + if (chance < 0 || chance > 1) + throw new Error('FUZZ_SETTINGS.collisionChances must contain values between 0 and 1'); + }); + + jest.setTimeout(FUZZ_SETTINGS.timeoutMs); + + const rng = new MersenneTwister(0xC0FFEE); + + const count = (t: BTreeEx) => t.toArray().length; + + for (const maxNodeSize of FUZZ_SETTINGS.branchingFactors) { + describe(`branching factor ${maxNodeSize}`, () => { + for (const collisionChance of FUZZ_SETTINGS.collisionChances) { + for (const oom of FUZZ_SETTINGS.ooms) { + const size = 5 * Math.pow(10, oom); + for (const fractionA of FUZZ_SETTINGS.fractionsPerOOM) { + const fractionB = 1 - fractionA; + const collisionLabel = collisionChance.toFixed(2); + + it(`size ${size}, fractionA ${fractionA.toFixed(2)}, fractionB ${fractionB.toFixed(2)}, collision ${collisionLabel}`, () => { + const treeA = new BTreeEx([], compare, maxNodeSize); + const treeB = new BTreeEx([], compare, maxNodeSize); + const treeC = new BTreeEx([], compare, maxNodeSize); + + const keys = makeArray(size, true, 1, collisionChance, rng); + + for (const value of keys) { + const assignToA = rng.random() < fractionA; + const assignToB = rng.random() < fractionB; + const assignToC = rng.random() < 0.5; + + if (assignToA) + treeA.set(value, value); + if (assignToB) + treeB.set(value, value); + if (assignToC) + treeC.set(value, value); + } + + const keepEither = (_k: number, left: number, _right: number) => left; + const dropValue = () => undefined; + const combineSum = (_k: number, left: number, right: number) => left + right; + + const unionDrop = treeA.union(treeB, dropValue); + const unionKeep = treeA.union(treeB, keepEither); + const intersection = treeA.intersect(treeB, keepEither); + const diffAB = treeA.subtract(treeB); + const diffBA = treeB.subtract(treeA); + + // 1. Partition of A: A = (A\B) โˆช (AโˆฉB) and parts are disjoint. + const partition = diffAB.union(intersection, keepEither); + expect(partition.toArray()).toEqual(treeA.toArray()); + expect(diffAB.intersect(intersection, keepEither).size).toBe(0); + + // 2. Recover B from union and A\B: (AโˆชB)\(A\B) = B. + expect(unionKeep.subtract(diffAB).toArray()).toEqual(treeB.toArray()); + + // 3. Symmetric difference two ways. + const symFromDiffs = diffAB.union(diffBA, keepEither); + const symFromUnion = unionKeep.subtract(intersection); + expect(symFromDiffs.toArray()).toEqual(symFromUnion.toArray()); + + // 4. Intersection via difference: AโˆฉB = A \ (A\B). + expect(intersection.toArray()).toEqual(treeA.subtract(diffAB).toArray()); + + // 5. Difference via intersection: A\B = A \ (AโˆฉB). + expect(diffAB.toArray()).toEqual(treeA.subtract(intersection).toArray()); + + // 6. Idempotence. + expect(treeA.union(treeA, keepEither).toArray()).toEqual(treeA.toArray()); + expect(treeA.intersect(treeA, keepEither).toArray()).toEqual(treeA.toArray()); + expect(treeA.subtract(diffAB).toArray()).toEqual(treeA.subtract(treeB).toArray()); + + // 7. Commutativity. + expect(intersection.toArray()).toEqual(treeB.intersect(treeA, keepEither).toArray()); + const commUT = treeA.union(treeB, combineSum); + const commTU = treeB.union(treeA, combineSum); + expect(commUT.toArray()).toEqual(commTU.toArray()); + + // 8. Associativity. + const assocLeft = treeA.intersect(treeB, keepEither).intersect(treeC, keepEither); + const assocRight = treeA.intersect(treeB.intersect(treeC, keepEither), keepEither); + expect(assocLeft.toArray()).toEqual(assocRight.toArray()); + const assocSumLeft = treeA.union(treeB, combineSum).union(treeC, combineSum); + const assocSumRight = treeA.union(treeB.union(treeC, combineSum), combineSum); + expect(assocSumLeft.toArray()).toEqual(assocSumRight.toArray()); + + // 9. Absorption. + expect(treeA.intersect(treeA.union(treeB, keepEither), keepEither).toArray()).toEqual(treeA.toArray()); + expect(treeA.union(treeA.intersect(treeB, keepEither), keepEither).toArray()).toEqual(treeA.toArray()); + + // 10. Distributivity. + const distIntersect = treeA.intersect(treeB.union(treeC, keepEither), keepEither); + const distRight = treeA.intersect(treeB, keepEither).union(treeA.intersect(treeC, keepEither), keepEither); + expect(distIntersect.toArray()).toEqual(distRight.toArray()); + const distSubtract = treeA.subtract(treeB.union(treeC, keepEither)); + const distSubtractRight = treeA.subtract(treeB).subtract(treeC); + expect(distSubtract.toArray()).toEqual(distSubtractRight.toArray()); + const distIntersectDiff = treeA.intersect(treeB, keepEither).subtract(treeC); + const distDiffIntersect = treeA.subtract(treeC).intersect(treeB, keepEither); + expect(distIntersectDiff.toArray()).toEqual(distDiffIntersect.toArray()); + + // 11. Superset sanity. + expect(treeA.subtract(treeA.union(treeB, keepEither)).size).toBe(0); + expect(diffAB.intersect(treeB, keepEither).size).toBe(0); + + // 12. Cardinality relations. + expect(count(unionKeep)).toBe(count(treeA) + count(treeB) - count(intersection)); + expect(count(diffAB)).toBe(count(treeA) - count(intersection)); + expect(count(treeA)).toBe(count(diffAB) + count(intersection)); + + partition.checkValid(); + unionDrop.checkValid(); + unionKeep.checkValid(); + intersection.checkValid(); + diffAB.checkValid(); + diffBA.checkValid(); + treeA.checkValid(); + treeB.checkValid(); + treeC.checkValid(); + }); + } + } + } + }); + } +}); diff --git a/test/setOperationFuzz.ts b/test/setOperationFuzz.ts deleted file mode 100644 index 56eaf88..0000000 --- a/test/setOperationFuzz.ts +++ /dev/null @@ -1,89 +0,0 @@ -import BTreeEx from '../extended'; -import MersenneTwister from 'mersenne-twister'; -import { makeArray } from './shared'; - -const compare = (a: number, b: number) => a - b; - -describe('Complicated set operation fuzz tests', () => { - const FUZZ_SETTINGS = { - branchingFactors: [4, 5, 32], - ooms: [2, 3], - fractionsPerOOM: [0.1, 0.25, 0.5], - collisionChances: [0.05, 0.1, 0.3], - timeoutMs: 30_000 - } as const; - - FUZZ_SETTINGS.fractionsPerOOM.forEach(fraction => { - if (fraction < 0 || fraction > 1) - throw new Error('FUZZ_SETTINGS.fractionsPerOOM must contain values between 0 and 1'); - }); - FUZZ_SETTINGS.collisionChances.forEach(chance => { - if (chance < 0 || chance > 1) - throw new Error('FUZZ_SETTINGS.collisionChances must contain values between 0 and 1'); - }); - - jest.setTimeout(FUZZ_SETTINGS.timeoutMs); - - const rng = new MersenneTwister(0xC0FFEE); - - for (const maxNodeSize of FUZZ_SETTINGS.branchingFactors) { - describe(`branching factor ${maxNodeSize}`, () => { - for (const collisionChance of FUZZ_SETTINGS.collisionChances) { - for (const oom of FUZZ_SETTINGS.ooms) { - const size = 5 * Math.pow(10, oom); - for (const fractionA of FUZZ_SETTINGS.fractionsPerOOM) { - const fractionB = 1 - fractionA; - const collisionLabel = collisionChance.toFixed(2); - - test(`size ${size}, fractionA ${fractionA.toFixed(2)}, fractionB ${fractionB.toFixed(2)}, collision ${collisionLabel}`, () => { - const treeA = new BTreeEx([], compare, maxNodeSize); - const treeB = new BTreeEx([], compare, maxNodeSize); - - const keys = makeArray(size, true, 1, collisionChance, rng); - - for (const value of keys) { - const assignToA = rng.random() < fractionA; - const assignToB = rng.random() < fractionB; - - if (!assignToA && !assignToB) { - if (rng.random() < 0.5) - treeA.set(value, value); - else - treeB.set(value, value); - continue; - } - - if (assignToA) - treeA.set(value, value); - if (assignToB) - treeB.set(value, value); - } - - const keepValue = (_k: number, left: number, _right: number) => left; - const dropValue = () => undefined; - - const symmetricViaUnion = treeA.union(treeB, dropValue); - const fullUnion = treeA.union(treeB, keepValue); - const intersection = treeA.intersect(treeB, keepValue); - const symmetricViaSubtract = fullUnion.subtract(intersection); - - expect(symmetricViaUnion.toArray()).toEqual(symmetricViaSubtract.toArray()); - - const diffBA = treeB.subtract(treeA); - const diffAB = treeA.subtract(treeB); - const mergedDiffs = diffAB.union(diffBA, keepValue); - - expect(mergedDiffs.toArray()).toEqual(symmetricViaUnion.toArray()); - - symmetricViaUnion.checkValid(); - symmetricViaSubtract.checkValid(); - mergedDiffs.checkValid(); - treeA.checkValid(); - treeB.checkValid(); - }); - } - } - } - }); - } -}); From c42b19593e10ea4801ea67877e26c2628b397aeb Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Sat, 15 Nov 2025 13:17:56 -0800 Subject: [PATCH 114/143] fix bug --- extended/decompose.js | 18 +++++++++--------- extended/decompose.ts | 19 +++++++++---------- 2 files changed, 18 insertions(+), 19 deletions(-) diff --git a/extended/decompose.js b/extended/decompose.js index 25d9cc3..1ed8351 100644 --- a/extended/decompose.js +++ b/extended/decompose.js @@ -16,6 +16,7 @@ var parallelWalk_1 = require("./parallelWalk"); */ function decompose(left, right, combineFn, ignoreRight) { if (ignoreRight === void 0) { ignoreRight = false; } + var maxNodeSize = left._maxNodeSize; var cmp = left._compare; (0, b_tree_1.check)(left._root.size() > 0 && right._root.size() > 0, "decompose requires non-empty inputs"); // Holds the disjoint nodes that result from decomposition. @@ -37,15 +38,8 @@ function decompose(left, right, combineFn, ignoreRight) { var onLeafCreation = function (leaf) { (0, shared_1.alternatingPush)(disjoint, 0, leaf); }; - var flushPendingEntries = function () { - var createdLeaves = (0, shared_1.flushToLeaves)(pending, left._maxNodeSize, onLeafCreation); - if (createdLeaves > 0) { - tallestIndex = (0, shared_1.alternatingCount)(disjoint) - 1; - tallestHeight = 0; - } - }; var addSharedNodeToDisjointSet = function (node, height) { - flushPendingEntries(); + (0, shared_1.flushToLeaves)(pending, maxNodeSize, onLeafCreation); node.isShared = true; (0, shared_1.alternatingPush)(disjoint, height, node); if (height > tallestHeight) { @@ -270,7 +264,12 @@ function decompose(left, right, combineFn, ignoreRight) { } } // Ensure any trailing non-disjoint entries are added - flushPendingEntries(); + var createdLeaves = (0, shared_1.flushToLeaves)(pending, maxNodeSize, onLeafCreation); + // In fully interleaved cases, no leaves may be created until now + if (tallestHeight < 0 && createdLeaves > 0) { + tallestIndex = (0, shared_1.alternatingCount)(disjoint) - 1; + tallestHeight = 0; + } return { disjoint: disjoint, tallestIndex: tallestIndex }; } exports.decompose = decompose; @@ -337,6 +336,7 @@ function processSide(branchingFactor, disjoint, spine, start, end, step, sideInd var currentHeight = spine.length - 1; // height is number of internal levels; 0 means leaf var subtree = (0, shared_1.alternatingGetSecond)(disjoint, i); var subtreeHeight = (0, shared_1.alternatingGetFirst)(disjoint, i); + (0, b_tree_1.check)(subtreeHeight <= currentHeight, "Subtree taller than spine during reconstruction."); var insertionDepth = currentHeight - (subtreeHeight + 1); // node at this depth has children of height 'subtreeHeight' // Ensure path is unshared before mutation ensureNotShared(spine, isSharedFrontierDepth, insertionDepth, sideIndex); diff --git a/extended/decompose.ts b/extended/decompose.ts index 5ab1442..98353c4 100644 --- a/extended/decompose.ts +++ b/extended/decompose.ts @@ -29,6 +29,7 @@ export function decompose( combineFn: (key: K, leftValue: V, rightValue: V) => V | undefined, ignoreRight: boolean = false ): DecomposeResult { + const maxNodeSize = left._maxNodeSize; const cmp = left._compare; check(left._root.size() > 0 && right._root.size() > 0, "decompose requires non-empty inputs"); // Holds the disjoint nodes that result from decomposition. @@ -55,16 +56,8 @@ export function decompose( alternatingPush(disjoint, 0, leaf); } - const flushPendingEntries = () => { - const createdLeaves = flushToLeaves(pending, left._maxNodeSize, onLeafCreation); - if (createdLeaves > 0) { - tallestIndex = alternatingCount(disjoint) - 1; - tallestHeight = 0; - } - }; - const addSharedNodeToDisjointSet = (node: BNode, height: number) => { - flushPendingEntries(); + flushToLeaves(pending, maxNodeSize, onLeafCreation); node.isShared = true; alternatingPush(disjoint, height, node); if (height > tallestHeight) { @@ -351,7 +344,12 @@ export function decompose( } // Ensure any trailing non-disjoint entries are added - flushPendingEntries(); + const createdLeaves = flushToLeaves(pending, maxNodeSize, onLeafCreation); + // In fully interleaved cases, no leaves may be created until now + if (tallestHeight < 0 && createdLeaves > 0) { + tallestIndex = alternatingCount(disjoint) - 1; + tallestHeight = 0; + } return { disjoint, tallestIndex }; } @@ -462,6 +460,7 @@ function processSide( const currentHeight = spine.length - 1; // height is number of internal levels; 0 means leaf const subtree = alternatingGetSecond(disjoint, i); const subtreeHeight = alternatingGetFirst(disjoint, i); + check(subtreeHeight <= currentHeight, "Subtree taller than spine during reconstruction."); const insertionDepth = currentHeight - (subtreeHeight + 1); // node at this depth has children of height 'subtreeHeight' // Ensure path is unshared before mutation From 9797206d2a0bf934d6533053c85f2faf1a6fc3c8 Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Sat, 15 Nov 2025 13:20:30 -0800 Subject: [PATCH 115/143] fix test --- test/setOperationFuzz.test.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/setOperationFuzz.test.ts b/test/setOperationFuzz.test.ts index da7c160..20bd45a 100644 --- a/test/setOperationFuzz.test.ts +++ b/test/setOperationFuzz.test.ts @@ -89,7 +89,7 @@ describe('Set operation fuzz tests', () => { // 6. Idempotence. expect(treeA.union(treeA, keepEither).toArray()).toEqual(treeA.toArray()); expect(treeA.intersect(treeA, keepEither).toArray()).toEqual(treeA.toArray()); - expect(treeA.subtract(diffAB).toArray()).toEqual(treeA.subtract(treeB).toArray()); + expect(diffAB.subtract(treeB).toArray()).toEqual(diffAB.toArray()); // 7. Commutativity. expect(intersection.toArray()).toEqual(treeB.intersect(treeA, keepEither).toArray()); From f7d07ce6f72db8150fa318a6a6acbd4111a346a4 Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Sun, 16 Nov 2025 16:29:19 -0800 Subject: [PATCH 116/143] improve packing in bulk load --- extended/bulkLoad.d.ts | 5 ++- extended/bulkLoad.js | 15 +++++--- extended/bulkLoad.ts | 16 +++++--- extended/shared.js | 21 ++++++++--- extended/shared.ts | 22 ++++++++--- test/bulkLoad.test.ts | 83 +++++++++++++++++++++++++----------------- 6 files changed, 104 insertions(+), 58 deletions(-) diff --git a/extended/bulkLoad.d.ts b/extended/bulkLoad.d.ts index b154d12..79dc063 100644 --- a/extended/bulkLoad.d.ts +++ b/extended/bulkLoad.d.ts @@ -7,7 +7,8 @@ import BTree from '../b+tree'; * the array is an alternating list of keys and values: [key0, value0, key1, value1, ...]. * @param maxNodeSize The branching factor (maximum node size) for the resulting tree. * @param compare Function to compare keys. + * @param loadFactor Desired load factor for created leaves. Must be between 0.5 and 1.0. * @returns A new BTree containing the given entries. - * @throws Error if the entries are not sorted by key in strictly ascending order (duplicates disallowed). + * @throws Error if the entries are not sorted by key in strictly ascending order (duplicates disallowed) or if the load factor is out of the allowed range. */ -export declare function bulkLoad(entries: (K | V)[], maxNodeSize: number, compare: (a: K, b: K) => number): BTree; +export declare function bulkLoad(entries: (K | V)[], maxNodeSize: number, compare: (a: K, b: K) => number, loadFactor?: number): BTree; diff --git a/extended/bulkLoad.js b/extended/bulkLoad.js index 1150ba1..616f38f 100644 --- a/extended/bulkLoad.js +++ b/extended/bulkLoad.js @@ -30,12 +30,14 @@ var shared_1 = require("./shared"); * the array is an alternating list of keys and values: [key0, value0, key1, value1, ...]. * @param maxNodeSize The branching factor (maximum node size) for the resulting tree. * @param compare Function to compare keys. + * @param loadFactor Desired load factor for created leaves. Must be between 0.5 and 1.0. * @returns A new BTree containing the given entries. - * @throws Error if the entries are not sorted by key in strictly ascending order (duplicates disallowed). + * @throws Error if the entries are not sorted by key in strictly ascending order (duplicates disallowed) or if the load factor is out of the allowed range. */ -function bulkLoad(entries, maxNodeSize, compare) { +function bulkLoad(entries, maxNodeSize, compare, loadFactor) { + if (loadFactor === void 0) { loadFactor = 0.8; } var alternatingEntries = entries; - var root = bulkLoadRoot(alternatingEntries, maxNodeSize, compare); + var root = bulkLoadRoot(alternatingEntries, maxNodeSize, compare, loadFactor); var tree = new b_tree_1.default(undefined, compare, maxNodeSize); var target = tree; target._root = root; @@ -47,7 +49,10 @@ exports.bulkLoad = bulkLoad; * Bulk loads, returns the root node of the resulting tree. * @internal */ -function bulkLoadRoot(entries, maxNodeSize, compare) { +function bulkLoadRoot(entries, maxNodeSize, compare, loadFactor) { + if (loadFactor === void 0) { loadFactor = 0.8; } + if (loadFactor < 0.5 || loadFactor > 1.0) + throw new Error("bulkLoad: loadFactor must be between 0.5 and 1.0"); var totalPairs = (0, shared_1.alternatingCount)(entries); if (totalPairs > 1) { var previousKey = (0, shared_1.alternatingGetFirst)(entries, 0); @@ -59,7 +64,7 @@ function bulkLoadRoot(entries, maxNodeSize, compare) { } } var leaves = []; - (0, shared_1.flushToLeaves)(entries, maxNodeSize, function (leaf) { return leaves.push(leaf); }); + (0, shared_1.flushToLeaves)(entries, maxNodeSize, function (leaf) { return leaves.push(leaf); }, loadFactor); if (leaves.length === 0) return new b_tree_1.BNode(); var currentLevel = leaves; diff --git a/extended/bulkLoad.ts b/extended/bulkLoad.ts index 7e2b825..a1a55c4 100644 --- a/extended/bulkLoad.ts +++ b/extended/bulkLoad.ts @@ -9,16 +9,18 @@ import { alternatingCount, alternatingGetFirst, flushToLeaves, type AlternatingL * the array is an alternating list of keys and values: [key0, value0, key1, value1, ...]. * @param maxNodeSize The branching factor (maximum node size) for the resulting tree. * @param compare Function to compare keys. + * @param loadFactor Desired load factor for created leaves. Must be between 0.5 and 1.0. * @returns A new BTree containing the given entries. - * @throws Error if the entries are not sorted by key in strictly ascending order (duplicates disallowed). + * @throws Error if the entries are not sorted by key in strictly ascending order (duplicates disallowed) or if the load factor is out of the allowed range. */ export function bulkLoad( entries: (K | V)[], maxNodeSize: number, - compare: (a: K, b: K) => number + compare: (a: K, b: K) => number, + loadFactor = 0.8 ): BTree { const alternatingEntries = entries as AlternatingList; - const root = bulkLoadRoot(alternatingEntries, maxNodeSize, compare); + const root = bulkLoadRoot(alternatingEntries, maxNodeSize, compare, loadFactor); const tree = new BTree(undefined, compare, maxNodeSize); const target = tree as unknown as BTreeWithInternals; target._root = root; @@ -33,8 +35,12 @@ export function bulkLoad( export function bulkLoadRoot( entries: AlternatingList, maxNodeSize: number, - compare: (a: K, b: K) => number + compare: (a: K, b: K) => number, + loadFactor = 0.8 ): BNode { + if (loadFactor < 0.5 || loadFactor > 1.0) + throw new Error("bulkLoad: loadFactor must be between 0.5 and 1.0"); + const totalPairs = alternatingCount(entries); if (totalPairs > 1) { let previousKey = alternatingGetFirst(entries, 0); @@ -47,7 +53,7 @@ export function bulkLoadRoot( } const leaves: BNode[] = []; - flushToLeaves(entries, maxNodeSize, (leaf) => leaves.push(leaf)); + flushToLeaves(entries, maxNodeSize, (leaf) => leaves.push(leaf), loadFactor); if (leaves.length === 0) return new BNode(); diff --git a/extended/shared.js b/extended/shared.js index 8e6749c..9608dc3 100644 --- a/extended/shared.js +++ b/extended/shared.js @@ -4,18 +4,27 @@ exports.checkCanDoSetOperation = exports.branchingFactorErrorMsg = exports.compa var b_tree_1 = require("../b+tree"); /** * Flushes entries from an alternating list into leaf nodes. - * The leaf nodes are packed as tightly as possible while ensuring all - * nodes are at least 50% full (if more than one leaf is created). + * The supplied load factor will be respected if possible, but may be exceeded + * to ensure the 50% full rule is maintained. + * Note: if < maxNodeSize entries are provided, only one leaf will be created, which may be underfilled. + * @param alternatingList The list of entries to flush. This list will be cleared. + * @param maxNodeSize The maximum node size (branching factor) for the resulting leaves. + * @param onLeafCreation Called when a new leaf is created. + * @param loadFactor Desired load factor for created leaves. Must be between 0.5 and 1.0. + * @returns The number of leaves created. * @internal */ -function flushToLeaves(alternatingList, maxNodeSize, onLeafCreation) { +function flushToLeaves(alternatingList, maxNodeSize, onLeafCreation, loadFactor) { + if (loadFactor === void 0) { loadFactor = 0.8; } var totalPairs = alternatingCount(alternatingList); if (totalPairs === 0) return 0; + var targetSize = Math.ceil(maxNodeSize * loadFactor); + // Ensure we don't make any underfilled nodes unless we have to. + var targetLeafCount = totalPairs <= maxNodeSize ? 1 : Math.ceil(totalPairs / targetSize); // This method creates as many evenly filled leaves as possible from // the pending entries. All will be > 50% full if we are creating more than one leaf. - var leafCount = Math.ceil(totalPairs / maxNodeSize); - var remainingLeaves = leafCount; + var remainingLeaves = targetLeafCount; var remaining = totalPairs; var pairIndex = 0; while (remainingLeaves > 0) { @@ -33,7 +42,7 @@ function flushToLeaves(alternatingList, maxNodeSize, onLeafCreation) { onLeafCreation(leaf); } alternatingList.length = 0; - return leafCount; + return targetLeafCount; } exports.flushToLeaves = flushToLeaves; ; diff --git a/extended/shared.ts b/extended/shared.ts index 83ad838..7a960d5 100644 --- a/extended/shared.ts +++ b/extended/shared.ts @@ -20,23 +20,33 @@ export type AlternatingList = Array; /** * Flushes entries from an alternating list into leaf nodes. - * The leaf nodes are packed as tightly as possible while ensuring all - * nodes are at least 50% full (if more than one leaf is created). + * The supplied load factor will be respected if possible, but may be exceeded + * to ensure the 50% full rule is maintained. + * Note: if < maxNodeSize entries are provided, only one leaf will be created, which may be underfilled. + * @param alternatingList The list of entries to flush. This list will be cleared. + * @param maxNodeSize The maximum node size (branching factor) for the resulting leaves. + * @param onLeafCreation Called when a new leaf is created. + * @param loadFactor Desired load factor for created leaves. Must be between 0.5 and 1.0. + * @returns The number of leaves created. * @internal */ export function flushToLeaves( alternatingList: AlternatingList, maxNodeSize: number, - onLeafCreation: (node: BNode) => void + onLeafCreation: (node: BNode) => void, + loadFactor = 0.8 ): number { const totalPairs = alternatingCount(alternatingList); if (totalPairs === 0) return 0; + const targetSize = Math.ceil(maxNodeSize * loadFactor); + // Ensure we don't make any underfilled nodes unless we have to. + const targetLeafCount = totalPairs <= maxNodeSize ? 1 : Math.ceil(totalPairs / targetSize); + // This method creates as many evenly filled leaves as possible from // the pending entries. All will be > 50% full if we are creating more than one leaf. - const leafCount = Math.ceil(totalPairs / maxNodeSize); - let remainingLeaves = leafCount; + let remainingLeaves = targetLeafCount; let remaining = totalPairs; let pairIndex = 0; while (remainingLeaves > 0) { @@ -54,7 +64,7 @@ export function flushToLeaves( onLeafCreation(leaf); } alternatingList.length = 0; - return leafCount; + return targetLeafCount; }; // ------- Alternating list helpers ------- diff --git a/test/bulkLoad.test.ts b/test/bulkLoad.test.ts index f981b62..2cd0ec1 100644 --- a/test/bulkLoad.test.ts +++ b/test/bulkLoad.test.ts @@ -29,9 +29,9 @@ function toAlternating(pairs: Pair[]): number[] { return alternating; } -function buildTreeFromPairs(maxNodeSize: number, pairs: Pair[]) { +function buildTreeFromPairs(maxNodeSize: number, pairs: Pair[], loadFactor: number) { const alternating = toAlternating(pairs); - const tree = bulkLoad(alternating, maxNodeSize, compareNumbers); + const tree = bulkLoad(alternating, maxNodeSize, compareNumbers, loadFactor); const root = tree['_root'] as BNode; return { tree, root }; } @@ -74,7 +74,7 @@ describe.each(branchingFactors)('bulkLoad fanout %i', (maxNodeSize) => { }); test('empty input produces empty tree', () => { - const { tree, root } = buildTreeFromPairs(maxNodeSize, []); + const { tree, root } = buildTreeFromPairs(maxNodeSize, [], 1.0); expect(root?.isLeaf).toBe(true); expect(root?.keys.length ?? 0).toBe(0); expectTreeMatches(tree, []); @@ -82,7 +82,7 @@ describe.each(branchingFactors)('bulkLoad fanout %i', (maxNodeSize) => { test('single entry stays in one leaf', () => { const pairs = sequentialPairs(1, 5); - const { tree } = buildTreeFromPairs(maxNodeSize, pairs); + const { tree } = buildTreeFromPairs(maxNodeSize, pairs, 1.0); expectTreeMatches(tree, pairs); const root = tree['_root'] as BNode; expect(root.isLeaf).toBe(true); @@ -91,17 +91,35 @@ describe.each(branchingFactors)('bulkLoad fanout %i', (maxNodeSize) => { test('fills a single leaf up to capacity', () => { const pairs = sequentialPairs(maxNodeSize, 0, 2); - const { tree } = buildTreeFromPairs(maxNodeSize, pairs); + const { tree } = buildTreeFromPairs(maxNodeSize, pairs, 1.0); expectTreeMatches(tree, pairs); const root = tree['_root'] as BNode; expect(root.isLeaf).toBe(true); expect(root.keys.length).toBe(maxNodeSize); }); + test('does not produce underfilled nodes if possible', () => { + const pairs = sequentialPairs(maxNodeSize, 0, 2); + // despite asking for only 60% load factor, we should still get a full node + // because splitting into > 1 leaf would cause underfilled nodes + const { tree } = buildTreeFromPairs(maxNodeSize, pairs, 0.6); + expectTreeMatches(tree, pairs); + const root = tree['_root'] as BNode; + expect(root.isLeaf).toBe(true); + expect(root.keys.length).toBe(maxNodeSize); + }); + + test('throws when load factor is too low or too high', () => { + const pairs = sequentialPairs(maxNodeSize, 0, 2); + const alternating = toAlternating(pairs); + expect(() => bulkLoad(alternating, maxNodeSize, compareNumbers, 0.3)).toThrow(); + expect(() => bulkLoad(alternating, maxNodeSize, compareNumbers, 1.1)).toThrow(); + }); + test('distributes keys nearly evenly across leaves when not divisible by fanout', () => { const inputSize = maxNodeSize * 3 + Math.floor(maxNodeSize / 2) + 1; const pairs = sequentialPairs(inputSize, 10, 3); - const { tree } = buildTreeFromPairs(maxNodeSize, pairs); + const { tree } = buildTreeFromPairs(maxNodeSize, pairs, 0.8); expectTreeMatches(tree, pairs); const leaves = collectLeaves(tree['_root'] as BNode); const leafSizes = leaves.map((leaf) => leaf.keys.length); @@ -113,7 +131,7 @@ describe.each(branchingFactors)('bulkLoad fanout %i', (maxNodeSize) => { test('creates multiple internal layers when leaf count exceeds branching factor', () => { const inputSize = maxNodeSize * maxNodeSize + Math.floor(maxNodeSize / 2) + 1; const pairs = sequentialPairs(inputSize, 0, 1); - const { tree } = buildTreeFromPairs(maxNodeSize, pairs); + const { tree } = buildTreeFromPairs(maxNodeSize, pairs, 0.8); expectTreeMatches(tree, pairs); const root = tree['_root'] as BNode; expect(root.isLeaf).toBe(false); @@ -123,7 +141,7 @@ describe.each(branchingFactors)('bulkLoad fanout %i', (maxNodeSize) => { test('loads 10000 entries and preserves all data', () => { const keys = makeArray(10000, false, 3); const pairs = pairsFromKeys(keys); - const { tree } = buildTreeFromPairs(maxNodeSize, pairs); + const { tree } = buildTreeFromPairs(maxNodeSize, pairs, 1.0); expectTreeMatches(tree, pairs); const leaves = collectLeaves(tree['_root'] as BNode); expect(leaves.length).toBe(Math.ceil(pairs.length / maxNodeSize)); @@ -148,6 +166,7 @@ describe('bulkLoad fuzz tests', () => { iterationsPerOOM: 3, spacings: [1, 2, 3, 5, 8, 13], payloadMods: [1, 2, 5, 11, 17], + loadFactors: [0.5, 0.8, 1.0], timeoutMs: 30_000, } as const; @@ -160,32 +179,28 @@ describe('bulkLoad fuzz tests', () => { for (const oom of FUZZ_SETTINGS.ooms) { const baseSize = 5 * Math.pow(10, oom); for (let iteration = 0; iteration < FUZZ_SETTINGS.iterationsPerOOM; iteration++) { - const spacing = FUZZ_SETTINGS.spacings[randomInt(rng, FUZZ_SETTINGS.spacings.length)]; - const payloadMod = FUZZ_SETTINGS.payloadMods[randomInt(rng, FUZZ_SETTINGS.payloadMods.length)]; - const sizeJitter = randomInt(rng, baseSize); - const size = baseSize + sizeJitter; - - test(`size ${size}, spacing ${spacing}, payload ${payloadMod}, iteration ${iteration}`, () => { - const keys = makeArray(size, false, spacing, 0, rng); - const pairs = pairsFromKeys(keys).map(([key, value], index) => [key, value * payloadMod + index] as Pair); - const { tree, root } = buildTreeFromPairs(maxNodeSize, pairs); - expectTreeMatches(tree, pairs); - - const leaves = collectLeaves(root); - const leafSizes = leaves.map((leaf) => leaf.keys.length); - const expectedLeafCount = Math.ceil(pairs.length / maxNodeSize); - expect(leaves.length).toBe(expectedLeafCount); - const minLeaf = Math.min(...leafSizes); - const maxLeaf = Math.max(...leafSizes); - expect(maxLeaf - minLeaf).toBeLessThanOrEqual(1); - - if (!root.isLeaf) - assertInternalNodeFanout(root, maxNodeSize); - - const alternating = toAlternating(pairs); - const bulkLoadTree = BTreeEx.bulkLoad(alternating, maxNodeSize, compareNumbers); - expectTreeMatches(bulkLoadTree, pairs); - }); + for (const loadFactor of FUZZ_SETTINGS.loadFactors) { + const targetNodeSize = Math.ceil(maxNodeSize * loadFactor); + const spacing = FUZZ_SETTINGS.spacings[randomInt(rng, FUZZ_SETTINGS.spacings.length)]; + const payloadMod = FUZZ_SETTINGS.payloadMods[randomInt(rng, FUZZ_SETTINGS.payloadMods.length)]; + const sizeJitter = randomInt(rng, baseSize); + const size = baseSize + sizeJitter; + + test(`size ${size}, spacing ${spacing}, payload ${payloadMod}, iteration ${iteration}`, () => { + const keys = makeArray(size, false, spacing, 0, rng); + const pairs = pairsFromKeys(keys).map(([key, value], index) => [key, value * payloadMod + index] as Pair); + const { tree, root } = buildTreeFromPairs(maxNodeSize, pairs, loadFactor); + expectTreeMatches(tree, pairs); + + const leaves = collectLeaves(root); + const leafSizes = leaves.map((leaf) => leaf.keys.length); + const expectedLeafCount = Math.ceil(pairs.length / targetNodeSize); + expect(leaves.length).toBe(expectedLeafCount); + const minLeaf = Math.min(...leafSizes); + const maxLeaf = Math.max(...leafSizes); + expect(maxLeaf - minLeaf).toBeLessThanOrEqual(1); + }); + } } } }); From f148b39ff210d2ece07a3d9e525366b03b8da1fa Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Sun, 16 Nov 2025 16:34:17 -0800 Subject: [PATCH 117/143] bulk load doesn't mutate --- extended/bulkLoad.js | 2 +- extended/bulkLoad.ts | 4 ++-- extended/decompose.js | 6 ++++-- extended/decompose.ts | 8 +++++--- extended/shared.js | 11 +++++------ extended/shared.ts | 7 +++---- test/bulkLoad.test.ts | 6 ++++++ 7 files changed, 26 insertions(+), 18 deletions(-) diff --git a/extended/bulkLoad.js b/extended/bulkLoad.js index 616f38f..6d4d8d9 100644 --- a/extended/bulkLoad.js +++ b/extended/bulkLoad.js @@ -64,7 +64,7 @@ function bulkLoadRoot(entries, maxNodeSize, compare, loadFactor) { } } var leaves = []; - (0, shared_1.flushToLeaves)(entries, maxNodeSize, function (leaf) { return leaves.push(leaf); }, loadFactor); + (0, shared_1.makeLeavesFrom)(entries, maxNodeSize, function (leaf) { return leaves.push(leaf); }, loadFactor); if (leaves.length === 0) return new b_tree_1.BNode(); var currentLevel = leaves; diff --git a/extended/bulkLoad.ts b/extended/bulkLoad.ts index a1a55c4..fe0af6a 100644 --- a/extended/bulkLoad.ts +++ b/extended/bulkLoad.ts @@ -1,5 +1,5 @@ import BTree, { BNode, BNodeInternal, check, sumChildSizes } from '../b+tree'; -import { alternatingCount, alternatingGetFirst, flushToLeaves, type AlternatingList, type BTreeWithInternals } from './shared'; +import { alternatingCount, alternatingGetFirst, makeLeavesFrom, type AlternatingList, type BTreeWithInternals } from './shared'; /** * Loads a B-Tree from a sorted list of entries in bulk. This is faster than inserting @@ -53,7 +53,7 @@ export function bulkLoadRoot( } const leaves: BNode[] = []; - flushToLeaves(entries, maxNodeSize, (leaf) => leaves.push(leaf), loadFactor); + makeLeavesFrom(entries, maxNodeSize, (leaf) => leaves.push(leaf), loadFactor); if (leaves.length === 0) return new BNode(); diff --git a/extended/decompose.js b/extended/decompose.js index 1ed8351..bc3fde3 100644 --- a/extended/decompose.js +++ b/extended/decompose.js @@ -39,7 +39,9 @@ function decompose(left, right, combineFn, ignoreRight) { (0, shared_1.alternatingPush)(disjoint, 0, leaf); }; var addSharedNodeToDisjointSet = function (node, height) { - (0, shared_1.flushToLeaves)(pending, maxNodeSize, onLeafCreation); + (0, shared_1.makeLeavesFrom)(pending, maxNodeSize, onLeafCreation); + // flush pending entries + pending.length = 0; node.isShared = true; (0, shared_1.alternatingPush)(disjoint, height, node); if (height > tallestHeight) { @@ -264,7 +266,7 @@ function decompose(left, right, combineFn, ignoreRight) { } } // Ensure any trailing non-disjoint entries are added - var createdLeaves = (0, shared_1.flushToLeaves)(pending, maxNodeSize, onLeafCreation); + var createdLeaves = (0, shared_1.makeLeavesFrom)(pending, maxNodeSize, onLeafCreation); // In fully interleaved cases, no leaves may be created until now if (tallestHeight < 0 && createdLeaves > 0) { tallestIndex = (0, shared_1.alternatingCount)(disjoint) - 1; diff --git a/extended/decompose.ts b/extended/decompose.ts index 98353c4..dc983db 100644 --- a/extended/decompose.ts +++ b/extended/decompose.ts @@ -1,5 +1,5 @@ import BTree, { areOverlapping, BNode, BNodeInternal, check } from '../b+tree'; -import { alternatingCount, alternatingGetFirst, alternatingGetSecond, alternatingPush, BTreeConstructor, createAlternatingList, flushToLeaves, type AlternatingList, type BTreeWithInternals } from './shared'; +import { alternatingCount, alternatingGetFirst, alternatingGetSecond, alternatingPush, BTreeConstructor, createAlternatingList, makeLeavesFrom, type AlternatingList, type BTreeWithInternals } from './shared'; import { createCursor, getKey, Cursor, moveForwardOne, moveTo, noop } from "./parallelWalk"; /** @@ -57,7 +57,9 @@ export function decompose( } const addSharedNodeToDisjointSet = (node: BNode, height: number) => { - flushToLeaves(pending, maxNodeSize, onLeafCreation); + makeLeavesFrom(pending, maxNodeSize, onLeafCreation); + // flush pending entries + pending.length = 0; node.isShared = true; alternatingPush(disjoint, height, node); if (height > tallestHeight) { @@ -344,7 +346,7 @@ export function decompose( } // Ensure any trailing non-disjoint entries are added - const createdLeaves = flushToLeaves(pending, maxNodeSize, onLeafCreation); + const createdLeaves = makeLeavesFrom(pending, maxNodeSize, onLeafCreation); // In fully interleaved cases, no leaves may be created until now if (tallestHeight < 0 && createdLeaves > 0) { tallestIndex = alternatingCount(disjoint) - 1; diff --git a/extended/shared.js b/extended/shared.js index 9608dc3..cfa553e 100644 --- a/extended/shared.js +++ b/extended/shared.js @@ -1,20 +1,20 @@ "use strict"; Object.defineProperty(exports, "__esModule", { value: true }); -exports.checkCanDoSetOperation = exports.branchingFactorErrorMsg = exports.comparatorErrorMsg = exports.alternatingPush = exports.alternatingGetSecond = exports.alternatingGetFirst = exports.alternatingCount = exports.createAlternatingList = exports.flushToLeaves = void 0; +exports.checkCanDoSetOperation = exports.branchingFactorErrorMsg = exports.comparatorErrorMsg = exports.alternatingPush = exports.alternatingGetSecond = exports.alternatingGetFirst = exports.alternatingCount = exports.createAlternatingList = exports.makeLeavesFrom = void 0; var b_tree_1 = require("../b+tree"); /** - * Flushes entries from an alternating list into leaf nodes. + * Builds leaves from the given alternating list of entries. * The supplied load factor will be respected if possible, but may be exceeded * to ensure the 50% full rule is maintained. * Note: if < maxNodeSize entries are provided, only one leaf will be created, which may be underfilled. - * @param alternatingList The list of entries to flush. This list will be cleared. + * @param alternatingList The list of entries to build leaves from. * @param maxNodeSize The maximum node size (branching factor) for the resulting leaves. * @param onLeafCreation Called when a new leaf is created. * @param loadFactor Desired load factor for created leaves. Must be between 0.5 and 1.0. * @returns The number of leaves created. * @internal */ -function flushToLeaves(alternatingList, maxNodeSize, onLeafCreation, loadFactor) { +function makeLeavesFrom(alternatingList, maxNodeSize, onLeafCreation, loadFactor) { if (loadFactor === void 0) { loadFactor = 0.8; } var totalPairs = alternatingCount(alternatingList); if (totalPairs === 0) @@ -41,10 +41,9 @@ function flushToLeaves(alternatingList, maxNodeSize, onLeafCreation, loadFactor) var leaf = new b_tree_1.BNode(keys, vals); onLeafCreation(leaf); } - alternatingList.length = 0; return targetLeafCount; } -exports.flushToLeaves = flushToLeaves; +exports.makeLeavesFrom = makeLeavesFrom; ; // ------- Alternating list helpers ------- // These helpers manage a list that alternates between two types of entries. diff --git a/extended/shared.ts b/extended/shared.ts index 7a960d5..7a79980 100644 --- a/extended/shared.ts +++ b/extended/shared.ts @@ -19,18 +19,18 @@ export type BTreeWithInternals = BTree> = export type AlternatingList = Array; /** - * Flushes entries from an alternating list into leaf nodes. + * Builds leaves from the given alternating list of entries. * The supplied load factor will be respected if possible, but may be exceeded * to ensure the 50% full rule is maintained. * Note: if < maxNodeSize entries are provided, only one leaf will be created, which may be underfilled. - * @param alternatingList The list of entries to flush. This list will be cleared. + * @param alternatingList The list of entries to build leaves from. * @param maxNodeSize The maximum node size (branching factor) for the resulting leaves. * @param onLeafCreation Called when a new leaf is created. * @param loadFactor Desired load factor for created leaves. Must be between 0.5 and 1.0. * @returns The number of leaves created. * @internal */ -export function flushToLeaves( +export function makeLeavesFrom( alternatingList: AlternatingList, maxNodeSize: number, onLeafCreation: (node: BNode) => void, @@ -63,7 +63,6 @@ export function flushToLeaves( const leaf = new BNode(keys, vals); onLeafCreation(leaf); } - alternatingList.length = 0; return targetLeafCount; }; diff --git a/test/bulkLoad.test.ts b/test/bulkLoad.test.ts index 2cd0ec1..e081272 100644 --- a/test/bulkLoad.test.ts +++ b/test/bulkLoad.test.ts @@ -109,6 +109,12 @@ describe.each(branchingFactors)('bulkLoad fanout %i', (maxNodeSize) => { expect(root.keys.length).toBe(maxNodeSize); }); + test('does not mutate the supplied entry list', () => { + const pairs = sequentialPairs(maxNodeSize, 0, 2); + buildTreeFromPairs(maxNodeSize, pairs, 0.6); + expect(pairs.length).toBe(maxNodeSize); + }); + test('throws when load factor is too low or too high', () => { const pairs = sequentialPairs(maxNodeSize, 0, 2); const alternating = toAlternating(pairs); From e8aba4eccd1f22772beab978caea6532b8689e89 Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Sun, 16 Nov 2025 17:06:06 -0800 Subject: [PATCH 118/143] tests --- extended/bulkLoad.js | 9 ++++++--- extended/bulkLoad.ts | 10 +++++++--- test/bulkLoad.test.ts | 9 +++++++++ 3 files changed, 22 insertions(+), 6 deletions(-) diff --git a/extended/bulkLoad.js b/extended/bulkLoad.js index 6d4d8d9..3a23f94 100644 --- a/extended/bulkLoad.js +++ b/extended/bulkLoad.js @@ -67,14 +67,17 @@ function bulkLoadRoot(entries, maxNodeSize, compare, loadFactor) { (0, shared_1.makeLeavesFrom)(entries, maxNodeSize, function (leaf) { return leaves.push(leaf); }, loadFactor); if (leaves.length === 0) return new b_tree_1.BNode(); + var targetNodeSize = Math.ceil(maxNodeSize * loadFactor); + var exactlyHalf = targetNodeSize === maxNodeSize / 2; + var minSize = Math.floor(maxNodeSize / 2); var currentLevel = leaves; while (currentLevel.length > 1) { var nodeCount = currentLevel.length; - if (nodeCount <= maxNodeSize) { + if (nodeCount <= maxNodeSize && (nodeCount !== maxNodeSize || !exactlyHalf)) { currentLevel = [new b_tree_1.BNodeInternal(currentLevel, (0, b_tree_1.sumChildSizes)(currentLevel))]; break; } - var nextLevelCount = Math.ceil(nodeCount / maxNodeSize); + var nextLevelCount = Math.ceil(nodeCount / targetNodeSize); (0, b_tree_1.check)(nextLevelCount > 1); var nextLevel = new Array(nextLevelCount); var remainingNodes = nodeCount; @@ -93,7 +96,7 @@ function bulkLoadRoot(entries, maxNodeSize, compare, loadFactor) { remainingParents--; nextLevel[i] = new b_tree_1.BNodeInternal(children, size); } - var minSize = Math.floor(maxNodeSize / 2); + // If last node is underfilled, balance with left sibling var secondLastNode = nextLevel[nextLevelCount - 2]; var lastNode = nextLevel[nextLevelCount - 1]; while (lastNode.children.length < minSize) diff --git a/extended/bulkLoad.ts b/extended/bulkLoad.ts index fe0af6a..2fe5986 100644 --- a/extended/bulkLoad.ts +++ b/extended/bulkLoad.ts @@ -57,15 +57,19 @@ export function bulkLoadRoot( if (leaves.length === 0) return new BNode(); + const targetNodeSize = Math.ceil(maxNodeSize * loadFactor); + const exactlyHalf = targetNodeSize === maxNodeSize / 2; + const minSize = Math.floor(maxNodeSize / 2); + let currentLevel: BNode[] = leaves; while (currentLevel.length > 1) { const nodeCount = currentLevel.length; - if (nodeCount <= maxNodeSize) { + if (nodeCount <= maxNodeSize && (nodeCount !== maxNodeSize || !exactlyHalf)) { currentLevel = [new BNodeInternal(currentLevel, sumChildSizes(currentLevel))]; break; } - const nextLevelCount = Math.ceil(nodeCount / maxNodeSize); + const nextLevelCount = Math.ceil(nodeCount / targetNodeSize); check(nextLevelCount > 1); const nextLevel = new Array>(nextLevelCount); let remainingNodes = nodeCount; @@ -86,7 +90,7 @@ export function bulkLoadRoot( nextLevel[i] = new BNodeInternal(children, size); } - const minSize = Math.floor(maxNodeSize / 2); + // If last node is underfilled, balance with left sibling const secondLastNode = nextLevel[nextLevelCount - 2] as BNodeInternal; const lastNode = nextLevel[nextLevelCount - 1] as BNodeInternal; while (lastNode.children.length < minSize) diff --git a/test/bulkLoad.test.ts b/test/bulkLoad.test.ts index e081272..b45d685 100644 --- a/test/bulkLoad.test.ts +++ b/test/bulkLoad.test.ts @@ -153,6 +153,15 @@ describe.each(branchingFactors)('bulkLoad fanout %i', (maxNodeSize) => { expect(leaves.length).toBe(Math.ceil(pairs.length / maxNodeSize)); assertInternalNodeFanout(tree['_root'] as BNode, maxNodeSize); }); + + test('entries with 50% load factor, second layer with exactly half full nodes', () => { + // Create enough entries to require a second layer that has exactly two nodes when maxNodeSize is even. + const entryCount = Math.ceil(maxNodeSize / 2) * maxNodeSize; + const keys = makeArray(entryCount, false, 3); + const pairs = pairsFromKeys(keys); + const { tree } = buildTreeFromPairs(maxNodeSize, pairs, 0.5); + expectTreeMatches(tree, pairs); + }); }); describe('BTreeEx.bulkLoad', () => { From 3cca51d6251d7830f21ed3c48bf2d86cc49d2222 Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Sun, 16 Nov 2025 17:15:03 -0800 Subject: [PATCH 119/143] test cleanup --- test/bulkLoad.test.ts | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/test/bulkLoad.test.ts b/test/bulkLoad.test.ts index b45d685..988e309 100644 --- a/test/bulkLoad.test.ts +++ b/test/bulkLoad.test.ts @@ -177,10 +177,8 @@ describe('BTreeEx.bulkLoad', () => { describe('bulkLoad fuzz tests', () => { const FUZZ_SETTINGS = { branchingFactors, - ooms: [2, 3], + ooms: [0, 2, 3], iterationsPerOOM: 3, - spacings: [1, 2, 3, 5, 8, 13], - payloadMods: [1, 2, 5, 11, 17], loadFactors: [0.5, 0.8, 1.0], timeoutMs: 30_000, } as const; @@ -196,21 +194,21 @@ describe('bulkLoad fuzz tests', () => { for (let iteration = 0; iteration < FUZZ_SETTINGS.iterationsPerOOM; iteration++) { for (const loadFactor of FUZZ_SETTINGS.loadFactors) { const targetNodeSize = Math.ceil(maxNodeSize * loadFactor); - const spacing = FUZZ_SETTINGS.spacings[randomInt(rng, FUZZ_SETTINGS.spacings.length)]; - const payloadMod = FUZZ_SETTINGS.payloadMods[randomInt(rng, FUZZ_SETTINGS.payloadMods.length)]; const sizeJitter = randomInt(rng, baseSize); const size = baseSize + sizeJitter; - test(`size ${size}, spacing ${spacing}, payload ${payloadMod}, iteration ${iteration}`, () => { - const keys = makeArray(size, false, spacing, 0, rng); - const pairs = pairsFromKeys(keys).map(([key, value], index) => [key, value * payloadMod + index] as Pair); + test(`size ${size}, iteration ${iteration}`, () => { + const keys = makeArray(size, false, 0, 0, rng); + const pairs = pairsFromKeys(keys).map(([key, value], index) => [key, value + index] as Pair); const { tree, root } = buildTreeFromPairs(maxNodeSize, pairs, loadFactor); expectTreeMatches(tree, pairs); const leaves = collectLeaves(root); const leafSizes = leaves.map((leaf) => leaf.keys.length); - const expectedLeafCount = Math.ceil(pairs.length / targetNodeSize); - expect(leaves.length).toBe(expectedLeafCount); + if (pairs.length >= maxNodeSize) { + const expectedLeafCount = Math.ceil(pairs.length / targetNodeSize); + expect(leaves.length).toBe(expectedLeafCount); + } const minLeaf = Math.min(...leafSizes); const maxLeaf = Math.max(...leafSizes); expect(maxLeaf - minLeaf).toBeLessThanOrEqual(1); From 999398b3001c35e0cd60a0c6a32a79e7bfc0baee Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Mon, 17 Nov 2025 08:16:34 -0800 Subject: [PATCH 120/143] remove default param --- extended/decompose.js | 6 ++++-- extended/decompose.ts | 7 +++++-- extended/shared.js | 1 - extended/shared.ts | 2 +- 4 files changed, 10 insertions(+), 6 deletions(-) diff --git a/extended/decompose.js b/extended/decompose.js index bc3fde3..da159f1 100644 --- a/extended/decompose.js +++ b/extended/decompose.js @@ -4,6 +4,7 @@ exports.buildFromDecomposition = exports.decompose = void 0; var b_tree_1 = require("../b+tree"); var shared_1 = require("./shared"); var parallelWalk_1 = require("./parallelWalk"); +var decomposeLoadFactor = 0.7; /** * Decomposes two trees into disjoint nodes. Reuses interior nodes when they do not overlap/intersect with any leaf nodes * in the other tree. Overlapping leaf nodes are broken down into new leaf nodes containing merged entries. @@ -12,6 +13,7 @@ var parallelWalk_1 = require("./parallelWalk"); * be disjoint. This is true because the leading cursor was also previously walked in this way, and is thus pointing to * the first key at or after the trailing cursor's previous position. * The cursor walk is efficient, meaning it skips over disjoint subtrees entirely rather than visiting every leaf. + * Note: some of the returned leaves may be underfilled. * @internal */ function decompose(left, right, combineFn, ignoreRight) { @@ -39,7 +41,7 @@ function decompose(left, right, combineFn, ignoreRight) { (0, shared_1.alternatingPush)(disjoint, 0, leaf); }; var addSharedNodeToDisjointSet = function (node, height) { - (0, shared_1.makeLeavesFrom)(pending, maxNodeSize, onLeafCreation); + (0, shared_1.makeLeavesFrom)(pending, maxNodeSize, onLeafCreation, decomposeLoadFactor); // flush pending entries pending.length = 0; node.isShared = true; @@ -266,7 +268,7 @@ function decompose(left, right, combineFn, ignoreRight) { } } // Ensure any trailing non-disjoint entries are added - var createdLeaves = (0, shared_1.makeLeavesFrom)(pending, maxNodeSize, onLeafCreation); + var createdLeaves = (0, shared_1.makeLeavesFrom)(pending, maxNodeSize, onLeafCreation, decomposeLoadFactor); // In fully interleaved cases, no leaves may be created until now if (tallestHeight < 0 && createdLeaves > 0) { tallestIndex = (0, shared_1.alternatingCount)(disjoint) - 1; diff --git a/extended/decompose.ts b/extended/decompose.ts index dc983db..cbdb57c 100644 --- a/extended/decompose.ts +++ b/extended/decompose.ts @@ -13,6 +13,8 @@ export type DecomposeResult = { disjoint: AlternatingList( @@ -57,7 +60,7 @@ export function decompose( } const addSharedNodeToDisjointSet = (node: BNode, height: number) => { - makeLeavesFrom(pending, maxNodeSize, onLeafCreation); + makeLeavesFrom(pending, maxNodeSize, onLeafCreation, decomposeLoadFactor); // flush pending entries pending.length = 0; node.isShared = true; @@ -346,7 +349,7 @@ export function decompose( } // Ensure any trailing non-disjoint entries are added - const createdLeaves = makeLeavesFrom(pending, maxNodeSize, onLeafCreation); + const createdLeaves = makeLeavesFrom(pending, maxNodeSize, onLeafCreation, decomposeLoadFactor); // In fully interleaved cases, no leaves may be created until now if (tallestHeight < 0 && createdLeaves > 0) { tallestIndex = alternatingCount(disjoint) - 1; diff --git a/extended/shared.js b/extended/shared.js index cfa553e..ea4e2c8 100644 --- a/extended/shared.js +++ b/extended/shared.js @@ -15,7 +15,6 @@ var b_tree_1 = require("../b+tree"); * @internal */ function makeLeavesFrom(alternatingList, maxNodeSize, onLeafCreation, loadFactor) { - if (loadFactor === void 0) { loadFactor = 0.8; } var totalPairs = alternatingCount(alternatingList); if (totalPairs === 0) return 0; diff --git a/extended/shared.ts b/extended/shared.ts index 7a79980..d0f90d5 100644 --- a/extended/shared.ts +++ b/extended/shared.ts @@ -34,7 +34,7 @@ export function makeLeavesFrom( alternatingList: AlternatingList, maxNodeSize: number, onLeafCreation: (node: BNode) => void, - loadFactor = 0.8 + loadFactor: number ): number { const totalPairs = alternatingCount(alternatingList); if (totalPairs === 0) From 72e769545e32c0948964d932e27c587eeb2d1ce8 Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Mon, 17 Nov 2025 09:39:28 -0800 Subject: [PATCH 121/143] comment fix --- extended/decompose.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/extended/decompose.ts b/extended/decompose.ts index cbdb57c..a8c8ad8 100644 --- a/extended/decompose.ts +++ b/extended/decompose.ts @@ -518,7 +518,7 @@ function appendAndCascade( sideInsertionIndex: (node: BNodeInternal) => number, splitOffSide: (node: BNodeInternal) => BNodeInternal ): BNodeInternal | undefined { - // We must take care to avoid accidental propagation upward of the size of the inserted su + // We must take care to avoid accidental propagation upward of the size of the inserted subtree // To do this, we first split nodes upward from the insertion point until we find a node with capacity // or create a new root. Since all un-propagated sizes have already been applied to the spine up to this point, // inserting at the end ensures no accidental propagation. From e8d94eaf56bbbe0cf89f6ae511329456b61d6957 Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Mon, 17 Nov 2025 09:40:38 -0800 Subject: [PATCH 122/143] update js --- extended/decompose.js | 4 ++-- extended/decompose.ts | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/extended/decompose.js b/extended/decompose.js index da159f1..fe133a5 100644 --- a/extended/decompose.js +++ b/extended/decompose.js @@ -380,7 +380,7 @@ function processSide(branchingFactor, disjoint, spine, start, end, step, sideInd * Returns a new root if the root was split, otherwise undefined. */ function appendAndCascade(spine, insertionDepth, branchingFactor, subtree, sideIndex, sideInsertionIndex, splitOffSide) { - // We must take care to avoid accidental propagation upward of the size of the inserted su + // We must take care to avoid accidental propagation upward of the size of the inserted subtree // To do this, we first split nodes upward from the insertion point until we find a node with capacity // or create a new root. Since all un-propagated sizes have already been applied to the spine up to this point, // inserting at the end ensures no accidental propagation. @@ -389,7 +389,7 @@ function appendAndCascade(spine, insertionDepth, branchingFactor, subtree, sideI var carry = undefined; // Determine initially where to insert after any splits var insertTarget = spine[insertionDepth]; - if (insertTarget.keys.length >= branchingFactor) { + if (insertTarget.keys.length === branchingFactor) { insertTarget = carry = splitOffSide(insertTarget); } var d = insertionDepth - 1; diff --git a/extended/decompose.ts b/extended/decompose.ts index a8c8ad8..1eb7ecd 100644 --- a/extended/decompose.ts +++ b/extended/decompose.ts @@ -528,7 +528,7 @@ function appendAndCascade( let carry: BNode | undefined = undefined; // Determine initially where to insert after any splits let insertTarget: BNodeInternal = spine[insertionDepth] as BNodeInternal; - if (insertTarget.keys.length >= branchingFactor) { + if (insertTarget.keys.length === branchingFactor) { insertTarget = carry = splitOffSide(insertTarget); } From 4b2891e6c0cf5386cc1097a934b098a8b7aa0066 Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Tue, 18 Nov 2025 07:53:48 -0800 Subject: [PATCH 123/143] wip --- extended/decompose.js | 94 ++++++++++++++++++++++------- extended/decompose.ts | 134 ++++++++++++++++++++++++++++++++++-------- 2 files changed, 185 insertions(+), 43 deletions(-) diff --git a/extended/decompose.js b/extended/decompose.js index fe133a5..78f514c 100644 --- a/extended/decompose.js +++ b/extended/decompose.js @@ -37,8 +37,10 @@ function decompose(left, right, combineFn, ignoreRight) { // This is done because we cannot know immediately whether we can add the node to the disjoint set // because its ancestor may also be disjoint and should be reused instead. var highestDisjoint = undefined; + var minSize = Math.floor(maxNodeSize / 2); var onLeafCreation = function (leaf) { - (0, shared_1.alternatingPush)(disjoint, 0, leaf); + var height = leaf.keys.length < minSize ? -1 : 0; + (0, shared_1.alternatingPush)(disjoint, height, leaf); }; var addSharedNodeToDisjointSet = function (node, height) { (0, shared_1.makeLeavesFrom)(pending, maxNodeSize, onLeafCreation, decomposeLoadFactor); @@ -297,14 +299,14 @@ function buildFromDecomposition(constructor, branchingFactor, decomposed, cmp, m // Process all subtrees to the right of the tallest subtree if (tallestIndex + 1 <= disjointEntryCount - 1) { updateFrontier(frontier, 0, getRightmostIndex); - processSide(branchingFactor, disjoint, frontier, tallestIndex + 1, disjointEntryCount, 1, getRightmostIndex, getRightInsertionIndex, splitOffRightSide, updateRightMax); + processSide(cmp, branchingFactor, disjoint, frontier, tallestIndex + 1, disjointEntryCount, 1, getRightmostIndex, getRightInsertionIndex, splitOffRightSide, updateRightMax, mergeRightEntries); } // Process all subtrees to the left of the current tree if (tallestIndex - 1 >= 0) { // Note we need to update the frontier here because the right-side processing may have grown the tree taller. updateFrontier(frontier, 0, getLeftmostIndex); - processSide(branchingFactor, disjoint, frontier, tallestIndex - 1, -1, -1, getLeftmostIndex, getLeftmostIndex, splitOffLeftSide, parallelWalk_1.noop // left side appending doesn't update max keys - ); + processSide(cmp, branchingFactor, disjoint, frontier, tallestIndex - 1, -1, -1, getLeftmostIndex, getLeftmostIndex, splitOffLeftSide, parallelWalk_1.noop, // left side appending doesn't update max keys, + mergeLeftEntries); } var reconstructed = new constructor(undefined, cmp, maxNodeSize); reconstructed._root = frontier[0]; @@ -317,7 +319,7 @@ exports.buildFromDecomposition = buildFromDecomposition; * Merges each subtree in the disjoint set from start to end (exclusive) into the given spine. * @internal */ -function processSide(branchingFactor, disjoint, spine, start, end, step, sideIndex, sideInsertionIndex, splitOffSide, updateMax) { +function processSide(cmp, branchingFactor, disjoint, spine, start, end, step, sideIndex, sideInsertionIndex, splitOffSide, updateMax, mergeLeaves) { // Determine the depth of the first shared node on the frontier. // Appending subtrees to the frontier must respect the copy-on-write semantics by cloning // any shared nodes down to the insertion point. We track it by depth to avoid a log(n) walk of the @@ -340,27 +342,49 @@ function processSide(branchingFactor, disjoint, spine, start, end, step, sideInd var currentHeight = spine.length - 1; // height is number of internal levels; 0 means leaf var subtree = (0, shared_1.alternatingGetSecond)(disjoint, i); var subtreeHeight = (0, shared_1.alternatingGetFirst)(disjoint, i); + var isEntryInsertion = subtreeHeight === -1; (0, b_tree_1.check)(subtreeHeight <= currentHeight, "Subtree taller than spine during reconstruction."); - var insertionDepth = currentHeight - (subtreeHeight + 1); // node at this depth has children of height 'subtreeHeight' + // If subtree height is -1 (indicating underfilled leaf), then this indicates insertion into a leaf + // otherwise, it points to a node whose children have height === subtreeHeight + var insertionDepth = currentHeight - (subtreeHeight + 1); // Ensure path is unshared before mutation ensureNotShared(spine, isSharedFrontierDepth, insertionDepth, sideIndex); + var insertionCount = void 0; // non-recursive + var insertionSize = void 0; // recursive + if (isEntryInsertion) { + insertionCount = insertionSize = subtree.keys.length; + } + else { + insertionCount = 1; + insertionSize = subtree.size(); + } // Calculate expansion depth (first ancestor with capacity) - var expansionDepth = Math.max(0, findCascadeEndDepth(spine, insertionDepth, branchingFactor)); + var expansionDepth = Math.max(0, // -1 indicates we will cascade to new root + findSplitCascadeEndDepth(spine, insertionDepth, insertionCount, branchingFactor)); // Update sizes on spine above the shared ancestor before we expand updateSizeAndMax(spine, unflushedSizes, isSharedFrontierDepth, expansionDepth, updateMax); - // Append and cascade splits upward - var newRoot = appendAndCascade(spine, insertionDepth, branchingFactor, subtree, sideIndex, sideInsertionIndex, splitOffSide); + var newRoot = undefined; + var sizeChangeDepth = void 0; + if (isEntryInsertion) { + newRoot = splitUpwardsAndInsertEntries(spine, insertionDepth, branchingFactor, subtree, sideIndex, sideInsertionIndex, splitOffSide, mergeLeaves); + // if we are inserting entries, we don't have to update a cached size on the leaf as they simply return count of keys + sizeChangeDepth = insertionDepth - 1; + } + else { + newRoot = splitUpwardsAndInsert(spine, insertionDepth, branchingFactor, subtree, sideIndex, sideInsertionIndex, splitOffSide); + sizeChangeDepth = insertionDepth; + } if (newRoot) { // Set the spine root to the highest up new node; the rest of the spine is updated below spine[0] = newRoot; unflushedSizes.forEach(function (count) { return (0, b_tree_1.check)(count === 0, "Unexpected unflushed size after root split."); }); unflushedSizes.push(0); // new root level - isSharedFrontierDepth = insertionDepth + 2; - unflushedSizes[insertionDepth + 1] += subtree.size(); + isSharedFrontierDepth = sizeChangeDepth + 2; + unflushedSizes[sizeChangeDepth + 1] += insertionSize; } else { - isSharedFrontierDepth = insertionDepth + 1; - unflushedSizes[insertionDepth] += subtree.size(); + isSharedFrontierDepth = sizeChangeDepth + 1; + unflushedSizes[sizeChangeDepth] += insertionSize; } // Finally, update the frontier from the highest new node downward // Note that this is often the point where the new subtree is attached, @@ -368,18 +392,20 @@ function processSide(branchingFactor, disjoint, spine, start, end, step, sideInd updateFrontier(spine, expansionDepth, sideIndex); (0, b_tree_1.check)(isSharedFrontierDepth === spine.length - 1 || spine[isSharedFrontierDepth].isShared === true, "Non-leaf subtrees must be shared."); (0, b_tree_1.check)(unflushedSizes.length === spine.length, "Unflushed sizes length mismatch after root split."); + // updateSizeAndMax(spine, unflushedSizes, spine.length - 1, 0, updateMax); + // spine[0].checkValid(0, { _compare: cmp } as unknown as BTree, 0); } // Finally, propagate any remaining unflushed sizes upward and update max keys updateSizeAndMax(spine, unflushedSizes, isSharedFrontierDepth, 0, updateMax); } ; /** - * Append a subtree at a given depth on the chosen side; cascade splits upward if needed. + * Cascade splits upward if capacity needed, then append a subtree at a given depth on the chosen side. * All un-propagated sizes must have already been applied to the spine up to the end of any cascading expansions. * This method guarantees that the size of the inserted subtree will not propagate upward beyond the insertion point. * Returns a new root if the root was split, otherwise undefined. */ -function appendAndCascade(spine, insertionDepth, branchingFactor, subtree, sideIndex, sideInsertionIndex, splitOffSide) { +function splitUpwardsAndInsert(spine, insertionDepth, branchingFactor, subtree, sideIndex, sideInsertionIndex, splitOffSide) { // We must take care to avoid accidental propagation upward of the size of the inserted subtree // To do this, we first split nodes upward from the insertion point until we find a node with capacity // or create a new root. Since all un-propagated sizes have already been applied to the spine up to this point, @@ -435,12 +461,24 @@ function appendAndCascade(spine, insertionDepth, branchingFactor, subtree, sideI } } ; +function splitUpwardsAndInsertEntries(spine, insertionDepth, branchingFactor, entryContainer, sideIndex, sideInsertionIndex, splitOffSide, mergeLeaves) { + var entryCount = entryContainer.keys.length; + var parent = spine[insertionDepth]; + var parentSize = parent.keys.length; + if (parentSize + entryCount <= branchingFactor) { + mergeLeaves(parent, entryContainer); + return undefined; + } + else { + return splitUpwardsAndInsert(spine, insertionDepth - 1, branchingFactor, entryContainer, sideIndex, sideInsertionIndex, splitOffSide); + } +} /** * Clone along the spine from [isSharedFrontierDepth to depthTo] inclusive so path is safe to mutate. * Short-circuits if first shared node is deeper than depthTo (the insertion depth). */ function ensureNotShared(spine, isSharedFrontierDepth, depthToInclusive, sideIndex) { - if (spine.length === 1 /* only a leaf */ || depthToInclusive < 0 /* new root case */) + if (depthToInclusive < 0 /* new root case */) return; // nothing to clone when root is a leaf; equal-height case will handle this // Clone root if needed first (depth 0) if (isSharedFrontierDepth === 0) { @@ -504,10 +542,18 @@ function updateFrontier(frontier, depthLastValid, sideIndex) { /** * Find the first ancestor (starting at insertionDepth) with capacity. */ -function findCascadeEndDepth(spine, insertionDepth, branchingFactor) { - for (var depth = insertionDepth; depth >= 0; depth--) { - if (spine[depth].keys.length < branchingFactor) +function findSplitCascadeEndDepth(spine, insertionDepth, insertionCount, branchingFactor) { + if (insertionDepth >= 0) { + var depth = insertionDepth; + if (spine[depth].keys.length + insertionCount <= branchingFactor) { return depth; + } + depth--; + while (depth >= 0) { + if (spine[depth].keys.length < branchingFactor) + return depth; + depth--; + } } return -1; // no capacity, will need a new root } @@ -536,5 +582,13 @@ function splitOffLeftSide(node) { return node.splitOffLeftSide(); } function updateRightMax(node, maxBelow) { - node.keys[node.keys.length - 1] = maxBelow; + node.keys[node.keys.length - 1] = node.children[node.children.length - 1].maxKey(); +} +function mergeRightEntries(leaf, entries) { + leaf.keys.push.apply(leaf.keys, entries.keys); + leaf.values.push.apply(leaf.values, entries.values); +} +function mergeLeftEntries(leaf, entries) { + leaf.keys.unshift.apply(leaf.keys, entries.keys); + leaf.values.unshift.apply(leaf.values, entries.values); } diff --git a/extended/decompose.ts b/extended/decompose.ts index 1eb7ecd..2476f5e 100644 --- a/extended/decompose.ts +++ b/extended/decompose.ts @@ -4,6 +4,7 @@ import { createCursor, getKey, Cursor, moveForwardOne, moveTo, noop } from "./pa /** * A set of disjoint nodes, their heights, and the index of the tallest node. + * A height of -1 indicates an underfilled node that must be merged. * @internal */ export type DecomposeResult = { disjoint: AlternatingList>, tallestIndex: number }; @@ -55,8 +56,10 @@ export function decompose( // Have to do this as cast to convince TS it's ever assigned = undefined as { node: BNode, height: number } | undefined; + const minSize = Math.floor(maxNodeSize / 2); const onLeafCreation = (leaf: BNode) => { - alternatingPush(disjoint, 0, leaf); + let height = leaf.keys.length < minSize ? -1 : 0; + alternatingPush(disjoint, height, leaf); } const addSharedNodeToDisjointSet = (node: BNode, height: number) => { @@ -387,6 +390,7 @@ export function buildFromDecomposition, K, V>( if (tallestIndex + 1 <= disjointEntryCount - 1) { updateFrontier(frontier, 0, getRightmostIndex); processSide( + cmp, branchingFactor, disjoint, frontier, @@ -395,7 +399,8 @@ export function buildFromDecomposition, K, V>( getRightmostIndex, getRightInsertionIndex, splitOffRightSide, - updateRightMax + updateRightMax, + mergeRightEntries ); } @@ -404,6 +409,7 @@ export function buildFromDecomposition, K, V>( // Note we need to update the frontier here because the right-side processing may have grown the tree taller. updateFrontier(frontier, 0, getLeftmostIndex); processSide( + cmp, branchingFactor, disjoint, frontier, @@ -413,7 +419,8 @@ export function buildFromDecomposition, K, V>( getLeftmostIndex, getLeftmostIndex, splitOffLeftSide, - noop // left side appending doesn't update max keys + noop, // left side appending doesn't update max keys, + mergeLeftEntries ); } @@ -430,6 +437,7 @@ export function buildFromDecomposition, K, V>( * @internal */ function processSide( + cmp: (a: K, b: K) => number, branchingFactor: number, disjoint: AlternatingList>, spine: BNode[], @@ -439,7 +447,8 @@ function processSide( sideIndex: (node: BNodeInternal) => number, sideInsertionIndex: (node: BNodeInternal) => number, splitOffSide: (node: BNodeInternal) => BNodeInternal, - updateMax: (node: BNodeInternal, maxBelow: K) => void + updateMax: (node: BNodeInternal, maxBelow: K) => void, + mergeLeaves: (leaf: BNode, entries: BNode) => void ): void { // Determine the depth of the first shared node on the frontier. // Appending subtrees to the frontier must respect the copy-on-write semantics by cloning @@ -465,30 +474,54 @@ function processSide( const currentHeight = spine.length - 1; // height is number of internal levels; 0 means leaf const subtree = alternatingGetSecond(disjoint, i); const subtreeHeight = alternatingGetFirst(disjoint, i); + const isEntryInsertion = subtreeHeight === -1; check(subtreeHeight <= currentHeight, "Subtree taller than spine during reconstruction."); - const insertionDepth = currentHeight - (subtreeHeight + 1); // node at this depth has children of height 'subtreeHeight' + // If subtree height is -1 (indicating underfilled leaf), then this indicates insertion into a leaf + // otherwise, it points to a node whose children have height === subtreeHeight + const insertionDepth = currentHeight - (subtreeHeight + 1); // Ensure path is unshared before mutation ensureNotShared(spine, isSharedFrontierDepth, insertionDepth, sideIndex); + let insertionCount: number; // non-recursive + let insertionSize: number; // recursive + if (isEntryInsertion) { + insertionCount = insertionSize = subtree.keys.length; + } else { + insertionCount = 1; + insertionSize = subtree.size(); + } + // Calculate expansion depth (first ancestor with capacity) - const expansionDepth = Math.max(0, findCascadeEndDepth(spine, insertionDepth, branchingFactor)); + const expansionDepth = Math.max( + 0, // -1 indicates we will cascade to new root + findSplitCascadeEndDepth(spine, insertionDepth, insertionCount, branchingFactor) + ); // Update sizes on spine above the shared ancestor before we expand updateSizeAndMax(spine, unflushedSizes, isSharedFrontierDepth, expansionDepth, updateMax); - // Append and cascade splits upward - const newRoot = appendAndCascade(spine, insertionDepth, branchingFactor, subtree, sideIndex, sideInsertionIndex, splitOffSide); + let newRoot: BNodeInternal | undefined = undefined; + let sizeChangeDepth: number; + if (isEntryInsertion) { + newRoot = splitUpwardsAndInsertEntries(spine, insertionDepth, branchingFactor, subtree, sideIndex, sideInsertionIndex, splitOffSide, mergeLeaves, updateMax); + // if we are inserting entries, we don't have to update a cached size on the leaf as they simply return count of keys + sizeChangeDepth = insertionDepth - 1; + } else { + newRoot = splitUpwardsAndInsert(spine, insertionDepth, branchingFactor, subtree, sideIndex, sideInsertionIndex, splitOffSide, updateMax); + sizeChangeDepth = insertionDepth; + } + if (newRoot) { // Set the spine root to the highest up new node; the rest of the spine is updated below spine[0] = newRoot; unflushedSizes.forEach((count) => check(count === 0, "Unexpected unflushed size after root split.")); unflushedSizes.push(0); // new root level - isSharedFrontierDepth = insertionDepth + 2; - unflushedSizes[insertionDepth + 1] += subtree.size(); + isSharedFrontierDepth = sizeChangeDepth + 2; + unflushedSizes[sizeChangeDepth + 1] += insertionSize; } else { - isSharedFrontierDepth = insertionDepth + 1; - unflushedSizes[insertionDepth] += subtree.size(); + isSharedFrontierDepth = sizeChangeDepth + 1; + unflushedSizes[sizeChangeDepth] += insertionSize; } // Finally, update the frontier from the highest new node downward @@ -497,6 +530,8 @@ function processSide( updateFrontier(spine, expansionDepth, sideIndex); check(isSharedFrontierDepth === spine.length - 1 || spine[isSharedFrontierDepth].isShared === true, "Non-leaf subtrees must be shared."); check(unflushedSizes.length === spine.length, "Unflushed sizes length mismatch after root split."); + // updateSizeAndMax(spine, unflushedSizes, spine.length - 1, 0, updateMax); + // spine[0].checkValid(0, { _compare: cmp } as unknown as BTree, 0); } // Finally, propagate any remaining unflushed sizes upward and update max keys @@ -504,19 +539,20 @@ function processSide( }; /** - * Append a subtree at a given depth on the chosen side; cascade splits upward if needed. + * Cascade splits upward if capacity needed, then append a subtree at a given depth on the chosen side. * All un-propagated sizes must have already been applied to the spine up to the end of any cascading expansions. * This method guarantees that the size of the inserted subtree will not propagate upward beyond the insertion point. * Returns a new root if the root was split, otherwise undefined. */ -function appendAndCascade( +function splitUpwardsAndInsert( spine: BNode[], insertionDepth: number, branchingFactor: number, subtree: BNode, sideIndex: (node: BNodeInternal) => number, sideInsertionIndex: (node: BNodeInternal) => number, - splitOffSide: (node: BNodeInternal) => BNodeInternal + splitOffSide: (node: BNodeInternal) => BNodeInternal, + updateMax: (node: BNodeInternal, maxBelow: K) => void ): BNodeInternal | undefined { // We must take care to avoid accidental propagation upward of the size of the inserted subtree // To do this, we first split nodes upward from the insertion point until we find a node with capacity @@ -535,9 +571,9 @@ function appendAndCascade( let d = insertionDepth - 1; while (carry && d >= 0) { const parent = spine[d] as BNodeInternal; - const idx = sideIndex(parent); + const sideChildIndex = sideIndex(parent); // Refresh last key since child was split - parent.keys[idx] = parent.children[idx].maxKey(); + updateMax(parent, parent.children[sideChildIndex].maxKey()); if (parent.keys.length < branchingFactor) { // We have reached the end of the cascade insertNoCount(parent, sideInsertionIndex(parent), carry); @@ -575,6 +611,39 @@ function appendAndCascade( } }; +function splitUpwardsAndInsertEntries( + spine: BNode[], + insertionDepth: number, + branchingFactor: number, + entryContainer: BNode, + sideIndex: (node: BNodeInternal) => number, + sideInsertionIndex: (node: BNodeInternal) => number, + splitOffSide: (node: BNodeInternal) => BNodeInternal, + mergeLeaves: (leaf: BNode, entries: BNode) => void, + updateMax: (node: BNodeInternal, maxBelow: K) => void +): BNodeInternal | undefined { + const entryCount = entryContainer.keys.length; + const parent = spine[insertionDepth]; + const parentSize = parent.keys.length; + if (parentSize + entryCount <= branchingFactor) { + mergeLeaves(parent, entryContainer); + return undefined; + } else { + const minSize = Math.floor(branchingFactor / 2); + const toTake = minSize - entryCount; + return splitUpwardsAndInsert( + spine, + insertionDepth - 1, + branchingFactor, + entryContainer, + sideIndex, + sideInsertionIndex, + splitOffSide, + updateMax + ); + } +} + /** * Clone along the spine from [isSharedFrontierDepth to depthTo] inclusive so path is safe to mutate. * Short-circuits if first shared node is deeper than depthTo (the insertion depth). @@ -584,13 +653,13 @@ function ensureNotShared( isSharedFrontierDepth: number, depthToInclusive: number, sideIndex: (node: BNodeInternal) => number) { - if (spine.length === 1 /* only a leaf */ || depthToInclusive < 0 /* new root case */) + if (depthToInclusive < 0 /* new root case */) return; // nothing to clone when root is a leaf; equal-height case will handle this // Clone root if needed first (depth 0) if (isSharedFrontierDepth === 0) { const root = spine[0]; - spine[0] = root.clone() as BNodeInternal; + spine[0] = root.clone(); } // Clone downward along the frontier to 'depthToInclusive' @@ -599,7 +668,7 @@ function ensureNotShared( const childIndex = sideIndex(parent); const clone = parent.children[childIndex].clone(); parent.children[childIndex] = clone; - spine[depth] = clone as BNodeInternal; + spine[depth] = clone; } }; @@ -655,10 +724,18 @@ function updateFrontier(frontier: BNode[], depthLastValid: number, s /** * Find the first ancestor (starting at insertionDepth) with capacity. */ -function findCascadeEndDepth(spine: BNode[], insertionDepth: number, branchingFactor: number): number { - for (let depth = insertionDepth; depth >= 0; depth--) { - if (spine[depth].keys.length < branchingFactor) +function findSplitCascadeEndDepth(spine: BNode[], insertionDepth: number, insertionCount: number, branchingFactor: number): number { + if (insertionDepth >= 0) { + let depth = insertionDepth; + if (spine[depth].keys.length + insertionCount <= branchingFactor) { return depth; + } + depth--; + while (depth >= 0) { + if (spine[depth].keys.length < branchingFactor) + return depth; + depth-- + } } return -1; // no capacity, will need a new root }; @@ -700,3 +777,14 @@ function splitOffLeftSide(node: BNodeInternal): BNodeInternal function updateRightMax(node: BNodeInternal, maxBelow: K): void { node.keys[node.keys.length - 1] = maxBelow; } + +function mergeRightEntries(leaf: BNode, entries: BNode): void { + leaf.keys.push.apply(leaf.keys, entries.keys); + leaf.values.push.apply(leaf.values, entries.values); +} + +function mergeLeftEntries(leaf: BNode, entries: BNode): void{ + leaf.keys.unshift.apply(leaf.keys, entries.keys); + leaf.values.unshift.apply(leaf.values, entries.values); +} + From 61652df408f9d5c3305b489a08b17db80df518b6 Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Tue, 18 Nov 2025 08:07:32 -0800 Subject: [PATCH 124/143] context obj --- extended/decompose.js | 74 +++++++++++++++++--------- extended/decompose.ts | 120 +++++++++++++++++++++--------------------- 2 files changed, 109 insertions(+), 85 deletions(-) diff --git a/extended/decompose.js b/extended/decompose.js index 78f514c..1ababd8 100644 --- a/extended/decompose.js +++ b/extended/decompose.js @@ -296,17 +296,34 @@ function buildFromDecomposition(constructor, branchingFactor, decomposed, cmp, m // current frontier because we start from the tallest subtree and work outward. var initialRoot = (0, shared_1.alternatingGetSecond)(disjoint, tallestIndex); var frontier = [initialRoot]; + var rightContext = { + branchingFactor: branchingFactor, + spine: frontier, + sideIndex: getRightmostIndex, + sideInsertionIndex: getRightInsertionIndex, + splitOffSide: splitOffRightSide, + updateMax: updateRightMax, + mergeLeaves: mergeRightEntries + }; // Process all subtrees to the right of the tallest subtree if (tallestIndex + 1 <= disjointEntryCount - 1) { - updateFrontier(frontier, 0, getRightmostIndex); - processSide(cmp, branchingFactor, disjoint, frontier, tallestIndex + 1, disjointEntryCount, 1, getRightmostIndex, getRightInsertionIndex, splitOffRightSide, updateRightMax, mergeRightEntries); + updateFrontier(rightContext, 0); + processSide(cmp, disjoint, tallestIndex + 1, disjointEntryCount, 1, rightContext); } + var leftContext = { + branchingFactor: branchingFactor, + spine: frontier, + sideIndex: getLeftmostIndex, + sideInsertionIndex: getLeftmostIndex, + splitOffSide: splitOffLeftSide, + updateMax: parallelWalk_1.noop, + mergeLeaves: mergeLeftEntries + }; // Process all subtrees to the left of the current tree if (tallestIndex - 1 >= 0) { // Note we need to update the frontier here because the right-side processing may have grown the tree taller. - updateFrontier(frontier, 0, getLeftmostIndex); - processSide(cmp, branchingFactor, disjoint, frontier, tallestIndex - 1, -1, -1, getLeftmostIndex, getLeftmostIndex, splitOffLeftSide, parallelWalk_1.noop, // left side appending doesn't update max keys, - mergeLeftEntries); + updateFrontier(leftContext, 0); + processSide(cmp, disjoint, tallestIndex - 1, -1, -1, leftContext); } var reconstructed = new constructor(undefined, cmp, maxNodeSize); reconstructed._root = frontier[0]; @@ -319,7 +336,8 @@ exports.buildFromDecomposition = buildFromDecomposition; * Merges each subtree in the disjoint set from start to end (exclusive) into the given spine. * @internal */ -function processSide(cmp, branchingFactor, disjoint, spine, start, end, step, sideIndex, sideInsertionIndex, splitOffSide, updateMax, mergeLeaves) { +function processSide(cmp, disjoint, start, end, step, context) { + var spine = context.spine, sideIndex = context.sideIndex; // Determine the depth of the first shared node on the frontier. // Appending subtrees to the frontier must respect the copy-on-write semantics by cloning // any shared nodes down to the insertion point. We track it by depth to avoid a log(n) walk of the @@ -348,7 +366,7 @@ function processSide(cmp, branchingFactor, disjoint, spine, start, end, step, si // otherwise, it points to a node whose children have height === subtreeHeight var insertionDepth = currentHeight - (subtreeHeight + 1); // Ensure path is unshared before mutation - ensureNotShared(spine, isSharedFrontierDepth, insertionDepth, sideIndex); + ensureNotShared(context, isSharedFrontierDepth, insertionDepth); var insertionCount = void 0; // non-recursive var insertionSize = void 0; // recursive if (isEntryInsertion) { @@ -360,18 +378,18 @@ function processSide(cmp, branchingFactor, disjoint, spine, start, end, step, si } // Calculate expansion depth (first ancestor with capacity) var expansionDepth = Math.max(0, // -1 indicates we will cascade to new root - findSplitCascadeEndDepth(spine, insertionDepth, insertionCount, branchingFactor)); + findSplitCascadeEndDepth(context, insertionDepth, insertionCount)); // Update sizes on spine above the shared ancestor before we expand - updateSizeAndMax(spine, unflushedSizes, isSharedFrontierDepth, expansionDepth, updateMax); + updateSizeAndMax(context, unflushedSizes, isSharedFrontierDepth, expansionDepth); var newRoot = undefined; var sizeChangeDepth = void 0; if (isEntryInsertion) { - newRoot = splitUpwardsAndInsertEntries(spine, insertionDepth, branchingFactor, subtree, sideIndex, sideInsertionIndex, splitOffSide, mergeLeaves); + newRoot = splitUpwardsAndInsertEntries(context, insertionDepth, subtree); // if we are inserting entries, we don't have to update a cached size on the leaf as they simply return count of keys sizeChangeDepth = insertionDepth - 1; } else { - newRoot = splitUpwardsAndInsert(spine, insertionDepth, branchingFactor, subtree, sideIndex, sideInsertionIndex, splitOffSide); + newRoot = splitUpwardsAndInsert(context, insertionDepth, subtree); sizeChangeDepth = insertionDepth; } if (newRoot) { @@ -389,14 +407,14 @@ function processSide(cmp, branchingFactor, disjoint, spine, start, end, step, si // Finally, update the frontier from the highest new node downward // Note that this is often the point where the new subtree is attached, // but in the case of cascaded splits it may be higher up. - updateFrontier(spine, expansionDepth, sideIndex); + updateFrontier(context, expansionDepth); (0, b_tree_1.check)(isSharedFrontierDepth === spine.length - 1 || spine[isSharedFrontierDepth].isShared === true, "Non-leaf subtrees must be shared."); (0, b_tree_1.check)(unflushedSizes.length === spine.length, "Unflushed sizes length mismatch after root split."); - // updateSizeAndMax(spine, unflushedSizes, spine.length - 1, 0, updateMax); + // updateSizeAndMax(context, unflushedSizes, spine.length - 1, 0); // spine[0].checkValid(0, { _compare: cmp } as unknown as BTree, 0); } // Finally, propagate any remaining unflushed sizes upward and update max keys - updateSizeAndMax(spine, unflushedSizes, isSharedFrontierDepth, 0, updateMax); + updateSizeAndMax(context, unflushedSizes, isSharedFrontierDepth, 0); } ; /** @@ -405,7 +423,8 @@ function processSide(cmp, branchingFactor, disjoint, spine, start, end, step, si * This method guarantees that the size of the inserted subtree will not propagate upward beyond the insertion point. * Returns a new root if the root was split, otherwise undefined. */ -function splitUpwardsAndInsert(spine, insertionDepth, branchingFactor, subtree, sideIndex, sideInsertionIndex, splitOffSide) { +function splitUpwardsAndInsert(context, insertionDepth, subtree) { + var spine = context.spine, branchingFactor = context.branchingFactor, sideIndex = context.sideIndex, sideInsertionIndex = context.sideInsertionIndex, splitOffSide = context.splitOffSide, updateMax = context.updateMax; // We must take care to avoid accidental propagation upward of the size of the inserted subtree // To do this, we first split nodes upward from the insertion point until we find a node with capacity // or create a new root. Since all un-propagated sizes have already been applied to the spine up to this point, @@ -421,9 +440,9 @@ function splitUpwardsAndInsert(spine, insertionDepth, branchingFactor, subtree, var d = insertionDepth - 1; while (carry && d >= 0) { var parent = spine[d]; - var idx = sideIndex(parent); + var sideChildIndex = sideIndex(parent); // Refresh last key since child was split - parent.keys[idx] = parent.children[idx].maxKey(); + updateMax(parent, parent.children[sideChildIndex].maxKey()); if (parent.keys.length < branchingFactor) { // We have reached the end of the cascade insertNoCount(parent, sideInsertionIndex(parent), carry); @@ -461,7 +480,8 @@ function splitUpwardsAndInsert(spine, insertionDepth, branchingFactor, subtree, } } ; -function splitUpwardsAndInsertEntries(spine, insertionDepth, branchingFactor, entryContainer, sideIndex, sideInsertionIndex, splitOffSide, mergeLeaves) { +function splitUpwardsAndInsertEntries(context, insertionDepth, entryContainer) { + var branchingFactor = context.branchingFactor, spine = context.spine, mergeLeaves = context.mergeLeaves; var entryCount = entryContainer.keys.length; var parent = spine[insertionDepth]; var parentSize = parent.keys.length; @@ -470,14 +490,17 @@ function splitUpwardsAndInsertEntries(spine, insertionDepth, branchingFactor, en return undefined; } else { - return splitUpwardsAndInsert(spine, insertionDepth - 1, branchingFactor, entryContainer, sideIndex, sideInsertionIndex, splitOffSide); + var minSize = Math.floor(branchingFactor / 2); + var toTake = minSize - entryCount; + return splitUpwardsAndInsert(context, insertionDepth - 1, entryContainer); } } /** * Clone along the spine from [isSharedFrontierDepth to depthTo] inclusive so path is safe to mutate. * Short-circuits if first shared node is deeper than depthTo (the insertion depth). */ -function ensureNotShared(spine, isSharedFrontierDepth, depthToInclusive, sideIndex) { +function ensureNotShared(context, isSharedFrontierDepth, depthToInclusive) { + var spine = context.spine, sideIndex = context.sideIndex; if (depthToInclusive < 0 /* new root case */) return; // nothing to clone when root is a leaf; equal-height case will handle this // Clone root if needed first (depth 0) @@ -498,7 +521,8 @@ function ensureNotShared(spine, isSharedFrontierDepth, depthToInclusive, sideInd /** * Propagates size updates and updates max keys for nodes in (isSharedFrontierDepth, depthTo) */ -function updateSizeAndMax(spine, unflushedSizes, isSharedFrontierDepth, depthUpToInclusive, updateMax) { +function updateSizeAndMax(context, unflushedSizes, isSharedFrontierDepth, depthUpToInclusive) { + var spine = context.spine, updateMax = context.updateMax; // If isSharedFrontierDepth is <= depthUpToInclusive there is nothing to update because // the insertion point is inside a shared node which will always have correct sizes var maxKey = spine[isSharedFrontierDepth].maxKey(); @@ -522,7 +546,8 @@ function updateSizeAndMax(spine, unflushedSizes, isSharedFrontierDepth, depthUpT * Update a spine (frontier) from a specific depth down, inclusive. * Extends the frontier array if it is not already as long as the frontier. */ -function updateFrontier(frontier, depthLastValid, sideIndex) { +function updateFrontier(context, depthLastValid) { + var frontier = context.spine, sideIndex = context.sideIndex; (0, b_tree_1.check)(frontier.length > depthLastValid, "updateFrontier: depthLastValid exceeds frontier height"); var startingAncestor = frontier[depthLastValid]; if (startingAncestor.isLeaf) @@ -542,7 +567,8 @@ function updateFrontier(frontier, depthLastValid, sideIndex) { /** * Find the first ancestor (starting at insertionDepth) with capacity. */ -function findSplitCascadeEndDepth(spine, insertionDepth, insertionCount, branchingFactor) { +function findSplitCascadeEndDepth(context, insertionDepth, insertionCount) { + var spine = context.spine, branchingFactor = context.branchingFactor; if (insertionDepth >= 0) { var depth = insertionDepth; if (spine[depth].keys.length + insertionCount <= branchingFactor) { @@ -582,7 +608,7 @@ function splitOffLeftSide(node) { return node.splitOffLeftSide(); } function updateRightMax(node, maxBelow) { - node.keys[node.keys.length - 1] = node.children[node.children.length - 1].maxKey(); + node.keys[node.keys.length - 1] = maxBelow; } function mergeRightEntries(leaf, entries) { leaf.keys.push.apply(leaf.keys, entries.keys); diff --git a/extended/decompose.ts b/extended/decompose.ts index 2476f5e..9f13609 100644 --- a/extended/decompose.ts +++ b/extended/decompose.ts @@ -386,41 +386,47 @@ export function buildFromDecomposition, K, V>( const initialRoot = alternatingGetSecond(disjoint, tallestIndex); const frontier: BNode[] = [initialRoot]; + const rightContext: SideMergeContext = { + branchingFactor, + spine: frontier, + sideIndex: getRightmostIndex, + sideInsertionIndex: getRightInsertionIndex, + splitOffSide: splitOffRightSide, + updateMax: updateRightMax, + mergeLeaves: mergeRightEntries + }; // Process all subtrees to the right of the tallest subtree if (tallestIndex + 1 <= disjointEntryCount - 1) { - updateFrontier(frontier, 0, getRightmostIndex); + updateFrontier(rightContext, 0); processSide( cmp, - branchingFactor, disjoint, - frontier, tallestIndex + 1, disjointEntryCount, 1, - getRightmostIndex, - getRightInsertionIndex, - splitOffRightSide, - updateRightMax, - mergeRightEntries + rightContext ); } + const leftContext: SideMergeContext = { + branchingFactor, + spine: frontier, + sideIndex: getLeftmostIndex, + sideInsertionIndex: getLeftmostIndex, + splitOffSide: splitOffLeftSide, + updateMax: noop, // left side appending doesn't update max keys, + mergeLeaves: mergeLeftEntries + }; // Process all subtrees to the left of the current tree if (tallestIndex - 1 >= 0) { // Note we need to update the frontier here because the right-side processing may have grown the tree taller. - updateFrontier(frontier, 0, getLeftmostIndex); + updateFrontier(leftContext, 0); processSide( cmp, - branchingFactor, disjoint, - frontier, tallestIndex - 1, -1, -1, - getLeftmostIndex, - getLeftmostIndex, - splitOffLeftSide, - noop, // left side appending doesn't update max keys, - mergeLeftEntries + leftContext ); } @@ -438,18 +444,13 @@ export function buildFromDecomposition, K, V>( */ function processSide( cmp: (a: K, b: K) => number, - branchingFactor: number, disjoint: AlternatingList>, - spine: BNode[], start: number, end: number, step: number, - sideIndex: (node: BNodeInternal) => number, - sideInsertionIndex: (node: BNodeInternal) => number, - splitOffSide: (node: BNodeInternal) => BNodeInternal, - updateMax: (node: BNodeInternal, maxBelow: K) => void, - mergeLeaves: (leaf: BNode, entries: BNode) => void + context: SideMergeContext ): void { + const { spine, sideIndex } = context; // Determine the depth of the first shared node on the frontier. // Appending subtrees to the frontier must respect the copy-on-write semantics by cloning // any shared nodes down to the insertion point. We track it by depth to avoid a log(n) walk of the @@ -481,7 +482,7 @@ function processSide( const insertionDepth = currentHeight - (subtreeHeight + 1); // Ensure path is unshared before mutation - ensureNotShared(spine, isSharedFrontierDepth, insertionDepth, sideIndex); + ensureNotShared(context, isSharedFrontierDepth, insertionDepth); let insertionCount: number; // non-recursive let insertionSize: number; // recursive @@ -495,20 +496,20 @@ function processSide( // Calculate expansion depth (first ancestor with capacity) const expansionDepth = Math.max( 0, // -1 indicates we will cascade to new root - findSplitCascadeEndDepth(spine, insertionDepth, insertionCount, branchingFactor) + findSplitCascadeEndDepth(context, insertionDepth, insertionCount) ); // Update sizes on spine above the shared ancestor before we expand - updateSizeAndMax(spine, unflushedSizes, isSharedFrontierDepth, expansionDepth, updateMax); + updateSizeAndMax(context, unflushedSizes, isSharedFrontierDepth, expansionDepth); let newRoot: BNodeInternal | undefined = undefined; let sizeChangeDepth: number; if (isEntryInsertion) { - newRoot = splitUpwardsAndInsertEntries(spine, insertionDepth, branchingFactor, subtree, sideIndex, sideInsertionIndex, splitOffSide, mergeLeaves, updateMax); + newRoot = splitUpwardsAndInsertEntries(context, insertionDepth, subtree); // if we are inserting entries, we don't have to update a cached size on the leaf as they simply return count of keys sizeChangeDepth = insertionDepth - 1; } else { - newRoot = splitUpwardsAndInsert(spine, insertionDepth, branchingFactor, subtree, sideIndex, sideInsertionIndex, splitOffSide, updateMax); + newRoot = splitUpwardsAndInsert(context, insertionDepth, subtree); sizeChangeDepth = insertionDepth; } @@ -527,15 +528,15 @@ function processSide( // Finally, update the frontier from the highest new node downward // Note that this is often the point where the new subtree is attached, // but in the case of cascaded splits it may be higher up. - updateFrontier(spine, expansionDepth, sideIndex); + updateFrontier(context, expansionDepth); check(isSharedFrontierDepth === spine.length - 1 || spine[isSharedFrontierDepth].isShared === true, "Non-leaf subtrees must be shared."); check(unflushedSizes.length === spine.length, "Unflushed sizes length mismatch after root split."); - // updateSizeAndMax(spine, unflushedSizes, spine.length - 1, 0, updateMax); + // updateSizeAndMax(context, unflushedSizes, spine.length - 1, 0); // spine[0].checkValid(0, { _compare: cmp } as unknown as BTree, 0); } // Finally, propagate any remaining unflushed sizes upward and update max keys - updateSizeAndMax(spine, unflushedSizes, isSharedFrontierDepth, 0, updateMax); + updateSizeAndMax(context, unflushedSizes, isSharedFrontierDepth, 0); }; /** @@ -545,15 +546,11 @@ function processSide( * Returns a new root if the root was split, otherwise undefined. */ function splitUpwardsAndInsert( - spine: BNode[], + context: SideMergeContext, insertionDepth: number, - branchingFactor: number, - subtree: BNode, - sideIndex: (node: BNodeInternal) => number, - sideInsertionIndex: (node: BNodeInternal) => number, - splitOffSide: (node: BNodeInternal) => BNodeInternal, - updateMax: (node: BNodeInternal, maxBelow: K) => void + subtree: BNode ): BNodeInternal | undefined { + const { spine, branchingFactor, sideIndex, sideInsertionIndex, splitOffSide, updateMax } = context; // We must take care to avoid accidental propagation upward of the size of the inserted subtree // To do this, we first split nodes upward from the insertion point until we find a node with capacity // or create a new root. Since all un-propagated sizes have already been applied to the spine up to this point, @@ -612,16 +609,11 @@ function splitUpwardsAndInsert( }; function splitUpwardsAndInsertEntries( - spine: BNode[], + context: SideMergeContext, insertionDepth: number, - branchingFactor: number, - entryContainer: BNode, - sideIndex: (node: BNodeInternal) => number, - sideInsertionIndex: (node: BNodeInternal) => number, - splitOffSide: (node: BNodeInternal) => BNodeInternal, - mergeLeaves: (leaf: BNode, entries: BNode) => void, - updateMax: (node: BNodeInternal, maxBelow: K) => void + entryContainer: BNode ): BNodeInternal | undefined { + const { branchingFactor, spine, mergeLeaves } = context; const entryCount = entryContainer.keys.length; const parent = spine[insertionDepth]; const parentSize = parent.keys.length; @@ -632,14 +624,9 @@ function splitUpwardsAndInsertEntries( const minSize = Math.floor(branchingFactor / 2); const toTake = minSize - entryCount; return splitUpwardsAndInsert( - spine, + context, insertionDepth - 1, - branchingFactor, - entryContainer, - sideIndex, - sideInsertionIndex, - splitOffSide, - updateMax + entryContainer ); } } @@ -649,10 +636,10 @@ function splitUpwardsAndInsertEntries( * Short-circuits if first shared node is deeper than depthTo (the insertion depth). */ function ensureNotShared( - spine: BNode[], + context: SideMergeContext, isSharedFrontierDepth: number, - depthToInclusive: number, - sideIndex: (node: BNodeInternal) => number) { + depthToInclusive: number) { + const { spine, sideIndex } = context; if (depthToInclusive < 0 /* new root case */) return; // nothing to clone when root is a leaf; equal-height case will handle this @@ -676,11 +663,11 @@ function ensureNotShared( * Propagates size updates and updates max keys for nodes in (isSharedFrontierDepth, depthTo) */ function updateSizeAndMax( - spine: BNode[], + context: SideMergeContext, unflushedSizes: number[], isSharedFrontierDepth: number, - depthUpToInclusive: number, - updateMax: (node: BNodeInternal, maxBelow: K) => void) { + depthUpToInclusive: number) { + const { spine, updateMax } = context; // If isSharedFrontierDepth is <= depthUpToInclusive there is nothing to update because // the insertion point is inside a shared node which will always have correct sizes const maxKey = spine[isSharedFrontierDepth].maxKey(); @@ -704,7 +691,8 @@ function updateSizeAndMax( * Update a spine (frontier) from a specific depth down, inclusive. * Extends the frontier array if it is not already as long as the frontier. */ -function updateFrontier(frontier: BNode[], depthLastValid: number, sideIndex: (node: BNodeInternal) => number): void { +function updateFrontier(context: SideMergeContext, depthLastValid: number): void { + const { spine: frontier, sideIndex } = context; check(frontier.length > depthLastValid, "updateFrontier: depthLastValid exceeds frontier height"); const startingAncestor = frontier[depthLastValid]; if (startingAncestor.isLeaf) @@ -724,7 +712,8 @@ function updateFrontier(frontier: BNode[], depthLastValid: number, s /** * Find the first ancestor (starting at insertionDepth) with capacity. */ -function findSplitCascadeEndDepth(spine: BNode[], insertionDepth: number, insertionCount: number, branchingFactor: number): number { +function findSplitCascadeEndDepth(context: SideMergeContext, insertionDepth: number, insertionCount: number): number { + const { spine, branchingFactor } = context; if (insertionDepth >= 0) { let depth = insertionDepth; if (spine[depth].keys.length + insertionCount <= branchingFactor) { @@ -788,3 +777,12 @@ function mergeLeftEntries(leaf: BNode, entries: BNode): void{ leaf.values.unshift.apply(leaf.values, entries.values); } +type SideMergeContext = { + branchingFactor: number; + spine: BNode[]; + sideIndex: (node: BNodeInternal) => number; + sideInsertionIndex: (node: BNodeInternal) => number; + splitOffSide: (node: BNodeInternal) => BNodeInternal; + updateMax: (node: BNodeInternal, maxBelow: K) => void; + mergeLeaves: (leaf: BNode, entries: BNode) => void; +}; From 0e306796732c0ee246267ee26078f055141e5f5f Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Tue, 18 Nov 2025 08:07:57 -0800 Subject: [PATCH 125/143] rename --- extended/decompose.ts | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/extended/decompose.ts b/extended/decompose.ts index 9f13609..a7574ad 100644 --- a/extended/decompose.ts +++ b/extended/decompose.ts @@ -386,7 +386,7 @@ export function buildFromDecomposition, K, V>( const initialRoot = alternatingGetSecond(disjoint, tallestIndex); const frontier: BNode[] = [initialRoot]; - const rightContext: SideMergeContext = { + const rightContext: SideContext = { branchingFactor, spine: frontier, sideIndex: getRightmostIndex, @@ -407,7 +407,7 @@ export function buildFromDecomposition, K, V>( ); } - const leftContext: SideMergeContext = { + const leftContext: SideContext = { branchingFactor, spine: frontier, sideIndex: getLeftmostIndex, @@ -448,7 +448,7 @@ function processSide( start: number, end: number, step: number, - context: SideMergeContext + context: SideContext ): void { const { spine, sideIndex } = context; // Determine the depth of the first shared node on the frontier. @@ -546,7 +546,7 @@ function processSide( * Returns a new root if the root was split, otherwise undefined. */ function splitUpwardsAndInsert( - context: SideMergeContext, + context: SideContext, insertionDepth: number, subtree: BNode ): BNodeInternal | undefined { @@ -609,7 +609,7 @@ function splitUpwardsAndInsert( }; function splitUpwardsAndInsertEntries( - context: SideMergeContext, + context: SideContext, insertionDepth: number, entryContainer: BNode ): BNodeInternal | undefined { @@ -636,7 +636,7 @@ function splitUpwardsAndInsertEntries( * Short-circuits if first shared node is deeper than depthTo (the insertion depth). */ function ensureNotShared( - context: SideMergeContext, + context: SideContext, isSharedFrontierDepth: number, depthToInclusive: number) { const { spine, sideIndex } = context; @@ -663,7 +663,7 @@ function ensureNotShared( * Propagates size updates and updates max keys for nodes in (isSharedFrontierDepth, depthTo) */ function updateSizeAndMax( - context: SideMergeContext, + context: SideContext, unflushedSizes: number[], isSharedFrontierDepth: number, depthUpToInclusive: number) { @@ -691,7 +691,7 @@ function updateSizeAndMax( * Update a spine (frontier) from a specific depth down, inclusive. * Extends the frontier array if it is not already as long as the frontier. */ -function updateFrontier(context: SideMergeContext, depthLastValid: number): void { +function updateFrontier(context: SideContext, depthLastValid: number): void { const { spine: frontier, sideIndex } = context; check(frontier.length > depthLastValid, "updateFrontier: depthLastValid exceeds frontier height"); const startingAncestor = frontier[depthLastValid]; @@ -712,7 +712,7 @@ function updateFrontier(context: SideMergeContext, depthLastValid: n /** * Find the first ancestor (starting at insertionDepth) with capacity. */ -function findSplitCascadeEndDepth(context: SideMergeContext, insertionDepth: number, insertionCount: number): number { +function findSplitCascadeEndDepth(context: SideContext, insertionDepth: number, insertionCount: number): number { const { spine, branchingFactor } = context; if (insertionDepth >= 0) { let depth = insertionDepth; @@ -777,7 +777,7 @@ function mergeLeftEntries(leaf: BNode, entries: BNode): void{ leaf.values.unshift.apply(leaf.values, entries.values); } -type SideMergeContext = { +type SideContext = { branchingFactor: number; spine: BNode[]; sideIndex: (node: BNodeInternal) => number; From 5cfd06033c478ccb854e1d8ed861a6533ecd62a5 Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Wed, 19 Nov 2025 11:21:22 -0800 Subject: [PATCH 126/143] all tests passing --- extended/decompose.js | 52 ++++++++++++++++++++++++++++---------- extended/decompose.ts | 59 +++++++++++++++++++++++++++++++------------ 2 files changed, 82 insertions(+), 29 deletions(-) diff --git a/extended/decompose.js b/extended/decompose.js index 1ababd8..c4a610c 100644 --- a/extended/decompose.js +++ b/extended/decompose.js @@ -302,13 +302,14 @@ function buildFromDecomposition(constructor, branchingFactor, decomposed, cmp, m sideIndex: getRightmostIndex, sideInsertionIndex: getRightInsertionIndex, splitOffSide: splitOffRightSide, + balanceLeaves: balanceLeavesRight, updateMax: updateRightMax, mergeLeaves: mergeRightEntries }; // Process all subtrees to the right of the tallest subtree if (tallestIndex + 1 <= disjointEntryCount - 1) { updateFrontier(rightContext, 0); - processSide(cmp, disjoint, tallestIndex + 1, disjointEntryCount, 1, rightContext); + processSide(disjoint, tallestIndex + 1, disjointEntryCount, 1, rightContext); } var leftContext = { branchingFactor: branchingFactor, @@ -316,6 +317,7 @@ function buildFromDecomposition(constructor, branchingFactor, decomposed, cmp, m sideIndex: getLeftmostIndex, sideInsertionIndex: getLeftmostIndex, splitOffSide: splitOffLeftSide, + balanceLeaves: balanceLeavesLeft, updateMax: parallelWalk_1.noop, mergeLeaves: mergeLeftEntries }; @@ -323,7 +325,7 @@ function buildFromDecomposition(constructor, branchingFactor, decomposed, cmp, m if (tallestIndex - 1 >= 0) { // Note we need to update the frontier here because the right-side processing may have grown the tree taller. updateFrontier(leftContext, 0); - processSide(cmp, disjoint, tallestIndex - 1, -1, -1, leftContext); + processSide(disjoint, tallestIndex - 1, -1, -1, leftContext); } var reconstructed = new constructor(undefined, cmp, maxNodeSize); reconstructed._root = frontier[0]; @@ -336,7 +338,7 @@ exports.buildFromDecomposition = buildFromDecomposition; * Merges each subtree in the disjoint set from start to end (exclusive) into the given spine. * @internal */ -function processSide(cmp, disjoint, start, end, step, context) { +function processSide(disjoint, start, end, step, context) { var spine = context.spine, sideIndex = context.sideIndex; // Determine the depth of the first shared node on the frontier. // Appending subtrees to the frontier must respect the copy-on-write semantics by cloning @@ -376,9 +378,10 @@ function processSide(cmp, disjoint, start, end, step, context) { insertionCount = 1; insertionSize = subtree.size(); } + var cascadeEndDepth = findSplitCascadeEndDepth(context, insertionDepth, insertionCount); // Calculate expansion depth (first ancestor with capacity) var expansionDepth = Math.max(0, // -1 indicates we will cascade to new root - findSplitCascadeEndDepth(context, insertionDepth, insertionCount)); + cascadeEndDepth); // Update sizes on spine above the shared ancestor before we expand updateSizeAndMax(context, unflushedSizes, isSharedFrontierDepth, expansionDepth); var newRoot = undefined; @@ -389,7 +392,7 @@ function processSide(cmp, disjoint, start, end, step, context) { sizeChangeDepth = insertionDepth - 1; } else { - newRoot = splitUpwardsAndInsert(context, insertionDepth, subtree); + newRoot = splitUpwardsAndInsert(context, insertionDepth, subtree)[0]; sizeChangeDepth = insertionDepth; } if (newRoot) { @@ -410,8 +413,9 @@ function processSide(cmp, disjoint, start, end, step, context) { updateFrontier(context, expansionDepth); (0, b_tree_1.check)(isSharedFrontierDepth === spine.length - 1 || spine[isSharedFrontierDepth].isShared === true, "Non-leaf subtrees must be shared."); (0, b_tree_1.check)(unflushedSizes.length === spine.length, "Unflushed sizes length mismatch after root split."); - // updateSizeAndMax(context, unflushedSizes, spine.length - 1, 0); - // spine[0].checkValid(0, { _compare: cmp } as unknown as BTree, 0); + // Useful for debugging: + //updateSizeAndMax(context, unflushedSizes, spine.length - 1, 0); + //spine[0].checkValid(0, { _compare: cmp } as unknown as BTree, 0); } // Finally, propagate any remaining unflushed sizes upward and update max keys updateSizeAndMax(context, unflushedSizes, isSharedFrontierDepth, 0); @@ -421,7 +425,7 @@ function processSide(cmp, disjoint, start, end, step, context) { * Cascade splits upward if capacity needed, then append a subtree at a given depth on the chosen side. * All un-propagated sizes must have already been applied to the spine up to the end of any cascading expansions. * This method guarantees that the size of the inserted subtree will not propagate upward beyond the insertion point. - * Returns a new root if the root was split, otherwise undefined. + * Returns a new root if the root was split, otherwise undefined, and the node into which the subtree was inserted. */ function splitUpwardsAndInsert(context, insertionDepth, subtree) { var spine = context.spine, branchingFactor = context.branchingFactor, sideIndex = context.sideIndex, sideInsertionIndex = context.sideInsertionIndex, splitOffSide = context.splitOffSide, updateMax = context.updateMax; @@ -453,7 +457,7 @@ function splitUpwardsAndInsert(context, insertionDepth, subtree) { // Example: a node is at max capacity 4, with children each of size 4 for 16 total. // We split the node into two nodes of 2 children each, but this does *not* modify the size // of its parent. Therefore when we insert the carry into the torn-off node, we must not - // increase its size or we will double-count the size of the carry su + // increase its size or we will double-count the size of the carry subtree. var tornOff = splitOffSide(parent); insertNoCount(tornOff, sideInsertionIndex(tornOff), carry); carry = tornOff; @@ -469,19 +473,19 @@ function splitUpwardsAndInsert(context, insertionDepth, subtree) { } // Finally, insert the subtree at the insertion point insertNoCount(insertTarget, sideInsertionIndex(insertTarget), subtree); - return newRoot; + return [newRoot, insertTarget]; } else { // Insertion of subtree with equal height to current tree var oldRoot = spine[0]; var newRoot = new b_tree_1.BNodeInternal([oldRoot], oldRoot.size()); insertNoCount(newRoot, sideInsertionIndex(newRoot), subtree); - return newRoot; + return [newRoot, newRoot]; } } ; function splitUpwardsAndInsertEntries(context, insertionDepth, entryContainer) { - var branchingFactor = context.branchingFactor, spine = context.spine, mergeLeaves = context.mergeLeaves; + var branchingFactor = context.branchingFactor, spine = context.spine, balanceLeaves = context.balanceLeaves, mergeLeaves = context.mergeLeaves; var entryCount = entryContainer.keys.length; var parent = spine[insertionDepth]; var parentSize = parent.keys.length; @@ -490,9 +494,13 @@ function splitUpwardsAndInsertEntries(context, insertionDepth, entryContainer) { return undefined; } else { + // As with the internal node splitUpwardsAndInsert method, this method also must make all structural changes + // to the tree before inserting any new content. This is to avoid accidental propagation of sizes upward. + var _a = splitUpwardsAndInsert(context, insertionDepth - 1, entryContainer), newRoot = _a[0], grandparent = _a[1]; var minSize = Math.floor(branchingFactor / 2); var toTake = minSize - entryCount; - return splitUpwardsAndInsert(context, insertionDepth - 1, entryContainer); + balanceLeaves(grandparent, entryContainer, toTake); + return newRoot; } } /** @@ -607,6 +615,24 @@ function splitOffRightSide(node) { function splitOffLeftSide(node) { return node.splitOffLeftSide(); } +function balanceLeavesRight(parent, underfilled, toTake) { + var siblingIndex = parent.children.length - 2; + var sibling = parent.children[siblingIndex]; + var index = sibling.keys.length - toTake; + var movedKeys = sibling.keys.splice(index); + var movedValues = sibling.values.splice(index); + underfilled.keys.unshift.apply(underfilled.keys, movedKeys); + underfilled.values.unshift.apply(underfilled.values, movedValues); + parent.keys[siblingIndex] = sibling.maxKey(); +} +function balanceLeavesLeft(parent, underfilled, toTake) { + var sibling = parent.children[1]; + var movedKeys = sibling.keys.splice(0, toTake); + var movedValues = sibling.values.splice(0, toTake); + underfilled.keys.push.apply(underfilled.keys, movedKeys); + underfilled.values.push.apply(underfilled.values, movedValues); + parent.keys[0] = underfilled.maxKey(); +} function updateRightMax(node, maxBelow) { node.keys[node.keys.length - 1] = maxBelow; } diff --git a/extended/decompose.ts b/extended/decompose.ts index a7574ad..41d5414 100644 --- a/extended/decompose.ts +++ b/extended/decompose.ts @@ -392,6 +392,7 @@ export function buildFromDecomposition, K, V>( sideIndex: getRightmostIndex, sideInsertionIndex: getRightInsertionIndex, splitOffSide: splitOffRightSide, + balanceLeaves: balanceLeavesRight, updateMax: updateRightMax, mergeLeaves: mergeRightEntries }; @@ -399,7 +400,6 @@ export function buildFromDecomposition, K, V>( if (tallestIndex + 1 <= disjointEntryCount - 1) { updateFrontier(rightContext, 0); processSide( - cmp, disjoint, tallestIndex + 1, disjointEntryCount, 1, @@ -413,6 +413,7 @@ export function buildFromDecomposition, K, V>( sideIndex: getLeftmostIndex, sideInsertionIndex: getLeftmostIndex, splitOffSide: splitOffLeftSide, + balanceLeaves: balanceLeavesLeft, updateMax: noop, // left side appending doesn't update max keys, mergeLeaves: mergeLeftEntries }; @@ -421,7 +422,6 @@ export function buildFromDecomposition, K, V>( // Note we need to update the frontier here because the right-side processing may have grown the tree taller. updateFrontier(leftContext, 0); processSide( - cmp, disjoint, tallestIndex - 1, -1, @@ -443,7 +443,6 @@ export function buildFromDecomposition, K, V>( * @internal */ function processSide( - cmp: (a: K, b: K) => number, disjoint: AlternatingList>, start: number, end: number, @@ -493,10 +492,12 @@ function processSide( insertionSize = subtree.size(); } + const cascadeEndDepth = findSplitCascadeEndDepth(context, insertionDepth, insertionCount); + // Calculate expansion depth (first ancestor with capacity) const expansionDepth = Math.max( 0, // -1 indicates we will cascade to new root - findSplitCascadeEndDepth(context, insertionDepth, insertionCount) + cascadeEndDepth ); // Update sizes on spine above the shared ancestor before we expand @@ -509,7 +510,7 @@ function processSide( // if we are inserting entries, we don't have to update a cached size on the leaf as they simply return count of keys sizeChangeDepth = insertionDepth - 1; } else { - newRoot = splitUpwardsAndInsert(context, insertionDepth, subtree); + [newRoot] = splitUpwardsAndInsert(context, insertionDepth, subtree); sizeChangeDepth = insertionDepth; } @@ -531,8 +532,9 @@ function processSide( updateFrontier(context, expansionDepth); check(isSharedFrontierDepth === spine.length - 1 || spine[isSharedFrontierDepth].isShared === true, "Non-leaf subtrees must be shared."); check(unflushedSizes.length === spine.length, "Unflushed sizes length mismatch after root split."); - // updateSizeAndMax(context, unflushedSizes, spine.length - 1, 0); - // spine[0].checkValid(0, { _compare: cmp } as unknown as BTree, 0); + // Useful for debugging: + //updateSizeAndMax(context, unflushedSizes, spine.length - 1, 0); + //spine[0].checkValid(0, { _compare: cmp } as unknown as BTree, 0); } // Finally, propagate any remaining unflushed sizes upward and update max keys @@ -543,13 +545,13 @@ function processSide( * Cascade splits upward if capacity needed, then append a subtree at a given depth on the chosen side. * All un-propagated sizes must have already been applied to the spine up to the end of any cascading expansions. * This method guarantees that the size of the inserted subtree will not propagate upward beyond the insertion point. - * Returns a new root if the root was split, otherwise undefined. + * Returns a new root if the root was split, otherwise undefined, and the node into which the subtree was inserted. */ function splitUpwardsAndInsert( context: SideContext, insertionDepth: number, subtree: BNode -): BNodeInternal | undefined { +): [newRoot: BNodeInternal | undefined, insertTarget: BNodeInternal] { const { spine, branchingFactor, sideIndex, sideInsertionIndex, splitOffSide, updateMax } = context; // We must take care to avoid accidental propagation upward of the size of the inserted subtree // To do this, we first split nodes upward from the insertion point until we find a node with capacity @@ -580,7 +582,7 @@ function splitUpwardsAndInsert( // Example: a node is at max capacity 4, with children each of size 4 for 16 total. // We split the node into two nodes of 2 children each, but this does *not* modify the size // of its parent. Therefore when we insert the carry into the torn-off node, we must not - // increase its size or we will double-count the size of the carry su + // increase its size or we will double-count the size of the carry subtree. const tornOff = splitOffSide(parent); insertNoCount(tornOff, sideInsertionIndex(tornOff), carry); carry = tornOff; @@ -598,13 +600,13 @@ function splitUpwardsAndInsert( // Finally, insert the subtree at the insertion point insertNoCount(insertTarget, sideInsertionIndex(insertTarget), subtree); - return newRoot; + return [newRoot, insertTarget]; } else { // Insertion of subtree with equal height to current tree const oldRoot = spine[0] as BNodeInternal; const newRoot = new BNodeInternal([oldRoot], oldRoot.size()); insertNoCount(newRoot, sideInsertionIndex(newRoot), subtree); - return newRoot; + return [newRoot, newRoot]; } }; @@ -613,7 +615,7 @@ function splitUpwardsAndInsertEntries( insertionDepth: number, entryContainer: BNode ): BNodeInternal | undefined { - const { branchingFactor, spine, mergeLeaves } = context; + const { branchingFactor, spine, balanceLeaves, mergeLeaves } = context; const entryCount = entryContainer.keys.length; const parent = spine[insertionDepth]; const parentSize = parent.keys.length; @@ -621,13 +623,17 @@ function splitUpwardsAndInsertEntries( mergeLeaves(parent, entryContainer); return undefined; } else { - const minSize = Math.floor(branchingFactor / 2); - const toTake = minSize - entryCount; - return splitUpwardsAndInsert( + // As with the internal node splitUpwardsAndInsert method, this method also must make all structural changes + // to the tree before inserting any new content. This is to avoid accidental propagation of sizes upward. + const [newRoot, grandparent] = splitUpwardsAndInsert( context, insertionDepth - 1, entryContainer ); + const minSize = Math.floor(branchingFactor / 2); + const toTake = minSize - entryCount; + balanceLeaves(grandparent, entryContainer, toTake); + return newRoot; } } @@ -763,6 +769,26 @@ function splitOffLeftSide(node: BNodeInternal): BNodeInternal return node.splitOffLeftSide(); } +function balanceLeavesRight(parent: BNodeInternal, underfilled: BNode, toTake: number): void { + const siblingIndex = parent.children.length - 2; + const sibling = parent.children[siblingIndex]; + const index = sibling.keys.length - toTake; + const movedKeys = sibling.keys.splice(index); + const movedValues = sibling.values.splice(index); + underfilled.keys.unshift.apply(underfilled.keys, movedKeys); + underfilled.values.unshift.apply(underfilled.values, movedValues); + parent.keys[siblingIndex] = sibling.maxKey(); +} + +function balanceLeavesLeft(parent: BNodeInternal, underfilled: BNode, toTake: number): void { + const sibling = parent.children[1]; + const movedKeys = sibling.keys.splice(0, toTake); + const movedValues = sibling.values.splice(0, toTake); + underfilled.keys.push.apply(underfilled.keys, movedKeys); + underfilled.values.push.apply(underfilled.values, movedValues); + parent.keys[0] = underfilled.maxKey(); +} + function updateRightMax(node: BNodeInternal, maxBelow: K): void { node.keys[node.keys.length - 1] = maxBelow; } @@ -785,4 +811,5 @@ type SideContext = { splitOffSide: (node: BNodeInternal) => BNodeInternal; updateMax: (node: BNodeInternal, maxBelow: K) => void; mergeLeaves: (leaf: BNode, entries: BNode) => void; + balanceLeaves: (parent: BNodeInternal, underfilled: BNode, toTake: number) => void; }; From 639780b6e0217efbbe9cb1585ec960fffd5b509f Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Wed, 19 Nov 2025 12:56:28 -0800 Subject: [PATCH 127/143] add underfilled node stat --- benchmarks.ts | 201 ++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 188 insertions(+), 13 deletions(-) diff --git a/benchmarks.ts b/benchmarks.ts index 551d5b5..1215caa 100644 --- a/benchmarks.ts +++ b/benchmarks.ts @@ -48,29 +48,43 @@ function measure(message: (t:T) => string, callback: () => T, minMillise return result; } -function countTreeNodeStats(tree: BTree) { +type TreeNodeStats = { total: number, shared: number, underfilled: number }; + +function countTreeNodeStats(tree: BTree): TreeNodeStats { const root = (tree as any)._root; - if (!root) - return { total: 0, shared: 0 }; + if (!root || tree.size === 0) + return { total: 0, shared: 0, underfilled: 0 }; + + const maxNodeSize = tree.maxNodeSize ?? ((tree as any)._maxNodeSize ?? 0); + const minNodeSize = Math.max(1, Math.floor(maxNodeSize / 2)); - const visit = (node: any, ancestorShared: boolean): { total: number, shared: number } => { + const visit = (node: any, ancestorShared: boolean, isRoot: boolean): TreeNodeStats => { if (!node) - return { total: 0, shared: 0 }; + return { total: 0, shared: 0, underfilled: 0 }; const selfShared = node.isShared === true || ancestorShared; + const children: any[] | undefined = node.children; + const occupancy = children ? children.length : node.keys.length; + const isUnderfilled = !isRoot && occupancy < minNodeSize; let shared = selfShared ? 1 : 0; let total = 1; - const children: any[] | undefined = node.children; + let underfilled = isUnderfilled ? 1 : 0; if (children) { for (const child of children) { - const stats = visit(child, selfShared); + const stats = visit(child, selfShared, false); total += stats.total; shared += stats.shared; + underfilled += stats.underfilled; } } - return { total, shared }; + return { total, shared, underfilled }; }; - return visit(root, false); + return visit(root, false, true); +} + +function logTreeNodeStats(label: string, stats: TreeNodeStats) { + console.log(`\tShared nodes (${label}): ${stats.shared}/${stats.total}`); + console.log(`\tUnderfilled nodes (${label}): ${stats.underfilled}/${stats.total}`); } function intersectBySorting( @@ -450,7 +464,7 @@ console.log("### Union between B+ trees"); return result; }); const baselineStats = countTreeNodeStats(baselineResult); - console.log(`\tShared nodes (baseline): ${baselineStats.shared}/${baselineStats.total}`); + logTreeNodeStats('baseline', baselineStats); }; const preferLeftUnion = (_k: number, leftValue: any, _rightValue: any) => leftValue; @@ -467,7 +481,7 @@ console.log("### Union between B+ trees"); return tree1.union(tree2, prefer); }); const unionStats = countTreeNodeStats(unionResult); - console.log(`\tShared nodes (union): ${unionStats.shared}/${unionStats.total}`); + logTreeNodeStats('union', unionStats); timeBaselineMerge(`${baseTitle} using ${baselineLabel}`, tree1, tree2); }; @@ -637,7 +651,7 @@ console.log("### Subtract between B+ trees"); return result; }); const stats = countTreeNodeStats(baselineResult); - console.log(`\tShared nodes (baseline): ${stats.shared}/${stats.total}`); + logTreeNodeStats('baseline', stats); }; const timeSubtractVsBaseline = ( @@ -651,7 +665,7 @@ console.log("### Subtract between B+ trees"); return subtract, number, number>(includeTree, excludeTree); }); const subtractStats = countTreeNodeStats(subtractResult); - console.log(`\tShared nodes (subtract): ${subtractStats.shared}/${subtractStats.total}`); + logTreeNodeStats('subtract', subtractStats); timeBaselineSubtract(`${baseTitle} using ${baselineLabel}`, includeTree, excludeTree); }; @@ -775,6 +789,167 @@ console.log("### Subtract between B+ trees"); } } +console.log(); +console.log("### Intersection between B+ trees"); +{ + console.log(); + const sizes = [100, 1000, 10000, 100000]; + const preferLeftIntersection = (_k: number, leftValue: number, _rightValue: number) => leftValue; + + const timeBaselineIntersect = ( + title: string, + tree1: BTreeEx, + tree2: BTreeEx, + combine: (key: number, leftValue: number, rightValue: number) => number + ) => { + const baselineResult = measure(() => title, () => { + const result = new BTreeEx(undefined, tree1._compare, tree1._maxNodeSize); + intersectBySorting(tree1, tree2, (key, leftValue, rightValue) => { + const mergedValue = combine(key, leftValue, rightValue); + result.set(key, mergedValue); + }); + return result; + }); + const baselineStats = countTreeNodeStats(baselineResult); + logTreeNodeStats('baseline', baselineStats); + }; + + const timeIntersectVsBaseline = ( + baseTitle: string, + tree1: BTreeEx, + tree2: BTreeEx, + combine = preferLeftIntersection, + intersectLabel = 'intersect()', + baselineLabel = 'sort merge (baseline)' + ) => { + const intersectResult = measure(() => `${baseTitle} using ${intersectLabel}`, () => { + return tree1.intersect(tree2, combine); + }); + const intersectStats = countTreeNodeStats(intersectResult); + logTreeNodeStats('intersect', intersectStats); + + timeBaselineIntersect(`${baseTitle} using ${baselineLabel}`, tree1, tree2, combine); + }; + + console.log("# Non-overlapping ranges (no shared keys)"); + sizes.forEach((size) => { + const tree1 = new BTreeEx(); + const tree2 = new BTreeEx(); + const offset = size * 3; + for (let i = 0; i < size; i++) { + tree1.set(i, i); + tree2.set(offset + i, offset + i); + } + + const baseTitle = `Intersect ${size}+${size} disjoint trees`; + timeIntersectVsBaseline(baseTitle, tree1, tree2); + }); + + console.log(); + console.log("# Partial overlap (middle segment shared)"); + sizes.forEach((size) => { + const tree1 = new BTreeEx(); + const tree2 = new BTreeEx(); + const overlapStart = Math.floor(size / 3); + const overlapEnd = overlapStart + Math.floor(size / 2); + for (let i = 0; i < size; i++) { + tree1.set(i, i); + if (i >= overlapStart && i < overlapEnd) + tree2.set(i, i * 10); + } + + const baseTitle = `Intersect ${tree1.size}+${tree2.size} partially overlapping trees`; + timeIntersectVsBaseline(baseTitle, tree1, tree2); + }); + + console.log(); + console.log("# Interleaved keys (every other key shared)"); + sizes.forEach((size) => { + const tree1 = new BTreeEx(); + const tree2 = new BTreeEx(); + for (let i = 0; i < size * 2; i++) { + tree1.set(i, i); + if (i % 2 === 0) + tree2.set(i, i * 3); + } + + const baseTitle = `Intersect ${tree1.size}+${tree2.size} interleaved trees`; + timeIntersectVsBaseline(baseTitle, tree1, tree2); + }); + + console.log(); + console.log("# Complete overlap (all keys shared)"); + sizes.forEach((size) => { + const tree1 = new BTreeEx(); + const tree2 = new BTreeEx(); + for (let i = 0; i < size; i++) { + tree1.set(i, i); + tree2.set(i, i * 5); + } + + const baseTitle = `Intersect ${tree1.size}+${tree2.size} identical trees`; + timeIntersectVsBaseline(baseTitle, tree1, tree2); + }); + + console.log(); + console.log("# Random overlaps (~10% shared keys)"); + sizes.forEach((size) => { + const keys1 = makeArray(size, true); + const keys2 = makeArray(size, true); + const overlapCount = Math.max(1, Math.floor(size * 0.1)); + for (let i = 0; i < overlapCount && i < keys1.length && i < keys2.length; i++) { + keys2[i] = keys1[i]; + } + + const tree1 = new BTreeEx(); + const tree2 = new BTreeEx(); + for (const key of keys1) + tree1.set(key, key * 5); + for (const key of keys2) + tree2.set(key, key * 7); + + const baseTitle = `Intersect ${tree1.size}+${tree2.size} random trees`; + timeIntersectVsBaseline(baseTitle, tree1, tree2); + }); + + console.log(); + console.log("# Intersection with empty tree"); + sizes.forEach((size) => { + const tree1 = new BTreeEx(); + const tree2 = new BTreeEx(); + for (let i = 0; i < size; i++) + tree1.set(i, i); + + const baseTitle = `Intersect ${tree1.size}-key tree with empty tree`; + timeIntersectVsBaseline(baseTitle, tree1, tree2); + }); + + console.log(); + console.log("# Large sparse-overlap trees (1M keys each, 10 overlaps per 100k)"); + { + const totalKeys = 1_000_000; + const overlapInterval = 100_000; + const overlapPerInterval = 10; + + const tree1 = new BTreeEx(); + for (let i = 0; i < totalKeys; i++) { + tree1.set(i, i); + } + + const tree2 = new BTreeEx(); + for (let i = 0; i < totalKeys; i++) { + if ((i % overlapInterval) < overlapPerInterval) { + tree2.set(i, i * 3); + } else { + tree2.set(totalKeys + i, totalKeys + i); + } + } + + const baseTitle = `Intersect ${tree1.size}+${tree2.size} sparse-overlap trees`; + timeIntersectVsBaseline(baseTitle, tree1, tree2); + } +} + console.log(); console.log("### forEachKeyInBoth"); { From e2bae727047760bcaca7674c5eb8500ad4135c8a Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Wed, 19 Nov 2025 13:33:59 -0800 Subject: [PATCH 128/143] cleanup --- benchmarks.ts | 15 ++++++++------- test/setOperationFuzz.test.ts | 8 ++++++++ 2 files changed, 16 insertions(+), 7 deletions(-) diff --git a/benchmarks.ts b/benchmarks.ts index 1215caa..d28817e 100644 --- a/benchmarks.ts +++ b/benchmarks.ts @@ -1,5 +1,5 @@ #!/usr/bin/env ts-node -import BTree from '.'; +import BTree, { BNode, BNodeInternal } from '.'; import BTreeEx from './extended'; import SortedArray from './sorted-array'; import forEachKeyNotIn from './extended/forEachKeyNotIn'; @@ -9,6 +9,7 @@ import subtract from './extended/subtract'; // randomized data, but it becomes extremely slow when filled with sorted // data, so it's not usually a good choice. import {RBTree} from 'bintrees'; +import { BTreeWithInternals } from './extended/shared'; const SortedSet = require("collections/sorted-set"); // Bad type definition: missing 'length' const SortedMap = require("collections/sorted-map"); // No type definitions available const functionalTree = require("functional-red-black-tree"); // No type definitions available @@ -51,18 +52,18 @@ function measure(message: (t:T) => string, callback: () => T, minMillise type TreeNodeStats = { total: number, shared: number, underfilled: number }; function countTreeNodeStats(tree: BTree): TreeNodeStats { - const root = (tree as any)._root; - if (!root || tree.size === 0) + const root = (tree as unknown as BTreeWithInternals)._root; + if (tree.size === 0) return { total: 0, shared: 0, underfilled: 0 }; - const maxNodeSize = tree.maxNodeSize ?? ((tree as any)._maxNodeSize ?? 0); - const minNodeSize = Math.max(1, Math.floor(maxNodeSize / 2)); + const maxNodeSize = tree.maxNodeSize; + const minNodeSize = Math.floor(maxNodeSize / 2); - const visit = (node: any, ancestorShared: boolean, isRoot: boolean): TreeNodeStats => { + const visit = (node: BNode, ancestorShared: boolean, isRoot: boolean): TreeNodeStats => { if (!node) return { total: 0, shared: 0, underfilled: 0 }; const selfShared = node.isShared === true || ancestorShared; - const children: any[] | undefined = node.children; + const children: BNode[] | undefined = (node as BNodeInternal).children; const occupancy = children ? children.length : node.keys.length; const isUnderfilled = !isRoot && occupancy < minNodeSize; let shared = selfShared ? 1 : 0; diff --git a/test/setOperationFuzz.test.ts b/test/setOperationFuzz.test.ts index 20bd45a..2618d2a 100644 --- a/test/setOperationFuzz.test.ts +++ b/test/setOperationFuzz.test.ts @@ -57,6 +57,10 @@ describe('Set operation fuzz tests', () => { treeC.set(value, value); } + const treeAInitial = treeA.toArray(); + const treeBInitial = treeB.toArray(); + const treeCInitial = treeC.toArray(); + const keepEither = (_k: number, left: number, _right: number) => left; const dropValue = () => undefined; const combineSum = (_k: number, left: number, right: number) => left + right; @@ -138,6 +142,10 @@ describe('Set operation fuzz tests', () => { treeA.checkValid(); treeB.checkValid(); treeC.checkValid(); + + expect(treeA.toArray()).toEqual(treeAInitial); + expect(treeB.toArray()).toEqual(treeBInitial); + expect(treeC.toArray()).toEqual(treeCInitial); }); } } From 94436502e747d24177c0ce6dfba597929b5b36b3 Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Wed, 19 Nov 2025 14:38:03 -0800 Subject: [PATCH 129/143] wip underfilled removal --- benchmarks.ts | 43 +------------ extended/shared.ts | 3 +- test/forEachKeyInBoth.test.ts | 4 +- test/forEachKeyNotIn.test.ts | 4 +- test/shared.d.ts | 16 +++++ test/shared.js | 112 ++++++++++++++++++++++++++++++++++ test/shared.ts | 66 +++++++++++++++++++- test/union.test.ts | 4 +- 8 files changed, 205 insertions(+), 47 deletions(-) create mode 100644 test/shared.d.ts create mode 100644 test/shared.js diff --git a/benchmarks.ts b/benchmarks.ts index d28817e..c565cc9 100644 --- a/benchmarks.ts +++ b/benchmarks.ts @@ -1,5 +1,5 @@ #!/usr/bin/env ts-node -import BTree, { BNode, BNodeInternal } from '.'; +import BTree from '.'; import BTreeEx from './extended'; import SortedArray from './sorted-array'; import forEachKeyNotIn from './extended/forEachKeyNotIn'; @@ -9,7 +9,7 @@ import subtract from './extended/subtract'; // randomized data, but it becomes extremely slow when filled with sorted // data, so it's not usually a good choice. import {RBTree} from 'bintrees'; -import { BTreeWithInternals } from './extended/shared'; +import { countTreeNodeStats, logTreeNodeStats } from './test/shared'; const SortedSet = require("collections/sorted-set"); // Bad type definition: missing 'length' const SortedMap = require("collections/sorted-map"); // No type definitions available const functionalTree = require("functional-red-black-tree"); // No type definitions available @@ -49,45 +49,6 @@ function measure(message: (t:T) => string, callback: () => T, minMillise return result; } -type TreeNodeStats = { total: number, shared: number, underfilled: number }; - -function countTreeNodeStats(tree: BTree): TreeNodeStats { - const root = (tree as unknown as BTreeWithInternals)._root; - if (tree.size === 0) - return { total: 0, shared: 0, underfilled: 0 }; - - const maxNodeSize = tree.maxNodeSize; - const minNodeSize = Math.floor(maxNodeSize / 2); - - const visit = (node: BNode, ancestorShared: boolean, isRoot: boolean): TreeNodeStats => { - if (!node) - return { total: 0, shared: 0, underfilled: 0 }; - const selfShared = node.isShared === true || ancestorShared; - const children: BNode[] | undefined = (node as BNodeInternal).children; - const occupancy = children ? children.length : node.keys.length; - const isUnderfilled = !isRoot && occupancy < minNodeSize; - let shared = selfShared ? 1 : 0; - let total = 1; - let underfilled = isUnderfilled ? 1 : 0; - if (children) { - for (const child of children) { - const stats = visit(child, selfShared, false); - total += stats.total; - shared += stats.shared; - underfilled += stats.underfilled; - } - } - return { total, shared, underfilled }; - }; - - return visit(root, false, true); -} - -function logTreeNodeStats(label: string, stats: TreeNodeStats) { - console.log(`\tShared nodes (${label}): ${stats.shared}/${stats.total}`); - console.log(`\tUnderfilled nodes (${label}): ${stats.underfilled}/${stats.total}`); -} - function intersectBySorting( tree1: BTree, tree2: BTree, diff --git a/extended/shared.ts b/extended/shared.ts index d0f90d5..4ca123e 100644 --- a/extended/shared.ts +++ b/extended/shared.ts @@ -1,5 +1,4 @@ -import { BNode } from '../b+tree'; -import BTree from '../b+tree'; +import BTree, { BNode } from '../b+tree'; /** * BTree with access to internal properties. diff --git a/test/forEachKeyInBoth.test.ts b/test/forEachKeyInBoth.test.ts index 3764e78..54adaf4 100644 --- a/test/forEachKeyInBoth.test.ts +++ b/test/forEachKeyInBoth.test.ts @@ -2,7 +2,7 @@ import BTreeEx from '../extended'; import intersect from '../extended/intersect'; import { comparatorErrorMsg } from '../extended/shared'; import MersenneTwister from 'mersenne-twister'; -import { makeArray } from './shared'; +import { countTreeNodeStats, makeArray } from './shared'; var test: (name: string, f: () => void) => void = it; @@ -27,6 +27,8 @@ const runIntersectionImplementations = ( const expectedEntries = intersectionCalls.map(({ key, leftValue }) => [key, leftValue] as [number, number]); expect(resultTree.toArray()).toEqual(expectedEntries); resultTree.checkValid(); + const stats = countTreeNodeStats(resultTree); + expect(stats.newUnderfilled).toBe(0); assertion(intersectionCalls); }; diff --git a/test/forEachKeyNotIn.test.ts b/test/forEachKeyNotIn.test.ts index 919a178..543c8d1 100644 --- a/test/forEachKeyNotIn.test.ts +++ b/test/forEachKeyNotIn.test.ts @@ -3,7 +3,7 @@ import forEachKeyNotIn from '../extended/forEachKeyNotIn'; import subtract from '../extended/subtract'; import { comparatorErrorMsg, branchingFactorErrorMsg } from '../extended/shared'; import MersenneTwister from 'mersenne-twister'; -import { makeArray } from './shared'; +import { countTreeNodeStats, makeArray } from './shared'; type NotInCall = { key: number, value: number }; @@ -22,6 +22,8 @@ const runSubtractionImplementations = ( const subtractCalls = resultTree.toArray().map(([key, value]) => ({ key, value })); expect(subtractCalls).toEqual(forEachCalls); resultTree.checkValid(); + const stats = countTreeNodeStats(resultTree); + expect(stats.newUnderfilled).toBe(0); assertion(subtractCalls); }; diff --git a/test/shared.d.ts b/test/shared.d.ts new file mode 100644 index 0000000..cea5443 --- /dev/null +++ b/test/shared.d.ts @@ -0,0 +1,16 @@ +import BTree, { IMap } from '../b+tree'; +import SortedArray from '../sorted-array'; +import MersenneTwister from 'mersenne-twister'; +export declare type TreeNodeStats = { + total: number; + shared: number; + newUnderfilled: number; + averageLoadFactor: number; +}; +export declare function countTreeNodeStats(tree: BTree): TreeNodeStats; +export declare function logTreeNodeStats(label: string, stats: TreeNodeStats): void; +export declare function randInt(max: number): number; +export declare function expectTreeEqualTo(tree: BTree, list: SortedArray): void; +export declare function addToBoth(a: IMap, b: IMap, k: K, v: V): void; +export declare function makeArray(size: number, randomOrder: boolean, spacing?: number, collisionChance?: number, rng?: MersenneTwister): number[]; +export declare const randomInt: (rng: MersenneTwister, maxExclusive: number) => number; diff --git a/test/shared.js b/test/shared.js new file mode 100644 index 0000000..251b534 --- /dev/null +++ b/test/shared.js @@ -0,0 +1,112 @@ +"use strict"; +var __importDefault = (this && this.__importDefault) || function (mod) { + return (mod && mod.__esModule) ? mod : { "default": mod }; +}; +Object.defineProperty(exports, "__esModule", { value: true }); +exports.randomInt = exports.makeArray = exports.addToBoth = exports.expectTreeEqualTo = exports.randInt = exports.logTreeNodeStats = exports.countTreeNodeStats = void 0; +var mersenne_twister_1 = __importDefault(require("mersenne-twister")); +var rand = new mersenne_twister_1.default(1234); +function countTreeNodeStats(tree) { + var root = tree._root; + if (tree.size === 0 || !root) + return { total: 0, shared: 0, newUnderfilled: 0, averageLoadFactor: 0 }; + var maxNodeSize = tree.maxNodeSize; + var minNodeSize = Math.floor(maxNodeSize / 2); + var visit = function (node, ancestorShared, isRoot) { + if (!node) + return { total: 0, shared: 0, newUnderfilled: 0, loadFactorSum: 0 }; + var selfShared = node.isShared === true || ancestorShared; + var children = node.children; + var occupancy = children ? children.length : node.keys.length; + var isUnderfilled = !isRoot && occupancy < minNodeSize; + var loadFactor = occupancy / maxNodeSize; + var shared = selfShared ? 1 : 0; + var total = 1; + var newUnderfilled = !selfShared && isUnderfilled ? 1 : 0; + var loadFactorSum = loadFactor; + if (children) { + for (var _i = 0, children_1 = children; _i < children_1.length; _i++) { + var child = children_1[_i]; + var stats = visit(child, selfShared, false); + total += stats.total; + shared += stats.shared; + newUnderfilled += stats.newUnderfilled; + loadFactorSum += stats.loadFactorSum; + } + } + return { total: total, shared: shared, newUnderfilled: newUnderfilled, loadFactorSum: loadFactorSum }; + }; + var result = visit(root, false, true); + var averageLoadFactor = result.total === 0 ? 0 : result.loadFactorSum / result.total; + return { + total: result.total, + shared: result.shared, + newUnderfilled: result.newUnderfilled, + averageLoadFactor: averageLoadFactor + }; +} +exports.countTreeNodeStats = countTreeNodeStats; +function logTreeNodeStats(label, stats) { + console.log("\tShared nodes (".concat(label, "): ").concat(stats.shared, "/").concat(stats.total)); + console.log("\tUnderfilled nodes (".concat(label, "): ").concat(stats.newUnderfilled, "/").concat(stats.total)); + var percent = (stats.averageLoadFactor * 100).toFixed(2); + console.log("\tAverage load factor (".concat(label, "): ").concat(percent, "%")); +} +exports.logTreeNodeStats = logTreeNodeStats; +function randInt(max) { + return rand.random_int() % max; +} +exports.randInt = randInt; +function expectTreeEqualTo(tree, list) { + tree.checkValid(); + expect(tree.toArray()).toEqual(list.getArray()); +} +exports.expectTreeEqualTo = expectTreeEqualTo; +function addToBoth(a, b, k, v) { + expect(a.set(k, v)).toEqual(b.set(k, v)); +} +exports.addToBoth = addToBoth; +function makeArray(size, randomOrder, spacing, collisionChance, rng) { + if (spacing === void 0) { spacing = 10; } + if (collisionChance === void 0) { collisionChance = 0; } + var randomizer = rng !== null && rng !== void 0 ? rng : rand; + var useGlobalRand = rng === undefined; + var randomFloat = function () { + if (typeof randomizer.random === 'function') + return randomizer.random(); + return Math.random(); + }; + var randomIntWithMax = function (max) { + if (max <= 0) + return 0; + if (useGlobalRand) + return randInt(max); + return Math.floor(randomFloat() * max); + }; + var keys = []; + var current = 0; + for (var i = 0; i < size; i++) { + if (i > 0 && collisionChance > 0 && randomFloat() < collisionChance) { + keys[i] = keys[i - 1]; + } + else { + current += 1 + randomIntWithMax(spacing); + keys[i] = current; + } + } + if (randomOrder) { + for (var i = 0; i < size; i++) + swap(keys, i, randomIntWithMax(size)); + } + return keys; +} +exports.makeArray = makeArray; +var randomInt = function (rng, maxExclusive) { + return Math.floor(rng.random() * maxExclusive); +}; +exports.randomInt = randomInt; +function swap(keys, i, j) { + var tmp = keys[i]; + keys[i] = keys[j]; + keys[j] = tmp; +} diff --git a/test/shared.ts b/test/shared.ts index 99f96dc..a4f996a 100644 --- a/test/shared.ts +++ b/test/shared.ts @@ -1,9 +1,73 @@ -import BTree, { IMap } from '../b+tree'; +import BTree, { BNode, BNodeInternal, IMap } from '../b+tree'; import SortedArray from '../sorted-array'; import MersenneTwister from 'mersenne-twister'; +import type { BTreeWithInternals } from '../extended/shared'; const rand = new MersenneTwister(1234); +export type TreeNodeStats = { + total: number; + shared: number; + newUnderfilled: number; + averageLoadFactor: number; +}; + +export function countTreeNodeStats(tree: BTree): TreeNodeStats { + const root = (tree as unknown as BTreeWithInternals)._root; + if (tree.size === 0 || !root) + return { total: 0, shared: 0, newUnderfilled: 0, averageLoadFactor: 0 }; + + const maxNodeSize = tree.maxNodeSize; + const minNodeSize = Math.floor(maxNodeSize / 2); + + type StatsAccumulator = { + total: number; + shared: number; + newUnderfilled: number; + loadFactorSum: number; + }; + + const visit = (node: BNode, ancestorShared: boolean, isRoot: boolean): StatsAccumulator => { + if (!node) + return { total: 0, shared: 0, newUnderfilled: 0, loadFactorSum: 0 }; + const selfShared = node.isShared === true || ancestorShared; + const children: BNode[] | undefined = (node as BNodeInternal).children; + const occupancy = children ? children.length : node.keys.length; + const isUnderfilled = !isRoot && occupancy < minNodeSize; + const loadFactor = occupancy / maxNodeSize; + let shared = selfShared ? 1 : 0; + let total = 1; + let newUnderfilled = !selfShared && isUnderfilled ? 1 : 0; + let loadFactorSum = loadFactor; + if (children) { + for (const child of children) { + const stats = visit(child, selfShared, false); + total += stats.total; + shared += stats.shared; + newUnderfilled += stats.newUnderfilled; + loadFactorSum += stats.loadFactorSum; + } + } + return { total, shared, newUnderfilled, loadFactorSum }; + }; + + const result = visit(root, false, true); + const averageLoadFactor = result.total === 0 ? 0 : result.loadFactorSum / result.total; + return { + total: result.total, + shared: result.shared, + newUnderfilled: result.newUnderfilled, + averageLoadFactor + }; +} + +export function logTreeNodeStats(label: string, stats: TreeNodeStats): void { + console.log(`\tShared nodes (${label}): ${stats.shared}/${stats.total}`); + console.log(`\tUnderfilled nodes (${label}): ${stats.newUnderfilled}/${stats.total}`); + const percent = (stats.averageLoadFactor * 100).toFixed(2); + console.log(`\tAverage load factor (${label}): ${percent}%`); +} + export function randInt(max: number): number { return rand.random_int() % max; } diff --git a/test/union.test.ts b/test/union.test.ts index fd865df..8fdba70 100644 --- a/test/union.test.ts +++ b/test/union.test.ts @@ -3,7 +3,7 @@ import BTreeEx from '../extended'; import union from '../extended/union'; import { branchingFactorErrorMsg, comparatorErrorMsg } from '../extended/shared'; import MersenneTwister from 'mersenne-twister'; -import { makeArray, randomInt } from './shared'; +import { countTreeNodeStats, makeArray, randomInt } from './shared'; var test: (name: string, f: () => void) => void = it; @@ -819,6 +819,8 @@ describe('BTree union fuzz tests', () => { // Check for shared mutability issues expect(treeA.toArray()).toEqual(aArray); expect(treeB.toArray()).toEqual(bArray); + const unionStats = countTreeNodeStats(unioned); + expect(unionStats.newUnderfilled).toBe(0); }); } } From f87865c90909a8c9ec556f8ee9876708171954f2 Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Wed, 19 Nov 2025 16:54:46 -0800 Subject: [PATCH 130/143] subtraction stuff --- test/setOperationFuzz.test.ts | 271 ++++++++++++++++++++-------------- 1 file changed, 159 insertions(+), 112 deletions(-) diff --git a/test/setOperationFuzz.test.ts b/test/setOperationFuzz.test.ts index 2618d2a..6fa3a9d 100644 --- a/test/setOperationFuzz.test.ts +++ b/test/setOperationFuzz.test.ts @@ -1,6 +1,6 @@ import BTreeEx from '../extended'; import MersenneTwister from 'mersenne-twister'; -import { makeArray } from './shared'; +import { countTreeNodeStats, makeArray } from './shared'; const compare = (a: number, b: number) => a - b; @@ -10,6 +10,7 @@ describe('Set operation fuzz tests', () => { ooms: [2, 3], fractionsPerOOM: [0.1, 0.25, 0.5], collisionChances: [0.05, 0.1, 0.3], + removalChances: [0.001, 0.01, 0.1], timeoutMs: 30_000 } as const; @@ -21,6 +22,10 @@ describe('Set operation fuzz tests', () => { if (chance < 0 || chance > 1) throw new Error('FUZZ_SETTINGS.collisionChances must contain values between 0 and 1'); }); + FUZZ_SETTINGS.removalChances.forEach(chance => { + if (chance < 0 || chance > 1) + throw new Error('FUZZ_SETTINGS.removalChances must contain values between 0 and 1'); + }); jest.setTimeout(FUZZ_SETTINGS.timeoutMs); @@ -28,6 +33,29 @@ describe('Set operation fuzz tests', () => { const count = (t: BTreeEx) => t.toArray().length; + const applyRemovalRuns = (tree: BTreeEx, removalChance: number, branchingFactor: number) => { + if (removalChance <= 0 || tree.size === 0) + return; + const keys = tree.toArray().map(([key]) => key); + let index = 0; + while (index < keys.length) { + const key = keys[index]; + if (rng.random() < removalChance) { + tree.delete(key); + index++; + while (index < keys.length) { + const candidateKey = keys[index]; + if (rng.random() < (1 / branchingFactor)) + break; + tree.delete(candidateKey); + index++; + } + } else { + index++; + } + } + }; + for (const maxNodeSize of FUZZ_SETTINGS.branchingFactors) { describe(`branching factor ${maxNodeSize}`, () => { for (const collisionChance of FUZZ_SETTINGS.collisionChances) { @@ -36,117 +64,136 @@ describe('Set operation fuzz tests', () => { for (const fractionA of FUZZ_SETTINGS.fractionsPerOOM) { const fractionB = 1 - fractionA; const collisionLabel = collisionChance.toFixed(2); - - it(`size ${size}, fractionA ${fractionA.toFixed(2)}, fractionB ${fractionB.toFixed(2)}, collision ${collisionLabel}`, () => { - const treeA = new BTreeEx([], compare, maxNodeSize); - const treeB = new BTreeEx([], compare, maxNodeSize); - const treeC = new BTreeEx([], compare, maxNodeSize); - - const keys = makeArray(size, true, 1, collisionChance, rng); - - for (const value of keys) { - const assignToA = rng.random() < fractionA; - const assignToB = rng.random() < fractionB; - const assignToC = rng.random() < 0.5; - - if (assignToA) - treeA.set(value, value); - if (assignToB) - treeB.set(value, value); - if (assignToC) - treeC.set(value, value); - } - - const treeAInitial = treeA.toArray(); - const treeBInitial = treeB.toArray(); - const treeCInitial = treeC.toArray(); - - const keepEither = (_k: number, left: number, _right: number) => left; - const dropValue = () => undefined; - const combineSum = (_k: number, left: number, right: number) => left + right; - - const unionDrop = treeA.union(treeB, dropValue); - const unionKeep = treeA.union(treeB, keepEither); - const intersection = treeA.intersect(treeB, keepEither); - const diffAB = treeA.subtract(treeB); - const diffBA = treeB.subtract(treeA); - - // 1. Partition of A: A = (A\B) โˆช (AโˆฉB) and parts are disjoint. - const partition = diffAB.union(intersection, keepEither); - expect(partition.toArray()).toEqual(treeA.toArray()); - expect(diffAB.intersect(intersection, keepEither).size).toBe(0); - - // 2. Recover B from union and A\B: (AโˆชB)\(A\B) = B. - expect(unionKeep.subtract(diffAB).toArray()).toEqual(treeB.toArray()); - - // 3. Symmetric difference two ways. - const symFromDiffs = diffAB.union(diffBA, keepEither); - const symFromUnion = unionKeep.subtract(intersection); - expect(symFromDiffs.toArray()).toEqual(symFromUnion.toArray()); - - // 4. Intersection via difference: AโˆฉB = A \ (A\B). - expect(intersection.toArray()).toEqual(treeA.subtract(diffAB).toArray()); - - // 5. Difference via intersection: A\B = A \ (AโˆฉB). - expect(diffAB.toArray()).toEqual(treeA.subtract(intersection).toArray()); - - // 6. Idempotence. - expect(treeA.union(treeA, keepEither).toArray()).toEqual(treeA.toArray()); - expect(treeA.intersect(treeA, keepEither).toArray()).toEqual(treeA.toArray()); - expect(diffAB.subtract(treeB).toArray()).toEqual(diffAB.toArray()); - - // 7. Commutativity. - expect(intersection.toArray()).toEqual(treeB.intersect(treeA, keepEither).toArray()); - const commUT = treeA.union(treeB, combineSum); - const commTU = treeB.union(treeA, combineSum); - expect(commUT.toArray()).toEqual(commTU.toArray()); - - // 8. Associativity. - const assocLeft = treeA.intersect(treeB, keepEither).intersect(treeC, keepEither); - const assocRight = treeA.intersect(treeB.intersect(treeC, keepEither), keepEither); - expect(assocLeft.toArray()).toEqual(assocRight.toArray()); - const assocSumLeft = treeA.union(treeB, combineSum).union(treeC, combineSum); - const assocSumRight = treeA.union(treeB.union(treeC, combineSum), combineSum); - expect(assocSumLeft.toArray()).toEqual(assocSumRight.toArray()); - - // 9. Absorption. - expect(treeA.intersect(treeA.union(treeB, keepEither), keepEither).toArray()).toEqual(treeA.toArray()); - expect(treeA.union(treeA.intersect(treeB, keepEither), keepEither).toArray()).toEqual(treeA.toArray()); - - // 10. Distributivity. - const distIntersect = treeA.intersect(treeB.union(treeC, keepEither), keepEither); - const distRight = treeA.intersect(treeB, keepEither).union(treeA.intersect(treeC, keepEither), keepEither); - expect(distIntersect.toArray()).toEqual(distRight.toArray()); - const distSubtract = treeA.subtract(treeB.union(treeC, keepEither)); - const distSubtractRight = treeA.subtract(treeB).subtract(treeC); - expect(distSubtract.toArray()).toEqual(distSubtractRight.toArray()); - const distIntersectDiff = treeA.intersect(treeB, keepEither).subtract(treeC); - const distDiffIntersect = treeA.subtract(treeC).intersect(treeB, keepEither); - expect(distIntersectDiff.toArray()).toEqual(distDiffIntersect.toArray()); - - // 11. Superset sanity. - expect(treeA.subtract(treeA.union(treeB, keepEither)).size).toBe(0); - expect(diffAB.intersect(treeB, keepEither).size).toBe(0); - - // 12. Cardinality relations. - expect(count(unionKeep)).toBe(count(treeA) + count(treeB) - count(intersection)); - expect(count(diffAB)).toBe(count(treeA) - count(intersection)); - expect(count(treeA)).toBe(count(diffAB) + count(intersection)); - - partition.checkValid(); - unionDrop.checkValid(); - unionKeep.checkValid(); - intersection.checkValid(); - diffAB.checkValid(); - diffBA.checkValid(); - treeA.checkValid(); - treeB.checkValid(); - treeC.checkValid(); - - expect(treeA.toArray()).toEqual(treeAInitial); - expect(treeB.toArray()).toEqual(treeBInitial); - expect(treeC.toArray()).toEqual(treeCInitial); - }); + for (const removalChance of FUZZ_SETTINGS.removalChances) { + const removalLabel = removalChance.toFixed(3); + + it(`size ${size}, fractionA ${fractionA.toFixed(2)}, fractionB ${fractionB.toFixed(2)}, collision ${collisionLabel}, removal ${removalLabel}`, () => { + const treeA = new BTreeEx([], compare, maxNodeSize); + const treeB = new BTreeEx([], compare, maxNodeSize); + const treeC = new BTreeEx([], compare, maxNodeSize); + + const keys = makeArray(size, true, 1, collisionChance, rng); + + for (const value of keys) { + const assignToA = rng.random() < fractionA; + const assignToB = rng.random() < fractionB; + const assignToC = rng.random() < 0.5; + + if (assignToA) + treeA.set(value, value); + if (assignToB) + treeB.set(value, value); + if (assignToC) + treeC.set(value, value); + } + + applyRemovalRuns(treeA, removalChance, maxNodeSize); + applyRemovalRuns(treeB, removalChance, maxNodeSize); + + const treeAInitial = treeA.toArray(); + const treeBInitial = treeB.toArray(); + const treeCInitial = treeC.toArray(); + + const keepEither = (_k: number, left: number, _right: number) => left; + const dropValue = () => undefined; + const combineSum = (_k: number, left: number, right: number) => left + right; + + const unionDrop = treeA.union(treeB, dropValue); + const unionKeep = treeA.union(treeB, keepEither); + const intersection = treeA.intersect(treeB, keepEither); + const diffAB = treeA.subtract(treeB); + const diffBA = treeB.subtract(treeA); + + // 1. Partition of A: A = (A\B) โˆช (AโˆฉB) and parts are disjoint. + const partition = diffAB.union(intersection, keepEither); + expect(partition.toArray()).toEqual(treeA.toArray()); + expect(diffAB.intersect(intersection, keepEither).size).toBe(0); + + // 2. Recover B from union and A\B: (AโˆชB)\(A\B) = B. + expect(unionKeep.subtract(diffAB).toArray()).toEqual(treeB.toArray()); + + // 3. Symmetric difference two ways. + const symFromDiffs = diffAB.union(diffBA, keepEither); + const symFromUnion = unionKeep.subtract(intersection); + expect(symFromDiffs.toArray()).toEqual(symFromUnion.toArray()); + + // 4. Intersection via difference: AโˆฉB = A \ (A\B). + expect(intersection.toArray()).toEqual(treeA.subtract(diffAB).toArray()); + + // 5. Difference via intersection: A\B = A \ (AโˆฉB). + expect(diffAB.toArray()).toEqual(treeA.subtract(intersection).toArray()); + + // 6. Idempotence. + expect(treeA.union(treeA, keepEither).toArray()).toEqual(treeA.toArray()); + expect(treeA.intersect(treeA, keepEither).toArray()).toEqual(treeA.toArray()); + expect(diffAB.subtract(treeB).toArray()).toEqual(diffAB.toArray()); + + // 7. Commutativity. + expect(intersection.toArray()).toEqual(treeB.intersect(treeA, keepEither).toArray()); + const commUT = treeA.union(treeB, combineSum); + const commTU = treeB.union(treeA, combineSum); + expect(commUT.toArray()).toEqual(commTU.toArray()); + + // 8. Associativity. + const assocLeft = treeA.intersect(treeB, keepEither).intersect(treeC, keepEither); + const assocRight = treeA.intersect(treeB.intersect(treeC, keepEither), keepEither); + expect(assocLeft.toArray()).toEqual(assocRight.toArray()); + const assocSumLeft = treeA.union(treeB, combineSum).union(treeC, combineSum); + const assocSumRight = treeA.union(treeB.union(treeC, combineSum), combineSum); + expect(assocSumLeft.toArray()).toEqual(assocSumRight.toArray()); + + // 9. Absorption. + expect(treeA.intersect(treeA.union(treeB, keepEither), keepEither).toArray()).toEqual(treeA.toArray()); + expect(treeA.union(treeA.intersect(treeB, keepEither), keepEither).toArray()).toEqual(treeA.toArray()); + + // 10. Distributivity. + const distIntersect = treeA.intersect(treeB.union(treeC, keepEither), keepEither); + const distRight = treeA.intersect(treeB, keepEither).union(treeA.intersect(treeC, keepEither), keepEither); + expect(distIntersect.toArray()).toEqual(distRight.toArray()); + const distSubtract = treeA.subtract(treeB.union(treeC, keepEither)); + const distSubtractRight = treeA.subtract(treeB).subtract(treeC); + expect(distSubtract.toArray()).toEqual(distSubtractRight.toArray()); + const distIntersectDiff = treeA.intersect(treeB, keepEither).subtract(treeC); + const distDiffIntersect = treeA.subtract(treeC).intersect(treeB, keepEither); + expect(distIntersectDiff.toArray()).toEqual(distDiffIntersect.toArray()); + + // 11. Superset sanity. + expect(treeA.subtract(treeA.union(treeB, keepEither)).size).toBe(0); + expect(diffAB.intersect(treeB, keepEither).size).toBe(0); + + // 12. Cardinality relations. + expect(count(unionKeep)).toBe(count(treeA) + count(treeB) - count(intersection)); + expect(count(diffAB)).toBe(count(treeA) - count(intersection)); + expect(count(treeA)).toBe(count(diffAB) + count(intersection)); + + partition.checkValid(); + unionDrop.checkValid(); + unionKeep.checkValid(); + intersection.checkValid(); + diffAB.checkValid(); + diffBA.checkValid(); + treeA.checkValid(); + treeB.checkValid(); + treeC.checkValid(); + + expect(treeA.toArray()).toEqual(treeAInitial); + expect(treeB.toArray()).toEqual(treeBInitial); + expect(treeC.toArray()).toEqual(treeCInitial); + + const unionStats = countTreeNodeStats(unionKeep); + const intersectionStats = countTreeNodeStats(intersection); + const diffABStats = countTreeNodeStats(diffAB); + const diffBAStats = countTreeNodeStats(diffBA); + const unionDropStats = countTreeNodeStats(unionDrop); + const partitionStats = countTreeNodeStats(partition); + expect(unionStats.newUnderfilled).toBe(0); + expect(intersectionStats.newUnderfilled).toBe(0); + expect(diffABStats.newUnderfilled).toBe(0); + expect(diffBAStats.newUnderfilled).toBe(0); + expect(unionDropStats.newUnderfilled).toBe(0); + expect(partitionStats.newUnderfilled).toBe(0); + }); + } } } } From f3ba889463abad69a63eda0135162226ad27b694 Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Wed, 19 Nov 2025 17:02:03 -0800 Subject: [PATCH 131/143] optimize --- test/setOperationFuzz.test.ts | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/test/setOperationFuzz.test.ts b/test/setOperationFuzz.test.ts index 6fa3a9d..796d577 100644 --- a/test/setOperationFuzz.test.ts +++ b/test/setOperationFuzz.test.ts @@ -31,20 +31,20 @@ describe('Set operation fuzz tests', () => { const rng = new MersenneTwister(0xC0FFEE); - const count = (t: BTreeEx) => t.toArray().length; + const count = (t: BTreeEx) => t.size; const applyRemovalRuns = (tree: BTreeEx, removalChance: number, branchingFactor: number) => { if (removalChance <= 0 || tree.size === 0) return; - const keys = tree.toArray().map(([key]) => key); + const entries = tree.toArray(); let index = 0; - while (index < keys.length) { - const key = keys[index]; + while (index < entries.length) { + const key = entries[index][0]; if (rng.random() < removalChance) { tree.delete(key); index++; - while (index < keys.length) { - const candidateKey = keys[index]; + while (index < entries.length) { + const candidateKey = entries[index][0]; if (rng.random() < (1 / branchingFactor)) break; tree.delete(candidateKey); @@ -106,11 +106,11 @@ describe('Set operation fuzz tests', () => { // 1. Partition of A: A = (A\B) โˆช (AโˆฉB) and parts are disjoint. const partition = diffAB.union(intersection, keepEither); - expect(partition.toArray()).toEqual(treeA.toArray()); + expect(partition.toArray()).toEqual(treeAInitial); expect(diffAB.intersect(intersection, keepEither).size).toBe(0); // 2. Recover B from union and A\B: (AโˆชB)\(A\B) = B. - expect(unionKeep.subtract(diffAB).toArray()).toEqual(treeB.toArray()); + expect(unionKeep.subtract(diffAB).toArray()).toEqual(treeBInitial); // 3. Symmetric difference two ways. const symFromDiffs = diffAB.union(diffBA, keepEither); @@ -124,8 +124,8 @@ describe('Set operation fuzz tests', () => { expect(diffAB.toArray()).toEqual(treeA.subtract(intersection).toArray()); // 6. Idempotence. - expect(treeA.union(treeA, keepEither).toArray()).toEqual(treeA.toArray()); - expect(treeA.intersect(treeA, keepEither).toArray()).toEqual(treeA.toArray()); + expect(treeA.union(treeA, keepEither).toArray()).toEqual(treeAInitial); + expect(treeA.intersect(treeA, keepEither).toArray()).toEqual(treeAInitial); expect(diffAB.subtract(treeB).toArray()).toEqual(diffAB.toArray()); // 7. Commutativity. @@ -143,8 +143,8 @@ describe('Set operation fuzz tests', () => { expect(assocSumLeft.toArray()).toEqual(assocSumRight.toArray()); // 9. Absorption. - expect(treeA.intersect(treeA.union(treeB, keepEither), keepEither).toArray()).toEqual(treeA.toArray()); - expect(treeA.union(treeA.intersect(treeB, keepEither), keepEither).toArray()).toEqual(treeA.toArray()); + expect(treeA.intersect(treeA.union(treeB, keepEither), keepEither).toArray()).toEqual(treeAInitial); + expect(treeA.union(treeA.intersect(treeB, keepEither), keepEither).toArray()).toEqual(treeAInitial); // 10. Distributivity. const distIntersect = treeA.intersect(treeB.union(treeC, keepEither), keepEither); From 447ab7f88255aa7bd8f69c0500380713492eab34 Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Thu, 20 Nov 2025 10:47:12 -0800 Subject: [PATCH 132/143] test cleanup --- test/setOperationFuzz.test.ts | 48 ++++++++++------------------------- 1 file changed, 14 insertions(+), 34 deletions(-) diff --git a/test/setOperationFuzz.test.ts b/test/setOperationFuzz.test.ts index 796d577..671c605 100644 --- a/test/setOperationFuzz.test.ts +++ b/test/setOperationFuzz.test.ts @@ -1,6 +1,6 @@ import BTreeEx from '../extended'; import MersenneTwister from 'mersenne-twister'; -import { countTreeNodeStats, makeArray } from './shared'; +import { makeArray } from './shared'; const compare = (a: number, b: number) => a - b; @@ -9,8 +9,7 @@ describe('Set operation fuzz tests', () => { branchingFactors: [4, 5, 32], ooms: [2, 3], fractionsPerOOM: [0.1, 0.25, 0.5], - collisionChances: [0.05, 0.1, 0.3], - removalChances: [0.001, 0.01, 0.1], + removalChances: [0.01, 0.1], timeoutMs: 30_000 } as const; @@ -18,10 +17,6 @@ describe('Set operation fuzz tests', () => { if (fraction < 0 || fraction > 1) throw new Error('FUZZ_SETTINGS.fractionsPerOOM must contain values between 0 and 1'); }); - FUZZ_SETTINGS.collisionChances.forEach(chance => { - if (chance < 0 || chance > 1) - throw new Error('FUZZ_SETTINGS.collisionChances must contain values between 0 and 1'); - }); FUZZ_SETTINGS.removalChances.forEach(chance => { if (chance < 0 || chance > 1) throw new Error('FUZZ_SETTINGS.removalChances must contain values between 0 and 1'); @@ -31,8 +26,6 @@ describe('Set operation fuzz tests', () => { const rng = new MersenneTwister(0xC0FFEE); - const count = (t: BTreeEx) => t.size; - const applyRemovalRuns = (tree: BTreeEx, removalChance: number, branchingFactor: number) => { if (removalChance <= 0 || tree.size === 0) return; @@ -58,21 +51,21 @@ describe('Set operation fuzz tests', () => { for (const maxNodeSize of FUZZ_SETTINGS.branchingFactors) { describe(`branching factor ${maxNodeSize}`, () => { - for (const collisionChance of FUZZ_SETTINGS.collisionChances) { - for (const oom of FUZZ_SETTINGS.ooms) { - const size = 5 * Math.pow(10, oom); - for (const fractionA of FUZZ_SETTINGS.fractionsPerOOM) { - const fractionB = 1 - fractionA; - const collisionLabel = collisionChance.toFixed(2); + for (const oom of FUZZ_SETTINGS.ooms) { + const size = 5 * Math.pow(10, oom); + for (const fractionA of FUZZ_SETTINGS.fractionsPerOOM) { + const fractionB = 1 - fractionA; + for (const removalChance of FUZZ_SETTINGS.removalChances) { + const removalLabel = removalChance.toFixed(3); for (const removalChance of FUZZ_SETTINGS.removalChances) { const removalLabel = removalChance.toFixed(3); - it(`size ${size}, fractionA ${fractionA.toFixed(2)}, fractionB ${fractionB.toFixed(2)}, collision ${collisionLabel}, removal ${removalLabel}`, () => { - const treeA = new BTreeEx([], compare, maxNodeSize); + it(`size ${size}, fractionA ${fractionA.toFixed(2)}, fractionB ${fractionB.toFixed(2)}, removal ${removalLabel}`, () => { + const treeA = new BTreeEx([], compare, maxNodeSize); const treeB = new BTreeEx([], compare, maxNodeSize); const treeC = new BTreeEx([], compare, maxNodeSize); - const keys = makeArray(size, true, 1, collisionChance, rng); + const keys = makeArray(size, true, 1, 0, rng); for (const value of keys) { const assignToA = rng.random() < fractionA; @@ -162,9 +155,9 @@ describe('Set operation fuzz tests', () => { expect(diffAB.intersect(treeB, keepEither).size).toBe(0); // 12. Cardinality relations. - expect(count(unionKeep)).toBe(count(treeA) + count(treeB) - count(intersection)); - expect(count(diffAB)).toBe(count(treeA) - count(intersection)); - expect(count(treeA)).toBe(count(diffAB) + count(intersection)); + expect(unionKeep.size).toBe(treeA.size + treeB.size - intersection.size); + expect(diffAB.size).toBe(treeA.size - intersection.size); + expect(treeA.size).toBe(diffAB.size + intersection.size); partition.checkValid(); unionDrop.checkValid(); @@ -179,19 +172,6 @@ describe('Set operation fuzz tests', () => { expect(treeA.toArray()).toEqual(treeAInitial); expect(treeB.toArray()).toEqual(treeBInitial); expect(treeC.toArray()).toEqual(treeCInitial); - - const unionStats = countTreeNodeStats(unionKeep); - const intersectionStats = countTreeNodeStats(intersection); - const diffABStats = countTreeNodeStats(diffAB); - const diffBAStats = countTreeNodeStats(diffBA); - const unionDropStats = countTreeNodeStats(unionDrop); - const partitionStats = countTreeNodeStats(partition); - expect(unionStats.newUnderfilled).toBe(0); - expect(intersectionStats.newUnderfilled).toBe(0); - expect(diffABStats.newUnderfilled).toBe(0); - expect(diffBAStats.newUnderfilled).toBe(0); - expect(unionDropStats.newUnderfilled).toBe(0); - expect(partitionStats.newUnderfilled).toBe(0); }); } } From 5be34bb18c3bfc8988bdd8750135388ec4339f12 Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Thu, 20 Nov 2025 11:17:26 -0800 Subject: [PATCH 133/143] all tests passing --- extended/decompose.js | 17 +++++++------- extended/decompose.ts | 42 +++++++++++++++++++---------------- test/forEachKeyInBoth.test.ts | 4 +--- test/forEachKeyNotIn.test.ts | 4 +--- test/union.test.ts | 4 +--- 5 files changed, 35 insertions(+), 36 deletions(-) diff --git a/extended/decompose.js b/extended/decompose.js index c4a610c..6fe5a7a 100644 --- a/extended/decompose.js +++ b/extended/decompose.js @@ -372,6 +372,7 @@ function processSide(disjoint, start, end, step, context) { var insertionCount = void 0; // non-recursive var insertionSize = void 0; // recursive if (isEntryInsertion) { + (0, b_tree_1.check)(subtree.isShared !== true); insertionCount = insertionSize = subtree.keys.length; } else { @@ -398,15 +399,11 @@ function processSide(disjoint, start, end, step, context) { if (newRoot) { // Set the spine root to the highest up new node; the rest of the spine is updated below spine[0] = newRoot; - unflushedSizes.forEach(function (count) { return (0, b_tree_1.check)(count === 0, "Unexpected unflushed size after root split."); }); - unflushedSizes.push(0); // new root level - isSharedFrontierDepth = sizeChangeDepth + 2; - unflushedSizes[sizeChangeDepth + 1] += insertionSize; - } - else { - isSharedFrontierDepth = sizeChangeDepth + 1; - unflushedSizes[sizeChangeDepth] += insertionSize; + unflushedSizes.push(0); // new root level, keep unflushed sizes in sync + sizeChangeDepth++; // account for the spine lengthening } + isSharedFrontierDepth = sizeChangeDepth + 1; + unflushedSizes[sizeChangeDepth] += insertionSize; // Finally, update the frontier from the highest new node downward // Note that this is often the point where the new subtree is attached, // but in the case of cascaded splits it may be higher up. @@ -484,12 +481,16 @@ function splitUpwardsAndInsert(context, insertionDepth, subtree) { } } ; +/** + * Inserts an underfilled leaf (entryContainer), merging with its sibling if possible and splitting upward if not. + */ function splitUpwardsAndInsertEntries(context, insertionDepth, entryContainer) { var branchingFactor = context.branchingFactor, spine = context.spine, balanceLeaves = context.balanceLeaves, mergeLeaves = context.mergeLeaves; var entryCount = entryContainer.keys.length; var parent = spine[insertionDepth]; var parentSize = parent.keys.length; if (parentSize + entryCount <= branchingFactor) { + // Sibling has capacity, just merge into it mergeLeaves(parent, entryContainer); return undefined; } diff --git a/extended/decompose.ts b/extended/decompose.ts index 41d5414..b37543c 100644 --- a/extended/decompose.ts +++ b/extended/decompose.ts @@ -4,7 +4,8 @@ import { createCursor, getKey, Cursor, moveForwardOne, moveTo, noop } from "./pa /** * A set of disjoint nodes, their heights, and the index of the tallest node. - * A height of -1 indicates an underfilled node that must be merged. + * A height of -1 indicates an underfilled non-shared node that must be merged. + * Any shared nodes (including underfilled leaves) must have height >= 0. * @internal */ export type DecomposeResult = { disjoint: AlternatingList>, tallestIndex: number }; @@ -486,6 +487,7 @@ function processSide( let insertionCount: number; // non-recursive let insertionSize: number; // recursive if (isEntryInsertion) { + check(subtree.isShared !== true); insertionCount = insertionSize = subtree.keys.length; } else { insertionCount = 1; @@ -517,15 +519,13 @@ function processSide( if (newRoot) { // Set the spine root to the highest up new node; the rest of the spine is updated below spine[0] = newRoot; - unflushedSizes.forEach((count) => check(count === 0, "Unexpected unflushed size after root split.")); - unflushedSizes.push(0); // new root level - isSharedFrontierDepth = sizeChangeDepth + 2; - unflushedSizes[sizeChangeDepth + 1] += insertionSize; - } else { - isSharedFrontierDepth = sizeChangeDepth + 1; - unflushedSizes[sizeChangeDepth] += insertionSize; + unflushedSizes.push(0); // new root level, keep unflushed sizes in sync + sizeChangeDepth++; // account for the spine lengthening } + isSharedFrontierDepth = sizeChangeDepth + 1; + unflushedSizes[sizeChangeDepth] += insertionSize; + // Finally, update the frontier from the highest new node downward // Note that this is often the point where the new subtree is attached, // but in the case of cascaded splits it may be higher up. @@ -610,6 +610,9 @@ function splitUpwardsAndInsert( } }; +/** + * Inserts an underfilled leaf (entryContainer), merging with its sibling if possible and splitting upward if not. + */ function splitUpwardsAndInsertEntries( context: SideContext, insertionDepth: number, @@ -620,6 +623,7 @@ function splitUpwardsAndInsertEntries( const parent = spine[insertionDepth]; const parentSize = parent.keys.length; if (parentSize + entryCount <= branchingFactor) { + // Sibling has capacity, just merge into it mergeLeaves(parent, entryContainer); return undefined; } else { @@ -747,6 +751,17 @@ function insertNoCount( parent.keys.splice(index, 0, child.maxKey()); } +type SideContext = { + branchingFactor: number; + spine: BNode[]; + sideIndex: (node: BNodeInternal) => number; + sideInsertionIndex: (node: BNodeInternal) => number; + splitOffSide: (node: BNodeInternal) => BNodeInternal; + updateMax: (node: BNodeInternal, maxBelow: K) => void; + mergeLeaves: (leaf: BNode, entries: BNode) => void; + balanceLeaves: (parent: BNodeInternal, underfilled: BNode, toTake: number) => void; +}; + // ---- Side-specific delegates for merging subtrees into a frontier ---- function getLeftmostIndex(): number { @@ -802,14 +817,3 @@ function mergeLeftEntries(leaf: BNode, entries: BNode): void{ leaf.keys.unshift.apply(leaf.keys, entries.keys); leaf.values.unshift.apply(leaf.values, entries.values); } - -type SideContext = { - branchingFactor: number; - spine: BNode[]; - sideIndex: (node: BNodeInternal) => number; - sideInsertionIndex: (node: BNodeInternal) => number; - splitOffSide: (node: BNodeInternal) => BNodeInternal; - updateMax: (node: BNodeInternal, maxBelow: K) => void; - mergeLeaves: (leaf: BNode, entries: BNode) => void; - balanceLeaves: (parent: BNodeInternal, underfilled: BNode, toTake: number) => void; -}; diff --git a/test/forEachKeyInBoth.test.ts b/test/forEachKeyInBoth.test.ts index 54adaf4..3764e78 100644 --- a/test/forEachKeyInBoth.test.ts +++ b/test/forEachKeyInBoth.test.ts @@ -2,7 +2,7 @@ import BTreeEx from '../extended'; import intersect from '../extended/intersect'; import { comparatorErrorMsg } from '../extended/shared'; import MersenneTwister from 'mersenne-twister'; -import { countTreeNodeStats, makeArray } from './shared'; +import { makeArray } from './shared'; var test: (name: string, f: () => void) => void = it; @@ -27,8 +27,6 @@ const runIntersectionImplementations = ( const expectedEntries = intersectionCalls.map(({ key, leftValue }) => [key, leftValue] as [number, number]); expect(resultTree.toArray()).toEqual(expectedEntries); resultTree.checkValid(); - const stats = countTreeNodeStats(resultTree); - expect(stats.newUnderfilled).toBe(0); assertion(intersectionCalls); }; diff --git a/test/forEachKeyNotIn.test.ts b/test/forEachKeyNotIn.test.ts index 543c8d1..919a178 100644 --- a/test/forEachKeyNotIn.test.ts +++ b/test/forEachKeyNotIn.test.ts @@ -3,7 +3,7 @@ import forEachKeyNotIn from '../extended/forEachKeyNotIn'; import subtract from '../extended/subtract'; import { comparatorErrorMsg, branchingFactorErrorMsg } from '../extended/shared'; import MersenneTwister from 'mersenne-twister'; -import { countTreeNodeStats, makeArray } from './shared'; +import { makeArray } from './shared'; type NotInCall = { key: number, value: number }; @@ -22,8 +22,6 @@ const runSubtractionImplementations = ( const subtractCalls = resultTree.toArray().map(([key, value]) => ({ key, value })); expect(subtractCalls).toEqual(forEachCalls); resultTree.checkValid(); - const stats = countTreeNodeStats(resultTree); - expect(stats.newUnderfilled).toBe(0); assertion(subtractCalls); }; diff --git a/test/union.test.ts b/test/union.test.ts index 8fdba70..fd865df 100644 --- a/test/union.test.ts +++ b/test/union.test.ts @@ -3,7 +3,7 @@ import BTreeEx from '../extended'; import union from '../extended/union'; import { branchingFactorErrorMsg, comparatorErrorMsg } from '../extended/shared'; import MersenneTwister from 'mersenne-twister'; -import { countTreeNodeStats, makeArray, randomInt } from './shared'; +import { makeArray, randomInt } from './shared'; var test: (name: string, f: () => void) => void = it; @@ -819,8 +819,6 @@ describe('BTree union fuzz tests', () => { // Check for shared mutability issues expect(treeA.toArray()).toEqual(aArray); expect(treeB.toArray()).toEqual(bArray); - const unionStats = countTreeNodeStats(unioned); - expect(unionStats.newUnderfilled).toBe(0); }); } } From 206b8b7d547b356e80a1ecf9f17bbd88d57a926c Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Thu, 20 Nov 2025 11:29:16 -0800 Subject: [PATCH 134/143] test refactor --- ...achKeyInBoth.test.ts => intersect.test.ts} | 58 ++++++++-------- ...rEachKeyNotIn.test.ts => subtract.test.ts} | 68 +++++++++---------- 2 files changed, 63 insertions(+), 63 deletions(-) rename test/{forEachKeyInBoth.test.ts => intersect.test.ts} (82%) rename test/{forEachKeyNotIn.test.ts => subtract.test.ts} (80%) diff --git a/test/forEachKeyInBoth.test.ts b/test/intersect.test.ts similarity index 82% rename from test/forEachKeyInBoth.test.ts rename to test/intersect.test.ts index 3764e78..5b0331f 100644 --- a/test/forEachKeyInBoth.test.ts +++ b/test/intersect.test.ts @@ -8,7 +8,7 @@ var test: (name: string, f: () => void) => void = it; type SharedCall = { key: number, leftValue: number, rightValue: number }; -const runIntersectionImplementations = ( +const runForEachKeyInBothAndIntersect = ( left: BTreeEx, right: BTreeEx, assertion: (calls: SharedCall[]) => void @@ -30,12 +30,12 @@ const runIntersectionImplementations = ( assertion(intersectionCalls); }; -const expectIntersectionCalls = ( +const expectForEachKeyInBothAndIntersectCalls = ( left: BTreeEx, right: BTreeEx, expected: SharedCall[] ) => { - runIntersectionImplementations(left, right, (calls) => { + runForEachKeyInBothAndIntersect(left, right, (calls) => { expect(calls).toEqual(expected); }); }; @@ -45,9 +45,9 @@ const tuplesToRecords = (entries: Array<[number, number, number]>): SharedCall[] const tuples = (...pairs: Array<[number, number]>) => pairs; -describe('BTree forEachKeyInBoth tests with fanout 32', testForEachKeyInBoth.bind(null, 32)); -describe('BTree forEachKeyInBoth tests with fanout 10', testForEachKeyInBoth.bind(null, 10)); -describe('BTree forEachKeyInBoth tests with fanout 4', testForEachKeyInBoth.bind(null, 4)); +describe('BTree forEachKeyInBoth/intersect tests with fanout 32', testForEachKeyInBoth.bind(null, 32)); +describe('BTree forEachKeyInBoth/intersect tests with fanout 10', testForEachKeyInBoth.bind(null, 10)); +describe('BTree forEachKeyInBoth/intersect tests with fanout 4', testForEachKeyInBoth.bind(null, 4)); function testForEachKeyInBoth(maxNodeSize: number) { const compare = (a: number, b: number) => a - b; @@ -55,39 +55,39 @@ function testForEachKeyInBoth(maxNodeSize: number) { const buildTree = (entries: Array<[number, number]>) => new BTreeEx(entries, compare, maxNodeSize); - test('forEachKeyInBoth two empty trees', () => { + test('forEachKeyInBoth/intersect two empty trees', () => { const tree1 = buildTree([]); const tree2 = buildTree([]); - expectIntersectionCalls(tree1, tree2, []); + expectForEachKeyInBothAndIntersectCalls(tree1, tree2, []); }); - test('forEachKeyInBoth empty tree with non-empty tree', () => { + test('forEachKeyInBoth/intersect empty tree with non-empty tree', () => { const tree1 = buildTree([]); const tree2 = buildTree(tuples([1, 10], [2, 20], [3, 30])); - expectIntersectionCalls(tree1, tree2, []); - expectIntersectionCalls(tree2, tree1, []); + expectForEachKeyInBothAndIntersectCalls(tree1, tree2, []); + expectForEachKeyInBothAndIntersectCalls(tree2, tree1, []); }); - test('forEachKeyInBoth with no overlapping keys', () => { + test('forEachKeyInBoth/intersect with no overlapping keys', () => { const tree1 = buildTree(tuples([1, 10], [3, 30], [5, 50])); const tree2 = buildTree(tuples([2, 20], [4, 40], [6, 60])); - expectIntersectionCalls(tree1, tree2, []); + expectForEachKeyInBothAndIntersectCalls(tree1, tree2, []); }); - test('forEachKeyInBoth with single overlapping key', () => { + test('forEachKeyInBoth/intersect with single overlapping key', () => { const tree1 = buildTree(tuples([1, 10], [2, 20], [3, 30])); const tree2 = buildTree(tuples([0, 100], [2, 200], [4, 400])); - expectIntersectionCalls(tree1, tree2, [{ key: 2, leftValue: 20, rightValue: 200 }]); + expectForEachKeyInBothAndIntersectCalls(tree1, tree2, [{ key: 2, leftValue: 20, rightValue: 200 }]); }); - test('forEachKeyInBoth with multiple overlapping keys maintains tree contents', () => { + test('forEachKeyInBoth/intersect with multiple overlapping keys maintains tree contents', () => { const leftEntries: Array<[number, number]> = [[1, 10], [2, 20], [3, 30], [4, 40], [5, 50]]; const rightEntries: Array<[number, number]> = [[0, 100], [2, 200], [4, 400], [6, 600]]; const tree1 = buildTree(leftEntries); const tree2 = buildTree(rightEntries); const leftBefore = tree1.toArray(); const rightBefore = tree2.toArray(); - expectIntersectionCalls(tree1, tree2, [ + expectForEachKeyInBothAndIntersectCalls(tree1, tree2, [ { key: 2, leftValue: 20, rightValue: 200 }, { key: 4, leftValue: 40, rightValue: 400 }, ]); @@ -97,17 +97,17 @@ function testForEachKeyInBoth(maxNodeSize: number) { tree2.checkValid(); }); - test('forEachKeyInBoth with contiguous overlap yields sorted keys', () => { + test('forEachKeyInBoth/intersect with contiguous overlap yields sorted keys', () => { const tree1 = buildTree(tuples([1, 1], [2, 2], [3, 3], [4, 4], [5, 5], [6, 6])); const tree2 = buildTree(tuples([3, 30], [4, 40], [5, 50], [6, 60], [7, 70])); - runIntersectionImplementations(tree1, tree2, ( calls ) => { + runForEachKeyInBothAndIntersect(tree1, tree2, ( calls ) => { expect(calls.map(c => c.key)).toEqual([3, 4, 5, 6]); expect(calls.map(c => c.leftValue)).toEqual([3, 4, 5, 6]); expect(calls.map(c => c.rightValue)).toEqual([30, 40, 50, 60]); }); }); - test('forEachKeyInBoth large overlapping range counts each shared key once', () => { + test('forEachKeyInBoth/intersect large overlapping range counts each shared key once', () => { const size = 1000; const overlapStart = 500; const leftEntries = Array.from({ length: size }, (_, i) => [i, i * 3] as [number, number]); @@ -117,7 +117,7 @@ function testForEachKeyInBoth(maxNodeSize: number) { }); const tree1 = buildTree(leftEntries); const tree2 = buildTree(rightEntries); - runIntersectionImplementations(tree1, tree2, (calls) => { + runForEachKeyInBothAndIntersect(tree1, tree2, (calls) => { expect(calls.length).toBe(size - overlapStart); expect(calls[0]).toEqual({ key: overlapStart, @@ -131,10 +131,10 @@ function testForEachKeyInBoth(maxNodeSize: number) { }); }); - test('forEachKeyInBoth tree with itself visits each key once', () => { + test('forEachKeyInBoth/intersect tree with itself visits each key once', () => { const entries = Array.from({ length: 20 }, (_, i) => [i, i * 2] as [number, number]); const tree = buildTree(entries); - runIntersectionImplementations(tree, tree, (calls) => { + runForEachKeyInBothAndIntersect(tree, tree, (calls) => { expect(calls.length).toBe(entries.length); for (let i = 0; i < entries.length; i++) { const [key, value] = entries[i]; @@ -143,14 +143,14 @@ function testForEachKeyInBoth(maxNodeSize: number) { }); }); - test('forEachKeyInBoth arguments determine left/right values', () => { + test('forEachKeyInBoth/intersect arguments determine left/right values', () => { const tree1 = buildTree(tuples([1, 100], [2, 200], [4, 400])); const tree2 = buildTree(tuples([2, 20], [3, 30], [4, 40])); - expectIntersectionCalls(tree1, tree2, [ + expectForEachKeyInBothAndIntersectCalls(tree1, tree2, [ { key: 2, leftValue: 200, rightValue: 20 }, { key: 4, leftValue: 400, rightValue: 40 }, ]); - expectIntersectionCalls(tree2, tree1, [ + expectForEachKeyInBothAndIntersectCalls(tree2, tree1, [ { key: 2, leftValue: 20, rightValue: 200 }, { key: 4, leftValue: 40, rightValue: 400 }, ]); @@ -210,7 +210,7 @@ describe('BTree forEachKeyInBoth and intersect input/output validation', () => { }); }); -describe('BTree forEachKeyInBoth fuzz tests', () => { +describe('BTree forEachKeyInBoth/intersect fuzz tests', () => { const compare = (a: number, b: number) => a - b; const FUZZ_SETTINGS = { branchingFactors: [4, 5, 32], @@ -277,13 +277,13 @@ describe('BTree forEachKeyInBoth fuzz tests', () => { } const expectedRecords = tuplesToRecords(expected); - expectIntersectionCalls(treeA, treeB, expectedRecords); + expectForEachKeyInBothAndIntersectCalls(treeA, treeB, expectedRecords); const swappedExpected = expectedRecords.map(({ key, leftValue, rightValue }) => ({ key, leftValue: rightValue, rightValue: leftValue, })); - expectIntersectionCalls(treeB, treeA, swappedExpected); + expectForEachKeyInBothAndIntersectCalls(treeB, treeA, swappedExpected); expect(treeA.toArray()).toEqual(aArray); expect(treeB.toArray()).toEqual(bArray); diff --git a/test/forEachKeyNotIn.test.ts b/test/subtract.test.ts similarity index 80% rename from test/forEachKeyNotIn.test.ts rename to test/subtract.test.ts index 919a178..25d5f1a 100644 --- a/test/forEachKeyNotIn.test.ts +++ b/test/subtract.test.ts @@ -7,7 +7,7 @@ import { makeArray } from './shared'; type NotInCall = { key: number, value: number }; -const runSubtractionImplementations = ( +const runForEachKeyNotInAndSubtract = ( include: BTreeEx, exclude: BTreeEx, assertion: (calls: NotInCall[]) => void @@ -25,12 +25,12 @@ const runSubtractionImplementations = ( assertion(subtractCalls); }; -const expectSubtractionCalls = ( +const expectForEachKeyNotInAndSubtractCalls = ( include: BTreeEx, exclude: BTreeEx, expected: NotInCall[] ) => { - runSubtractionImplementations(include, exclude, (calls) => { + runForEachKeyNotInAndSubtract(include, exclude, (calls) => { expect(calls).toEqual(expected); }); }; @@ -40,9 +40,9 @@ const tuplesToRecords = (entries: Array<[number, number]>): NotInCall[] => const tuples = (...pairs: Array<[number, number]>) => pairs; -describe('BTree forEachKeyNotIn tests with fanout 32', testForEachKeyNotIn.bind(null, 32)); -describe('BTree forEachKeyNotIn tests with fanout 10', testForEachKeyNotIn.bind(null, 10)); -describe('BTree forEachKeyNotIn tests with fanout 4', testForEachKeyNotIn.bind(null, 4)); +describe('BTree forEachKeyNotIn/subtract tests with fanout 32', testForEachKeyNotIn.bind(null, 32)); +describe('BTree forEachKeyNotIn/subtract tests with fanout 10', testForEachKeyNotIn.bind(null, 10)); +describe('BTree forEachKeyNotIn/subtract tests with fanout 4', testForEachKeyNotIn.bind(null, 4)); function testForEachKeyNotIn(maxNodeSize: number) { const compare = (a: number, b: number) => a - b; @@ -50,62 +50,62 @@ function testForEachKeyNotIn(maxNodeSize: number) { const buildTree = (entries: Array<[number, number]>) => new BTreeEx(entries, compare, maxNodeSize); - it('forEachKeyNotIn two empty trees', () => { + it('forEachKeyNotIn/subtract two empty trees', () => { const includeTree = buildTree([]); const excludeTree = buildTree([]); - expectSubtractionCalls(includeTree, excludeTree, []); + expectForEachKeyNotInAndSubtractCalls(includeTree, excludeTree, []); }); - it('forEachKeyNotIn include empty tree with non-empty tree', () => { + it('forEachKeyNotIn/subtract include empty tree with non-empty tree', () => { const includeTree = buildTree([]); const excludeTree = buildTree(tuples([1, 10], [2, 20], [3, 30])); - expectSubtractionCalls(includeTree, excludeTree, []); + expectForEachKeyNotInAndSubtractCalls(includeTree, excludeTree, []); }); - it('forEachKeyNotIn exclude tree empty yields all include keys', () => { + it('forEachKeyNotIn/subtract exclude tree empty yields all include keys', () => { const includeEntries: Array<[number, number]> = [[1, 10], [3, 30], [5, 50]]; const includeTree = buildTree(includeEntries); const excludeTree = buildTree([]); const expected = tuplesToRecords(includeEntries); - expectSubtractionCalls(includeTree, excludeTree, expected); + expectForEachKeyNotInAndSubtractCalls(includeTree, excludeTree, expected); }); - it('forEachKeyNotIn with no overlapping keys returns include tree contents', () => { + it('forEachKeyNotIn/subtract with no overlapping keys returns include tree contents', () => { const includeEntries: Array<[number, number]> = [[1, 10], [3, 30], [5, 50]]; const excludeEntries: Array<[number, number]> = [[0, 100], [2, 200], [4, 400]]; const includeTree = buildTree(includeEntries); const excludeTree = buildTree(excludeEntries); const expected = tuplesToRecords(includeEntries); - expectSubtractionCalls(includeTree, excludeTree, expected); + expectForEachKeyNotInAndSubtractCalls(includeTree, excludeTree, expected); }); - it('forEachKeyNotIn with overlapping keys excludes matches', () => { + it('forEachKeyNotIn/subtract with overlapping keys excludes matches', () => { const includeTree = buildTree(tuples([1, 10], [2, 20], [3, 30], [4, 40], [5, 50])); const excludeTree = buildTree(tuples([0, 100], [2, 200], [4, 400], [6, 600])); - expectSubtractionCalls(includeTree, excludeTree, [ + expectForEachKeyNotInAndSubtractCalls(includeTree, excludeTree, [ { key: 1, value: 10 }, { key: 3, value: 30 }, { key: 5, value: 50 }, ]); }); - it('forEachKeyNotIn excludes leading overlap then emits remaining keys', () => { + it('forEachKeyNotIn/subtract excludes leading overlap then emits remaining keys', () => { const includeTree = buildTree(tuples([1, 10], [2, 20], [3, 30], [4, 40])); const excludeTree = buildTree(tuples([1, 100], [2, 200])); - expectSubtractionCalls(includeTree, excludeTree, [ + expectForEachKeyNotInAndSubtractCalls(includeTree, excludeTree, [ { key: 3, value: 30 }, { key: 4, value: 40 }, ]); }); - it('forEachKeyNotIn maintains tree contents', () => { + it('forEachKeyNotIn/subtract maintains tree contents', () => { const includeEntries: Array<[number, number]> = [[1, 10], [2, 20], [3, 30], [4, 40], [5, 50]]; const excludeEntries: Array<[number, number]> = [[1, 100], [3, 300], [5, 500]]; const includeTree = buildTree(includeEntries); const excludeTree = buildTree(excludeEntries); const includeBefore = includeTree.toArray(); const excludeBefore = excludeTree.toArray(); - expectSubtractionCalls(includeTree, excludeTree, [ + expectForEachKeyNotInAndSubtractCalls(includeTree, excludeTree, [ { key: 2, value: 20 }, { key: 4, value: 40 }, ]); @@ -115,16 +115,16 @@ function testForEachKeyNotIn(maxNodeSize: number) { excludeTree.checkValid(); }); - it('forEachKeyNotIn with contiguous overlap yields sorted survivors', () => { + it('forEachKeyNotIn/subtract with contiguous overlap yields sorted survivors', () => { const includeTree = buildTree(tuples([1, 1], [2, 2], [3, 3], [4, 4], [5, 5], [6, 6])); const excludeTree = buildTree(tuples([3, 30], [4, 40], [5, 50])); - runSubtractionImplementations(includeTree, excludeTree, (calls) => { + runForEachKeyNotInAndSubtract(includeTree, excludeTree, (calls) => { expect(calls.map(c => c.key)).toEqual([1, 2, 6]); expect(calls.map(c => c.value)).toEqual([1, 2, 6]); }); }); - it('forEachKeyNotIn large subtraction leaves prefix and suffix ranges', () => { + it('forEachKeyNotIn/subtract large subtraction leaves prefix and suffix ranges', () => { const size = 1000; const excludeStart = 200; const excludeSpan = 500; @@ -135,7 +135,7 @@ function testForEachKeyNotIn(maxNodeSize: number) { }); const includeTree = buildTree(includeEntries); const excludeTree = buildTree(excludeEntries); - runSubtractionImplementations(includeTree, excludeTree, (calls) => { + runForEachKeyNotInAndSubtract(includeTree, excludeTree, (calls) => { expect(calls.length).toBe(size - excludeSpan); expect(calls[0]).toEqual({ key: 0, value: 0 }); const lastCall = calls[calls.length - 1]; @@ -145,25 +145,25 @@ function testForEachKeyNotIn(maxNodeSize: number) { }); }); - it('forEachKeyNotIn tree with itself visits no keys', () => { + it('forEachKeyNotIn/subtract tree with itself visits no keys', () => { const entries = Array.from({ length: 20 }, (_, i) => [i, i * 2] as [number, number]); const tree = buildTree(entries); - expectSubtractionCalls(tree, tree, []); + expectForEachKeyNotInAndSubtractCalls(tree, tree, []); }); - it('forEachKeyNotIn exclude superset yields empty result', () => { + it('forEachKeyNotIn/subtract exclude superset yields empty result', () => { const includeTree = buildTree(tuples([2, 200], [3, 300])); const excludeTree = buildTree(tuples([1, 100], [2, 200], [3, 300], [4, 400])); - expectSubtractionCalls(includeTree, excludeTree, []); + expectForEachKeyNotInAndSubtractCalls(includeTree, excludeTree, []); }); - it('forEachKeyNotIn arguments determine surviving keys', () => { + it('forEachKeyNotIn/subtract arguments determine surviving keys', () => { const tree1 = buildTree(tuples([1, 100], [2, 200], [4, 400])); const tree2 = buildTree(tuples([2, 20], [3, 30], [4, 40])); - expectSubtractionCalls(tree1, tree2, [ + expectForEachKeyNotInAndSubtractCalls(tree1, tree2, [ { key: 1, value: 100 }, ]); - expectSubtractionCalls(tree2, tree1, [ + expectForEachKeyNotInAndSubtractCalls(tree2, tree1, [ { key: 3, value: 30 }, ]); }); @@ -233,7 +233,7 @@ describe('BTree forEachKeyNotIn and subtract input/output validation', () => { }); }); -describe('BTree forEachKeyNotIn fuzz tests', () => { +describe('BTree forEachKeyNotIn/subtract fuzz tests', () => { const compare = (a: number, b: number) => a - b; const FUZZ_SETTINGS = { branchingFactors: [4, 5, 32], @@ -297,8 +297,8 @@ describe('BTree forEachKeyNotIn fuzz tests', () => { const expectedA = aArray.filter(([key]) => !bMap.has(key)); const expectedB = bArray.filter(([key]) => !aMap.has(key)); - expectSubtractionCalls(treeA, treeB, tuplesToRecords(expectedA)); - expectSubtractionCalls(treeB, treeA, tuplesToRecords(expectedB)); + expectForEachKeyNotInAndSubtractCalls(treeA, treeB, tuplesToRecords(expectedA)); + expectForEachKeyNotInAndSubtractCalls(treeB, treeA, tuplesToRecords(expectedB)); expect(treeA.toArray()).toEqual(aArray); expect(treeB.toArray()).toEqual(bArray); From ecef29f3bf8aecd0d5c1fa01dbcd042429f67c28 Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Thu, 20 Nov 2025 14:47:59 -0800 Subject: [PATCH 135/143] cleanup and reduce unfilled node creation --- extended/decompose.js | 38 ++++++++++++++++------ extended/decompose.ts | 37 +++++++++++++++------ extended/shared.js | 6 +--- extended/shared.ts | 8 ++--- test/union.test.ts | 76 +++++++++++++++++++++++++++++-------------- 5 files changed, 110 insertions(+), 55 deletions(-) diff --git a/extended/decompose.js b/extended/decompose.js index 6fe5a7a..7b6ac9f 100644 --- a/extended/decompose.js +++ b/extended/decompose.js @@ -43,11 +43,17 @@ function decompose(left, right, combineFn, ignoreRight) { (0, shared_1.alternatingPush)(disjoint, height, leaf); }; var addSharedNodeToDisjointSet = function (node, height) { - (0, shared_1.makeLeavesFrom)(pending, maxNodeSize, onLeafCreation, decomposeLoadFactor); // flush pending entries + (0, shared_1.makeLeavesFrom)(pending, maxNodeSize, onLeafCreation, decomposeLoadFactor); pending.length = 0; - node.isShared = true; - (0, shared_1.alternatingPush)(disjoint, height, node); + // Don't share underfilled leaves, instead mark them as needing merging + if (node.isLeaf && node.keys.length < minSize) { + (0, shared_1.alternatingPush)(disjoint, -1, node.clone()); + } + else { + node.isShared = true; + (0, shared_1.alternatingPush)(disjoint, height, node); + } if (height > tallestHeight) { tallestIndex = (0, shared_1.alternatingCount)(disjoint) - 1; tallestHeight = height; @@ -109,10 +115,23 @@ function decompose(left, right, combineFn, ignoreRight) { if (stepDownIndex !== stepDownIndex /* NaN: still walking up */ || stepDownIndex === Number.POSITIVE_INFINITY /* target key is beyond edge of tree, done with walk */) { if (!payload.disqualified) { - highestDisjoint = { node: parent, height: height }; if (stepDownIndex === Number.POSITIVE_INFINITY) { // We have finished our walk, and we won't be stepping down, so add the root - addHighestDisjoint(); + // Roots are allowed to be underfilled, so break the root up here if so to avoid + // creating underfilled interior nodes during reconstruction. + // Note: the main btree implementation allows underfilled nodes in general, this algorithm + // guarantees that no additional underfilled nodes are created beyond what was already present. + if (parent.keys.length < minSize) { + for (var i = fromIndex; i < children.length; ++i) + addSharedNodeToDisjointSet(children[i], nextHeight); + } + else { + addSharedNodeToDisjointSet(parent, height); + } + highestDisjoint = undefined; + } + else { + highestDisjoint = { node: parent, height: height }; } } else { @@ -270,11 +289,10 @@ function decompose(left, right, combineFn, ignoreRight) { } } // Ensure any trailing non-disjoint entries are added - var createdLeaves = (0, shared_1.makeLeavesFrom)(pending, maxNodeSize, onLeafCreation, decomposeLoadFactor); - // In fully interleaved cases, no leaves may be created until now - if (tallestHeight < 0 && createdLeaves > 0) { - tallestIndex = (0, shared_1.alternatingCount)(disjoint) - 1; - tallestHeight = 0; + (0, shared_1.makeLeavesFrom)(pending, maxNodeSize, onLeafCreation, decomposeLoadFactor); + // In cases like full interleaving, no leaves may be created until now + if (tallestHeight < 0 && (0, shared_1.alternatingCount)(disjoint) > 0) { + tallestIndex = 0; } return { disjoint: disjoint, tallestIndex: tallestIndex }; } diff --git a/extended/decompose.ts b/extended/decompose.ts index b37543c..9a9594f 100644 --- a/extended/decompose.ts +++ b/extended/decompose.ts @@ -64,11 +64,18 @@ export function decompose( } const addSharedNodeToDisjointSet = (node: BNode, height: number) => { - makeLeavesFrom(pending, maxNodeSize, onLeafCreation, decomposeLoadFactor); // flush pending entries + makeLeavesFrom(pending, maxNodeSize, onLeafCreation, decomposeLoadFactor); pending.length = 0; - node.isShared = true; - alternatingPush(disjoint, height, node); + + // Don't share underfilled leaves, instead mark them as needing merging + if (node.isLeaf && node.keys.length < minSize) { + alternatingPush(disjoint, -1, node.clone()); + } else { + node.isShared = true; + alternatingPush(disjoint, height, node); + } + if (height > tallestHeight) { tallestIndex = alternatingCount(disjoint) - 1; tallestHeight = height; @@ -156,10 +163,21 @@ export function decompose( if (stepDownIndex !== stepDownIndex /* NaN: still walking up */ || stepDownIndex === Number.POSITIVE_INFINITY /* target key is beyond edge of tree, done with walk */) { if (!payload.disqualified) { - highestDisjoint = { node: parent, height }; if (stepDownIndex === Number.POSITIVE_INFINITY) { // We have finished our walk, and we won't be stepping down, so add the root - addHighestDisjoint(); + // Roots are allowed to be underfilled, so break the root up here if so to avoid + // creating underfilled interior nodes during reconstruction. + // Note: the main btree implementation allows underfilled nodes in general, this algorithm + // guarantees that no additional underfilled nodes are created beyond what was already present. + if (parent.keys.length < minSize) { + for (let i = fromIndex; i < children.length; ++i) + addSharedNodeToDisjointSet(children[i], nextHeight); + } else { + addSharedNodeToDisjointSet(parent, height); + } + highestDisjoint = undefined; + } else { + highestDisjoint = { node: parent, height }; } } else { addHighestDisjoint(); @@ -353,11 +371,10 @@ export function decompose( } // Ensure any trailing non-disjoint entries are added - const createdLeaves = makeLeavesFrom(pending, maxNodeSize, onLeafCreation, decomposeLoadFactor); - // In fully interleaved cases, no leaves may be created until now - if (tallestHeight < 0 && createdLeaves > 0) { - tallestIndex = alternatingCount(disjoint) - 1; - tallestHeight = 0; + makeLeavesFrom(pending, maxNodeSize, onLeafCreation, decomposeLoadFactor); + // In cases like full interleaving, no leaves may be created until now + if (tallestHeight < 0 && alternatingCount(disjoint) > 0) { + tallestIndex = 0; } return { disjoint, tallestIndex }; } diff --git a/extended/shared.js b/extended/shared.js index ea4e2c8..982b280 100644 --- a/extended/shared.js +++ b/extended/shared.js @@ -11,7 +11,6 @@ var b_tree_1 = require("../b+tree"); * @param maxNodeSize The maximum node size (branching factor) for the resulting leaves. * @param onLeafCreation Called when a new leaf is created. * @param loadFactor Desired load factor for created leaves. Must be between 0.5 and 1.0. - * @returns The number of leaves created. * @internal */ function makeLeavesFrom(alternatingList, maxNodeSize, onLeafCreation, loadFactor) { @@ -19,11 +18,9 @@ function makeLeavesFrom(alternatingList, maxNodeSize, onLeafCreation, loadFactor if (totalPairs === 0) return 0; var targetSize = Math.ceil(maxNodeSize * loadFactor); - // Ensure we don't make any underfilled nodes unless we have to. - var targetLeafCount = totalPairs <= maxNodeSize ? 1 : Math.ceil(totalPairs / targetSize); // This method creates as many evenly filled leaves as possible from // the pending entries. All will be > 50% full if we are creating more than one leaf. - var remainingLeaves = targetLeafCount; + var remainingLeaves = totalPairs <= maxNodeSize ? 1 : Math.ceil(totalPairs / targetSize); var remaining = totalPairs; var pairIndex = 0; while (remainingLeaves > 0) { @@ -40,7 +37,6 @@ function makeLeavesFrom(alternatingList, maxNodeSize, onLeafCreation, loadFactor var leaf = new b_tree_1.BNode(keys, vals); onLeafCreation(leaf); } - return targetLeafCount; } exports.makeLeavesFrom = makeLeavesFrom; ; diff --git a/extended/shared.ts b/extended/shared.ts index 4ca123e..19c5a34 100644 --- a/extended/shared.ts +++ b/extended/shared.ts @@ -26,7 +26,6 @@ export type AlternatingList = Array; * @param maxNodeSize The maximum node size (branching factor) for the resulting leaves. * @param onLeafCreation Called when a new leaf is created. * @param loadFactor Desired load factor for created leaves. Must be between 0.5 and 1.0. - * @returns The number of leaves created. * @internal */ export function makeLeavesFrom( @@ -34,18 +33,16 @@ export function makeLeavesFrom( maxNodeSize: number, onLeafCreation: (node: BNode) => void, loadFactor: number -): number { +) { const totalPairs = alternatingCount(alternatingList); if (totalPairs === 0) return 0; const targetSize = Math.ceil(maxNodeSize * loadFactor); - // Ensure we don't make any underfilled nodes unless we have to. - const targetLeafCount = totalPairs <= maxNodeSize ? 1 : Math.ceil(totalPairs / targetSize); // This method creates as many evenly filled leaves as possible from // the pending entries. All will be > 50% full if we are creating more than one leaf. - let remainingLeaves = targetLeafCount; + let remainingLeaves = totalPairs <= maxNodeSize ? 1 : Math.ceil(totalPairs / targetSize); let remaining = totalPairs; let pairIndex = 0; while (remainingLeaves > 0) { @@ -62,7 +59,6 @@ export function makeLeavesFrom( const leaf = new BNode(keys, vals); onLeafCreation(leaf); } - return targetLeafCount; }; // ------- Alternating list helpers ------- diff --git a/test/union.test.ts b/test/union.test.ts index fd865df..1eb5d99 100644 --- a/test/union.test.ts +++ b/test/union.test.ts @@ -91,8 +91,9 @@ function testUnion(maxNodeSize: number) { return { result, expected }; }; - test('Union disjoint roots reuses appended subtree', () => { - const size = maxNodeSize * 3; + test('Union disjoint roots reuses roots', () => { + // ensure the roots are not underfilled, as union will try to merge underfilled roots + const size = maxNodeSize * maxNodeSize; const tree1 = buildTree(range(0, size), 1, 0); const offset = size * 5; const tree2 = buildTree(range(offset, offset + size), 2, 0); @@ -266,6 +267,55 @@ function testUnion(maxNodeSize: number) { expect(result.size).toBe(treeOdd.size + treeEven.size); }); + test('Union merges disjoint leaf roots into a single leaf', () => { + const perTree = Math.max(1, Math.floor(maxNodeSize / 2) - 1); + const keysA = range(1, perTree).map(i => i); + const keysB = keysA.map(k => k * 1000); + const tree1 = buildTree(keysA); + const tree2 = buildTree(keysB); + + expectRootLeafState(tree1, true); + expectRootLeafState(tree2, true); + + const unioned = tree1.union(tree2, () => { + throw new Error('Should not be called for disjoint keys'); + }); + const resultRoot = unioned['_root'] as any; + const expectedKeys = keysA.concat(keysB).sort(compare); + expect(resultRoot.isLeaf).toBe(true); + expect(resultRoot.keys).toEqual(expectedKeys); + }); + + test('Union combines underfilled non-leaf roots into a filled root', () => { + const minChildren = Math.floor(maxNodeSize / 2); + const targetLeavesPerTree = minChildren - 1; + if (targetLeavesPerTree === 1) { + return; // cannot test this case with only one leaf per tree + } + const entriesPerLeaf = maxNodeSize; + const buildUnderfilledTree = (startKey: number) => { + const keys: number[] = []; + for (let leaf = 0; leaf < targetLeavesPerTree; leaf++) { + for (let i = 0; i < entriesPerLeaf; i++) + keys.push(startKey + leaf * entriesPerLeaf + i); + } + const tree = buildTree(keys); + const root = tree['_root'] as any; + expect(root.isLeaf).toBe(false); + expect(root.children.length).toBeLessThan(minChildren); + return { tree, nextKey: startKey + keys.length, childCount: root.children.length }; + }; + + const first = buildUnderfilledTree(0); + const second = buildUnderfilledTree(first.nextKey + maxNodeSize * 10); + + const unioned = first.tree.union(second.tree, () => { throw new Error('Should not be called for disjoint keys'); }); + const resultRoot = unioned['_root'] as any; + expect(resultRoot.isLeaf).toBe(false); + expect(resultRoot.children.length).toBeGreaterThanOrEqual(minChildren); + expect(resultRoot.children.length).toBe(first.childCount + second.childCount); + }); + test('Union with single boundary overlap prefers right value', () => { const size = maxNodeSize * 2; const tree1 = buildTree(range(0, size), 1, 0); @@ -619,28 +669,6 @@ function testUnion(maxNodeSize: number) { expect(result.toArray()).toEqual([[1, 10], [4, 400]]); }); - test('Union reuses appended subtree with minimum fanout', () => { - const tree1 = new BTreeEx([], compare, maxNodeSize); - const tree2 = new BTreeEx([], compare, maxNodeSize); - - for (let i = 0; i < 400; i++) { - tree1.set(i, i); - } - for (let i = 400; i < 800; i++) { - tree2.set(i, i * 2); - } - - const unionFn: UnionFn = () => { - throw new Error('Should not be called for disjoint ranges'); - }; - - expectUnionMatchesBaseline(tree1, tree2, unionFn, ({ result }) => { - const resultRoot = result['_root'] as any; - const tree2Root = tree2['_root'] as any; - expect(sharesNode(resultRoot, tree2Root)).toBe(true); - }); - }); - test('Union with large disjoint ranges', () => { const tree1 = new BTreeEx([], compare, maxNodeSize); const tree2 = new BTreeEx([], compare, maxNodeSize); From e07d01029f857cafa18e0b905f19030eb5e6b1fa Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Thu, 20 Nov 2025 15:39:13 -0800 Subject: [PATCH 136/143] wip --- test/bulkLoad.test.ts | 2 +- test/intersect.test.ts | 151 +++++++++-------- test/setOperationFuzz.test.ts | 298 +++++++++++++++------------------- test/shared.d.ts | 22 ++- test/shared.js | 86 ++++++++-- test/shared.ts | 107 +++++++++++- test/subtract.test.ts | 131 +++++++-------- test/union.test.ts | 141 ++++++++-------- 8 files changed, 542 insertions(+), 396 deletions(-) diff --git a/test/bulkLoad.test.ts b/test/bulkLoad.test.ts index 988e309..1bc017f 100644 --- a/test/bulkLoad.test.ts +++ b/test/bulkLoad.test.ts @@ -198,7 +198,7 @@ describe('bulkLoad fuzz tests', () => { const size = baseSize + sizeJitter; test(`size ${size}, iteration ${iteration}`, () => { - const keys = makeArray(size, false, 0, 0, rng); + const keys = makeArray(size, false, 0, rng); const pairs = pairsFromKeys(keys).map(([key, value], index) => [key, value + index] as Pair); const { tree, root } = buildTreeFromPairs(maxNodeSize, pairs, loadFactor); expectTreeMatches(tree, pairs); diff --git a/test/intersect.test.ts b/test/intersect.test.ts index 5b0331f..6bf67d5 100644 --- a/test/intersect.test.ts +++ b/test/intersect.test.ts @@ -2,7 +2,15 @@ import BTreeEx from '../extended'; import intersect from '../extended/intersect'; import { comparatorErrorMsg } from '../extended/shared'; import MersenneTwister from 'mersenne-twister'; -import { makeArray } from './shared'; +import { + applyRemovalRunsToTree, + buildEntriesFromMap, + expectTreeMatchesEntries, + forEachFuzzCase, + makeArray, + SetOperationFuzzSettings, + TreeEntries +} from './shared'; var test: (name: string, f: () => void) => void = it; @@ -47,7 +55,7 @@ const tuples = (...pairs: Array<[number, number]>) => pairs; describe('BTree forEachKeyInBoth/intersect tests with fanout 32', testForEachKeyInBoth.bind(null, 32)); describe('BTree forEachKeyInBoth/intersect tests with fanout 10', testForEachKeyInBoth.bind(null, 10)); -describe('BTree forEachKeyInBoth/intersect tests with fanout 4', testForEachKeyInBoth.bind(null, 4)); +describe('BTree forEachKeyInBoth/intersect tests with fanout 4', testForEachKeyInBoth.bind(null, 4)); function testForEachKeyInBoth(maxNodeSize: number) { const compare = (a: number, b: number) => a - b; @@ -100,7 +108,7 @@ function testForEachKeyInBoth(maxNodeSize: number) { test('forEachKeyInBoth/intersect with contiguous overlap yields sorted keys', () => { const tree1 = buildTree(tuples([1, 1], [2, 2], [3, 3], [4, 4], [5, 5], [6, 6])); const tree2 = buildTree(tuples([3, 30], [4, 40], [5, 50], [6, 60], [7, 70])); - runForEachKeyInBothAndIntersect(tree1, tree2, ( calls ) => { + runForEachKeyInBothAndIntersect(tree1, tree2, (calls) => { expect(calls.map(c => c.key)).toEqual([3, 4, 5, 6]); expect(calls.map(c => c.leftValue)).toEqual([3, 4, 5, 6]); expect(calls.map(c => c.rightValue)).toEqual([30, 40, 50, 60]); @@ -202,97 +210,84 @@ describe('BTree forEachKeyInBoth early exiting', () => { }); describe('BTree forEachKeyInBoth and intersect input/output validation', () => { - test('forEachKeyInBoth throws error when comparators differ', () => { + test('forEachKeyInBoth throws error when comparators differ', () => { const tree1 = new BTreeEx([[1, 10]], (a, b) => b + a); const tree2 = new BTreeEx([[2, 20]], (a, b) => b - a); - expect(() => tree1.forEachKeyInBoth(tree2, () => {})).toThrow(comparatorErrorMsg); + expect(() => tree1.forEachKeyInBoth(tree2, () => { })).toThrow(comparatorErrorMsg); expect(() => intersect, number, number>(tree1, tree2, () => 0)).toThrow(comparatorErrorMsg); }); }); describe('BTree forEachKeyInBoth/intersect fuzz tests', () => { const compare = (a: number, b: number) => a - b; - const FUZZ_SETTINGS = { + const FUZZ_SETTINGS: SetOperationFuzzSettings = { branchingFactors: [4, 5, 32], ooms: [2, 3], fractionsPerOOM: [0.1, 0.25, 0.5], - collisionChances: [0.05, 0.1, 0.3], - timeoutMs: 30_000 - } as const; - - FUZZ_SETTINGS.fractionsPerOOM.forEach(fraction => { - if (fraction < 0 || fraction > 1) - throw new Error('FUZZ_SETTINGS.fractionsPerOOM must contain values between 0 and 1'); - }); - FUZZ_SETTINGS.collisionChances.forEach(chance => { - if (chance < 0 || chance > 1) - throw new Error('FUZZ_SETTINGS.collisionChances must contain values between 0 and 1'); - }); + removalChances: [0, 0.01, 0.1] + }; - jest.setTimeout(FUZZ_SETTINGS.timeoutMs); + const FUZZ_TIMEOUT_MS = 30_000; + jest.setTimeout(FUZZ_TIMEOUT_MS); const rng = new MersenneTwister(0xC0FFEE); - for (const maxNodeSize of FUZZ_SETTINGS.branchingFactors) { - describe(`branching factor ${maxNodeSize}`, () => { - for (const collisionChance of FUZZ_SETTINGS.collisionChances) { - for (const oom of FUZZ_SETTINGS.ooms) { - const size = 5 * Math.pow(10, oom); - for (const fractionA of FUZZ_SETTINGS.fractionsPerOOM) { - const fractionB = 1 - fractionA; - const collisionLabel = collisionChance.toFixed(2); - - test(`size ${size}, fractionA ${fractionA.toFixed(2)}, fractionB ${fractionB.toFixed(2)}, collision ${collisionLabel}`, () => { - const treeA = new BTreeEx([], compare, maxNodeSize); - const treeB = new BTreeEx([], compare, maxNodeSize); - - const keys = makeArray(size, true, 1, collisionChance, rng); - - for (const value of keys) { - const assignToA = rng.random() < fractionA; - const assignToB = rng.random() < fractionB; - - if (!assignToA && !assignToB) { - if (rng.random() < 0.5) - treeA.set(value, value); - else - treeB.set(value, value); - continue; - } - - if (assignToA) - treeA.set(value, value); - if (assignToB) - treeB.set(value, value); - } - - const aArray = treeA.toArray(); - const bArray = treeB.toArray(); - const bMap = new Map(bArray); - const expected: Array<[number, number, number]> = []; - for (const [key, leftValue] of aArray) { - const rightValue = bMap.get(key); - if (rightValue !== undefined) - expected.push([key, leftValue, rightValue]); - } - - const expectedRecords = tuplesToRecords(expected); - expectForEachKeyInBothAndIntersectCalls(treeA, treeB, expectedRecords); - const swappedExpected = expectedRecords.map(({ key, leftValue, rightValue }) => ({ - key, - leftValue: rightValue, - rightValue: leftValue, - })); - expectForEachKeyInBothAndIntersectCalls(treeB, treeA, swappedExpected); - - expect(treeA.toArray()).toEqual(aArray); - expect(treeB.toArray()).toEqual(bArray); - treeA.checkValid(); - treeB.checkValid(); - }); - } + forEachFuzzCase(FUZZ_SETTINGS, ({ maxNodeSize, size, fractionA, fractionB, removalChance, removalLabel }) => { + test(`branch ${maxNodeSize}, size ${size}, fractionA ${fractionA.toFixed(2)}, fractionB ${fractionB.toFixed(2)}, removal ${removalLabel}`, () => { + const treeA = new BTreeEx([], compare, maxNodeSize); + const treeB = new BTreeEx([], compare, maxNodeSize); + + const entriesMapA = new Map(); + const entriesMapB = new Map(); + const keys = makeArray(size, true, 1, rng); + + for (const value of keys) { + let assignToA = rng.random() < fractionA; + let assignToB = rng.random() < fractionB; + + if (!assignToA && !assignToB) { + if (rng.random() < 0.5) + assignToA = true; + else + assignToB = true; + } + + if (assignToA) { + treeA.set(value, value); + entriesMapA.set(value, value); + } + if (assignToB) { + treeB.set(value, value); + entriesMapB.set(value, value); } } + + let treeAEntries: TreeEntries = buildEntriesFromMap(entriesMapA, compare); + let treeBEntries: TreeEntries = buildEntriesFromMap(entriesMapB, compare); + treeAEntries = applyRemovalRunsToTree(treeA, treeAEntries, removalChance, maxNodeSize, rng); + treeBEntries = applyRemovalRunsToTree(treeB, treeBEntries, removalChance, maxNodeSize, rng); + + const bMap = new Map(treeBEntries); + const expectedTuples: Array<[number, number, number]> = []; + for (const [key, leftValue] of treeAEntries) { + const rightValue = bMap.get(key); + if (rightValue !== undefined) + expectedTuples.push([key, leftValue, rightValue]); + } + + const expectedRecords = tuplesToRecords(expectedTuples); + expectForEachKeyInBothAndIntersectCalls(treeA, treeB, expectedRecords); + const swappedExpected = expectedRecords.map(({ key, leftValue, rightValue }) => ({ + key, + leftValue: rightValue, + rightValue: leftValue, + })); + expectForEachKeyInBothAndIntersectCalls(treeB, treeA, swappedExpected); + + expectTreeMatchesEntries(treeA, treeAEntries); + expectTreeMatchesEntries(treeB, treeBEntries); + treeA.checkValid(); + treeB.checkValid(); }); - } + }); }); diff --git a/test/setOperationFuzz.test.ts b/test/setOperationFuzz.test.ts index 671c605..d4227db 100644 --- a/test/setOperationFuzz.test.ts +++ b/test/setOperationFuzz.test.ts @@ -1,182 +1,152 @@ import BTreeEx from '../extended'; import MersenneTwister from 'mersenne-twister'; -import { makeArray } from './shared'; +import { + applyRemovalRunsToTree, + buildEntriesFromMap, + expectTreeMatchesEntries, + forEachFuzzCase, + makeArray, + TreeEntries, + SetOperationFuzzSettings +} from './shared'; const compare = (a: number, b: number) => a - b; describe('Set operation fuzz tests', () => { - const FUZZ_SETTINGS = { + const FUZZ_SETTINGS: SetOperationFuzzSettings = { branchingFactors: [4, 5, 32], ooms: [2, 3], fractionsPerOOM: [0.1, 0.25, 0.5], - removalChances: [0.01, 0.1], - timeoutMs: 30_000 - } as const; - - FUZZ_SETTINGS.fractionsPerOOM.forEach(fraction => { - if (fraction < 0 || fraction > 1) - throw new Error('FUZZ_SETTINGS.fractionsPerOOM must contain values between 0 and 1'); - }); - FUZZ_SETTINGS.removalChances.forEach(chance => { - if (chance < 0 || chance > 1) - throw new Error('FUZZ_SETTINGS.removalChances must contain values between 0 and 1'); - }); + removalChances: [0.01, 0.1] + }; - jest.setTimeout(FUZZ_SETTINGS.timeoutMs); + const FUZZ_TIMEOUT_MS = 30_000; + jest.setTimeout(FUZZ_TIMEOUT_MS); const rng = new MersenneTwister(0xC0FFEE); - const applyRemovalRuns = (tree: BTreeEx, removalChance: number, branchingFactor: number) => { - if (removalChance <= 0 || tree.size === 0) - return; - const entries = tree.toArray(); - let index = 0; - while (index < entries.length) { - const key = entries[index][0]; - if (rng.random() < removalChance) { - tree.delete(key); - index++; - while (index < entries.length) { - const candidateKey = entries[index][0]; - if (rng.random() < (1 / branchingFactor)) - break; - tree.delete(candidateKey); - index++; + forEachFuzzCase(FUZZ_SETTINGS, ({ maxNodeSize, size, fractionA, fractionB, removalChance, removalLabel }) => { + it(`branch ${maxNodeSize}, size ${size}, fractionA ${fractionA.toFixed(2)}, fractionB ${fractionB.toFixed(2)}, removal ${removalLabel}`, () => { + const treeA = new BTreeEx([], compare, maxNodeSize); + const treeB = new BTreeEx([], compare, maxNodeSize); + const treeC = new BTreeEx([], compare, maxNodeSize); + const entriesMapA = new Map(); + const entriesMapB = new Map(); + const entriesMapC = new Map(); + + const keys = makeArray(size, true, 1, rng); + + for (const value of keys) { + const assignToA = rng.random() < fractionA; + const assignToB = rng.random() < fractionB; + const assignToC = rng.random() < 0.5; + + if (assignToA) { + treeA.set(value, value); + entriesMapA.set(value, value); } - } else { - index++; - } - } - }; - - for (const maxNodeSize of FUZZ_SETTINGS.branchingFactors) { - describe(`branching factor ${maxNodeSize}`, () => { - for (const oom of FUZZ_SETTINGS.ooms) { - const size = 5 * Math.pow(10, oom); - for (const fractionA of FUZZ_SETTINGS.fractionsPerOOM) { - const fractionB = 1 - fractionA; - for (const removalChance of FUZZ_SETTINGS.removalChances) { - const removalLabel = removalChance.toFixed(3); - for (const removalChance of FUZZ_SETTINGS.removalChances) { - const removalLabel = removalChance.toFixed(3); - - it(`size ${size}, fractionA ${fractionA.toFixed(2)}, fractionB ${fractionB.toFixed(2)}, removal ${removalLabel}`, () => { - const treeA = new BTreeEx([], compare, maxNodeSize); - const treeB = new BTreeEx([], compare, maxNodeSize); - const treeC = new BTreeEx([], compare, maxNodeSize); - - const keys = makeArray(size, true, 1, 0, rng); - - for (const value of keys) { - const assignToA = rng.random() < fractionA; - const assignToB = rng.random() < fractionB; - const assignToC = rng.random() < 0.5; - - if (assignToA) - treeA.set(value, value); - if (assignToB) - treeB.set(value, value); - if (assignToC) - treeC.set(value, value); - } - - applyRemovalRuns(treeA, removalChance, maxNodeSize); - applyRemovalRuns(treeB, removalChance, maxNodeSize); - - const treeAInitial = treeA.toArray(); - const treeBInitial = treeB.toArray(); - const treeCInitial = treeC.toArray(); - - const keepEither = (_k: number, left: number, _right: number) => left; - const dropValue = () => undefined; - const combineSum = (_k: number, left: number, right: number) => left + right; - - const unionDrop = treeA.union(treeB, dropValue); - const unionKeep = treeA.union(treeB, keepEither); - const intersection = treeA.intersect(treeB, keepEither); - const diffAB = treeA.subtract(treeB); - const diffBA = treeB.subtract(treeA); - - // 1. Partition of A: A = (A\B) โˆช (AโˆฉB) and parts are disjoint. - const partition = diffAB.union(intersection, keepEither); - expect(partition.toArray()).toEqual(treeAInitial); - expect(diffAB.intersect(intersection, keepEither).size).toBe(0); - - // 2. Recover B from union and A\B: (AโˆชB)\(A\B) = B. - expect(unionKeep.subtract(diffAB).toArray()).toEqual(treeBInitial); - - // 3. Symmetric difference two ways. - const symFromDiffs = diffAB.union(diffBA, keepEither); - const symFromUnion = unionKeep.subtract(intersection); - expect(symFromDiffs.toArray()).toEqual(symFromUnion.toArray()); - - // 4. Intersection via difference: AโˆฉB = A \ (A\B). - expect(intersection.toArray()).toEqual(treeA.subtract(diffAB).toArray()); - - // 5. Difference via intersection: A\B = A \ (AโˆฉB). - expect(diffAB.toArray()).toEqual(treeA.subtract(intersection).toArray()); - - // 6. Idempotence. - expect(treeA.union(treeA, keepEither).toArray()).toEqual(treeAInitial); - expect(treeA.intersect(treeA, keepEither).toArray()).toEqual(treeAInitial); - expect(diffAB.subtract(treeB).toArray()).toEqual(diffAB.toArray()); - - // 7. Commutativity. - expect(intersection.toArray()).toEqual(treeB.intersect(treeA, keepEither).toArray()); - const commUT = treeA.union(treeB, combineSum); - const commTU = treeB.union(treeA, combineSum); - expect(commUT.toArray()).toEqual(commTU.toArray()); - - // 8. Associativity. - const assocLeft = treeA.intersect(treeB, keepEither).intersect(treeC, keepEither); - const assocRight = treeA.intersect(treeB.intersect(treeC, keepEither), keepEither); - expect(assocLeft.toArray()).toEqual(assocRight.toArray()); - const assocSumLeft = treeA.union(treeB, combineSum).union(treeC, combineSum); - const assocSumRight = treeA.union(treeB.union(treeC, combineSum), combineSum); - expect(assocSumLeft.toArray()).toEqual(assocSumRight.toArray()); - - // 9. Absorption. - expect(treeA.intersect(treeA.union(treeB, keepEither), keepEither).toArray()).toEqual(treeAInitial); - expect(treeA.union(treeA.intersect(treeB, keepEither), keepEither).toArray()).toEqual(treeAInitial); - - // 10. Distributivity. - const distIntersect = treeA.intersect(treeB.union(treeC, keepEither), keepEither); - const distRight = treeA.intersect(treeB, keepEither).union(treeA.intersect(treeC, keepEither), keepEither); - expect(distIntersect.toArray()).toEqual(distRight.toArray()); - const distSubtract = treeA.subtract(treeB.union(treeC, keepEither)); - const distSubtractRight = treeA.subtract(treeB).subtract(treeC); - expect(distSubtract.toArray()).toEqual(distSubtractRight.toArray()); - const distIntersectDiff = treeA.intersect(treeB, keepEither).subtract(treeC); - const distDiffIntersect = treeA.subtract(treeC).intersect(treeB, keepEither); - expect(distIntersectDiff.toArray()).toEqual(distDiffIntersect.toArray()); - - // 11. Superset sanity. - expect(treeA.subtract(treeA.union(treeB, keepEither)).size).toBe(0); - expect(diffAB.intersect(treeB, keepEither).size).toBe(0); - - // 12. Cardinality relations. - expect(unionKeep.size).toBe(treeA.size + treeB.size - intersection.size); - expect(diffAB.size).toBe(treeA.size - intersection.size); - expect(treeA.size).toBe(diffAB.size + intersection.size); - - partition.checkValid(); - unionDrop.checkValid(); - unionKeep.checkValid(); - intersection.checkValid(); - diffAB.checkValid(); - diffBA.checkValid(); - treeA.checkValid(); - treeB.checkValid(); - treeC.checkValid(); - - expect(treeA.toArray()).toEqual(treeAInitial); - expect(treeB.toArray()).toEqual(treeBInitial); - expect(treeC.toArray()).toEqual(treeCInitial); - }); - } - } + if (assignToB) { + treeB.set(value, value); + entriesMapB.set(value, value); + } + if (assignToC) { + treeC.set(value, value); + entriesMapC.set(value, value); } } + + let treeAEntries: TreeEntries = buildEntriesFromMap(entriesMapA, compare); + let treeBEntries: TreeEntries = buildEntriesFromMap(entriesMapB, compare); + const treeCEntries: TreeEntries = buildEntriesFromMap(entriesMapC, compare); + + treeAEntries = applyRemovalRunsToTree(treeA, treeAEntries, removalChance, maxNodeSize, rng); + treeBEntries = applyRemovalRunsToTree(treeB, treeBEntries, removalChance, maxNodeSize, rng); + + const keepEither = (_k: number, left: number, _right: number) => left; + const dropValue = () => undefined; + const combineSum = (_k: number, left: number, right: number) => left + right; + + const unionDrop = treeA.union(treeB, dropValue); + const unionKeep = treeA.union(treeB, keepEither); + const intersection = treeA.intersect(treeB, keepEither); + const diffAB = treeA.subtract(treeB); + const diffBA = treeB.subtract(treeA); + + // 1. Partition of A: A = (A\B) โˆช (AโˆฉB) and parts are disjoint. + const partition = diffAB.union(intersection, keepEither); + expect(partition.toArray()).toEqual(treeAEntries); + expect(diffAB.intersect(intersection, keepEither).size).toBe(0); + + // 2. Recover B from union and A\B: (AโˆชB)\(A\B) = B. + expect(unionKeep.subtract(diffAB).toArray()).toEqual(treeBEntries); + + // 3. Symmetric difference two ways. + const symFromDiffs = diffAB.union(diffBA, keepEither); + const symFromUnion = unionKeep.subtract(intersection); + expect(symFromDiffs.toArray()).toEqual(symFromUnion.toArray()); + + // 4. Intersection via difference: AโˆฉB = A \ (A\B). + expect(intersection.toArray()).toEqual(treeA.subtract(diffAB).toArray()); + + // 5. Difference via intersection: A\B = A \ (AโˆฉB). + expect(diffAB.toArray()).toEqual(treeA.subtract(intersection).toArray()); + + // 6. Idempotence. + expect(treeA.union(treeA, keepEither).toArray()).toEqual(treeAEntries); + expect(treeA.intersect(treeA, keepEither).toArray()).toEqual(treeAEntries); + expect(diffAB.subtract(treeB).toArray()).toEqual(diffAB.toArray()); + + // 7. Commutativity. + expect(intersection.toArray()).toEqual(treeB.intersect(treeA, keepEither).toArray()); + const commUT = treeA.union(treeB, combineSum); + const commTU = treeB.union(treeA, combineSum); + expect(commUT.toArray()).toEqual(commTU.toArray()); + + // 8. Associativity. + const assocLeft = treeA.intersect(treeB, keepEither).intersect(treeC, keepEither); + const assocRight = treeA.intersect(treeB.intersect(treeC, keepEither), keepEither); + expect(assocLeft.toArray()).toEqual(assocRight.toArray()); + const assocSumLeft = treeA.union(treeB, combineSum).union(treeC, combineSum); + const assocSumRight = treeA.union(treeB.union(treeC, combineSum), combineSum); + expect(assocSumLeft.toArray()).toEqual(assocSumRight.toArray()); + + // 9. Absorption. + expect(treeA.intersect(treeA.union(treeB, keepEither), keepEither).toArray()).toEqual(treeAEntries); + expect(treeA.union(treeA.intersect(treeB, keepEither), keepEither).toArray()).toEqual(treeAEntries); + + // 10. Distributivity. + const distIntersect = treeA.intersect(treeB.union(treeC, keepEither), keepEither); + const distRight = treeA.intersect(treeB, keepEither).union(treeA.intersect(treeC, keepEither), keepEither); + expect(distIntersect.toArray()).toEqual(distRight.toArray()); + const distSubtract = treeA.subtract(treeB.union(treeC, keepEither)); + const distSubtractRight = treeA.subtract(treeB).subtract(treeC); + expect(distSubtract.toArray()).toEqual(distSubtractRight.toArray()); + const distIntersectDiff = treeA.intersect(treeB, keepEither).subtract(treeC); + const distDiffIntersect = treeA.subtract(treeC).intersect(treeB, keepEither); + expect(distIntersectDiff.toArray()).toEqual(distDiffIntersect.toArray()); + + // 11. Superset sanity. + expect(treeA.subtract(treeA.union(treeB, keepEither)).size).toBe(0); + expect(diffAB.intersect(treeB, keepEither).size).toBe(0); + + // 12. Cardinality relations. + expect(unionKeep.size).toBe(treeA.size + treeB.size - intersection.size); + expect(diffAB.size).toBe(treeA.size - intersection.size); + expect(treeA.size).toBe(diffAB.size + intersection.size); + + partition.checkValid(); + unionDrop.checkValid(); + unionKeep.checkValid(); + intersection.checkValid(); + diffAB.checkValid(); + diffBA.checkValid(); + treeA.checkValid(); + treeB.checkValid(); + treeC.checkValid(); + + expectTreeMatchesEntries(treeA, treeAEntries); + expectTreeMatchesEntries(treeB, treeBEntries); + expectTreeMatchesEntries(treeC, treeCEntries); }); - } + }); }); diff --git a/test/shared.d.ts b/test/shared.d.ts index cea5443..13bbf3c 100644 --- a/test/shared.d.ts +++ b/test/shared.d.ts @@ -7,10 +7,30 @@ export declare type TreeNodeStats = { newUnderfilled: number; averageLoadFactor: number; }; +export declare type TreeEntries = Array<[number, number]>; +export declare type SetOperationFuzzSettings = { + branchingFactors: number[]; + ooms: number[]; + fractionsPerOOM: number[]; + removalChances: number[]; +}; +export declare type FuzzCase = { + maxNodeSize: number; + oom: number; + size: number; + fractionA: number; + fractionB: number; + removalChance: number; + removalLabel: string; +}; export declare function countTreeNodeStats(tree: BTree): TreeNodeStats; export declare function logTreeNodeStats(label: string, stats: TreeNodeStats): void; export declare function randInt(max: number): number; export declare function expectTreeEqualTo(tree: BTree, list: SortedArray): void; export declare function addToBoth(a: IMap, b: IMap, k: K, v: V): void; -export declare function makeArray(size: number, randomOrder: boolean, spacing?: number, collisionChance?: number, rng?: MersenneTwister): number[]; +export declare function makeArray(size: number, randomOrder: boolean, spacing?: number, rng?: MersenneTwister): number[]; export declare const randomInt: (rng: MersenneTwister, maxExclusive: number) => number; +export declare function buildEntriesFromMap(entriesMap: Map, compareFn?: (a: number, b: number) => number): TreeEntries; +export declare function applyRemovalRunsToTree(tree: BTree, entries: TreeEntries, removalChance: number, branchingFactor: number, rng: MersenneTwister): TreeEntries; +export declare function expectTreeMatchesEntries(tree: BTree, entries: TreeEntries): void; +export declare function forEachFuzzCase(settings: SetOperationFuzzSettings, callback: (testCase: FuzzCase) => void): void; diff --git a/test/shared.js b/test/shared.js index 251b534..1eab227 100644 --- a/test/shared.js +++ b/test/shared.js @@ -3,7 +3,7 @@ var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); -exports.randomInt = exports.makeArray = exports.addToBoth = exports.expectTreeEqualTo = exports.randInt = exports.logTreeNodeStats = exports.countTreeNodeStats = void 0; +exports.forEachFuzzCase = exports.expectTreeMatchesEntries = exports.applyRemovalRunsToTree = exports.buildEntriesFromMap = exports.randomInt = exports.makeArray = exports.addToBoth = exports.expectTreeEqualTo = exports.randInt = exports.logTreeNodeStats = exports.countTreeNodeStats = void 0; var mersenne_twister_1 = __importDefault(require("mersenne-twister")); var rand = new mersenne_twister_1.default(1234); function countTreeNodeStats(tree) { @@ -66,9 +66,8 @@ function addToBoth(a, b, k, v) { expect(a.set(k, v)).toEqual(b.set(k, v)); } exports.addToBoth = addToBoth; -function makeArray(size, randomOrder, spacing, collisionChance, rng) { +function makeArray(size, randomOrder, spacing, rng) { if (spacing === void 0) { spacing = 10; } - if (collisionChance === void 0) { collisionChance = 0; } var randomizer = rng !== null && rng !== void 0 ? rng : rand; var useGlobalRand = rng === undefined; var randomFloat = function () { @@ -86,13 +85,8 @@ function makeArray(size, randomOrder, spacing, collisionChance, rng) { var keys = []; var current = 0; for (var i = 0; i < size; i++) { - if (i > 0 && collisionChance > 0 && randomFloat() < collisionChance) { - keys[i] = keys[i - 1]; - } - else { - current += 1 + randomIntWithMax(spacing); - keys[i] = current; - } + current += 1 + randomIntWithMax(spacing); + keys[i] = current; } if (randomOrder) { for (var i = 0; i < size; i++) @@ -110,3 +104,75 @@ function swap(keys, i, j) { keys[i] = keys[j]; keys[j] = tmp; } +function buildEntriesFromMap(entriesMap, compareFn) { + if (compareFn === void 0) { compareFn = function (a, b) { return a - b; }; } + var entries = Array.from(entriesMap.entries()); + entries.sort(function (a, b) { return compareFn(a[0], b[0]); }); + return entries; +} +exports.buildEntriesFromMap = buildEntriesFromMap; +function applyRemovalRunsToTree(tree, entries, removalChance, branchingFactor, rng) { + if (removalChance <= 0 || entries.length === 0) + return entries; + var remaining = []; + var index = 0; + while (index < entries.length) { + var _a = entries[index], key = _a[0], value = _a[1]; + if (rng.random() < removalChance) { + tree.delete(key); + index++; + while (index < entries.length) { + var candidateKey = entries[index][0]; + if (rng.random() < (1 / branchingFactor)) + break; + tree.delete(candidateKey); + index++; + } + } + else { + remaining.push([key, value]); + index++; + } + } + return remaining; +} +exports.applyRemovalRunsToTree = applyRemovalRunsToTree; +function expectTreeMatchesEntries(tree, entries) { + var index = 0; + tree.forEachPair(function (key, value) { + var expected = entries[index++]; + expect([key, value]).toEqual(expected); + }); + expect(index).toBe(entries.length); +} +exports.expectTreeMatchesEntries = expectTreeMatchesEntries; +function validateFuzzSettings(settings) { + settings.fractionsPerOOM.forEach(function (fraction) { + if (fraction < 0 || fraction > 1) + throw new Error('fractionsPerOOM values must be between 0 and 1'); + }); + settings.removalChances.forEach(function (chance) { + if (chance < 0 || chance > 1) + throw new Error('removalChances values must be between 0 and 1'); + }); +} +function forEachFuzzCase(settings, callback) { + validateFuzzSettings(settings); + for (var _i = 0, _a = settings.branchingFactors; _i < _a.length; _i++) { + var maxNodeSize = _a[_i]; + for (var _b = 0, _c = settings.removalChances; _b < _c.length; _b++) { + var removalChance = _c[_b]; + var removalLabel = removalChance.toFixed(3); + for (var _d = 0, _e = settings.ooms; _d < _e.length; _d++) { + var oom = _e[_d]; + var size = 5 * Math.pow(10, oom); + for (var _f = 0, _g = settings.fractionsPerOOM; _f < _g.length; _f++) { + var fractionA = _g[_f]; + var fractionB = 1 - fractionA; + callback({ maxNodeSize: maxNodeSize, oom: oom, size: size, fractionA: fractionA, fractionB: fractionB, removalChance: removalChance, removalLabel: removalLabel }); + } + } + } + } +} +exports.forEachFuzzCase = forEachFuzzCase; diff --git a/test/shared.ts b/test/shared.ts index a4f996a..7bb1e81 100644 --- a/test/shared.ts +++ b/test/shared.ts @@ -12,6 +12,25 @@ export type TreeNodeStats = { averageLoadFactor: number; }; +export type TreeEntries = Array<[number, number]>; + +export type SetOperationFuzzSettings = { + branchingFactors: number[]; + ooms: number[]; + fractionsPerOOM: number[]; + removalChances: number[]; +}; + +export type FuzzCase = { + maxNodeSize: number; + oom: number; + size: number; + fractionA: number; + fractionB: number; + removalChance: number; + removalLabel: string; +}; + export function countTreeNodeStats(tree: BTree): TreeNodeStats { const root = (tree as unknown as BTreeWithInternals)._root; if (tree.size === 0 || !root) @@ -85,7 +104,6 @@ export function makeArray( size: number, randomOrder: boolean, spacing = 10, - collisionChance = 0, rng?: MersenneTwister ): number[] { const randomizer = rng ?? rand; @@ -108,12 +126,8 @@ export function makeArray( const keys: number[] = []; let current = 0; for (let i = 0; i < size; i++) { - if (i > 0 && collisionChance > 0 && randomFloat() < collisionChance) { - keys[i] = keys[i - 1]; - } else { - current += 1 + randomIntWithMax(spacing); - keys[i] = current; - } + current += 1 + randomIntWithMax(spacing); + keys[i] = current; } if (randomOrder) { for (let i = 0; i < size; i++) @@ -130,3 +144,82 @@ function swap(keys: any[], i: number, j: number) { keys[i] = keys[j]; keys[j] = tmp; } + +export function buildEntriesFromMap( + entriesMap: Map, + compareFn: (a: number, b: number) => number = (a, b) => a - b +): TreeEntries { + const entries = Array.from(entriesMap.entries()) as TreeEntries; + entries.sort((a, b) => compareFn(a[0], b[0])); + return entries; +} + +export function applyRemovalRunsToTree( + tree: BTree, + entries: TreeEntries, + removalChance: number, + branchingFactor: number, + rng: MersenneTwister +): TreeEntries { + if (removalChance <= 0 || entries.length === 0) + return entries; + const remaining: TreeEntries = []; + let index = 0; + while (index < entries.length) { + const [key, value] = entries[index]; + if (rng.random() < removalChance) { + tree.delete(key); + index++; + while (index < entries.length) { + const [candidateKey] = entries[index]; + if (rng.random() < (1 / branchingFactor)) + break; + tree.delete(candidateKey); + index++; + } + } else { + remaining.push([key, value]); + index++; + } + } + return remaining; +} + +export function expectTreeMatchesEntries(tree: BTree, entries: TreeEntries): void { + let index = 0; + tree.forEachPair((key, value) => { + const expected = entries[index++]!; + expect([key, value]).toEqual(expected); + }); + expect(index).toBe(entries.length); +} + +function validateFuzzSettings(settings: SetOperationFuzzSettings): void { + settings.fractionsPerOOM.forEach(fraction => { + if (fraction < 0 || fraction > 1) + throw new Error('fractionsPerOOM values must be between 0 and 1'); + }); + settings.removalChances.forEach(chance => { + if (chance < 0 || chance > 1) + throw new Error('removalChances values must be between 0 and 1'); + }); +} + +export function forEachFuzzCase( + settings: SetOperationFuzzSettings, + callback: (testCase: FuzzCase) => void +): void { + validateFuzzSettings(settings); + for (const maxNodeSize of settings.branchingFactors) { + for (const removalChance of settings.removalChances) { + const removalLabel = removalChance.toFixed(3); + for (const oom of settings.ooms) { + const size = 5 * Math.pow(10, oom); + for (const fractionA of settings.fractionsPerOOM) { + const fractionB = 1 - fractionA; + callback({ maxNodeSize, oom, size, fractionA, fractionB, removalChance, removalLabel }); + } + } + } + } +} diff --git a/test/subtract.test.ts b/test/subtract.test.ts index 25d5f1a..e809773 100644 --- a/test/subtract.test.ts +++ b/test/subtract.test.ts @@ -3,7 +3,15 @@ import forEachKeyNotIn from '../extended/forEachKeyNotIn'; import subtract from '../extended/subtract'; import { comparatorErrorMsg, branchingFactorErrorMsg } from '../extended/shared'; import MersenneTwister from 'mersenne-twister'; -import { makeArray } from './shared'; +import { + applyRemovalRunsToTree, + buildEntriesFromMap, + expectTreeMatchesEntries, + forEachFuzzCase, + makeArray, + SetOperationFuzzSettings, + TreeEntries +} from './shared'; type NotInCall = { key: number, value: number }; @@ -42,7 +50,7 @@ const tuples = (...pairs: Array<[number, number]>) => pairs; describe('BTree forEachKeyNotIn/subtract tests with fanout 32', testForEachKeyNotIn.bind(null, 32)); describe('BTree forEachKeyNotIn/subtract tests with fanout 10', testForEachKeyNotIn.bind(null, 10)); -describe('BTree forEachKeyNotIn/subtract tests with fanout 4', testForEachKeyNotIn.bind(null, 4)); +describe('BTree forEachKeyNotIn/subtract tests with fanout 4', testForEachKeyNotIn.bind(null, 4)); function testForEachKeyNotIn(maxNodeSize: number) { const compare = (a: number, b: number) => a - b; @@ -217,7 +225,7 @@ describe('BTree forEachKeyNotIn and subtract input/output validation', () => { it('forEachKeyNotIn throws error when comparators differ', () => { const includeTree = new BTreeEx([[1, 10]], (a, b) => b - a); const excludeTree = new BTreeEx([[2, 20]], (a, b) => a + b); - expect(() => forEachKeyNotIn(includeTree, excludeTree, () => {})).toThrow(comparatorErrorMsg); + expect(() => forEachKeyNotIn(includeTree, excludeTree, () => { })).toThrow(comparatorErrorMsg); }); it('subtract throws error when comparators differ', () => { @@ -235,79 +243,66 @@ describe('BTree forEachKeyNotIn and subtract input/output validation', () => { describe('BTree forEachKeyNotIn/subtract fuzz tests', () => { const compare = (a: number, b: number) => a - b; - const FUZZ_SETTINGS = { + const FUZZ_SETTINGS: SetOperationFuzzSettings = { branchingFactors: [4, 5, 32], ooms: [2, 3], fractionsPerOOM: [0.1, 0.25, 0.5], - collisionChances: [0.05, 0.1, 0.3], - timeoutMs: 30_000 - } as const; + removalChances: [0, 0.01, 0.1] + }; - FUZZ_SETTINGS.fractionsPerOOM.forEach(fraction => { - if (fraction < 0 || fraction > 1) - throw new Error('FUZZ_SETTINGS.fractionsPerOOM must contain values between 0 and 1'); - }); - FUZZ_SETTINGS.collisionChances.forEach(chance => { - if (chance < 0 || chance > 1) - throw new Error('FUZZ_SETTINGS.collisionChances must contain values between 0 and 1'); - }); - - jest.setTimeout(FUZZ_SETTINGS.timeoutMs); + const FUZZ_TIMEOUT_MS = 30_000; + jest.setTimeout(FUZZ_TIMEOUT_MS); const rng = new MersenneTwister(0xBAD_C0DE); - for (const maxNodeSize of FUZZ_SETTINGS.branchingFactors) { - describe(`branching factor ${maxNodeSize}`, () => { - for (const collisionChance of FUZZ_SETTINGS.collisionChances) { - for (const oom of FUZZ_SETTINGS.ooms) { - const size = 5 * Math.pow(10, oom); - for (const fractionA of FUZZ_SETTINGS.fractionsPerOOM) { - const fractionB = 1 - fractionA; - const collisionLabel = collisionChance.toFixed(2); - - it(`size ${size}, fractionA ${fractionA.toFixed(2)}, fractionB ${fractionB.toFixed(2)}, collision ${collisionLabel}`, () => { - const treeA = new BTreeEx([], compare, maxNodeSize); - const treeB = new BTreeEx([], compare, maxNodeSize); - - const keys = makeArray(size, true, 1, collisionChance, rng); - - for (const value of keys) { - const assignToA = rng.random() < fractionA; - const assignToB = rng.random() < fractionB; - - if (!assignToA && !assignToB) { - if (rng.random() < 0.5) - treeA.set(value, value); - else - treeB.set(value, value); - continue; - } - - if (assignToA) - treeA.set(value, value); - if (assignToB) - treeB.set(value, value); - } - - const aArray = treeA.toArray(); - const bArray = treeB.toArray(); - const bMap = new Map(bArray); - const aMap = new Map(aArray); - - const expectedA = aArray.filter(([key]) => !bMap.has(key)); - const expectedB = bArray.filter(([key]) => !aMap.has(key)); - - expectForEachKeyNotInAndSubtractCalls(treeA, treeB, tuplesToRecords(expectedA)); - expectForEachKeyNotInAndSubtractCalls(treeB, treeA, tuplesToRecords(expectedB)); - - expect(treeA.toArray()).toEqual(aArray); - expect(treeB.toArray()).toEqual(bArray); - treeA.checkValid(); - treeB.checkValid(); - }); - } + forEachFuzzCase(FUZZ_SETTINGS, ({ maxNodeSize, size, fractionA, fractionB, removalChance, removalLabel }) => { + it(`branch ${maxNodeSize}, size ${size}, fractionA ${fractionA.toFixed(2)}, fractionB ${fractionB.toFixed(2)}, removal ${removalLabel}`, () => { + const treeA = new BTreeEx([], compare, maxNodeSize); + const treeB = new BTreeEx([], compare, maxNodeSize); + const entriesMapA = new Map(); + const entriesMapB = new Map(); + + const keys = makeArray(size, true, 1, rng); + + for (const value of keys) { + let assignToA = rng.random() < fractionA; + let assignToB = rng.random() < fractionB; + + if (!assignToA && !assignToB) { + if (rng.random() < 0.5) + assignToA = true; + else + assignToB = true; + } + + if (assignToA) { + treeA.set(value, value); + entriesMapA.set(value, value); + } + if (assignToB) { + treeB.set(value, value); + entriesMapB.set(value, value); } } + + let treeAEntries: TreeEntries = buildEntriesFromMap(entriesMapA, compare); + let treeBEntries: TreeEntries = buildEntriesFromMap(entriesMapB, compare); + treeAEntries = applyRemovalRunsToTree(treeA, treeAEntries, removalChance, maxNodeSize, rng); + treeBEntries = applyRemovalRunsToTree(treeB, treeBEntries, removalChance, maxNodeSize, rng); + + const bMap = new Map(treeBEntries); + const aMap = new Map(treeAEntries); + + const expectedA = treeAEntries.filter(([key]) => !bMap.has(key)); + const expectedB = treeBEntries.filter(([key]) => !aMap.has(key)); + + expectForEachKeyNotInAndSubtractCalls(treeA, treeB, tuplesToRecords(expectedA)); + expectForEachKeyNotInAndSubtractCalls(treeB, treeA, tuplesToRecords(expectedB)); + + expectTreeMatchesEntries(treeA, treeAEntries); + expectTreeMatchesEntries(treeB, treeBEntries); + treeA.checkValid(); + treeB.checkValid(); }); - } + }); }); diff --git a/test/union.test.ts b/test/union.test.ts index 1eb5d99..b010a1e 100644 --- a/test/union.test.ts +++ b/test/union.test.ts @@ -3,7 +3,15 @@ import BTreeEx from '../extended'; import union from '../extended/union'; import { branchingFactorErrorMsg, comparatorErrorMsg } from '../extended/shared'; import MersenneTwister from 'mersenne-twister'; -import { makeArray, randomInt } from './shared'; +import { + applyRemovalRunsToTree, + buildEntriesFromMap, + expectTreeMatchesEntries, + forEachFuzzCase, + makeArray, + randomInt, + SetOperationFuzzSettings +} from './shared'; var test: (name: string, f: () => void) => void = it; @@ -774,83 +782,82 @@ describe('BTree union input/output validation', () => { describe('BTree union fuzz tests', () => { const compare = (a: number, b: number) => a - b; const unionFn = (_k: number, left: number, _right: number) => left; - const FUZZ_SETTINGS = { + const FUZZ_SETTINGS: SetOperationFuzzSettings = { branchingFactors: [4, 5, 32], ooms: [0, 1, 2], // [0, 1, 2, 3], fractionsPerOOM: [0.1, 0.25, 0.5], // [0.0001, 0.01, 0.1, 0.25, 0.5], - collisionChances: [0.1, 0.5], // [0, 0.01, 0.1, 0.5] - } as const; + removalChances: [0, 0.01, 0.1] + }; const RANDOM_EDITS_PER_TEST = 20; const TIMEOUT_MS = 30_000; - FUZZ_SETTINGS.fractionsPerOOM.forEach(fraction => { - if (fraction < 0 || fraction > 1) - throw new Error('FUZZ_SETTINGS.fractionsPerOOM must contain values between 0 and 1'); - }); - FUZZ_SETTINGS.collisionChances.forEach(chance => { - if (chance < 0 || chance > 1) - throw new Error('FUZZ_SETTINGS.collisionChances must contain values between 0 and 1'); - }); - jest.setTimeout(TIMEOUT_MS); const rng = new MersenneTwister(0xBEEFCAFE); - for (const maxNodeSize of FUZZ_SETTINGS.branchingFactors) { - describe(`branching factor ${maxNodeSize}`, () => { - for (const collisionChance of FUZZ_SETTINGS.collisionChances) { - for (const oom of FUZZ_SETTINGS.ooms) { - const size = 5 * Math.pow(10, oom); - for (const fractionA of FUZZ_SETTINGS.fractionsPerOOM) { - const fractionB = 1 - fractionA; - const collisionLabel = collisionChance.toFixed(2); - - test(`size ${size}, fractionA ${fractionA.toFixed(2)}, fractionB ${fractionB.toFixed(2)}, collision ${collisionLabel}`, () => { - const treeA = new BTreeEx([], compare, maxNodeSize); - const treeB = new BTreeEx([], compare, maxNodeSize); - - const keys = makeArray(size, true, 1, collisionChance, rng); - const sorted = Array.from(new Set(keys)).sort(compare); - - for (const value of keys) { - if (rng.random() < fractionA) { - treeA.set(value, value); - } else { - treeB.set(value, value); - } - } - - const aArray = treeA.toArray(); - const bArray = treeB.toArray(); - - const unioned = treeA.union(treeB, unionFn); - unioned.checkValid(); - - expect(unioned.toArray()).toEqual(sorted.map(k => [k, k])); - - // Union should not have mutated inputs - expect(treeA.toArray()).toEqual(aArray); - expect(treeB.toArray()).toEqual(bArray); - - for (let edit = 0; edit < RANDOM_EDITS_PER_TEST; edit++) { - const key = 1 + randomInt(rng, size); - const action = rng.random(); - if (action < 0.33) { - unioned.set(key, key); - } else if (action < 0.66) { - unioned.set(key, -key); - } else { - unioned.delete(key); - } - } - - // Check for shared mutability issues - expect(treeA.toArray()).toEqual(aArray); - expect(treeB.toArray()).toEqual(bArray); - }); - } + forEachFuzzCase(FUZZ_SETTINGS, ({ maxNodeSize, size, fractionA, fractionB, removalChance, removalLabel }) => { + test(`branch ${maxNodeSize}, size ${size}, fractionA ${fractionA.toFixed(2)}, fractionB ${fractionB.toFixed(2)}, removal ${removalLabel}`, () => { + const treeA = new BTreeEx([], compare, maxNodeSize); + const treeB = new BTreeEx([], compare, maxNodeSize); + + const keys = makeArray(size, true, 1, rng); + const entriesMapA = new Map(); + const entriesMapB = new Map(); + + for (const value of keys) { + let assignToA = rng.random() < fractionA; + let assignToB = rng.random() < fractionB; + if (!assignToA && !assignToB) { + if (rng.random() < 0.5) + assignToA = true; + else + assignToB = true; + } + + if (assignToA) { + treeA.set(value, value); + entriesMapA.set(value, value); + } + if (assignToB) { + treeB.set(value, value); + entriesMapB.set(value, value); + } + } + + let treeAEntries = buildEntriesFromMap(entriesMapA, compare); + let treeBEntries = buildEntriesFromMap(entriesMapB, compare); + + treeAEntries = applyRemovalRunsToTree(treeA, treeAEntries, removalChance, maxNodeSize, rng); + treeBEntries = applyRemovalRunsToTree(treeB, treeBEntries, removalChance, maxNodeSize, rng); + + const unioned = treeA.union(treeB, unionFn); + unioned.checkValid(); + + const combinedKeys = new Set(); + treeAEntries.forEach(([key]) => combinedKeys.add(key)); + treeBEntries.forEach(([key]) => combinedKeys.add(key)); + const expected = Array.from(combinedKeys).sort(compare).map(key => [key, key]); + expect(unioned.toArray()).toEqual(expected); + + // Union should not have mutated inputs + expectTreeMatchesEntries(treeA, treeAEntries); + expectTreeMatchesEntries(treeB, treeBEntries); + + for (let edit = 0; edit < RANDOM_EDITS_PER_TEST; edit++) { + const key = 1 + randomInt(rng, size); + const action = rng.random(); + if (action < 0.33) { + unioned.set(key, key); + } else if (action < 0.66) { + unioned.set(key, -key); + } else { + unioned.delete(key); } } + + // Check for shared mutability issues + expectTreeMatchesEntries(treeA, treeAEntries); + expectTreeMatchesEntries(treeB, treeBEntries); }); - } + }); }); From f5f88b74eed6c074cf8e0c3185363b12c3c491f6 Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Thu, 20 Nov 2025 17:35:40 -0800 Subject: [PATCH 137/143] refactor --- test/intersect.test.ts | 44 ++++++-------------------- test/setOperationFuzz.test.ts | 44 ++++++-------------------- test/shared.d.ts | 13 ++++++++ test/shared.js | 42 ++++++++++++++++++++++++- test/shared.ts | 59 +++++++++++++++++++++++++++++++++++ test/subtract.test.ts | 44 ++++++-------------------- test/union.test.ts | 40 +++++------------------- 7 files changed, 148 insertions(+), 138 deletions(-) diff --git a/test/intersect.test.ts b/test/intersect.test.ts index 6bf67d5..fc09abd 100644 --- a/test/intersect.test.ts +++ b/test/intersect.test.ts @@ -3,13 +3,10 @@ import intersect from '../extended/intersect'; import { comparatorErrorMsg } from '../extended/shared'; import MersenneTwister from 'mersenne-twister'; import { - applyRemovalRunsToTree, - buildEntriesFromMap, expectTreeMatchesEntries, forEachFuzzCase, - makeArray, - SetOperationFuzzSettings, - TreeEntries + populateFuzzTrees, + SetOperationFuzzSettings } from './shared'; var test: (name: string, f: () => void) => void = it; @@ -236,36 +233,13 @@ describe('BTree forEachKeyInBoth/intersect fuzz tests', () => { test(`branch ${maxNodeSize}, size ${size}, fractionA ${fractionA.toFixed(2)}, fractionB ${fractionB.toFixed(2)}, removal ${removalLabel}`, () => { const treeA = new BTreeEx([], compare, maxNodeSize); const treeB = new BTreeEx([], compare, maxNodeSize); - - const entriesMapA = new Map(); - const entriesMapB = new Map(); - const keys = makeArray(size, true, 1, rng); - - for (const value of keys) { - let assignToA = rng.random() < fractionA; - let assignToB = rng.random() < fractionB; - - if (!assignToA && !assignToB) { - if (rng.random() < 0.5) - assignToA = true; - else - assignToB = true; - } - - if (assignToA) { - treeA.set(value, value); - entriesMapA.set(value, value); - } - if (assignToB) { - treeB.set(value, value); - entriesMapB.set(value, value); - } - } - - let treeAEntries: TreeEntries = buildEntriesFromMap(entriesMapA, compare); - let treeBEntries: TreeEntries = buildEntriesFromMap(entriesMapB, compare); - treeAEntries = applyRemovalRunsToTree(treeA, treeAEntries, removalChance, maxNodeSize, rng); - treeBEntries = applyRemovalRunsToTree(treeB, treeBEntries, removalChance, maxNodeSize, rng); + const [treeAEntries, treeBEntries] = populateFuzzTrees( + [ + { tree: treeA, fraction: fractionA, removalChance }, + { tree: treeB, fraction: fractionB, removalChance } + ], + { rng, size, compare, maxNodeSize, minAssignmentsPerKey: 1 } + ); const bMap = new Map(treeBEntries); const expectedTuples: Array<[number, number, number]> = []; diff --git a/test/setOperationFuzz.test.ts b/test/setOperationFuzz.test.ts index d4227db..d11bad7 100644 --- a/test/setOperationFuzz.test.ts +++ b/test/setOperationFuzz.test.ts @@ -1,12 +1,9 @@ import BTreeEx from '../extended'; import MersenneTwister from 'mersenne-twister'; import { - applyRemovalRunsToTree, - buildEntriesFromMap, expectTreeMatchesEntries, forEachFuzzCase, - makeArray, - TreeEntries, + populateFuzzTrees, SetOperationFuzzSettings } from './shared'; @@ -30,37 +27,14 @@ describe('Set operation fuzz tests', () => { const treeA = new BTreeEx([], compare, maxNodeSize); const treeB = new BTreeEx([], compare, maxNodeSize); const treeC = new BTreeEx([], compare, maxNodeSize); - const entriesMapA = new Map(); - const entriesMapB = new Map(); - const entriesMapC = new Map(); - - const keys = makeArray(size, true, 1, rng); - - for (const value of keys) { - const assignToA = rng.random() < fractionA; - const assignToB = rng.random() < fractionB; - const assignToC = rng.random() < 0.5; - - if (assignToA) { - treeA.set(value, value); - entriesMapA.set(value, value); - } - if (assignToB) { - treeB.set(value, value); - entriesMapB.set(value, value); - } - if (assignToC) { - treeC.set(value, value); - entriesMapC.set(value, value); - } - } - - let treeAEntries: TreeEntries = buildEntriesFromMap(entriesMapA, compare); - let treeBEntries: TreeEntries = buildEntriesFromMap(entriesMapB, compare); - const treeCEntries: TreeEntries = buildEntriesFromMap(entriesMapC, compare); - - treeAEntries = applyRemovalRunsToTree(treeA, treeAEntries, removalChance, maxNodeSize, rng); - treeBEntries = applyRemovalRunsToTree(treeB, treeBEntries, removalChance, maxNodeSize, rng); + const [treeAEntries, treeBEntries, treeCEntries] = populateFuzzTrees( + [ + { tree: treeA, fraction: fractionA, removalChance }, + { tree: treeB, fraction: fractionB, removalChance }, + { tree: treeC, fraction: 0.5 } + ], + { rng, size, compare, maxNodeSize } + ); const keepEither = (_k: number, left: number, _right: number) => left; const dropValue = () => undefined; diff --git a/test/shared.d.ts b/test/shared.d.ts index 13bbf3c..3d7356d 100644 --- a/test/shared.d.ts +++ b/test/shared.d.ts @@ -31,6 +31,19 @@ export declare function addToBoth(a: IMap, b: IMap, k: K, v: V export declare function makeArray(size: number, randomOrder: boolean, spacing?: number, rng?: MersenneTwister): number[]; export declare const randomInt: (rng: MersenneTwister, maxExclusive: number) => number; export declare function buildEntriesFromMap(entriesMap: Map, compareFn?: (a: number, b: number) => number): TreeEntries; +export declare type FuzzTreeSpec = { + tree: BTree; + fraction: number; + removalChance?: number; +}; +export declare type PopulateFuzzTreesOptions = { + size: number; + rng: MersenneTwister; + compare: (a: number, b: number) => number; + maxNodeSize: number; + minAssignmentsPerKey?: number; +}; +export declare function populateFuzzTrees(specs: FuzzTreeSpec[], { size, rng, compare, maxNodeSize, minAssignmentsPerKey }: PopulateFuzzTreesOptions): TreeEntries[]; export declare function applyRemovalRunsToTree(tree: BTree, entries: TreeEntries, removalChance: number, branchingFactor: number, rng: MersenneTwister): TreeEntries; export declare function expectTreeMatchesEntries(tree: BTree, entries: TreeEntries): void; export declare function forEachFuzzCase(settings: SetOperationFuzzSettings, callback: (testCase: FuzzCase) => void): void; diff --git a/test/shared.js b/test/shared.js index 1eab227..a12d92d 100644 --- a/test/shared.js +++ b/test/shared.js @@ -3,7 +3,7 @@ var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); -exports.forEachFuzzCase = exports.expectTreeMatchesEntries = exports.applyRemovalRunsToTree = exports.buildEntriesFromMap = exports.randomInt = exports.makeArray = exports.addToBoth = exports.expectTreeEqualTo = exports.randInt = exports.logTreeNodeStats = exports.countTreeNodeStats = void 0; +exports.forEachFuzzCase = exports.expectTreeMatchesEntries = exports.applyRemovalRunsToTree = exports.populateFuzzTrees = exports.buildEntriesFromMap = exports.randomInt = exports.makeArray = exports.addToBoth = exports.expectTreeEqualTo = exports.randInt = exports.logTreeNodeStats = exports.countTreeNodeStats = void 0; var mersenne_twister_1 = __importDefault(require("mersenne-twister")); var rand = new mersenne_twister_1.default(1234); function countTreeNodeStats(tree) { @@ -111,6 +111,46 @@ function buildEntriesFromMap(entriesMap, compareFn) { return entries; } exports.buildEntriesFromMap = buildEntriesFromMap; +function populateFuzzTrees(specs, _a) { + var size = _a.size, rng = _a.rng, compare = _a.compare, maxNodeSize = _a.maxNodeSize, _b = _a.minAssignmentsPerKey, minAssignmentsPerKey = _b === void 0 ? 0 : _b; + if (specs.length === 0) + return []; + var keys = makeArray(size, true, 1, rng); + var entriesMaps = specs.map(function () { return new Map(); }); + var assignments = new Array(specs.length); + var requiredAssignments = Math.min(minAssignmentsPerKey, specs.length); + for (var _i = 0, keys_1 = keys; _i < keys_1.length; _i++) { + var value = keys_1[_i]; + var assignedCount = 0; + for (var i = 0; i < specs.length; i++) { + assignments[i] = rng.random() < specs[i].fraction; + if (assignments[i]) + assignedCount++; + } + while (assignedCount < requiredAssignments && specs.length > 0) { + var index = (0, exports.randomInt)(rng, specs.length); + if (!assignments[index]) { + assignments[index] = true; + assignedCount++; + } + } + for (var i = 0; i < specs.length; i++) { + if (assignments[i]) { + specs[i].tree.set(value, value); + entriesMaps[i].set(value, value); + } + } + } + return specs.map(function (spec, index) { + var _a; + var entries = buildEntriesFromMap(entriesMaps[index], compare); + var removalChance = (_a = spec.removalChance) !== null && _a !== void 0 ? _a : 0; + if (removalChance > 0) + entries = applyRemovalRunsToTree(spec.tree, entries, removalChance, maxNodeSize, rng); + return entries; + }); +} +exports.populateFuzzTrees = populateFuzzTrees; function applyRemovalRunsToTree(tree, entries, removalChance, branchingFactor, rng) { if (removalChance <= 0 || entries.length === 0) return entries; diff --git a/test/shared.ts b/test/shared.ts index 7bb1e81..1b24cbe 100644 --- a/test/shared.ts +++ b/test/shared.ts @@ -154,6 +154,65 @@ export function buildEntriesFromMap( return entries; } +export type FuzzTreeSpec = { + tree: BTree; + fraction: number; + removalChance?: number; +}; + +export type PopulateFuzzTreesOptions = { + size: number; + rng: MersenneTwister; + compare: (a: number, b: number) => number; + maxNodeSize: number; + minAssignmentsPerKey?: number; +}; + +export function populateFuzzTrees( + specs: FuzzTreeSpec[], + { size, rng, compare, maxNodeSize, minAssignmentsPerKey = 0 }: PopulateFuzzTreesOptions +): TreeEntries[] { + if (specs.length === 0) + return []; + + const keys = makeArray(size, true, 1, rng); + const entriesMaps = specs.map(() => new Map()); + const assignments = new Array(specs.length); + const requiredAssignments = Math.min(minAssignmentsPerKey, specs.length); + + for (const value of keys) { + let assignedCount = 0; + for (let i = 0; i < specs.length; i++) { + assignments[i] = rng.random() < specs[i].fraction; + if (assignments[i]) + assignedCount++; + } + + while (assignedCount < requiredAssignments && specs.length > 0) { + const index = randomInt(rng, specs.length); + if (!assignments[index]) { + assignments[index] = true; + assignedCount++; + } + } + + for (let i = 0; i < specs.length; i++) { + if (assignments[i]) { + specs[i].tree.set(value, value); + entriesMaps[i].set(value, value); + } + } + } + + return specs.map((spec, index) => { + let entries = buildEntriesFromMap(entriesMaps[index], compare); + const removalChance = spec.removalChance ?? 0; + if (removalChance > 0) + entries = applyRemovalRunsToTree(spec.tree, entries, removalChance, maxNodeSize, rng); + return entries; + }); +} + export function applyRemovalRunsToTree( tree: BTree, entries: TreeEntries, diff --git a/test/subtract.test.ts b/test/subtract.test.ts index e809773..b6e370d 100644 --- a/test/subtract.test.ts +++ b/test/subtract.test.ts @@ -4,13 +4,10 @@ import subtract from '../extended/subtract'; import { comparatorErrorMsg, branchingFactorErrorMsg } from '../extended/shared'; import MersenneTwister from 'mersenne-twister'; import { - applyRemovalRunsToTree, - buildEntriesFromMap, expectTreeMatchesEntries, forEachFuzzCase, - makeArray, - SetOperationFuzzSettings, - TreeEntries + populateFuzzTrees, + SetOperationFuzzSettings } from './shared'; type NotInCall = { key: number, value: number }; @@ -259,36 +256,13 @@ describe('BTree forEachKeyNotIn/subtract fuzz tests', () => { it(`branch ${maxNodeSize}, size ${size}, fractionA ${fractionA.toFixed(2)}, fractionB ${fractionB.toFixed(2)}, removal ${removalLabel}`, () => { const treeA = new BTreeEx([], compare, maxNodeSize); const treeB = new BTreeEx([], compare, maxNodeSize); - const entriesMapA = new Map(); - const entriesMapB = new Map(); - - const keys = makeArray(size, true, 1, rng); - - for (const value of keys) { - let assignToA = rng.random() < fractionA; - let assignToB = rng.random() < fractionB; - - if (!assignToA && !assignToB) { - if (rng.random() < 0.5) - assignToA = true; - else - assignToB = true; - } - - if (assignToA) { - treeA.set(value, value); - entriesMapA.set(value, value); - } - if (assignToB) { - treeB.set(value, value); - entriesMapB.set(value, value); - } - } - - let treeAEntries: TreeEntries = buildEntriesFromMap(entriesMapA, compare); - let treeBEntries: TreeEntries = buildEntriesFromMap(entriesMapB, compare); - treeAEntries = applyRemovalRunsToTree(treeA, treeAEntries, removalChance, maxNodeSize, rng); - treeBEntries = applyRemovalRunsToTree(treeB, treeBEntries, removalChance, maxNodeSize, rng); + const [treeAEntries, treeBEntries] = populateFuzzTrees( + [ + { tree: treeA, fraction: fractionA, removalChance }, + { tree: treeB, fraction: fractionB, removalChance } + ], + { rng, size, compare, maxNodeSize, minAssignmentsPerKey: 1 } + ); const bMap = new Map(treeBEntries); const aMap = new Map(treeAEntries); diff --git a/test/union.test.ts b/test/union.test.ts index b010a1e..48cb173 100644 --- a/test/union.test.ts +++ b/test/union.test.ts @@ -4,11 +4,10 @@ import union from '../extended/union'; import { branchingFactorErrorMsg, comparatorErrorMsg } from '../extended/shared'; import MersenneTwister from 'mersenne-twister'; import { - applyRemovalRunsToTree, - buildEntriesFromMap, expectTreeMatchesEntries, forEachFuzzCase, makeArray, + populateFuzzTrees, randomInt, SetOperationFuzzSettings } from './shared'; @@ -799,36 +798,13 @@ describe('BTree union fuzz tests', () => { test(`branch ${maxNodeSize}, size ${size}, fractionA ${fractionA.toFixed(2)}, fractionB ${fractionB.toFixed(2)}, removal ${removalLabel}`, () => { const treeA = new BTreeEx([], compare, maxNodeSize); const treeB = new BTreeEx([], compare, maxNodeSize); - - const keys = makeArray(size, true, 1, rng); - const entriesMapA = new Map(); - const entriesMapB = new Map(); - - for (const value of keys) { - let assignToA = rng.random() < fractionA; - let assignToB = rng.random() < fractionB; - if (!assignToA && !assignToB) { - if (rng.random() < 0.5) - assignToA = true; - else - assignToB = true; - } - - if (assignToA) { - treeA.set(value, value); - entriesMapA.set(value, value); - } - if (assignToB) { - treeB.set(value, value); - entriesMapB.set(value, value); - } - } - - let treeAEntries = buildEntriesFromMap(entriesMapA, compare); - let treeBEntries = buildEntriesFromMap(entriesMapB, compare); - - treeAEntries = applyRemovalRunsToTree(treeA, treeAEntries, removalChance, maxNodeSize, rng); - treeBEntries = applyRemovalRunsToTree(treeB, treeBEntries, removalChance, maxNodeSize, rng); + const [treeAEntries, treeBEntries] = populateFuzzTrees( + [ + { tree: treeA, fraction: fractionA, removalChance }, + { tree: treeB, fraction: fractionB, removalChance } + ], + { rng, size, compare, maxNodeSize, minAssignmentsPerKey: 1 } + ); const unioned = treeA.union(treeB, unionFn); unioned.checkValid(); From fe628c1b1b3084f833763827fa29d639da09098e Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Thu, 20 Nov 2025 18:30:38 -0800 Subject: [PATCH 138/143] refactor subtract tests --- test/subtract.test.ts | 201 +++++++++++++++++++++--------------------- 1 file changed, 102 insertions(+), 99 deletions(-) diff --git a/test/subtract.test.ts b/test/subtract.test.ts index b6e370d..2c4df07 100644 --- a/test/subtract.test.ts +++ b/test/subtract.test.ts @@ -33,10 +33,11 @@ const runForEachKeyNotInAndSubtract = ( const expectForEachKeyNotInAndSubtractCalls = ( include: BTreeEx, exclude: BTreeEx, - expected: NotInCall[] + expected: Array<[number, number]> ) => { + const expectedRecords = tuplesToRecords(expected); runForEachKeyNotInAndSubtract(include, exclude, (calls) => { - expect(calls).toEqual(expected); + expect(calls).toEqual(expectedRecords); }); }; @@ -44,76 +45,79 @@ const tuplesToRecords = (entries: Array<[number, number]>): NotInCall[] => entries.map(([key, value]) => ({ key, value })); const tuples = (...pairs: Array<[number, number]>) => pairs; - -describe('BTree forEachKeyNotIn/subtract tests with fanout 32', testForEachKeyNotIn.bind(null, 32)); -describe('BTree forEachKeyNotIn/subtract tests with fanout 10', testForEachKeyNotIn.bind(null, 10)); -describe('BTree forEachKeyNotIn/subtract tests with fanout 4', testForEachKeyNotIn.bind(null, 4)); - -function testForEachKeyNotIn(maxNodeSize: number) { - const compare = (a: number, b: number) => a - b; - - const buildTree = (entries: Array<[number, number]>) => - new BTreeEx(entries, compare, maxNodeSize); - - it('forEachKeyNotIn/subtract two empty trees', () => { - const includeTree = buildTree([]); - const excludeTree = buildTree([]); - expectForEachKeyNotInAndSubtractCalls(includeTree, excludeTree, []); - }); - - it('forEachKeyNotIn/subtract include empty tree with non-empty tree', () => { - const includeTree = buildTree([]); - const excludeTree = buildTree(tuples([1, 10], [2, 20], [3, 30])); - expectForEachKeyNotInAndSubtractCalls(includeTree, excludeTree, []); - }); - - it('forEachKeyNotIn/subtract exclude tree empty yields all include keys', () => { - const includeEntries: Array<[number, number]> = [[1, 10], [3, 30], [5, 50]]; - const includeTree = buildTree(includeEntries); - const excludeTree = buildTree([]); - const expected = tuplesToRecords(includeEntries); - expectForEachKeyNotInAndSubtractCalls(includeTree, excludeTree, expected); - }); - - it('forEachKeyNotIn/subtract with no overlapping keys returns include tree contents', () => { - const includeEntries: Array<[number, number]> = [[1, 10], [3, 30], [5, 50]]; - const excludeEntries: Array<[number, number]> = [[0, 100], [2, 200], [4, 400]]; - const includeTree = buildTree(includeEntries); - const excludeTree = buildTree(excludeEntries); - const expected = tuplesToRecords(includeEntries); - expectForEachKeyNotInAndSubtractCalls(includeTree, excludeTree, expected); - }); - - it('forEachKeyNotIn/subtract with overlapping keys excludes matches', () => { - const includeTree = buildTree(tuples([1, 10], [2, 20], [3, 30], [4, 40], [5, 50])); - const excludeTree = buildTree(tuples([0, 100], [2, 200], [4, 400], [6, 600])); - expectForEachKeyNotInAndSubtractCalls(includeTree, excludeTree, [ - { key: 1, value: 10 }, - { key: 3, value: 30 }, - { key: 5, value: 50 }, - ]); - }); - - it('forEachKeyNotIn/subtract excludes leading overlap then emits remaining keys', () => { - const includeTree = buildTree(tuples([1, 10], [2, 20], [3, 30], [4, 40])); - const excludeTree = buildTree(tuples([1, 100], [2, 200])); - expectForEachKeyNotInAndSubtractCalls(includeTree, excludeTree, [ - { key: 3, value: 30 }, - { key: 4, value: 40 }, - ]); +const compareNumbers = (a: number, b: number) => a - b; +const buildTree = (entries: Array<[number, number]>, maxNodeSize: number) => + new BTreeEx(entries, compareNumbers, maxNodeSize); + +describe.each([32, 10, 4])('BTree forEachKeyNotIn/subtract tests with fanout %i', (maxNodeSize) => { + const buildTreeForFanout = (entries: Array<[number, number]>) => buildTree(entries, maxNodeSize); + + const BASIC_CASES: Array<{ + name: string; + include: Array<[number, number]>; + exclude: Array<[number, number]>; + expected: Array<[number, number]>; + }> = [ + { + name: 'forEachKeyNotIn/subtract two empty trees', + include: tuples(), + exclude: tuples(), + expected: [], + }, + { + name: 'forEachKeyNotIn/subtract include empty tree with non-empty tree', + include: tuples(), + exclude: tuples([1, 10], [2, 20], [3, 30]), + expected: [], + }, + { + name: 'forEachKeyNotIn/subtract exclude tree empty yields all include keys', + include: tuples([1, 10], [3, 30], [5, 50]), + exclude: tuples(), + expected: tuples([1, 10], [3, 30], [5, 50]), + }, + { + name: 'forEachKeyNotIn/subtract with no overlapping keys returns include tree contents', + include: tuples([1, 10], [3, 30], [5, 50]), + exclude: tuples([0, 100], [2, 200], [4, 400]), + expected: tuples([1, 10], [3, 30], [5, 50]), + }, + { + name: 'forEachKeyNotIn/subtract with overlapping keys excludes matches', + include: tuples([1, 10], [2, 20], [3, 30], [4, 40], [5, 50]), + exclude: tuples([0, 100], [2, 200], [4, 400], [6, 600]), + expected: tuples([1, 10], [3, 30], [5, 50]), + }, + { + name: 'forEachKeyNotIn/subtract excludes leading overlap then emits remaining keys', + include: tuples([1, 10], [2, 20], [3, 30], [4, 40]), + exclude: tuples([1, 100], [2, 200]), + expected: tuples([3, 30], [4, 40]), + }, + { + name: 'forEachKeyNotIn/subtract exclude superset yields empty result', + include: tuples([2, 200], [3, 300]), + exclude: tuples([1, 100], [2, 200], [3, 300], [4, 400]), + expected: [], + }, + ]; + + BASIC_CASES.forEach(({ name, include, exclude, expected }) => { + it(name, () => { + const includeTree = buildTreeForFanout(include); + const excludeTree = buildTreeForFanout(exclude); + expectForEachKeyNotInAndSubtractCalls(includeTree, excludeTree, expected); + }); }); it('forEachKeyNotIn/subtract maintains tree contents', () => { const includeEntries: Array<[number, number]> = [[1, 10], [2, 20], [3, 30], [4, 40], [5, 50]]; const excludeEntries: Array<[number, number]> = [[1, 100], [3, 300], [5, 500]]; - const includeTree = buildTree(includeEntries); - const excludeTree = buildTree(excludeEntries); + const includeTree = buildTreeForFanout(includeEntries); + const excludeTree = buildTreeForFanout(excludeEntries); const includeBefore = includeTree.toArray(); const excludeBefore = excludeTree.toArray(); - expectForEachKeyNotInAndSubtractCalls(includeTree, excludeTree, [ - { key: 2, value: 20 }, - { key: 4, value: 40 }, - ]); + expectForEachKeyNotInAndSubtractCalls(includeTree, excludeTree, tuples([2, 20], [4, 40])); expect(includeTree.toArray()).toEqual(includeBefore); expect(excludeTree.toArray()).toEqual(excludeBefore); includeTree.checkValid(); @@ -121,8 +125,8 @@ function testForEachKeyNotIn(maxNodeSize: number) { }); it('forEachKeyNotIn/subtract with contiguous overlap yields sorted survivors', () => { - const includeTree = buildTree(tuples([1, 1], [2, 2], [3, 3], [4, 4], [5, 5], [6, 6])); - const excludeTree = buildTree(tuples([3, 30], [4, 40], [5, 50])); + const includeTree = buildTreeForFanout(tuples([1, 1], [2, 2], [3, 3], [4, 4], [5, 5], [6, 6])); + const excludeTree = buildTreeForFanout(tuples([3, 30], [4, 40], [5, 50])); runForEachKeyNotInAndSubtract(includeTree, excludeTree, (calls) => { expect(calls.map(c => c.key)).toEqual([1, 2, 6]); expect(calls.map(c => c.value)).toEqual([1, 2, 6]); @@ -138,8 +142,8 @@ function testForEachKeyNotIn(maxNodeSize: number) { const key = i + excludeStart; return [key, key * 3] as [number, number]; }); - const includeTree = buildTree(includeEntries); - const excludeTree = buildTree(excludeEntries); + const includeTree = buildTreeForFanout(includeEntries); + const excludeTree = buildTreeForFanout(excludeEntries); runForEachKeyNotInAndSubtract(includeTree, excludeTree, (calls) => { expect(calls.length).toBe(size - excludeSpan); expect(calls[0]).toEqual({ key: 0, value: 0 }); @@ -152,37 +156,37 @@ function testForEachKeyNotIn(maxNodeSize: number) { it('forEachKeyNotIn/subtract tree with itself visits no keys', () => { const entries = Array.from({ length: 20 }, (_, i) => [i, i * 2] as [number, number]); - const tree = buildTree(entries); + const tree = buildTreeForFanout(entries); expectForEachKeyNotInAndSubtractCalls(tree, tree, []); }); - it('forEachKeyNotIn/subtract exclude superset yields empty result', () => { - const includeTree = buildTree(tuples([2, 200], [3, 300])); - const excludeTree = buildTree(tuples([1, 100], [2, 200], [3, 300], [4, 400])); - expectForEachKeyNotInAndSubtractCalls(includeTree, excludeTree, []); + it('subtract returns a cloned tree when nothing is removed', () => { + const includeTree = buildTreeForFanout(tuples([1, 10], [2, 20])); + const excludeTree = buildTreeForFanout(tuples([3, 30])); + const result = subtract, number, number>(includeTree, excludeTree); + expect(result).not.toBe(includeTree); + expect(result.toArray()).toEqual(includeTree.toArray()); + expect(excludeTree.toArray()).toEqual(tuples([3, 30])); + includeTree.checkValid(); + result.checkValid(); + excludeTree.checkValid(); }); it('forEachKeyNotIn/subtract arguments determine surviving keys', () => { - const tree1 = buildTree(tuples([1, 100], [2, 200], [4, 400])); - const tree2 = buildTree(tuples([2, 20], [3, 30], [4, 40])); - expectForEachKeyNotInAndSubtractCalls(tree1, tree2, [ - { key: 1, value: 100 }, - ]); - expectForEachKeyNotInAndSubtractCalls(tree2, tree1, [ - { key: 3, value: 30 }, - ]); + const tree1 = buildTreeForFanout(tuples([1, 100], [2, 200], [4, 400])); + const tree2 = buildTreeForFanout(tuples([2, 20], [3, 30], [4, 40])); + expectForEachKeyNotInAndSubtractCalls(tree1, tree2, tuples([1, 100])); + expectForEachKeyNotInAndSubtractCalls(tree2, tree1, tuples([3, 30])); }); -} +}); describe('BTree forEachKeyNotIn early exiting', () => { - const compare = (a: number, b: number) => a - b; - - const buildTree = (entries: Array<[number, number]>) => - new BTreeEx(entries, compare, 4); + const buildTreeForEarlyExit = (entries: Array<[number, number]>) => + buildTree(entries, 4); it('forEachKeyNotIn returns undefined when callback returns void', () => { - const includeTree = buildTree(tuples([1, 10], [2, 20], [3, 30])); - const excludeTree = buildTree(tuples([2, 200])); + const includeTree = buildTreeForEarlyExit(tuples([1, 10], [2, 20], [3, 30])); + const excludeTree = buildTreeForEarlyExit(tuples([2, 200])); const visited: number[] = []; const result = forEachKeyNotIn(includeTree, excludeTree, key => { visited.push(key); @@ -192,8 +196,8 @@ describe('BTree forEachKeyNotIn early exiting', () => { }); it('forEachKeyNotIn ignores undefined break values and completes traversal', () => { - const includeTree = buildTree(tuples([1, 10], [2, 20], [3, 30], [4, 40])); - const excludeTree = buildTree(tuples([2, 200])); + const includeTree = buildTreeForEarlyExit(tuples([1, 10], [2, 20], [3, 30], [4, 40])); + const excludeTree = buildTreeForEarlyExit(tuples([2, 200])); const visited: number[] = []; const result = forEachKeyNotIn(includeTree, excludeTree, key => { visited.push(key); @@ -204,8 +208,8 @@ describe('BTree forEachKeyNotIn early exiting', () => { }); it('forEachKeyNotIn breaks early when callback returns a value', () => { - const includeTree = buildTree(tuples([1, 10], [2, 20], [3, 30], [4, 40])); - const excludeTree = buildTree(tuples([2, 200])); + const includeTree = buildTreeForEarlyExit(tuples([1, 10], [2, 20], [3, 30], [4, 40])); + const excludeTree = buildTreeForEarlyExit(tuples([2, 200])); const visited: number[] = []; const breakResult = forEachKeyNotIn(includeTree, excludeTree, (key, value) => { visited.push(key); @@ -239,7 +243,6 @@ describe('BTree forEachKeyNotIn and subtract input/output validation', () => { }); describe('BTree forEachKeyNotIn/subtract fuzz tests', () => { - const compare = (a: number, b: number) => a - b; const FUZZ_SETTINGS: SetOperationFuzzSettings = { branchingFactors: [4, 5, 32], ooms: [2, 3], @@ -254,14 +257,14 @@ describe('BTree forEachKeyNotIn/subtract fuzz tests', () => { forEachFuzzCase(FUZZ_SETTINGS, ({ maxNodeSize, size, fractionA, fractionB, removalChance, removalLabel }) => { it(`branch ${maxNodeSize}, size ${size}, fractionA ${fractionA.toFixed(2)}, fractionB ${fractionB.toFixed(2)}, removal ${removalLabel}`, () => { - const treeA = new BTreeEx([], compare, maxNodeSize); - const treeB = new BTreeEx([], compare, maxNodeSize); + const treeA = new BTreeEx([], compareNumbers, maxNodeSize); + const treeB = new BTreeEx([], compareNumbers, maxNodeSize); const [treeAEntries, treeBEntries] = populateFuzzTrees( [ { tree: treeA, fraction: fractionA, removalChance }, { tree: treeB, fraction: fractionB, removalChance } ], - { rng, size, compare, maxNodeSize, minAssignmentsPerKey: 1 } + { rng, size, compare: compareNumbers, maxNodeSize, minAssignmentsPerKey: 1 } ); const bMap = new Map(treeBEntries); @@ -270,8 +273,8 @@ describe('BTree forEachKeyNotIn/subtract fuzz tests', () => { const expectedA = treeAEntries.filter(([key]) => !bMap.has(key)); const expectedB = treeBEntries.filter(([key]) => !aMap.has(key)); - expectForEachKeyNotInAndSubtractCalls(treeA, treeB, tuplesToRecords(expectedA)); - expectForEachKeyNotInAndSubtractCalls(treeB, treeA, tuplesToRecords(expectedB)); + expectForEachKeyNotInAndSubtractCalls(treeA, treeB, expectedA); + expectForEachKeyNotInAndSubtractCalls(treeB, treeA, expectedB); expectTreeMatchesEntries(treeA, treeAEntries); expectTreeMatchesEntries(treeB, treeBEntries); From 46b91903c3e110ad0589712c40e77f3767587479 Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Thu, 20 Nov 2025 18:38:14 -0800 Subject: [PATCH 139/143] more test improvements --- test/intersect.test.ts | 183 ++++++++++++++++++++--------------------- test/shared.d.ts | 1 + test/shared.js | 4 +- test/shared.ts | 1 + test/subtract.test.ts | 4 +- 5 files changed, 98 insertions(+), 95 deletions(-) diff --git a/test/intersect.test.ts b/test/intersect.test.ts index fc09abd..8014a74 100644 --- a/test/intersect.test.ts +++ b/test/intersect.test.ts @@ -6,11 +6,10 @@ import { expectTreeMatchesEntries, forEachFuzzCase, populateFuzzTrees, - SetOperationFuzzSettings + SetOperationFuzzSettings, + compareNumbers } from './shared'; -var test: (name: string, f: () => void) => void = it; - type SharedCall = { key: number, leftValue: number, rightValue: number }; const runForEachKeyInBothAndIntersect = ( @@ -38,10 +37,11 @@ const runForEachKeyInBothAndIntersect = ( const expectForEachKeyInBothAndIntersectCalls = ( left: BTreeEx, right: BTreeEx, - expected: SharedCall[] + expected: Array<[number, number, number]> ) => { + const expectedRecords = tuplesToRecords(expected); runForEachKeyInBothAndIntersect(left, right, (calls) => { - expect(calls).toEqual(expected); + expect(calls).toEqual(expectedRecords); }); }; @@ -49,62 +49,75 @@ const tuplesToRecords = (entries: Array<[number, number, number]>): SharedCall[] entries.map(([key, leftValue, rightValue]) => ({ key, leftValue, rightValue })); const tuples = (...pairs: Array<[number, number]>) => pairs; - -describe('BTree forEachKeyInBoth/intersect tests with fanout 32', testForEachKeyInBoth.bind(null, 32)); -describe('BTree forEachKeyInBoth/intersect tests with fanout 10', testForEachKeyInBoth.bind(null, 10)); -describe('BTree forEachKeyInBoth/intersect tests with fanout 4', testForEachKeyInBoth.bind(null, 4)); - -function testForEachKeyInBoth(maxNodeSize: number) { - const compare = (a: number, b: number) => a - b; - - const buildTree = (entries: Array<[number, number]>) => - new BTreeEx(entries, compare, maxNodeSize); - - test('forEachKeyInBoth/intersect two empty trees', () => { - const tree1 = buildTree([]); - const tree2 = buildTree([]); - expectForEachKeyInBothAndIntersectCalls(tree1, tree2, []); - }); - - test('forEachKeyInBoth/intersect empty tree with non-empty tree', () => { - const tree1 = buildTree([]); - const tree2 = buildTree(tuples([1, 10], [2, 20], [3, 30])); - expectForEachKeyInBothAndIntersectCalls(tree1, tree2, []); - expectForEachKeyInBothAndIntersectCalls(tree2, tree1, []); - }); - - test('forEachKeyInBoth/intersect with no overlapping keys', () => { - const tree1 = buildTree(tuples([1, 10], [3, 30], [5, 50])); - const tree2 = buildTree(tuples([2, 20], [4, 40], [6, 60])); - expectForEachKeyInBothAndIntersectCalls(tree1, tree2, []); - }); - - test('forEachKeyInBoth/intersect with single overlapping key', () => { - const tree1 = buildTree(tuples([1, 10], [2, 20], [3, 30])); - const tree2 = buildTree(tuples([0, 100], [2, 200], [4, 400])); - expectForEachKeyInBothAndIntersectCalls(tree1, tree2, [{ key: 2, leftValue: 20, rightValue: 200 }]); +const triples = (...triplets: Array<[number, number, number]>) => triplets; +const buildTree = (entries: Array<[number, number]>, maxNodeSize: number) => + new BTreeEx(entries, compareNumbers, maxNodeSize); + +describe.each([32, 10, 4])('BTree forEachKeyInBoth/intersect tests with fanout %i', (maxNodeSize) => { + const buildTreeForFanout = (entries: Array<[number, number]>) => buildTree(entries, maxNodeSize); + + const BASIC_CASES: Array<{ + name: string; + left: Array<[number, number]>; + right: Array<[number, number]>; + expected: Array<[number, number, number]>; + alsoCheckSwap?: boolean; + }> = [ + { + name: 'forEachKeyInBoth/intersect two empty trees', + left: tuples(), + right: tuples(), + expected: triples(), + }, + { + name: 'forEachKeyInBoth/intersect empty tree with non-empty tree', + left: tuples(), + right: tuples([1, 10], [2, 20], [3, 30]), + expected: triples(), + alsoCheckSwap: true, + }, + { + name: 'forEachKeyInBoth/intersect with no overlapping keys', + left: tuples([1, 10], [3, 30], [5, 50]), + right: tuples([2, 20], [4, 40], [6, 60]), + expected: triples(), + }, + { + name: 'forEachKeyInBoth/intersect with single overlapping key', + left: tuples([1, 10], [2, 20], [3, 30]), + right: tuples([0, 100], [2, 200], [4, 400]), + expected: triples([2, 20, 200]), + }, + ]; + + BASIC_CASES.forEach(({ name, left, right, expected, alsoCheckSwap }) => { + it(name, () => { + const leftTree = buildTreeForFanout(left); + const rightTree = buildTreeForFanout(right); + expectForEachKeyInBothAndIntersectCalls(leftTree, rightTree, expected); + if (alsoCheckSwap) { + expectForEachKeyInBothAndIntersectCalls(rightTree, leftTree, expected); + } + }); }); - test('forEachKeyInBoth/intersect with multiple overlapping keys maintains tree contents', () => { + it('forEachKeyInBoth/intersect with multiple overlapping keys maintains tree contents', () => { const leftEntries: Array<[number, number]> = [[1, 10], [2, 20], [3, 30], [4, 40], [5, 50]]; const rightEntries: Array<[number, number]> = [[0, 100], [2, 200], [4, 400], [6, 600]]; - const tree1 = buildTree(leftEntries); - const tree2 = buildTree(rightEntries); + const tree1 = buildTreeForFanout(leftEntries); + const tree2 = buildTreeForFanout(rightEntries); const leftBefore = tree1.toArray(); const rightBefore = tree2.toArray(); - expectForEachKeyInBothAndIntersectCalls(tree1, tree2, [ - { key: 2, leftValue: 20, rightValue: 200 }, - { key: 4, leftValue: 40, rightValue: 400 }, - ]); + expectForEachKeyInBothAndIntersectCalls(tree1, tree2, triples([2, 20, 200], [4, 40, 400])); expect(tree1.toArray()).toEqual(leftBefore); expect(tree2.toArray()).toEqual(rightBefore); tree1.checkValid(); tree2.checkValid(); }); - test('forEachKeyInBoth/intersect with contiguous overlap yields sorted keys', () => { - const tree1 = buildTree(tuples([1, 1], [2, 2], [3, 3], [4, 4], [5, 5], [6, 6])); - const tree2 = buildTree(tuples([3, 30], [4, 40], [5, 50], [6, 60], [7, 70])); + it('forEachKeyInBoth/intersect with contiguous overlap yields sorted keys', () => { + const tree1 = buildTreeForFanout(tuples([1, 1], [2, 2], [3, 3], [4, 4], [5, 5], [6, 6])); + const tree2 = buildTreeForFanout(tuples([3, 30], [4, 40], [5, 50], [6, 60], [7, 70])); runForEachKeyInBothAndIntersect(tree1, tree2, (calls) => { expect(calls.map(c => c.key)).toEqual([3, 4, 5, 6]); expect(calls.map(c => c.leftValue)).toEqual([3, 4, 5, 6]); @@ -112,7 +125,7 @@ function testForEachKeyInBoth(maxNodeSize: number) { }); }); - test('forEachKeyInBoth/intersect large overlapping range counts each shared key once', () => { + it('forEachKeyInBoth/intersect large overlapping range counts each shared key once', () => { const size = 1000; const overlapStart = 500; const leftEntries = Array.from({ length: size }, (_, i) => [i, i * 3] as [number, number]); @@ -120,8 +133,8 @@ function testForEachKeyInBoth(maxNodeSize: number) { const key = i + overlapStart; return [key, key * 7] as [number, number]; }); - const tree1 = buildTree(leftEntries); - const tree2 = buildTree(rightEntries); + const tree1 = buildTreeForFanout(leftEntries); + const tree2 = buildTreeForFanout(rightEntries); runForEachKeyInBothAndIntersect(tree1, tree2, (calls) => { expect(calls.length).toBe(size - overlapStart); expect(calls[0]).toEqual({ @@ -136,9 +149,9 @@ function testForEachKeyInBoth(maxNodeSize: number) { }); }); - test('forEachKeyInBoth/intersect tree with itself visits each key once', () => { + it('forEachKeyInBoth/intersect tree with itself visits each key once', () => { const entries = Array.from({ length: 20 }, (_, i) => [i, i * 2] as [number, number]); - const tree = buildTree(entries); + const tree = buildTreeForFanout(entries); runForEachKeyInBothAndIntersect(tree, tree, (calls) => { expect(calls.length).toBe(entries.length); for (let i = 0; i < entries.length; i++) { @@ -148,29 +161,21 @@ function testForEachKeyInBoth(maxNodeSize: number) { }); }); - test('forEachKeyInBoth/intersect arguments determine left/right values', () => { - const tree1 = buildTree(tuples([1, 100], [2, 200], [4, 400])); - const tree2 = buildTree(tuples([2, 20], [3, 30], [4, 40])); - expectForEachKeyInBothAndIntersectCalls(tree1, tree2, [ - { key: 2, leftValue: 200, rightValue: 20 }, - { key: 4, leftValue: 400, rightValue: 40 }, - ]); - expectForEachKeyInBothAndIntersectCalls(tree2, tree1, [ - { key: 2, leftValue: 20, rightValue: 200 }, - { key: 4, leftValue: 40, rightValue: 400 }, - ]); + it('forEachKeyInBoth/intersect arguments determine left/right values', () => { + const tree1 = buildTreeForFanout(tuples([1, 100], [2, 200], [4, 400])); + const tree2 = buildTreeForFanout(tuples([2, 20], [3, 30], [4, 40])); + expectForEachKeyInBothAndIntersectCalls(tree1, tree2, triples([2, 200, 20], [4, 400, 40])); + expectForEachKeyInBothAndIntersectCalls(tree2, tree1, triples([2, 20, 200], [4, 40, 400])); }); -} +}); describe('BTree forEachKeyInBoth early exiting', () => { - const compare = (a: number, b: number) => a - b; - - const buildTree = (entries: Array<[number, number]>) => - new BTreeEx(entries, compare, 4); + const buildTreeForEarlyExit = (entries: Array<[number, number]>) => + buildTree(entries, 4); - test('forEachKeyInBoth returns undefined when callback returns void', () => { - const tree1 = buildTree(tuples([1, 10], [2, 20], [3, 30])); - const tree2 = buildTree(tuples([0, 100], [2, 200], [3, 300], [4, 400])); + it('forEachKeyInBoth returns undefined when callback returns void', () => { + const tree1 = buildTreeForEarlyExit(tuples([1, 10], [2, 20], [3, 30])); + const tree2 = buildTreeForEarlyExit(tuples([0, 100], [2, 200], [3, 300], [4, 400])); const visited: number[] = []; const result = tree1.forEachKeyInBoth(tree2, key => { visited.push(key); @@ -179,9 +184,9 @@ describe('BTree forEachKeyInBoth early exiting', () => { expect(visited).toEqual([2, 3]); }); - test('forEachKeyInBoth ignores undefined break values and completes traversal', () => { - const tree1 = buildTree(tuples([1, 10], [2, 20], [3, 30])); - const tree2 = buildTree(tuples([2, 200], [3, 300], [5, 500])); + it('forEachKeyInBoth ignores undefined break values and completes traversal', () => { + const tree1 = buildTreeForEarlyExit(tuples([1, 10], [2, 20], [3, 30])); + const tree2 = buildTreeForEarlyExit(tuples([2, 200], [3, 300], [5, 500])); const visited: number[] = []; const result = tree1.forEachKeyInBoth(tree2, key => { visited.push(key); @@ -191,9 +196,9 @@ describe('BTree forEachKeyInBoth early exiting', () => { expect(visited).toEqual([2, 3]); }); - test('forEachKeyInBoth breaks early when callback returns a value', () => { - const tree1 = buildTree(tuples([1, 10], [2, 20], [3, 30], [4, 40])); - const tree2 = buildTree(tuples([2, 200], [3, 300], [4, 400], [5, 500])); + it('forEachKeyInBoth breaks early when callback returns a value', () => { + const tree1 = buildTreeForEarlyExit(tuples([1, 10], [2, 20], [3, 30], [4, 40])); + const tree2 = buildTreeForEarlyExit(tuples([2, 200], [3, 300], [4, 400], [5, 500])); const visited: number[] = []; const breakResult = tree1.forEachKeyInBoth(tree2, (key, leftValue, rightValue) => { visited.push(key); @@ -207,7 +212,7 @@ describe('BTree forEachKeyInBoth early exiting', () => { }); describe('BTree forEachKeyInBoth and intersect input/output validation', () => { - test('forEachKeyInBoth throws error when comparators differ', () => { + it('forEachKeyInBoth throws error when comparators differ', () => { const tree1 = new BTreeEx([[1, 10]], (a, b) => b + a); const tree2 = new BTreeEx([[2, 20]], (a, b) => b - a); expect(() => tree1.forEachKeyInBoth(tree2, () => { })).toThrow(comparatorErrorMsg); @@ -216,7 +221,6 @@ describe('BTree forEachKeyInBoth and intersect input/output validation', () => { }); describe('BTree forEachKeyInBoth/intersect fuzz tests', () => { - const compare = (a: number, b: number) => a - b; const FUZZ_SETTINGS: SetOperationFuzzSettings = { branchingFactors: [4, 5, 32], ooms: [2, 3], @@ -230,15 +234,15 @@ describe('BTree forEachKeyInBoth/intersect fuzz tests', () => { const rng = new MersenneTwister(0xC0FFEE); forEachFuzzCase(FUZZ_SETTINGS, ({ maxNodeSize, size, fractionA, fractionB, removalChance, removalLabel }) => { - test(`branch ${maxNodeSize}, size ${size}, fractionA ${fractionA.toFixed(2)}, fractionB ${fractionB.toFixed(2)}, removal ${removalLabel}`, () => { - const treeA = new BTreeEx([], compare, maxNodeSize); - const treeB = new BTreeEx([], compare, maxNodeSize); + it(`branch ${maxNodeSize}, size ${size}, fractionA ${fractionA.toFixed(2)}, fractionB ${fractionB.toFixed(2)}, removal ${removalLabel}`, () => { + const treeA = new BTreeEx([], compareNumbers, maxNodeSize); + const treeB = new BTreeEx([], compareNumbers, maxNodeSize); const [treeAEntries, treeBEntries] = populateFuzzTrees( [ { tree: treeA, fraction: fractionA, removalChance }, { tree: treeB, fraction: fractionB, removalChance } ], - { rng, size, compare, maxNodeSize, minAssignmentsPerKey: 1 } + { rng, size, compare: compareNumbers, maxNodeSize, minAssignmentsPerKey: 1 } ); const bMap = new Map(treeBEntries); @@ -249,13 +253,8 @@ describe('BTree forEachKeyInBoth/intersect fuzz tests', () => { expectedTuples.push([key, leftValue, rightValue]); } - const expectedRecords = tuplesToRecords(expectedTuples); - expectForEachKeyInBothAndIntersectCalls(treeA, treeB, expectedRecords); - const swappedExpected = expectedRecords.map(({ key, leftValue, rightValue }) => ({ - key, - leftValue: rightValue, - rightValue: leftValue, - })); + expectForEachKeyInBothAndIntersectCalls(treeA, treeB, expectedTuples); + const swappedExpected = expectedTuples.map(([key, leftValue, rightValue]) => [key, rightValue, leftValue] as [number, number, number]); expectForEachKeyInBothAndIntersectCalls(treeB, treeA, swappedExpected); expectTreeMatchesEntries(treeA, treeAEntries); diff --git a/test/shared.d.ts b/test/shared.d.ts index 3d7356d..a38bc07 100644 --- a/test/shared.d.ts +++ b/test/shared.d.ts @@ -1,6 +1,7 @@ import BTree, { IMap } from '../b+tree'; import SortedArray from '../sorted-array'; import MersenneTwister from 'mersenne-twister'; +export declare const compareNumbers: (a: number, b: number) => number; export declare type TreeNodeStats = { total: number; shared: number; diff --git a/test/shared.js b/test/shared.js index a12d92d..06de3f5 100644 --- a/test/shared.js +++ b/test/shared.js @@ -3,9 +3,11 @@ var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); -exports.forEachFuzzCase = exports.expectTreeMatchesEntries = exports.applyRemovalRunsToTree = exports.populateFuzzTrees = exports.buildEntriesFromMap = exports.randomInt = exports.makeArray = exports.addToBoth = exports.expectTreeEqualTo = exports.randInt = exports.logTreeNodeStats = exports.countTreeNodeStats = void 0; +exports.forEachFuzzCase = exports.expectTreeMatchesEntries = exports.applyRemovalRunsToTree = exports.populateFuzzTrees = exports.buildEntriesFromMap = exports.randomInt = exports.makeArray = exports.addToBoth = exports.expectTreeEqualTo = exports.randInt = exports.logTreeNodeStats = exports.countTreeNodeStats = exports.compareNumbers = void 0; var mersenne_twister_1 = __importDefault(require("mersenne-twister")); var rand = new mersenne_twister_1.default(1234); +var compareNumbers = function (a, b) { return a - b; }; +exports.compareNumbers = compareNumbers; function countTreeNodeStats(tree) { var root = tree._root; if (tree.size === 0 || !root) diff --git a/test/shared.ts b/test/shared.ts index 1b24cbe..c4770f6 100644 --- a/test/shared.ts +++ b/test/shared.ts @@ -4,6 +4,7 @@ import MersenneTwister from 'mersenne-twister'; import type { BTreeWithInternals } from '../extended/shared'; const rand = new MersenneTwister(1234); +export const compareNumbers = (a: number, b: number) => a - b; export type TreeNodeStats = { total: number; diff --git a/test/subtract.test.ts b/test/subtract.test.ts index 2c4df07..e985570 100644 --- a/test/subtract.test.ts +++ b/test/subtract.test.ts @@ -7,7 +7,8 @@ import { expectTreeMatchesEntries, forEachFuzzCase, populateFuzzTrees, - SetOperationFuzzSettings + SetOperationFuzzSettings, + compareNumbers } from './shared'; type NotInCall = { key: number, value: number }; @@ -45,7 +46,6 @@ const tuplesToRecords = (entries: Array<[number, number]>): NotInCall[] => entries.map(([key, value]) => ({ key, value })); const tuples = (...pairs: Array<[number, number]>) => pairs; -const compareNumbers = (a: number, b: number) => a - b; const buildTree = (entries: Array<[number, number]>, maxNodeSize: number) => new BTreeEx(entries, compareNumbers, maxNodeSize); From 3799527504570d588f33f6fe16d6401def9cc0a3 Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Thu, 20 Nov 2025 18:40:15 -0800 Subject: [PATCH 140/143] cleanup union tests --- test/union.test.ts | 388 ++++++++++++++++++--------------------------- 1 file changed, 156 insertions(+), 232 deletions(-) diff --git a/test/union.test.ts b/test/union.test.ts index 48cb173..d27a775 100644 --- a/test/union.test.ts +++ b/test/union.test.ts @@ -56,9 +56,17 @@ function testUnion(maxNodeSize: number) { }; type UnionExpectationOptions = { + after?: (ctx: { result: BTreeEx, expected: BTreeEx }) => void; expectedUnionFn?: UnionFn; }; + const sumUnion: UnionFn = (_key, leftValue, rightValue) => leftValue + rightValue; + const preferLeft: UnionFn = (_key, leftValue) => leftValue; + const preferRight: UnionFn = (_key, _leftValue, rightValue) => rightValue; + const failUnion = (message: string): UnionFn => () => { + throw new Error(message); + }; + const naiveUnion = ( left: BTreeEx, right: BTreeEx, @@ -85,10 +93,9 @@ function testUnion(maxNodeSize: number) { left: BTreeEx, right: BTreeEx, unionFn: UnionFn, - after?: (ctx: { result: BTreeEx, expected: BTreeEx }) => void, options: UnionExpectationOptions = {} ) => { - const expectedUnionFn = options.expectedUnionFn ?? unionFn; + const { expectedUnionFn = unionFn, after } = options; const expected = naiveUnion(left, right, expectedUnionFn); const result = left.union(right, unionFn); expect(result.toArray()).toEqual(expected.toArray()); @@ -108,19 +115,13 @@ function testUnion(maxNodeSize: number) { expectRootLeafState(tree1, false); expectRootLeafState(tree2, false); - let unionCalls = 0; - const unionFn: UnionFn = () => { - unionCalls++; - return 0; - }; - - expectUnionMatchesBaseline(tree1, tree2, unionFn, ({ result }) => { - const resultRoot = result['_root'] as any; - expect(sharesNode(resultRoot, tree1['_root'] as any)).toBe(true); - expect(sharesNode(resultRoot, tree2['_root'] as any)).toBe(true); + expectUnionMatchesBaseline(tree1, tree2, failUnion('Union callback should not run for disjoint roots'), { + after: ({ result }) => { + const resultRoot = result['_root'] as any; + expect(sharesNode(resultRoot, tree1['_root'] as any)).toBe(true); + expect(sharesNode(resultRoot, tree2['_root'] as any)).toBe(true); + } }); - - expect(unionCalls).toBe(0); }); test('Union leaf roots with intersecting keys uses union callback', () => { @@ -131,14 +132,16 @@ function testUnion(maxNodeSize: number) { expectRootLeafState(tree2, true); const calls: Array<{ key: number, leftValue: number, rightValue: number }> = []; - const unionFn: UnionFn = (key, leftValue, rightValue) => { - calls.push({ key, leftValue, rightValue }); - return leftValue + rightValue; - }; - expectUnionMatchesBaseline(tree1, tree2, unionFn, undefined, { - expectedUnionFn: (_k, leftValue, rightValue) => leftValue + rightValue - }); + expectUnionMatchesBaseline( + tree1, + tree2, + (key, leftValue, rightValue) => { + calls.push({ key, leftValue, rightValue }); + return leftValue + rightValue; + }, + { expectedUnionFn: sumUnion } + ); expect(calls).toEqual([{ key: 2, leftValue: 20, rightValue: 200 }]); }); @@ -149,16 +152,11 @@ function testUnion(maxNodeSize: number) { expectRootLeafState(tree1, true); expectRootLeafState(tree2, true); - let unionCalls = 0; - const unionFn: UnionFn = () => { - unionCalls++; - return 0; - }; - - const { result } = expectUnionMatchesBaseline(tree1, tree2, unionFn, undefined, { - expectedUnionFn: (_k, leftValue, rightValue) => leftValue + rightValue - }); - expect(unionCalls).toBe(0); + const { result } = expectUnionMatchesBaseline( + tree1, + tree2, + failUnion('Union callback should not run for disjoint leaf roots') + ); expect(result.toArray()).toEqual([ [1, 1], [2, 1002], @@ -178,14 +176,16 @@ function testUnion(maxNodeSize: number) { expectRootLeafState(tree2, false); let unionCalls = 0; - const unionFn: UnionFn = (_key, leftValue, rightValue) => { - unionCalls++; - return leftValue + rightValue; - }; - const { result } = expectUnionMatchesBaseline(tree1, tree2, unionFn, undefined, { - expectedUnionFn: (_k, leftValue, rightValue) => leftValue + rightValue - }); + const { result } = expectUnionMatchesBaseline( + tree1, + tree2, + (_key, leftValue, rightValue) => { + unionCalls++; + return sumUnion(_key, leftValue, rightValue); + }, + { expectedUnionFn: sumUnion } + ); expect(unionCalls).toBe(1); expect(result.get(size - 1)).toBe((size - 1) + (size - 1) * 3); expect(result.size).toBe(tree1.size + tree2.size - 1); @@ -202,14 +202,11 @@ function testUnion(maxNodeSize: number) { expectRootLeafState(tree1, false); expectRootLeafState(tree2, false); - let unionCalls = 0; - const unionFn: UnionFn = (_key, leftValue, rightValue) => { - unionCalls++; - return leftValue + rightValue; - }; - - const { result } = expectUnionMatchesBaseline(tree1, tree2, unionFn); - expect(unionCalls).toBe(0); + const { result } = expectUnionMatchesBaseline( + tree1, + tree2, + failUnion('Union callback should not run when all leaves are disjoint') + ); expect(result.size).toBe(tree1.size + tree2.size); }); @@ -222,14 +219,16 @@ function testUnion(maxNodeSize: number) { expectRootLeafState(tree2, true); const seenKeys: number[] = []; - const unionFn: UnionFn = (key, _leftValue, rightValue) => { - seenKeys.push(key); - return rightValue; - }; - expectUnionMatchesBaseline(tree1, tree2, unionFn, undefined, { - expectedUnionFn: (_k, _leftValue, rightValue) => rightValue - }); + expectUnionMatchesBaseline( + tree1, + tree2, + (key, _leftValue, rightValue) => { + seenKeys.push(key); + return rightValue; + }, + { expectedUnionFn: preferRight } + ); expect(seenKeys.sort((a, b) => a - b)).toEqual([1, Math.floor(size / 2), size - 1]); }); @@ -242,14 +241,16 @@ function testUnion(maxNodeSize: number) { expectRootLeafState(tree2, false); let unionCalls = 0; - const unionFn: UnionFn = (_key, _leftValue, rightValue) => { - unionCalls++; - return rightValue; - }; - const { result } = expectUnionMatchesBaseline(tree1, tree2, unionFn, undefined, { - expectedUnionFn: (_k, _leftValue, rightValue) => rightValue - }); + const { result } = expectUnionMatchesBaseline( + tree1, + tree2, + (_key, _leftValue, rightValue) => { + unionCalls++; + return rightValue; + }, + { expectedUnionFn: preferRight } + ); expect(unionCalls).toBe(1); expect(result.get(size - 1)).toBe((size - 1) * 10); expect(result.size).toBe(tree1.size + tree2.size - 1); @@ -263,14 +264,11 @@ function testUnion(maxNodeSize: number) { expectRootLeafState(treeOdd, false); expectRootLeafState(treeEven, false); - let unionCalls = 0; - const unionFn: UnionFn = () => { - unionCalls++; - return 0; - }; - - const { result } = expectUnionMatchesBaseline(treeOdd, treeEven, unionFn); - expect(unionCalls).toBe(0); + const { result } = expectUnionMatchesBaseline( + treeOdd, + treeEven, + failUnion('Union callback should not be invoked for disjoint parity sets') + ); expect(result.size).toBe(treeOdd.size + treeEven.size); }); @@ -284,9 +282,7 @@ function testUnion(maxNodeSize: number) { expectRootLeafState(tree1, true); expectRootLeafState(tree2, true); - const unioned = tree1.union(tree2, () => { - throw new Error('Should not be called for disjoint keys'); - }); + const unioned = tree1.union(tree2, failUnion('Should not be called for disjoint keys')); const resultRoot = unioned['_root'] as any; const expectedKeys = keysA.concat(keysB).sort(compare); expect(resultRoot.isLeaf).toBe(true); @@ -316,33 +312,13 @@ function testUnion(maxNodeSize: number) { const first = buildUnderfilledTree(0); const second = buildUnderfilledTree(first.nextKey + maxNodeSize * 10); - const unioned = first.tree.union(second.tree, () => { throw new Error('Should not be called for disjoint keys'); }); + const unioned = first.tree.union(second.tree, failUnion('Should not be called for disjoint keys')); const resultRoot = unioned['_root'] as any; expect(resultRoot.isLeaf).toBe(false); expect(resultRoot.children.length).toBeGreaterThanOrEqual(minChildren); expect(resultRoot.children.length).toBe(first.childCount + second.childCount); }); - test('Union with single boundary overlap prefers right value', () => { - const size = maxNodeSize * 2; - const tree1 = buildTree(range(0, size), 1, 0); - const tree2 = buildTree(range(size - 1, size - 1 + size), 10, 0); - - expectRootLeafState(tree1, false); - expectRootLeafState(tree2, false); - - let unionCalls = 0; - const unionFn: UnionFn = (_key, _leftValue, rightValue) => { - unionCalls++; - return rightValue; - }; - - expectUnionMatchesBaseline(tree1, tree2, unionFn, undefined, { - expectedUnionFn: (_k, _leftValue, rightValue) => rightValue - }); - expect(unionCalls).toBe(1); - }); - test('Union overlapping prefix equal to branching factor', () => { const shared = maxNodeSize; const tree1Keys = [ @@ -361,37 +337,35 @@ function testUnion(maxNodeSize: number) { expectRootLeafState(tree2, false); const unionedKeys: number[] = []; - const unionFn: UnionFn = (key, leftValue, rightValue) => { - unionedKeys.push(key); - return leftValue + rightValue; - }; - expectUnionMatchesBaseline(tree1, tree2, unionFn, undefined, { - expectedUnionFn: (_k, leftValue, rightValue) => leftValue + rightValue - }); + expectUnionMatchesBaseline( + tree1, + tree2, + (key, leftValue, rightValue) => { + unionedKeys.push(key); + return leftValue + rightValue; + }, + { expectedUnionFn: sumUnion } + ); expect(unionedKeys.sort((a, b) => a - b)).toEqual(range(0, shared)); }); test('Union two empty trees', () => { const tree1 = new BTreeEx([], compare, maxNodeSize); const tree2 = new BTreeEx([], compare, maxNodeSize); - const unionFn: UnionFn = (_k, v1, v2) => v1 + v2; - const { result } = expectUnionMatchesBaseline(tree1, tree2, unionFn, undefined, { - expectedUnionFn: (_k, v1, v2) => v1 + v2 - }); + const { result } = expectUnionMatchesBaseline(tree1, tree2, sumUnion); expect(result.size).toBe(0); }); test('Union empty tree with non-empty tree', () => { const tree1 = new BTreeEx([], compare, maxNodeSize); const tree2 = new BTreeEx([[1, 10], [2, 20], [3, 30]], compare, maxNodeSize); - const unionFn: UnionFn = (_k, v1, v2) => v1 + v2; - const { result: leftUnion } = expectUnionMatchesBaseline(tree1, tree2, unionFn); + const { result: leftUnion } = expectUnionMatchesBaseline(tree1, tree2, sumUnion); expect(leftUnion.toArray()).toEqual(tree2.toArray()); - const { result: rightUnion } = expectUnionMatchesBaseline(tree2, tree1, unionFn); + const { result: rightUnion } = expectUnionMatchesBaseline(tree2, tree1, sumUnion); expect(rightUnion.toArray()).toEqual(tree2.toArray()); expect(tree1.toArray()).toEqual([]); expect(tree2.toArray()).toEqual([[1, 10], [2, 20], [3, 30]]); @@ -402,13 +376,12 @@ function testUnion(maxNodeSize: number) { test('Union with no overlapping keys', () => { const tree1 = new BTreeEx([[1, 10], [3, 30], [5, 50]], compare, maxNodeSize); const tree2 = new BTreeEx([[2, 20], [4, 40], [6, 60]], compare, maxNodeSize); - const unionFn: UnionFn = () => { - throw new Error('Should not be called for non-overlapping keys'); - }; - const { result } = expectUnionMatchesBaseline(tree1, tree2, unionFn, undefined, { - expectedUnionFn: unionFn - }); + const { result } = expectUnionMatchesBaseline( + tree1, + tree2, + failUnion('Should not be called for non-overlapping keys') + ); expect(result.size).toBe(6); expect(result.toArray()).toEqual([[1, 10], [2, 20], [3, 30], [4, 40], [5, 50], [6, 60]]); @@ -417,31 +390,24 @@ function testUnion(maxNodeSize: number) { test('Union with completely overlapping keys - sum values', () => { const tree1 = new BTreeEx([[1, 10], [2, 20], [3, 30]], compare, maxNodeSize); const tree2 = new BTreeEx([[1, 5], [2, 15], [3, 25]], compare, maxNodeSize); - const unionFn: UnionFn = (_k, v1, v2) => v1 + v2; - const { result } = expectUnionMatchesBaseline(tree1, tree2, unionFn, undefined, { - expectedUnionFn: (_k, v1, v2) => v1 + v2 - }); + const { result } = expectUnionMatchesBaseline(tree1, tree2, sumUnion); expect(result.size).toBe(tree1.size); }); test('Union with completely overlapping keys - prefer left', () => { const tree1 = new BTreeEx([[1, 10], [2, 20], [3, 30]], compare, maxNodeSize); const tree2 = new BTreeEx([[1, 100], [2, 200], [3, 300]], compare, maxNodeSize); - const unionFn: UnionFn = (_k, v1, _v2) => v1; - const { result } = expectUnionMatchesBaseline(tree1, tree2, unionFn, undefined, { - expectedUnionFn: (_k, v1, _v2) => v1 - }); + const { result } = expectUnionMatchesBaseline(tree1, tree2, preferLeft); expect(result.toArray()).toEqual(tree1.toArray()); }); test('Union with completely overlapping keys - prefer right', () => { const tree1 = new BTreeEx([[1, 10], [2, 20], [3, 30]], compare, maxNodeSize); const tree2 = new BTreeEx([[1, 100], [2, 200], [3, 300]], compare, maxNodeSize); - const unionFn: UnionFn = (_k, _v1, v2) => v2; - const { result } = expectUnionMatchesBaseline(tree1, tree2, unionFn); + const { result } = expectUnionMatchesBaseline(tree1, tree2, (_k, _v1, v2) => v2); expect(result.toArray()).toEqual(tree2.toArray()); }); @@ -450,26 +416,26 @@ function testUnion(maxNodeSize: number) { const tree2 = new BTreeEx([[3, 300], [4, 400], [5, 500], [6, 600]], compare, maxNodeSize); const unionedKeys: number[] = []; - const unionFn: UnionFn = (key, v1, v2) => { - unionedKeys.push(key); - return v1 + v2; - }; - expectUnionMatchesBaseline(tree1, tree2, unionFn, undefined, { - expectedUnionFn: (_k, v1, v2) => v1 + v2 - }); + expectUnionMatchesBaseline( + tree1, + tree2, + (key, v1, v2) => { + unionedKeys.push(key); + return v1 + v2; + }, + { expectedUnionFn: sumUnion } + ); expect(unionedKeys.sort((a, b) => a - b)).toEqual([3, 4]); }); test('Union with overlapping keys can delete entries', () => { const tree1 = new BTreeEx([[1, 10], [2, 20], [3, 30], [4, 40]], compare, maxNodeSize); const tree2 = new BTreeEx([[2, 200], [3, 300], [4, 400], [5, 500]], compare, maxNodeSize); - const unionFn: UnionFn = (k, v1, v2) => { + const { result } = expectUnionMatchesBaseline(tree1, tree2, (k, v1, v2) => { if (k === 3) return undefined; return v1 + v2; - }; - - const { result } = expectUnionMatchesBaseline(tree1, tree2, unionFn); + }); expect(result.has(3)).toBe(false); }); @@ -477,16 +443,18 @@ function testUnion(maxNodeSize: number) { const tree1 = new BTreeEx([[1, 10], [2, 20]], compare, maxNodeSize); const tree2 = new BTreeEx([[2, 20], [3, 30]], compare, maxNodeSize); - const unionCallLog: Array<{k: number, v1: number, v2: number}> = []; - const unionFn: UnionFn = (k, v1, v2) => { - unionCallLog.push({k, v1, v2}); - return v1; - }; + const unionCallLog: Array<{ k: number, v1: number, v2: number }> = []; - expectUnionMatchesBaseline(tree1, tree2, unionFn, undefined, { - expectedUnionFn: (_k, v1, v2) => v1 - }); - expect(unionCallLog).toEqual([{k: 2, v1: 20, v2: 20}]); + expectUnionMatchesBaseline( + tree1, + tree2, + (k, v1, v2) => { + unionCallLog.push({ k, v1, v2 }); + return v1; + }, + { expectedUnionFn: preferLeft } + ); + expect(unionCallLog).toEqual([{ k: 2, v1: 20, v2: 20 }]); }); test('Union does not mutate input trees', () => { @@ -494,12 +462,11 @@ function testUnion(maxNodeSize: number) { const entries2: [number, number][] = [[2, 200], [3, 300], [4, 400]]; const tree1 = new BTreeEx(entries1, compare, maxNodeSize); const tree2 = new BTreeEx(entries2, compare, maxNodeSize); - const unionFn: UnionFn = (_k, v1, v2) => v1 + v2; const snapshot1 = tree1.toArray(); const snapshot2 = tree2.toArray(); - expectUnionMatchesBaseline(tree1, tree2, unionFn); + expectUnionMatchesBaseline(tree1, tree2, sumUnion); expect(tree1.toArray()).toEqual(snapshot1); expect(tree2.toArray()).toEqual(snapshot2); @@ -508,24 +475,22 @@ function testUnion(maxNodeSize: number) { }); test('Union large trees with some overlaps', () => { - const entries1: [number, number][] = []; - for (let i = 0; i < 1000; i++) entries1.push([i, i]); - - const entries2: [number, number][] = []; - for (let i = 500; i < 1500; i++) entries2.push([i, i * 10]); + const entries1: [number, number][] = range(0, 1000).map(i => [i, i]); + const entries2: [number, number][] = range(500, 1500).map(i => [i, i * 10]); const tree1 = new BTreeEx(entries1, compare, maxNodeSize); const tree2 = new BTreeEx(entries2, compare, maxNodeSize); let unionCount = 0; - const unionFn: UnionFn = (k, v1, v2) => { - unionCount++; - return v1 + v2; - }; - - expectUnionMatchesBaseline(tree1, tree2, unionFn, undefined, { - expectedUnionFn: (_k, v1, v2) => v1 + v2 - }); + expectUnionMatchesBaseline( + tree1, + tree2, + (k, v1, v2) => { + unionCount++; + return v1 + v2; + }, + { expectedUnionFn: sumUnion } + ); expect(unionCount).toBe(500); }); @@ -542,14 +507,16 @@ function testUnion(maxNodeSize: number) { } const unionedKeys: number[] = []; - const unionFn: UnionFn = (key, v1, v2) => { - unionedKeys.push(key); - return v1 + v2; - }; - expectUnionMatchesBaseline(tree1, tree2, unionFn, undefined, { - expectedUnionFn: (_k, v1, v2) => v1 + v2 - }); + expectUnionMatchesBaseline( + tree1, + tree2, + (key, v1, v2) => { + unionedKeys.push(key); + return v1 + v2; + }, + { expectedUnionFn: sumUnion } + ); const expectedUnionedKeys = range(50, 150).filter(k => k % 2 === 0); expect(unionedKeys.sort((a, b) => a - b)).toEqual(expectedUnionedKeys); @@ -558,9 +525,8 @@ function testUnion(maxNodeSize: number) { test('Union result can be modified without affecting inputs', () => { const tree1 = new BTreeEx([[1, 10], [2, 20]], compare, maxNodeSize); const tree2 = new BTreeEx([[3, 30], [4, 40]], compare, maxNodeSize); - const unionFn: UnionFn = (_k, v1, v2) => v1 + v2; - const { result } = expectUnionMatchesBaseline(tree1, tree2, unionFn); + const { result } = expectUnionMatchesBaseline(tree1, tree2, sumUnion); result.set(1, 100); result.set(5, 50); @@ -580,13 +546,12 @@ function testUnion(maxNodeSize: number) { const size = maxNodeSize * 2 + 5; const tree = buildTree(range(0, size), 3, 1); let unionCalls = 0; - const unionFn: UnionFn = (key, leftValue, rightValue) => { - unionCalls++; - return leftValue + rightValue; - }; const original = tree.toArray(); - const result = tree.union(tree, unionFn); + const result = tree.union(tree, (key, leftValue, rightValue) => { + unionCalls++; + return sumUnion(key, leftValue, rightValue); + }); expect(unionCalls).toBe(0); expect(result).not.toBe(tree); expect(result.toArray()).toEqual(original); @@ -597,13 +562,11 @@ function testUnion(maxNodeSize: number) { const size = maxNodeSize * 2 + 1; const tree = buildTree(range(0, size), 1, 0); let unionCalls = 0; - const unionFn: UnionFn = (_key, _leftValue, _rightValue) => { + const original = tree.toArray(); + const result = union(tree, tree, (_key: number, _leftValue: number, _rightValue: number) => { unionCalls++; return undefined; - }; - - const original = tree.toArray(); - const result = union(tree, tree, unionFn); + }); expect(unionCalls).toBe(0); expect(result).not.toBe(tree); expect(result.toArray()).toEqual(original); @@ -620,13 +583,12 @@ function testUnion(maxNodeSize: number) { const tree1 = new BTreeEx(entries1, compare, maxNodeSize); const tree2 = new BTreeEx(entries2, compare, maxNodeSize); - const unionFn: UnionFn = () => { - throw new Error('Should not be called - no overlaps'); - }; - const { result } = expectUnionMatchesBaseline(tree1, tree2, unionFn, undefined, { - expectedUnionFn: unionFn - }); + const { result } = expectUnionMatchesBaseline( + tree1, + tree2, + failUnion('Should not be called - no overlaps') + ); expect(result.size).toBe(300); expect(result.get(1)).toBe(1); @@ -640,39 +602,16 @@ function testUnion(maxNodeSize: number) { test('Union with single element trees', () => { const tree1 = new BTreeEx([[5, 50]], compare, maxNodeSize); const tree2 = new BTreeEx([[5, 500]], compare, maxNodeSize); - const unionFn: UnionFn = (_k, v1, v2) => Math.max(v1, v2); - const { result } = expectUnionMatchesBaseline(tree1, tree2, unionFn); + const { result } = expectUnionMatchesBaseline(tree1, tree2, (_k, v1, v2) => Math.max(v1, v2)); expect(result.toArray()).toEqual([[5, 500]]); }); - test('Union interleaved keys', () => { - const tree1 = new BTreeEx([], compare, maxNodeSize); - for (let i = 1; i <= 100; i += 2) - tree1.set(i, i); - - const tree2 = new BTreeEx([], compare, maxNodeSize); - for (let i = 2; i <= 100; i += 2) - tree2.set(i, i); - - const unionFn: UnionFn = () => { - throw new Error('Should not be called - no overlapping keys'); - }; - - const { result } = expectUnionMatchesBaseline(tree1, tree2, unionFn, undefined, { - expectedUnionFn: unionFn - }); - expect(result.size).toBe(100); - for (let i = 1; i <= 100; i++) - expect(result.get(i)).toBe(i); - }); - test('Union excluding all overlapping keys', () => { const tree1 = new BTreeEx([[1, 10], [2, 20], [3, 30]], compare, maxNodeSize); const tree2 = new BTreeEx([[2, 200], [3, 300], [4, 400]], compare, maxNodeSize); - const unionFn: UnionFn = () => undefined; - const { result } = expectUnionMatchesBaseline(tree1, tree2, unionFn); + const { result } = expectUnionMatchesBaseline(tree1, tree2, () => undefined); expect(result.toArray()).toEqual([[1, 10], [4, 400]]); }); @@ -685,17 +624,11 @@ function testUnion(maxNodeSize: number) { for (let i = 10001; i <= 20000; i++) tree2.set(i, i); - let unionCalls = 0; - const unionFn: UnionFn = (_k, v1, v2) => { - unionCalls++; - return v1 + v2; - }; - - const { result } = expectUnionMatchesBaseline(tree1, tree2, unionFn, undefined, { - expectedUnionFn: unionFn - }); - - expect(unionCalls).toBe(0); + const { result } = expectUnionMatchesBaseline( + tree1, + tree2, + failUnion('Union callback should not run for disjoint ranges') + ); expect(result.size).toBe(tree1.size + tree2.size); expect(result.get(0)).toBe(0); expect(result.get(20000)).toBe(20000); @@ -714,10 +647,7 @@ function testUnion(maxNodeSize: number) { for (let k of keys2) tree2.set(k, k * 10); - const preferLeft: UnionFn = (_key, leftValue) => leftValue; - expectUnionMatchesBaseline(tree1, tree2, preferLeft, undefined, { - expectedUnionFn: preferLeft - }); + expectUnionMatchesBaseline(tree1, tree2, preferLeft); }); test('Union trees with ~10% overlap', () => { @@ -736,11 +666,7 @@ function testUnion(maxNodeSize: number) { tree2.set(key, key * 10); } - const preferLeft: UnionFn = (_key, leftValue) => leftValue; - - const { result } = expectUnionMatchesBaseline(tree1, tree2, preferLeft, undefined, { - expectedUnionFn: preferLeft - }); + const { result } = expectUnionMatchesBaseline(tree1, tree2, preferLeft); expect(result.size).toBe(size + size - overlap); for (let i = 0; i < offset; i++) @@ -754,21 +680,19 @@ function testUnion(maxNodeSize: number) { } describe('BTree union input/output validation', () => { - test('Union throws error when comparators differ', () => { + test('Union throws error when comparators differ', () => { const tree1 = new BTreeEx([[1, 10]], (a, b) => b + a); const tree2 = new BTreeEx([[2, 20]], (a, b) => b - a); - const unionFn: UnionFn = (_k, v1, v2) => v1 + v2; - expect(() => tree1.union(tree2, unionFn)).toThrow(comparatorErrorMsg); + expect(() => tree1.union(tree2, (_k, v1, v2) => v1 + v2)).toThrow(comparatorErrorMsg); }); test('Union throws error when max node sizes differ', () => { const compare = (a: number, b: number) => b - a; const tree1 = new BTreeEx([[1, 10]], compare, 32); const tree2 = new BTreeEx([[2, 20]], compare, 33); - const unionFn: UnionFn = (_k, v1, v2) => v1 + v2; - expect(() => tree1.union(tree2, unionFn)).toThrow(branchingFactorErrorMsg); + expect(() => tree1.union(tree2, (_k, v1, v2) => v1 + v2)).toThrow(branchingFactorErrorMsg); }); test('Union returns a tree of the same class', () => { From 69b86e5b8982262d47c54380eddd6ec53a92d8a4 Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Thu, 20 Nov 2025 19:02:40 -0800 Subject: [PATCH 141/143] cleanup tests --- test/union.test.ts | 168 +++++++++++++++++++++------------------------ 1 file changed, 80 insertions(+), 88 deletions(-) diff --git a/test/union.test.ts b/test/union.test.ts index d27a775..ca1924e 100644 --- a/test/union.test.ts +++ b/test/union.test.ts @@ -9,19 +9,13 @@ import { makeArray, populateFuzzTrees, randomInt, - SetOperationFuzzSettings + SetOperationFuzzSettings, + compareNumbers } from './shared'; -var test: (name: string, f: () => void) => void = it; - -describe('BTree union tests with fanout 32', testUnion.bind(null, 32)); -describe('BTree union tests with fanout 10', testUnion.bind(null, 10)); -describe('BTree union tests with fanout 4', testUnion.bind(null, 4)); - type UnionFn = (key: number, leftValue: number, rightValue: number) => number | undefined; -function testUnion(maxNodeSize: number) { - const compare = (a: number, b: number) => a - b; +describe.each([32, 10, 4])('BTree union tests with fanout %i', (maxNodeSize) => { const sharesNode = (root: any, targetNode: any): boolean => { if (root === targetNode) return true; @@ -36,7 +30,7 @@ function testUnion(maxNodeSize: number) { }; const buildTree = (keys: number[], valueScale = 1, valueOffset = 0) => { - const tree = new BTreeEx([], compare, maxNodeSize); + const tree = new BTreeEx([], compareNumbers, maxNodeSize); for (const key of keys) { tree.set(key, key * valueScale + valueOffset); } @@ -105,7 +99,7 @@ function testUnion(maxNodeSize: number) { return { result, expected }; }; - test('Union disjoint roots reuses roots', () => { + it('Union disjoint roots reuses roots', () => { // ensure the roots are not underfilled, as union will try to merge underfilled roots const size = maxNodeSize * maxNodeSize; const tree1 = buildTree(range(0, size), 1, 0); @@ -124,7 +118,7 @@ function testUnion(maxNodeSize: number) { }); }); - test('Union leaf roots with intersecting keys uses union callback', () => { + it('Union leaf roots with intersecting keys uses union callback', () => { const tree1 = buildTree([1, 2, 4], 10, 0); const tree2 = buildTree([2, 3, 5], 100, 0); @@ -145,7 +139,7 @@ function testUnion(maxNodeSize: number) { expect(calls).toEqual([{ key: 2, leftValue: 20, rightValue: 200 }]); }); - test('Union leaf roots with disjoint keys', () => { + it('Union leaf roots with disjoint keys', () => { const tree1 = buildTree([1, 3, 5], 1, 0); const tree2 = buildTree([2, 4, 6], 1, 1000); @@ -167,7 +161,7 @@ function testUnion(maxNodeSize: number) { ]); }); - test('Union trees disjoint except for shared maximum key', () => { + it('Union trees disjoint except for shared maximum key', () => { const size = maxNodeSize * 2; const tree1 = buildTree(range(0, size), 1, 0); const tree2 = buildTree(range(size - 1, size - 1 + size), 3, 0); @@ -191,7 +185,7 @@ function testUnion(maxNodeSize: number) { expect(result.size).toBe(tree1.size + tree2.size - 1); }); - test('Union trees where all leaves are disjoint and one tree straddles the other', () => { + it('Union trees where all leaves are disjoint and one tree straddles the other', () => { const straddleLength = 3 * 2 * maxNodeSize; // creates multiple leaves on both trees const tree1 = buildTree( range(0, straddleLength / 3).concat(range((straddleLength / 3) * 2, straddleLength)), @@ -210,7 +204,7 @@ function testUnion(maxNodeSize: number) { expect(result.size).toBe(tree1.size + tree2.size); }); - test('Union where two-leaf tree intersects leaf-root tree across both leaves', () => { + it('Union where two-leaf tree intersects leaf-root tree across both leaves', () => { const size = maxNodeSize + Math.max(3, Math.floor(maxNodeSize / 2)); const tree1 = buildTree(range(0, size), 2, 0); const tree2 = buildTree([1, Math.floor(size / 2), size - 1], 5, 0); @@ -232,7 +226,7 @@ function testUnion(maxNodeSize: number) { expect(seenKeys.sort((a, b) => a - b)).toEqual([1, Math.floor(size / 2), size - 1]); }); - test('Union where max key equals min key of other tree', () => { + it('Union where max key equals min key of other tree', () => { const size = maxNodeSize * 2; const tree1 = buildTree(range(0, size), 1, 0); const tree2 = buildTree(range(size - 1, size - 1 + size), 10, 0); @@ -256,7 +250,7 @@ function testUnion(maxNodeSize: number) { expect(result.size).toBe(tree1.size + tree2.size - 1); }); - test('Union odd and even keyed trees', () => { + it('Union odd and even keyed trees', () => { const limit = maxNodeSize * 3; const treeOdd = buildTree(range(1, limit * 2, 2), 1, 0); const treeEven = buildTree(range(0, limit * 2, 2), 1, 100); @@ -272,7 +266,7 @@ function testUnion(maxNodeSize: number) { expect(result.size).toBe(treeOdd.size + treeEven.size); }); - test('Union merges disjoint leaf roots into a single leaf', () => { + it('Union merges disjoint leaf roots into a single leaf', () => { const perTree = Math.max(1, Math.floor(maxNodeSize / 2) - 1); const keysA = range(1, perTree).map(i => i); const keysB = keysA.map(k => k * 1000); @@ -284,12 +278,12 @@ function testUnion(maxNodeSize: number) { const unioned = tree1.union(tree2, failUnion('Should not be called for disjoint keys')); const resultRoot = unioned['_root'] as any; - const expectedKeys = keysA.concat(keysB).sort(compare); + const expectedKeys = keysA.concat(keysB).sort(compareNumbers); expect(resultRoot.isLeaf).toBe(true); expect(resultRoot.keys).toEqual(expectedKeys); }); - test('Union combines underfilled non-leaf roots into a filled root', () => { + it('Union combines underfilled non-leaf roots into a filled root', () => { const minChildren = Math.floor(maxNodeSize / 2); const targetLeavesPerTree = minChildren - 1; if (targetLeavesPerTree === 1) { @@ -319,7 +313,7 @@ function testUnion(maxNodeSize: number) { expect(resultRoot.children.length).toBe(first.childCount + second.childCount); }); - test('Union overlapping prefix equal to branching factor', () => { + it('Union overlapping prefix equal to branching factor', () => { const shared = maxNodeSize; const tree1Keys = [ ...range(0, shared), @@ -350,17 +344,17 @@ function testUnion(maxNodeSize: number) { expect(unionedKeys.sort((a, b) => a - b)).toEqual(range(0, shared)); }); - test('Union two empty trees', () => { - const tree1 = new BTreeEx([], compare, maxNodeSize); - const tree2 = new BTreeEx([], compare, maxNodeSize); + it('Union two empty trees', () => { + const tree1 = new BTreeEx([], compareNumbers, maxNodeSize); + const tree2 = new BTreeEx([], compareNumbers, maxNodeSize); const { result } = expectUnionMatchesBaseline(tree1, tree2, sumUnion); expect(result.size).toBe(0); }); - test('Union empty tree with non-empty tree', () => { - const tree1 = new BTreeEx([], compare, maxNodeSize); - const tree2 = new BTreeEx([[1, 10], [2, 20], [3, 30]], compare, maxNodeSize); + it('Union empty tree with non-empty tree', () => { + const tree1 = new BTreeEx([], compareNumbers, maxNodeSize); + const tree2 = new BTreeEx([[1, 10], [2, 20], [3, 30]], compareNumbers, maxNodeSize); const { result: leftUnion } = expectUnionMatchesBaseline(tree1, tree2, sumUnion); expect(leftUnion.toArray()).toEqual(tree2.toArray()); @@ -373,9 +367,9 @@ function testUnion(maxNodeSize: number) { tree2.checkValid(); }); - test('Union with no overlapping keys', () => { - const tree1 = new BTreeEx([[1, 10], [3, 30], [5, 50]], compare, maxNodeSize); - const tree2 = new BTreeEx([[2, 20], [4, 40], [6, 60]], compare, maxNodeSize); + it('Union with no overlapping keys', () => { + const tree1 = new BTreeEx([[1, 10], [3, 30], [5, 50]], compareNumbers, maxNodeSize); + const tree2 = new BTreeEx([[2, 20], [4, 40], [6, 60]], compareNumbers, maxNodeSize); const { result } = expectUnionMatchesBaseline( tree1, @@ -387,33 +381,33 @@ function testUnion(maxNodeSize: number) { expect(result.toArray()).toEqual([[1, 10], [2, 20], [3, 30], [4, 40], [5, 50], [6, 60]]); }); - test('Union with completely overlapping keys - sum values', () => { - const tree1 = new BTreeEx([[1, 10], [2, 20], [3, 30]], compare, maxNodeSize); - const tree2 = new BTreeEx([[1, 5], [2, 15], [3, 25]], compare, maxNodeSize); + it('Union with completely overlapping keys - sum values', () => { + const tree1 = new BTreeEx([[1, 10], [2, 20], [3, 30]], compareNumbers, maxNodeSize); + const tree2 = new BTreeEx([[1, 5], [2, 15], [3, 25]], compareNumbers, maxNodeSize); const { result } = expectUnionMatchesBaseline(tree1, tree2, sumUnion); expect(result.size).toBe(tree1.size); }); - test('Union with completely overlapping keys - prefer left', () => { - const tree1 = new BTreeEx([[1, 10], [2, 20], [3, 30]], compare, maxNodeSize); - const tree2 = new BTreeEx([[1, 100], [2, 200], [3, 300]], compare, maxNodeSize); + it('Union with completely overlapping keys - prefer left', () => { + const tree1 = new BTreeEx([[1, 10], [2, 20], [3, 30]], compareNumbers, maxNodeSize); + const tree2 = new BTreeEx([[1, 100], [2, 200], [3, 300]], compareNumbers, maxNodeSize); const { result } = expectUnionMatchesBaseline(tree1, tree2, preferLeft); expect(result.toArray()).toEqual(tree1.toArray()); }); - test('Union with completely overlapping keys - prefer right', () => { - const tree1 = new BTreeEx([[1, 10], [2, 20], [3, 30]], compare, maxNodeSize); - const tree2 = new BTreeEx([[1, 100], [2, 200], [3, 300]], compare, maxNodeSize); + it('Union with completely overlapping keys - prefer right', () => { + const tree1 = new BTreeEx([[1, 10], [2, 20], [3, 30]], compareNumbers, maxNodeSize); + const tree2 = new BTreeEx([[1, 100], [2, 200], [3, 300]], compareNumbers, maxNodeSize); const { result } = expectUnionMatchesBaseline(tree1, tree2, (_k, _v1, v2) => v2); expect(result.toArray()).toEqual(tree2.toArray()); }); - test('Union with partially overlapping keys', () => { - const tree1 = new BTreeEx([[1, 10], [2, 20], [3, 30], [4, 40]], compare, maxNodeSize); - const tree2 = new BTreeEx([[3, 300], [4, 400], [5, 500], [6, 600]], compare, maxNodeSize); + it('Union with partially overlapping keys', () => { + const tree1 = new BTreeEx([[1, 10], [2, 20], [3, 30], [4, 40]], compareNumbers, maxNodeSize); + const tree2 = new BTreeEx([[3, 300], [4, 400], [5, 500], [6, 600]], compareNumbers, maxNodeSize); const unionedKeys: number[] = []; @@ -429,9 +423,9 @@ function testUnion(maxNodeSize: number) { expect(unionedKeys.sort((a, b) => a - b)).toEqual([3, 4]); }); - test('Union with overlapping keys can delete entries', () => { - const tree1 = new BTreeEx([[1, 10], [2, 20], [3, 30], [4, 40]], compare, maxNodeSize); - const tree2 = new BTreeEx([[2, 200], [3, 300], [4, 400], [5, 500]], compare, maxNodeSize); + it('Union with overlapping keys can delete entries', () => { + const tree1 = new BTreeEx([[1, 10], [2, 20], [3, 30], [4, 40]], compareNumbers, maxNodeSize); + const tree2 = new BTreeEx([[2, 200], [3, 300], [4, 400], [5, 500]], compareNumbers, maxNodeSize); const { result } = expectUnionMatchesBaseline(tree1, tree2, (k, v1, v2) => { if (k === 3) return undefined; return v1 + v2; @@ -439,9 +433,9 @@ function testUnion(maxNodeSize: number) { expect(result.has(3)).toBe(false); }); - test('Union is called even when values are equal', () => { - const tree1 = new BTreeEx([[1, 10], [2, 20]], compare, maxNodeSize); - const tree2 = new BTreeEx([[2, 20], [3, 30]], compare, maxNodeSize); + it('Union is called even when values are equal', () => { + const tree1 = new BTreeEx([[1, 10], [2, 20]], compareNumbers, maxNodeSize); + const tree2 = new BTreeEx([[2, 20], [3, 30]], compareNumbers, maxNodeSize); const unionCallLog: Array<{ k: number, v1: number, v2: number }> = []; @@ -457,11 +451,11 @@ function testUnion(maxNodeSize: number) { expect(unionCallLog).toEqual([{ k: 2, v1: 20, v2: 20 }]); }); - test('Union does not mutate input trees', () => { + it('Union does not mutate input trees', () => { const entries1: [number, number][] = [[1, 10], [2, 20], [3, 30]]; const entries2: [number, number][] = [[2, 200], [3, 300], [4, 400]]; - const tree1 = new BTreeEx(entries1, compare, maxNodeSize); - const tree2 = new BTreeEx(entries2, compare, maxNodeSize); + const tree1 = new BTreeEx(entries1, compareNumbers, maxNodeSize); + const tree2 = new BTreeEx(entries2, compareNumbers, maxNodeSize); const snapshot1 = tree1.toArray(); const snapshot2 = tree2.toArray(); @@ -474,12 +468,12 @@ function testUnion(maxNodeSize: number) { tree2.checkValid(); }); - test('Union large trees with some overlaps', () => { + it('Union large trees with some overlaps', () => { const entries1: [number, number][] = range(0, 1000).map(i => [i, i]); const entries2: [number, number][] = range(500, 1500).map(i => [i, i * 10]); - const tree1 = new BTreeEx(entries1, compare, maxNodeSize); - const tree2 = new BTreeEx(entries2, compare, maxNodeSize); + const tree1 = new BTreeEx(entries1, compareNumbers, maxNodeSize); + const tree2 = new BTreeEx(entries2, compareNumbers, maxNodeSize); let unionCount = 0; expectUnionMatchesBaseline( @@ -494,9 +488,9 @@ function testUnion(maxNodeSize: number) { expect(unionCount).toBe(500); }); - test('Union with overlaps at boundaries', () => { - const tree1 = new BTreeEx([], compare, maxNodeSize); - const tree2 = new BTreeEx([], compare, maxNodeSize); + it('Union with overlaps at boundaries', () => { + const tree1 = new BTreeEx([], compareNumbers, maxNodeSize); + const tree2 = new BTreeEx([], compareNumbers, maxNodeSize); for (let i = 0; i < 100; i++) { tree1.set(i * 2, i * 2); @@ -522,9 +516,9 @@ function testUnion(maxNodeSize: number) { expect(unionedKeys.sort((a, b) => a - b)).toEqual(expectedUnionedKeys); }); - test('Union result can be modified without affecting inputs', () => { - const tree1 = new BTreeEx([[1, 10], [2, 20]], compare, maxNodeSize); - const tree2 = new BTreeEx([[3, 30], [4, 40]], compare, maxNodeSize); + it('Union result can be modified without affecting inputs', () => { + const tree1 = new BTreeEx([[1, 10], [2, 20]], compareNumbers, maxNodeSize); + const tree2 = new BTreeEx([[3, 30], [4, 40]], compareNumbers, maxNodeSize); const { result } = expectUnionMatchesBaseline(tree1, tree2, sumUnion); @@ -542,7 +536,7 @@ function testUnion(maxNodeSize: number) { result.checkValid(); }); - test('Union tree with itself returns a clone without invoking combineFn', () => { + it('Union tree with itself returns a clone without invoking combineFn', () => { const size = maxNodeSize * 2 + 5; const tree = buildTree(range(0, size), 3, 1); let unionCalls = 0; @@ -558,7 +552,7 @@ function testUnion(maxNodeSize: number) { expect(tree.toArray()).toEqual(original); }); - test('Standalone union short-circuits when given the same tree twice', () => { + it('Standalone union short-circuits when given the same tree twice', () => { const size = maxNodeSize * 2 + 1; const tree = buildTree(range(0, size), 1, 0); let unionCalls = 0; @@ -573,7 +567,7 @@ function testUnion(maxNodeSize: number) { expect(tree.toArray()).toEqual(original); }); - test('Union with disjoint ranges', () => { + it('Union with disjoint ranges', () => { const entries1: [number, number][] = []; for (let i = 1; i <= 100; i++) entries1.push([i, i]); for (let i = 201; i <= 300; i++) entries1.push([i, i]); @@ -581,8 +575,8 @@ function testUnion(maxNodeSize: number) { const entries2: [number, number][] = []; for (let i = 101; i <= 200; i++) entries2.push([i, i]); - const tree1 = new BTreeEx(entries1, compare, maxNodeSize); - const tree2 = new BTreeEx(entries2, compare, maxNodeSize); + const tree1 = new BTreeEx(entries1, compareNumbers, maxNodeSize); + const tree2 = new BTreeEx(entries2, compareNumbers, maxNodeSize); const { result } = expectUnionMatchesBaseline( tree1, @@ -599,25 +593,25 @@ function testUnion(maxNodeSize: number) { expect(result.get(300)).toBe(300); }); - test('Union with single element trees', () => { - const tree1 = new BTreeEx([[5, 50]], compare, maxNodeSize); - const tree2 = new BTreeEx([[5, 500]], compare, maxNodeSize); + it('Union with single element trees', () => { + const tree1 = new BTreeEx([[5, 50]], compareNumbers, maxNodeSize); + const tree2 = new BTreeEx([[5, 500]], compareNumbers, maxNodeSize); const { result } = expectUnionMatchesBaseline(tree1, tree2, (_k, v1, v2) => Math.max(v1, v2)); expect(result.toArray()).toEqual([[5, 500]]); }); - test('Union excluding all overlapping keys', () => { - const tree1 = new BTreeEx([[1, 10], [2, 20], [3, 30]], compare, maxNodeSize); - const tree2 = new BTreeEx([[2, 200], [3, 300], [4, 400]], compare, maxNodeSize); + it('Union excluding all overlapping keys', () => { + const tree1 = new BTreeEx([[1, 10], [2, 20], [3, 30]], compareNumbers, maxNodeSize); + const tree2 = new BTreeEx([[2, 200], [3, 300], [4, 400]], compareNumbers, maxNodeSize); const { result } = expectUnionMatchesBaseline(tree1, tree2, () => undefined); expect(result.toArray()).toEqual([[1, 10], [4, 400]]); }); - test('Union with large disjoint ranges', () => { - const tree1 = new BTreeEx([], compare, maxNodeSize); - const tree2 = new BTreeEx([], compare, maxNodeSize); + it('Union with large disjoint ranges', () => { + const tree1 = new BTreeEx([], compareNumbers, maxNodeSize); + const tree2 = new BTreeEx([], compareNumbers, maxNodeSize); for (let i = 0; i <= 10000; i++) tree1.set(i, i); @@ -634,7 +628,7 @@ function testUnion(maxNodeSize: number) { expect(result.get(20000)).toBe(20000); }); - test('Union trees with random overlap', () => { + it('Union trees with random overlap', () => { const size = 10000; const keys1 = makeArray(size, true); const keys2 = makeArray(size, true); @@ -650,13 +644,13 @@ function testUnion(maxNodeSize: number) { expectUnionMatchesBaseline(tree1, tree2, preferLeft); }); - test('Union trees with ~10% overlap', () => { + it('Union trees with ~10% overlap', () => { const size = 200; const offset = Math.floor(size * 0.9); const overlap = size - offset; - const tree1 = new BTreeEx([], compare, maxNodeSize); - const tree2 = new BTreeEx([], compare, maxNodeSize); + const tree1 = new BTreeEx([], compareNumbers, maxNodeSize); + const tree2 = new BTreeEx([], compareNumbers, maxNodeSize); for (let i = 0; i < size; i++) tree1.set(i, i); @@ -677,7 +671,7 @@ function testUnion(maxNodeSize: number) { for (let i = size; i < upperBound; i++) expect(result.get(i)).toBe(i * 10); }); -} +}); describe('BTree union input/output validation', () => { test('Union throws error when comparators differ', () => { @@ -688,9 +682,8 @@ describe('BTree union input/output validation', () => { }); test('Union throws error when max node sizes differ', () => { - const compare = (a: number, b: number) => b - a; - const tree1 = new BTreeEx([[1, 10]], compare, 32); - const tree2 = new BTreeEx([[2, 20]], compare, 33); + const tree1 = new BTreeEx([[1, 10]], compareNumbers, 32); + const tree2 = new BTreeEx([[2, 20]], compareNumbers, 33); expect(() => tree1.union(tree2, (_k, v1, v2) => v1 + v2)).toThrow(branchingFactorErrorMsg); }); @@ -703,7 +696,6 @@ describe('BTree union input/output validation', () => { }); describe('BTree union fuzz tests', () => { - const compare = (a: number, b: number) => a - b; const unionFn = (_k: number, left: number, _right: number) => left; const FUZZ_SETTINGS: SetOperationFuzzSettings = { branchingFactors: [4, 5, 32], @@ -720,14 +712,14 @@ describe('BTree union fuzz tests', () => { forEachFuzzCase(FUZZ_SETTINGS, ({ maxNodeSize, size, fractionA, fractionB, removalChance, removalLabel }) => { test(`branch ${maxNodeSize}, size ${size}, fractionA ${fractionA.toFixed(2)}, fractionB ${fractionB.toFixed(2)}, removal ${removalLabel}`, () => { - const treeA = new BTreeEx([], compare, maxNodeSize); - const treeB = new BTreeEx([], compare, maxNodeSize); + const treeA = new BTreeEx([], compareNumbers, maxNodeSize); + const treeB = new BTreeEx([], compareNumbers, maxNodeSize); const [treeAEntries, treeBEntries] = populateFuzzTrees( [ { tree: treeA, fraction: fractionA, removalChance }, { tree: treeB, fraction: fractionB, removalChance } ], - { rng, size, compare, maxNodeSize, minAssignmentsPerKey: 1 } + { rng, size, compare: compareNumbers, maxNodeSize, minAssignmentsPerKey: 1 } ); const unioned = treeA.union(treeB, unionFn); @@ -736,7 +728,7 @@ describe('BTree union fuzz tests', () => { const combinedKeys = new Set(); treeAEntries.forEach(([key]) => combinedKeys.add(key)); treeBEntries.forEach(([key]) => combinedKeys.add(key)); - const expected = Array.from(combinedKeys).sort(compare).map(key => [key, key]); + const expected = Array.from(combinedKeys).sort(compareNumbers).map(key => [key, key]); expect(unioned.toArray()).toEqual(expected); // Union should not have mutated inputs From d601b3fac8b7121d1f9d9bf9ad9c8b3edf0818ba Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Fri, 21 Nov 2025 08:17:32 -0800 Subject: [PATCH 142/143] update sizes script naming --- scripts/size-report.js | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/scripts/size-report.js b/scripts/size-report.js index 4cab044..49f5269 100644 --- a/scripts/size-report.js +++ b/scripts/size-report.js @@ -73,8 +73,8 @@ const header = console.log(header); console.log('-'.repeat(header.length)); -const nonCoreTotals = { raw: 0, min: 0, gz: 0 }; -const nonCoreHasValue = { raw: false, min: false, gz: false }; +const btreeExTransitive = { raw: 0, min: 0, gz: 0 }; +const btreeExTransitiveHasValue = { raw: false, min: false, gz: false }; entryPoints.forEach((entry, index) => { const raw = fileSize(entry.raw); @@ -88,26 +88,26 @@ entryPoints.forEach((entry, index) => { console.log(line); if (index > 0) { if (typeof raw === 'number') { - nonCoreTotals.raw += raw; - nonCoreHasValue.raw = true; + btreeExTransitive.raw += raw; + btreeExTransitiveHasValue.raw = true; } if (typeof min === 'number') { - nonCoreTotals.min += min; - nonCoreHasValue.min = true; + btreeExTransitive.min += min; + btreeExTransitiveHasValue.min = true; } if (typeof gz === 'number') { - nonCoreTotals.gz += gz; - nonCoreHasValue.gz = true; + btreeExTransitive.gz += gz; + btreeExTransitiveHasValue.gz = true; } } }); if (entryPoints.length > 1) { const line = - pad('Non-core total', nameColumnWidth) + - pad(nonCoreHasValue.raw ? formatBytes(nonCoreTotals.raw) : 'n/a', 13) + - pad(nonCoreHasValue.min ? formatBytes(nonCoreTotals.min) : 'n/a', 13) + - (nonCoreHasValue.gz ? formatBytes(nonCoreTotals.gz) : 'n/a'); + pad('BTreeEx transitive', nameColumnWidth) + + pad(btreeExTransitiveHasValue.raw ? formatBytes(btreeExTransitive.raw) : 'n/a', 13) + + pad(btreeExTransitiveHasValue.min ? formatBytes(btreeExTransitive.min) : 'n/a', 13) + + (btreeExTransitiveHasValue.gz ? formatBytes(btreeExTransitive.gz) : 'n/a'); console.log('-'.repeat(header.length)); console.log(line); } From 7958c155c3313d57c3eefefb40ff464fd608841b Mon Sep 17 00:00:00 2001 From: Taylor Williams Date: Fri, 21 Nov 2025 08:19:15 -0800 Subject: [PATCH 143/143] cleanup --- b+tree.js | 2 -- b+tree.ts | 2 -- 2 files changed, 4 deletions(-) diff --git a/b+tree.js b/b+tree.js index 25111bb..637db72 100644 --- a/b+tree.js +++ b/b+tree.js @@ -1505,8 +1505,6 @@ function sumChildSizes(children) { exports.sumChildSizes = sumChildSizes; /** * Determines whether two nodes are overlapping in key range. - * Takes the leftmost known key of each node to avoid a log(n) min calculation. - * This will still catch overlapping nodes because of the alternate hopping walk of the cursors. * @internal */ function areOverlapping(aMin, aMax, bMin, bMax, cmp) { diff --git a/b+tree.ts b/b+tree.ts index 2694ba9..c0d80c7 100644 --- a/b+tree.ts +++ b/b+tree.ts @@ -1654,8 +1654,6 @@ export function sumChildSizes(children: BNode[]): number { /** * Determines whether two nodes are overlapping in key range. - * Takes the leftmost known key of each node to avoid a log(n) min calculation. - * This will still catch overlapping nodes because of the alternate hopping walk of the cursors. * @internal */ export function areOverlapping(