diff --git a/.vscode/launch.json b/.vscode/launch.json index c39e819..9b0df17 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -9,7 +9,7 @@ "request": "launch", "name": "Jest Tests", "program": "${workspaceRoot}/node_modules/jest/bin/jest.js", - "args": [ "--runInBand", "--ci", "--bail" ], + "args": [ "--runInBand", "--ci" ], //"preLaunchTask": "build", "internalConsoleOptions": "openOnSessionStart", "outFiles": [ @@ -24,7 +24,7 @@ "name": "Debug Jest Tests", "runtimeArgs": [ "--nolazy", "--inspect-brk", "${workspaceRoot}/node_modules/jest/bin/jest.js", - "--runInBand", "--coverage", "false", "--ci", "--bail" ], + "--runInBand", "--coverage", "false", "--ci" ], "console": "integratedTerminal", //"internalConsoleOptions": "openOnSessionStart", "port": 9229, @@ -41,4 +41,4 @@ "port": 9229, } ] -} \ No newline at end of file +} diff --git a/b+tree.d.ts b/b+tree.d.ts index a8ff453..57fddd7 100644 --- a/b+tree.d.ts +++ b/b+tree.d.ts @@ -105,7 +105,6 @@ export declare function simpleComparator(a: (number | string)[], b: (number | st */ export default class BTree implements ISortedMapF, ISortedMap { private _root; - _size: number; _maxNodeSize: number; /** * provides a total order over keys (and a strict partial order over the type K) diff --git a/b+tree.js b/b+tree.js index df9add2..637db72 100644 --- a/b+tree.js +++ b/b+tree.js @@ -15,7 +15,7 @@ var __extends = (this && this.__extends) || (function () { }; })(); Object.defineProperty(exports, "__esModule", { value: true }); -exports.EmptyBTree = exports.check = exports.BNodeInternal = exports.BNode = exports.asSet = exports.simpleComparator = exports.defaultComparator = void 0; +exports.EmptyBTree = exports.check = exports.areOverlapping = exports.sumChildSizes = exports.BNodeInternal = exports.BNode = exports.asSet = exports.simpleComparator = exports.defaultComparator = void 0; /** * Compares DefaultComparables to form a strict partial ordering. * @@ -155,7 +155,6 @@ var BTree = /** @class */ (function () { */ function BTree(entries, compare, maxNodeSize) { this._root = EmptyLeaf; - this._size = 0; this._maxNodeSize = maxNodeSize >= 4 ? Math.min(maxNodeSize, 256) : 32; this._compare = compare || defaultComparator; if (entries) @@ -165,26 +164,25 @@ var BTree = /** @class */ (function () { ///////////////////////////////////////////////////////////////////////////// // ES6 Map methods ///////////////////////////////////////////////////// /** Gets the number of key-value pairs in the tree. */ - get: function () { return this._size; }, + get: function () { return this._root.size(); }, enumerable: false, configurable: true }); Object.defineProperty(BTree.prototype, "length", { /** Gets the number of key-value pairs in the tree. */ - get: function () { return this._size; }, + get: function () { return this.size; }, enumerable: false, configurable: true }); Object.defineProperty(BTree.prototype, "isEmpty", { /** Returns true iff the tree contains no key-value pairs. */ - get: function () { return this._size === 0; }, + get: function () { return this._root.size() === 0; }, enumerable: false, configurable: true }); /** Releases the tree so that its size is 0. */ BTree.prototype.clear = function () { this._root = EmptyLeaf; - this._size = 0; }; /** Runs a function for each key-value pair, in order from smallest to * largest key. For compatibility with ES6 Map, the argument order to @@ -248,7 +246,8 @@ var BTree = /** @class */ (function () { if (result === true || result === false) return result; // Root node has split, so create a new root node. - this._root = new BNodeInternal([this._root, result]); + var children = [this._root, result]; + this._root = new BNodeInternal(children, sumChildSizes(children)); return true; }; /** @@ -537,7 +536,6 @@ var BTree = /** @class */ (function () { this._root.isShared = true; var result = new BTree(undefined, this._compare, this._maxNodeSize); result._root = this._root; - result._size = this._size; return result; }; /** Performs a greedy clone, immediately duplicating any nodes that are @@ -548,7 +546,6 @@ var BTree = /** @class */ (function () { BTree.prototype.greedyClone = function (force) { var result = new BTree(undefined, this._compare, this._maxNodeSize); result._root = this._root.greedyClone(force); - result._size = this._size; return result; }; /** Gets an array filled with the contents of the tree, sorted by key */ @@ -830,7 +827,7 @@ var BTree = /** @class */ (function () { * skips the most expensive test - whether all keys are sorted - but it * does check that maxKey() of the children of internal nodes are sorted. */ BTree.prototype.checkValid = function () { - var size = this._root.checkValid(0, this, 0); + var size = this._root.checkValid(0, this, 0)[0]; check(size === this.size, "size mismatch: counted ", size, "but stored", this.size); }; return BTree; @@ -870,6 +867,9 @@ var BNode = /** @class */ (function () { enumerable: false, configurable: true }); + BNode.prototype.size = function () { + return this.keys.length; + }; /////////////////////////////////////////////////////////////////////////// // Shared methods ///////////////////////////////////////////////////////// BNode.prototype.maxKey = function () { @@ -1004,7 +1004,11 @@ var BNode = /** @class */ (function () { // it can't be merged with adjacent nodes. However, the parent will // verify that the average node size is at least half of the maximum. check(depth == 0 || kL > 0, "empty leaf at depth", depth, "and baseIndex", baseIndex); - return kL; + for (var i = 1; i < kL; i++) { + var c = tree._compare(this.keys[i - 1], this.keys[i]); + check(c < 0, "keys out of order at depth", depth, "and baseIndex", baseIndex + i - 1, ": ", this.keys[i - 1], " !< ", this.keys[i]); + } + return [kL, this.keys[0], this.keys[kL - 1]]; }; ///////////////////////////////////////////////////////////////////////////// // Leaf Node: set & node splitting ////////////////////////////////////////// @@ -1013,7 +1017,6 @@ var BNode = /** @class */ (function () { if (i < 0) { // key does not exist yet i = ~i; - tree._size++; if (this.keys.length < tree._maxNodeSize) { return this.insertInLeaf(i, key, value, tree); } @@ -1129,7 +1132,6 @@ var BNode = /** @class */ (function () { this.keys.splice(i, 1); if (this.values !== undefVals) this.values.splice(i, 1); - tree._size--; i--; iHigh--; } @@ -1167,7 +1169,7 @@ var BNodeInternal = /** @class */ (function (_super) { * This does not mark `children` as shared, so it is the responsibility of the caller * to ensure children are either marked shared, or aren't included in another tree. */ - function BNodeInternal(children, keys) { + function BNodeInternal(children, size, keys) { var _this = this; if (!keys) { keys = []; @@ -1176,18 +1178,22 @@ var BNodeInternal = /** @class */ (function (_super) { } _this = _super.call(this, keys) || this; _this.children = children; + _this._size = size; return _this; } BNodeInternal.prototype.clone = function () { var children = this.children.slice(0); for (var i = 0; i < children.length; i++) children[i].isShared = true; - return new BNodeInternal(children, this.keys.slice(0)); + return new BNodeInternal(children, this._size, this.keys.slice(0)); + }; + BNodeInternal.prototype.size = function () { + return this._size; }; BNodeInternal.prototype.greedyClone = function (force) { if (this.isShared && !force) return this; - var nu = new BNodeInternal(this.children.slice(0), this.keys.slice(0)); + var nu = new BNodeInternal(this.children.slice(0), this._size, this.keys.slice(0)); for (var i = 0; i < nu.children.length; i++) nu.children[i] = nu.children[i].greedyClone(force); return nu; @@ -1230,22 +1236,35 @@ var BNodeInternal = /** @class */ (function (_super) { check(kL === cL, "keys/children length mismatch: depth", depth, "lengths", kL, cL, "baseIndex", baseIndex); check(kL > 1 || depth > 0, "internal node has length", kL, "at depth", depth, "baseIndex", baseIndex); var size = 0, c = this.children, k = this.keys, childSize = 0; + var prevMinKey = undefined; + var prevMaxKey = undefined; for (var i = 0; i < cL; i++) { - size += c[i].checkValid(depth + 1, tree, baseIndex + size); - childSize += c[i].keys.length; + var child = c[i]; + var _a = child.checkValid(depth + 1, tree, baseIndex + size), subtreeSize = _a[0], minKey = _a[1], maxKey = _a[2]; + check(subtreeSize === child.size(), "cached size mismatch at depth", depth, "index", i, "baseIndex", baseIndex); + check(subtreeSize === 1 || tree._compare(minKey, maxKey) < 0, "child node keys not sorted at depth", depth, "index", i, "baseIndex", baseIndex); + if (prevMinKey !== undefined && prevMaxKey !== undefined) { + check(!areOverlapping(prevMinKey, prevMaxKey, minKey, maxKey, tree._compare), "children keys not sorted at depth", depth, "index", i, "baseIndex", baseIndex, ": ", prevMaxKey, " !< ", minKey); + check(tree._compare(prevMaxKey, minKey) < 0, "children keys not sorted at depth", depth, "index", i, "baseIndex", baseIndex, ": ", prevMaxKey, " !< ", minKey); + } + prevMinKey = minKey; + prevMaxKey = maxKey; + size += subtreeSize; + childSize += child.keys.length; check(size >= childSize, "wtf", baseIndex); // no way this will ever fail - check(i === 0 || c[i - 1].constructor === c[i].constructor, "type mismatch, baseIndex:", baseIndex); - if (c[i].maxKey() != k[i]) - check(false, "keys[", i, "] =", k[i], "is wrong, should be ", c[i].maxKey(), "at depth", depth, "baseIndex", baseIndex); + check(i === 0 || c[i - 1].constructor === child.constructor, "type mismatch, baseIndex:", baseIndex); + if (child.maxKey() != k[i]) + check(false, "keys[", i, "] =", k[i], "is wrong, should be ", child.maxKey(), "at depth", depth, "baseIndex", baseIndex); if (!(i === 0 || tree._compare(k[i - 1], k[i]) < 0)) check(false, "sort violation at depth", depth, "index", i, "keys", k[i - 1], k[i]); } + check(this._size === size, "internal node cached size mismatch at depth", depth, "baseIndex", baseIndex, "cached", this._size, "actual", size); // 2020/08: BTree doesn't always avoid grossly undersized nodes, // but AFAIK such nodes are pretty harmless, so accept them. var toofew = childSize === 0; // childSize < (tree.maxNodeSize >> 1)*cL; if (toofew || childSize > tree.maxNodeSize * cL) check(false, toofew ? "too few" : "too many", "children (", childSize, size, ") at depth", depth, "maxNodeSize:", tree.maxNodeSize, "children.length:", cL, "baseIndex:", baseIndex); - return size; + return [size, this.minKey(), this.maxKey()]; }; ///////////////////////////////////////////////////////////////////////////// // Internal Node: set & node splitting ////////////////////////////////////// @@ -1273,7 +1292,9 @@ var BNodeInternal = /** @class */ (function (_super) { this.keys[i] = c[i].maxKey(); } } + var oldSize = child.size(); var result = child.set(key, value, overwrite, tree); + this._size += child.size() - oldSize; if (result === false) return false; this.keys[i] = child.maxKey(); @@ -1302,6 +1323,7 @@ var BNodeInternal = /** @class */ (function (_super) { BNodeInternal.prototype.insert = function (i, child) { this.children.splice(i, 0, child); this.keys.splice(i, 0, child.maxKey()); + this._size += child.size(); }; /** * Split this node. @@ -1310,21 +1332,50 @@ var BNodeInternal = /** @class */ (function (_super) { BNodeInternal.prototype.splitOffRightSide = function () { // assert !this.isShared; var half = this.children.length >> 1; - return new BNodeInternal(this.children.splice(half), this.keys.splice(half)); + var newChildren = this.children.splice(half); + var newKeys = this.keys.splice(half); + var sizePrev = this._size; + this._size = sumChildSizes(this.children); + var newNode = new BNodeInternal(newChildren, sizePrev - this._size, newKeys); + return newNode; + }; + /** + * Split this node. + * Modifies this to remove the first half of the items, returning a separate node containing them. + */ + BNodeInternal.prototype.splitOffLeftSide = function () { + // assert !this.isShared; + var half = this.children.length >> 1; + var newChildren = this.children.splice(0, half); + var newKeys = this.keys.splice(0, half); + var sizePrev = this._size; + this._size = sumChildSizes(this.children); + var newNode = new BNodeInternal(newChildren, sizePrev - this._size, newKeys); + return newNode; }; BNodeInternal.prototype.takeFromRight = function (rhs) { // Reminder: parent node must update its copy of key for this node // assert: neither node is shared // assert rhs.keys.length > (maxNodeSize/2 && this.keys.length (maxNodeSize/2 && this.keys.length= 0 && aMinBMax <= 0) { + // case 2 or 4 + return true; + } + var aMaxBMin = cmp(aMax, bMin); + var aMaxBMax = cmp(aMax, bMax); + if (aMaxBMin >= 0 && aMaxBMax <= 0) { + // case 1 + return true; + } + // case 3 or no overlap + return aMinBMin <= 0 && aMaxBMax >= 0; +} +exports.areOverlapping = areOverlapping; var Delete = { delete: true }, DeleteRange = function () { return Delete; }; var Break = { break: true }; var EmptyLeaf = (function () { diff --git a/b+tree.ts b/b+tree.ts index 084055c..c0d80c7 100644 --- a/b+tree.ts +++ b/b+tree.ts @@ -184,7 +184,6 @@ export function simpleComparator(a: any, b: any): number { export default class BTree implements ISortedMapF, ISortedMap { private _root: BNode = EmptyLeaf as BNode; - _size: number = 0; _maxNodeSize: number; /** @@ -212,16 +211,15 @@ export default class BTree implements ISortedMapF, ISortedMap // ES6 Map methods ///////////////////////////////////////////////////// /** Gets the number of key-value pairs in the tree. */ - get size() { return this._size; } + get size(): number { return this._root.size(); } /** Gets the number of key-value pairs in the tree. */ - get length() { return this._size; } + get length(): number { return this.size; } /** Returns true iff the tree contains no key-value pairs. */ - get isEmpty() { return this._size === 0; } + get isEmpty(): boolean { return this._root.size() === 0; } /** Releases the tree so that its size is 0. */ clear() { this._root = EmptyLeaf as BNode; - this._size = 0; } forEach(callback: (v:V, k:K, tree:BTree) => void, thisArg?: any): number; @@ -290,7 +288,8 @@ export default class BTree implements ISortedMapF, ISortedMap if (result === true || result === false) return result; // Root node has split, so create a new root node. - this._root = new BNodeInternal([this._root, result]); + const children = [this._root, result]; + this._root = new BNodeInternal(children, sumChildSizes(children)); return true; } @@ -615,7 +614,6 @@ export default class BTree implements ISortedMapF, ISortedMap this._root.isShared = true; var result = new BTree(undefined, this._compare, this._maxNodeSize); result._root = this._root; - result._size = this._size; return result as this; } @@ -627,7 +625,6 @@ export default class BTree implements ISortedMapF, ISortedMap greedyClone(force?: boolean): this { var result = new BTree(undefined, this._compare, this._maxNodeSize); result._root = this._root.greedyClone(force); - result._size = this._size; return result as this; } @@ -926,7 +923,7 @@ export default class BTree implements ISortedMapF, ISortedMap * skips the most expensive test - whether all keys are sorted - but it * does check that maxKey() of the children of internal nodes are sorted. */ checkValid() { - var size = this._root.checkValid(0, this, 0); + var [size] = this._root.checkValid(0, this, 0); check(size === this.size, "size mismatch: counted ", size, "but stored", this.size); } } @@ -973,6 +970,10 @@ export class BNode { this.isShared = undefined; } + size(): number { + return this.keys.length; + } + /////////////////////////////////////////////////////////////////////////// // Shared methods ///////////////////////////////////////////////////////// @@ -1111,7 +1112,7 @@ export class BNode { return undefined; } - checkValid(depth: number, tree: BTree, baseIndex: number): number { + checkValid(depth: number, tree: BTree, baseIndex: number): [size: number, min: K, max: K] { var kL = this.keys.length, vL = this.values.length; check(this.values === undefVals ? kL <= vL : kL === vL, "keys/values length mismatch: depth", depth, "with lengths", kL, vL, "and baseIndex", baseIndex); @@ -1121,7 +1122,12 @@ export class BNode { // it can't be merged with adjacent nodes. However, the parent will // verify that the average node size is at least half of the maximum. check(depth == 0 || kL > 0, "empty leaf at depth", depth, "and baseIndex", baseIndex); - return kL; + for (var i = 1; i < kL; i++) { + var c = tree._compare(this.keys[i-1], this.keys[i]); + check(c < 0, "keys out of order at depth", depth, "and baseIndex", baseIndex + i - 1, + ": ", this.keys[i-1], " !< ", this.keys[i]); + } + return [kL, this.keys[0], this.keys[kL - 1]]; } ///////////////////////////////////////////////////////////////////////////// @@ -1132,8 +1138,6 @@ export class BNode { if (i < 0) { // key does not exist yet i = ~i; - tree._size++; - if (this.keys.length < tree._maxNodeSize) { return this.insertInLeaf(i, key, value, tree); } else { @@ -1251,7 +1255,6 @@ export class BNode { this.keys.splice(i, 1); if (this.values !== undefVals) this.values.splice(i, 1); - tree._size--; i--; iHigh--; } else if (result.hasOwnProperty('value')) { @@ -1286,12 +1289,13 @@ export class BNodeInternal extends BNode { // children, but I find it easier to keep the array lengths equal: each // keys[i] caches the value of children[i].maxKey(). children: BNode[]; + _size: number; /** * This does not mark `children` as shared, so it is the responsibility of the caller * to ensure children are either marked shared, or aren't included in another tree. */ - constructor(children: BNode[], keys?: K[]) { + constructor(children: BNode[], size: number, keys?: K[]) { if (!keys) { keys = []; for (var i = 0; i < children.length; i++) @@ -1299,19 +1303,24 @@ export class BNodeInternal extends BNode { } super(keys); this.children = children; + this._size = size; } clone(): BNode { var children = this.children.slice(0); for (var i = 0; i < children.length; i++) children[i].isShared = true; - return new BNodeInternal(children, this.keys.slice(0)); + return new BNodeInternal(children, this._size, this.keys.slice(0)); + } + + size(): number { + return this._size; } greedyClone(force?: boolean): BNode { if (this.isShared && !force) return this; - var nu = new BNodeInternal(this.children.slice(0), this.keys.slice(0)); + var nu = new BNodeInternal(this.children.slice(0), this._size, this.keys.slice(0)); for (var i = 0; i < nu.children.length; i++) nu.children[i] = nu.children[i].greedyClone(force); return nu; @@ -1356,27 +1365,42 @@ export class BNodeInternal extends BNode { return result; } - checkValid(depth: number, tree: BTree, baseIndex: number): number { + checkValid(depth: number, tree: BTree, baseIndex: number): [size: number, min: K, max: K] { let kL = this.keys.length, cL = this.children.length; check(kL === cL, "keys/children length mismatch: depth", depth, "lengths", kL, cL, "baseIndex", baseIndex); check(kL > 1 || depth > 0, "internal node has length", kL, "at depth", depth, "baseIndex", baseIndex); let size = 0, c = this.children, k = this.keys, childSize = 0; + let prevMinKey: K | undefined = undefined; + let prevMaxKey: K | undefined = undefined; for (var i = 0; i < cL; i++) { - size += c[i].checkValid(depth + 1, tree, baseIndex + size); - childSize += c[i].keys.length; + var child = c[i]; + var [subtreeSize, minKey, maxKey] = child.checkValid(depth + 1, tree, baseIndex + size); + check(subtreeSize === child.size(), "cached size mismatch at depth", depth, "index", i, "baseIndex", baseIndex); + check(subtreeSize === 1 || tree._compare(minKey, maxKey) < 0, "child node keys not sorted at depth", depth, "index", i, "baseIndex", baseIndex); + if (prevMinKey !== undefined && prevMaxKey !== undefined) { + check(!areOverlapping(prevMinKey, prevMaxKey, minKey, maxKey, tree._compare), "children keys not sorted at depth", depth, "index", i, "baseIndex", baseIndex, + ": ", prevMaxKey, " !< ", minKey); + check(tree._compare(prevMaxKey, minKey) < 0, "children keys not sorted at depth", depth, "index", i, "baseIndex", baseIndex, + ": ", prevMaxKey, " !< ", minKey); + } + prevMinKey = minKey; + prevMaxKey = maxKey; + size += subtreeSize; + childSize += child.keys.length; check(size >= childSize, "wtf", baseIndex); // no way this will ever fail - check(i === 0 || c[i-1].constructor === c[i].constructor, "type mismatch, baseIndex:", baseIndex); - if (c[i].maxKey() != k[i]) - check(false, "keys[", i, "] =", k[i], "is wrong, should be ", c[i].maxKey(), "at depth", depth, "baseIndex", baseIndex); + check(i === 0 || c[i-1].constructor === child.constructor, "type mismatch, baseIndex:", baseIndex); + if (child.maxKey() != k[i]) + check(false, "keys[", i, "] =", k[i], "is wrong, should be ", child.maxKey(), "at depth", depth, "baseIndex", baseIndex); if (!(i === 0 || tree._compare(k[i-1], k[i]) < 0)) check(false, "sort violation at depth", depth, "index", i, "keys", k[i-1], k[i]); } + check(this._size === size, "internal node cached size mismatch at depth", depth, "baseIndex", baseIndex, "cached", this._size, "actual", size); // 2020/08: BTree doesn't always avoid grossly undersized nodes, // but AFAIK such nodes are pretty harmless, so accept them. let toofew = childSize === 0; // childSize < (tree.maxNodeSize >> 1)*cL; if (toofew || childSize > tree.maxNodeSize*cL) check(false, toofew ? "too few" : "too many", "children (", childSize, size, ") at depth", depth, "maxNodeSize:", tree.maxNodeSize, "children.length:", cL, "baseIndex:", baseIndex); - return size; + return [size, this.minKey()!, this.maxKey()]; } ///////////////////////////////////////////////////////////////////////////// @@ -1407,7 +1431,9 @@ export class BNodeInternal extends BNode { } } + var oldSize = child.size(); var result = child.set(key, value, overwrite, tree); + this._size += child.size() - oldSize; if (result === false) return false; this.keys[i] = child.maxKey(); @@ -1437,6 +1463,7 @@ export class BNodeInternal extends BNode { insert(i: index, child: BNode) { this.children.splice(i, 0, child); this.keys.splice(i, 0, child.maxKey()); + this._size += child.size(); } /** @@ -1445,24 +1472,54 @@ export class BNodeInternal extends BNode { */ splitOffRightSide() { // assert !this.isShared; - var half = this.children.length >> 1; - return new BNodeInternal(this.children.splice(half), this.keys.splice(half)); + const half = this.children.length >> 1; + const newChildren = this.children.splice(half); + const newKeys = this.keys.splice(half); + const sizePrev = this._size; + this._size = sumChildSizes(this.children); + const newNode = new BNodeInternal(newChildren, sizePrev - this._size, newKeys); + return newNode; + } + + /** + * Split this node. + * Modifies this to remove the first half of the items, returning a separate node containing them. + */ + splitOffLeftSide() { + // assert !this.isShared; + const half = this.children.length >> 1; + const newChildren = this.children.splice(0, half); + const newKeys = this.keys.splice(0, half); + const sizePrev = this._size; + this._size = sumChildSizes(this.children); + const newNode = new BNodeInternal(newChildren, sizePrev - this._size, newKeys); + return newNode; } takeFromRight(rhs: BNode) { // Reminder: parent node must update its copy of key for this node // assert: neither node is shared // assert rhs.keys.length > (maxNodeSize/2 && this.keys.length; this.keys.push(rhs.keys.shift()!); - this.children.push((rhs as BNodeInternal).children.shift()!); + const child = rhsInternal.children.shift()!; + this.children.push(child); + const size = child.size(); + rhsInternal._size -= size; + this._size += size; } takeFromLeft(lhs: BNode) { // Reminder: parent node must update its copy of key for this node // assert: neither node is shared // assert rhs.keys.length > (maxNodeSize/2 && this.keys.length; + const child = lhsInternal.children.pop()!; this.keys.unshift(lhs.keys.pop()!); - this.children.unshift((lhs as BNodeInternal).children.pop()!); + this.children.unshift(child); + const size = child.size(); + lhsInternal._size -= size; + this._size += size; } ///////////////////////////////////////////////////////////////////////////// @@ -1489,12 +1546,15 @@ export class BNodeInternal extends BNode { } else if (i <= iHigh) { try { for (; i <= iHigh; i++) { - if (children[i].isShared) - children[i] = children[i].clone(); - var result = children[i].forRange(low, high, includeHigh, editMode, tree, count, onFound); + let child = children[i]; + if (child.isShared) + children[i] = child = child.clone(); + const beforeSize = child.size(); + const result = child.forRange(low, high, includeHigh, editMode, tree, count, onFound); // Note: if children[i] is empty then keys[i]=undefined. // This is an invalid state, but it is fixed below. - keys[i] = children[i].maxKey(); + keys[i] = child.maxKey(); + this._size += child.size() - beforeSize; if (typeof result !== 'number') return result; count = result; @@ -1510,7 +1570,8 @@ export class BNodeInternal extends BNode { this.tryMerge(i, tree._maxNodeSize); } else { // child is empty! delete it! keys.splice(i, 1); - children.splice(i, 1); + const removed = children.splice(i, 1); + check(removed[0].size() === 0, "emptiness cleanup"); } } } @@ -1549,6 +1610,7 @@ export class BNodeInternal extends BNode { this.keys.push.apply(this.keys, rhs.keys); const rhsChildren = (rhs as any as BNodeInternal).children; this.children.push.apply(this.children, rhsChildren); + this._size += rhs.size(); if (rhs.isShared && !this.isShared) { // All children of a shared node are implicitly shared, and since their new @@ -1577,6 +1639,59 @@ export class BNodeInternal extends BNode { // has the side effect of scanning the prototype chain. var undefVals: any[] = []; +/** + * Sums the sizes of the given child nodes. + * @param children the child nodes + * @returns the total size + * @internal + */ +export function sumChildSizes(children: BNode[]): number { + var total = 0; + for (var i = 0; i < children.length; i++) + total += children[i].size(); + return total; +} + +/** + * Determines whether two nodes are overlapping in key range. + * @internal + */ +export function areOverlapping( + aMin: K, + aMax: K, + bMin: K, + bMax: K, + cmp: (x:K,y:K)=>number +): boolean { + // There are 4 possibilities: + // 1. aMin.........aMax + // bMin.........bMax + // (aMax between bMin and bMax) + // 2. aMin.........aMax + // bMin.........bMax + // (aMin between bMin and bMax) + // 3. aMin.............aMax + // bMin....bMax + // (aMin and aMax enclose bMin and bMax; note this includes equality cases) + // 4. aMin....aMax + // bMin.............bMax + // (bMin and bMax enclose aMin and aMax; note equality cases are identical to case 3) + const aMinBMin = cmp(aMin, bMin); + const aMinBMax = cmp(aMin, bMax); + if (aMinBMin >= 0 && aMinBMax <= 0) { + // case 2 or 4 + return true; + } + const aMaxBMin = cmp(aMax, bMin); + const aMaxBMax = cmp(aMax, bMax); + if (aMaxBMin >= 0 && aMaxBMax <= 0) { + // case 1 + return true; + } + // case 3 or no overlap + return aMinBMin <= 0 && aMaxBMax >= 0; +} + const Delete = {delete: true}, DeleteRange = () => Delete; const Break = {break: true}; const EmptyLeaf = (function() { diff --git a/benchmarks.ts b/benchmarks.ts index 96ecad7..c565cc9 100644 --- a/benchmarks.ts +++ b/benchmarks.ts @@ -1,12 +1,15 @@ #!/usr/bin/env ts-node -import BTree, {IMap} from '.'; +import BTree from '.'; import BTreeEx from './extended'; import SortedArray from './sorted-array'; +import forEachKeyNotIn from './extended/forEachKeyNotIn'; +import subtract from './extended/subtract'; // Note: The `bintrees` package also includes a `BinTree` type which turned // out to be an unbalanced binary tree. It is faster than `RBTree` for // randomized data, but it becomes extremely slow when filled with sorted // data, so it's not usually a good choice. import {RBTree} from 'bintrees'; +import { countTreeNodeStats, logTreeNodeStats } from './test/shared'; const SortedSet = require("collections/sorted-set"); // Bad type definition: missing 'length' const SortedMap = require("collections/sorted-map"); // No type definitions available const functionalTree = require("functional-red-black-tree"); // No type definitions available @@ -46,6 +49,58 @@ function measure(message: (t:T) => string, callback: () => T, minMillise return result; } +function intersectBySorting( + tree1: BTree, + tree2: BTree, + callback: (k: number, leftValue: number, rightValue: number) => void +) { + const left = tree1.toArray(); + const right = tree2.toArray(); + let i = 0; + let j = 0; + const leftLen = left.length; + const rightLen = right.length; + + while (i < leftLen && j < rightLen) { + const [leftKey, leftValue] = left[i]; + const [rightKey, rightValue] = right[j]; + if (leftKey === rightKey) { + callback(leftKey, leftValue, rightValue); + i++; + j++; + } else if (leftKey < rightKey) { + i++; + } else { + j++; + } + } +} + +function subtractBySorting( + includeTree: BTree, + excludeTree: BTree, + callback: (k: number, value: number) => void +) { + const include = includeTree.toArray(); + const exclude = excludeTree.toArray(); + let i = 0; + let j = 0; + const includeLen = include.length; + const excludeLen = exclude.length; + + while (i < includeLen) { + const [includeKey, includeValue] = include[i]; + while (j < excludeLen && exclude[j][0] < includeKey) + j++; + if (j < excludeLen && exclude[j][0] === includeKey) { + i++; + continue; + } + callback(includeKey, includeValue); + i++; + } +} + console.log("Benchmark results (milliseconds with integer keys/values)"); console.log("---------------------------------------------------------"); @@ -355,3 +410,792 @@ console.log("### Delta between B+ trees"); } }) } + +console.log(); +console.log("### Union between B+ trees"); +{ + console.log(); + const sizes = [100, 1000, 10000, 100000]; + + const timeBaselineMerge = (title: string, tree1: BTree, tree2: BTree) => { + const baselineResult = measure(() => title, () => { + const result = tree1.clone(); + tree2.forEachPair((k, v) => { + result.set(k, v, false); + }); + return result; + }); + const baselineStats = countTreeNodeStats(baselineResult); + logTreeNodeStats('baseline', baselineStats); + }; + + const preferLeftUnion = (_k: number, leftValue: any, _rightValue: any) => leftValue; + + const timeUnionVsBaseline = ( + baseTitle: string, + tree1: BTreeEx, + tree2: BTreeEx, + prefer = preferLeftUnion, + unionLabel = 'union()', + baselineLabel = 'clone+set loop (baseline)' + ) => { + const unionResult = measure(() => `${baseTitle} using ${unionLabel}`, () => { + return tree1.union(tree2, prefer); + }); + const unionStats = countTreeNodeStats(unionResult); + logTreeNodeStats('union', unionStats); + + timeBaselineMerge(`${baseTitle} using ${baselineLabel}`, tree1, tree2); + }; + + console.log("# Non-overlapping ranges (disjoint keys)"); + sizes.forEach((size) => { + const tree1 = new BTreeEx(); + const tree2 = new BTreeEx(); + + const offset = size * 3; + for (let i = 0; i < size; i++) { + tree1.set(i, i); + tree2.set(offset + i, offset + i); + } + + const baseTitle = `Union ${size}+${size} non-overlapping trees`; + timeUnionVsBaseline(baseTitle, tree1, tree2); + }); + + console.log(); + console.log("# Adjacent ranges (one intersection point)"); + sizes.forEach((size) => { + const tree1 = new BTreeEx(); + const tree2 = new BTreeEx(); + + // Tree1: 0-size, Tree2: size-(2*size) + for (let i = 0; i <= size; i++) { + tree1.set(i, i); + tree2.set(i + size, i + size); + } + + const baseTitle = `Union ${size}+${size} adjacent range trees`; + timeUnionVsBaseline(baseTitle, tree1, tree2); + }); + + console.log(); + console.log("# Interleaved ranges (two intersection points)"); + sizes.forEach((size) => { + const tree1 = new BTreeEx(); + const tree2 = new BTreeEx(); + + // Tree1: 0-size, 2*size-3*size + // Tree2: size-2*size + for (let i = 0; i <= size; i++) { + tree1.set(i, i); + tree1.set(i + 2 * size, i + 2 * size); + tree2.set(i + size, i + size); + } + + const baseTitle = `Union ${size * 2}+${size} interleaved range trees`; + timeUnionVsBaseline(baseTitle, tree1, tree2); + }); + + console.log(); + console.log("# Complete overlap (all keys intersect)"); + sizes.forEach((size) => { + const tree1 = new BTreeEx(); + const tree2 = new BTreeEx(); + + for (let i = 0; i < size; i++) { + tree1.set(i, i); + tree2.set(i, i * 10); + } + + const baseTitle = `Union ${size}+${size} completely overlapping trees (prefer left)`; + timeUnionVsBaseline(baseTitle, tree1, tree2); + }); + + console.log(); + console.log("# Partial overlap (10% intersection)"); + sizes.forEach((size) => { + const tree1 = new BTreeEx(); + const tree2 = new BTreeEx(); + + // Tree1: 0-(size) + // Tree2: (~0.9*size)-(1.9*size) + // Overlap: last 10% of tree1 and first 10% of tree2 + for (let i = 0; i < size; i++) { + tree1.set(i, i); + } + const offset = Math.floor(size * 0.9); + for (let i = 0; i < size; i++) { + const key = offset + i; + tree2.set(key, key * 10); + } + + const baseTitle = `Union trees with 10% overlap (${size}+${size} keys)`; + timeUnionVsBaseline(baseTitle, tree1, tree2); + }); + + console.log(); + console.log("# Union random overlaps"); + sizes.forEach((size) => { + const keys1 = makeArray(size, true); + const keys2 = makeArray(size, true); + + const tree1 = new BTreeEx(); + const tree2 = new BTreeEx(); + + for (let k of keys1) { + tree1.set(k, k); + } + for (let k of keys2) { + tree2.set(k, k * 10); + } + + const baseTitle = `Union ${tree1.size}+${tree2.size} trees with random keys`; + timeUnionVsBaseline(baseTitle, tree1, tree2); + }); + + console.log(); + console.log("# Union with empty tree"); + sizes.forEach((size) => { + const tree1 = new BTreeEx(); + const tree2 = new BTreeEx(); + + for (let i = 0; i < size; i++) { + tree1.set(i, i); + } + + const baseTitle = `Union ${size}-key tree with empty tree`; + timeUnionVsBaseline(baseTitle, tree1, tree2); + }); + + console.log(); + console.log("# Large sparse-overlap trees (1M keys each, 10 overlaps per 100k)"); + { + const totalKeys = 1_000_000; + const overlapInterval = 100_000; + const overlapPerInterval = 10; + + const tree1 = new BTreeEx(); + for (let i = 0; i < totalKeys; i++) { + tree1.set(i, i); + } + + const tree2 = new BTreeEx(); + for (let i = 0; i < totalKeys; i++) { + if ((i % overlapInterval) < overlapPerInterval) { + tree2.set(i, i); + } else { + tree2.set(totalKeys + i, totalKeys + i); + } + } + + const baseTitle = `Union ${tree1.size}+${tree2.size} sparse-overlap trees`; + timeUnionVsBaseline(baseTitle, tree1, tree2); + } +} + +console.log(); +console.log("### Subtract between B+ trees"); +{ + console.log(); + const sizes = [100, 1000, 10000, 100000]; + + const timeBaselineSubtract = ( + title: string, + includeTree: BTreeEx, + excludeTree: BTreeEx + ) => { + const baselineResult = measure(() => title, () => { + const result = includeTree.clone(); + excludeTree.forEachPair((key) => { + result.delete(key); + }); + return result; + }); + const stats = countTreeNodeStats(baselineResult); + logTreeNodeStats('baseline', stats); + }; + + const timeSubtractVsBaseline = ( + baseTitle: string, + includeTree: BTreeEx, + excludeTree: BTreeEx, + subtractLabel = 'subtract()', + baselineLabel = 'clone+delete loop (baseline)' + ) => { + const subtractResult = measure(() => `${baseTitle} using ${subtractLabel}`, () => { + return subtract, number, number>(includeTree, excludeTree); + }); + const subtractStats = countTreeNodeStats(subtractResult); + logTreeNodeStats('subtract', subtractStats); + + timeBaselineSubtract(`${baseTitle} using ${baselineLabel}`, includeTree, excludeTree); + }; + + console.log("# Non-overlapping ranges (nothing removed)"); + sizes.forEach((size) => { + const includeTree = new BTreeEx(); + const excludeTree = new BTreeEx(); + const offset = size * 3; + for (let i = 0; i < size; i++) { + includeTree.set(i, i); + excludeTree.set(offset + i, offset + i); + } + + const baseTitle = `Subtract ${includeTree.size}-${excludeTree.size} disjoint trees`; + timeSubtractVsBaseline(baseTitle, includeTree, excludeTree); + }); + + console.log(); + console.log("# Partial overlap (middle segment removed)"); + sizes.forEach((size) => { + const includeTree = new BTreeEx(); + const excludeTree = new BTreeEx(); + const overlapStart = Math.floor(size / 3); + const overlapEnd = overlapStart + Math.floor(size / 2); + for (let i = 0; i < size; i++) { + includeTree.set(i, i); + if (i >= overlapStart && i < overlapEnd) + excludeTree.set(i, i * 10); + } + + const baseTitle = `Subtract ${includeTree.size}-${excludeTree.size} partially overlapping trees`; + timeSubtractVsBaseline(baseTitle, includeTree, excludeTree); + }); + + console.log(); + console.log("# Interleaved keys (every other key removed)"); + sizes.forEach((size) => { + const includeTree = new BTreeEx(); + const excludeTree = new BTreeEx(); + for (let i = 0; i < size * 2; i++) { + includeTree.set(i, i); + if (i % 2 === 0) + excludeTree.set(i, i); + } + + const baseTitle = `Subtract ${includeTree.size}-${excludeTree.size} interleaved trees`; + timeSubtractVsBaseline(baseTitle, includeTree, excludeTree); + }); + + console.log(); + console.log("# Complete overlap (entire tree removed)"); + sizes.forEach((size) => { + const includeTree = new BTreeEx(); + const excludeTree = new BTreeEx(); + for (let i = 0; i < size; i++) { + includeTree.set(i, i); + excludeTree.set(i, i * 5); + } + + const baseTitle = `Subtract ${includeTree.size}-${excludeTree.size} identical trees`; + timeSubtractVsBaseline(baseTitle, includeTree, excludeTree); + }); + + console.log(); + console.log("# Random overlaps (~10% removed)"); + sizes.forEach((size) => { + const keysInclude = makeArray(size, true); + const keysExclude = makeArray(size, true); + const overlapCount = Math.max(1, Math.floor(size * 0.1)); + for (let i = 0; i < overlapCount && i < keysInclude.length && i < keysExclude.length; i++) { + keysExclude[i] = keysInclude[i]; + } + + const includeTree = new BTreeEx(); + const excludeTree = new BTreeEx(); + for (const key of keysInclude) + includeTree.set(key, key * 3); + for (const key of keysExclude) + excludeTree.set(key, key * 7); + + const baseTitle = `Subtract ${includeTree.size}-${excludeTree.size} random trees`; + timeSubtractVsBaseline(baseTitle, includeTree, excludeTree); + }); + + console.log(); + console.log("# Subtract with empty exclude tree"); + sizes.forEach((size) => { + const includeTree = new BTreeEx(); + const excludeTree = new BTreeEx(); + for (let i = 0; i < size; i++) + includeTree.set(i, i); + + const baseTitle = `Subtract ${includeTree.size}-0 keys`; + timeSubtractVsBaseline(baseTitle, includeTree, excludeTree); + }); + + console.log(); + console.log("# Large sparse-overlap trees (1M keys each, 10 overlaps per 100k)"); + { + const totalKeys = 1_000_000; + const overlapInterval = 100_000; + const overlapPerInterval = 10; + + const includeTree = new BTreeEx(); + for (let i = 0; i < totalKeys; i++) { + includeTree.set(i, i); + } + + const excludeTree = new BTreeEx(); + for (let i = 0; i < totalKeys; i++) { + if ((i % overlapInterval) < overlapPerInterval) { + excludeTree.set(i, i); + } else { + excludeTree.set(totalKeys + i, totalKeys + i); + } + } + + const baseTitle = `Subtract ${includeTree.size}-${excludeTree.size} sparse-overlap trees`; + timeSubtractVsBaseline(baseTitle, includeTree, excludeTree); + } +} + +console.log(); +console.log("### Intersection between B+ trees"); +{ + console.log(); + const sizes = [100, 1000, 10000, 100000]; + const preferLeftIntersection = (_k: number, leftValue: number, _rightValue: number) => leftValue; + + const timeBaselineIntersect = ( + title: string, + tree1: BTreeEx, + tree2: BTreeEx, + combine: (key: number, leftValue: number, rightValue: number) => number + ) => { + const baselineResult = measure(() => title, () => { + const result = new BTreeEx(undefined, tree1._compare, tree1._maxNodeSize); + intersectBySorting(tree1, tree2, (key, leftValue, rightValue) => { + const mergedValue = combine(key, leftValue, rightValue); + result.set(key, mergedValue); + }); + return result; + }); + const baselineStats = countTreeNodeStats(baselineResult); + logTreeNodeStats('baseline', baselineStats); + }; + + const timeIntersectVsBaseline = ( + baseTitle: string, + tree1: BTreeEx, + tree2: BTreeEx, + combine = preferLeftIntersection, + intersectLabel = 'intersect()', + baselineLabel = 'sort merge (baseline)' + ) => { + const intersectResult = measure(() => `${baseTitle} using ${intersectLabel}`, () => { + return tree1.intersect(tree2, combine); + }); + const intersectStats = countTreeNodeStats(intersectResult); + logTreeNodeStats('intersect', intersectStats); + + timeBaselineIntersect(`${baseTitle} using ${baselineLabel}`, tree1, tree2, combine); + }; + + console.log("# Non-overlapping ranges (no shared keys)"); + sizes.forEach((size) => { + const tree1 = new BTreeEx(); + const tree2 = new BTreeEx(); + const offset = size * 3; + for (let i = 0; i < size; i++) { + tree1.set(i, i); + tree2.set(offset + i, offset + i); + } + + const baseTitle = `Intersect ${size}+${size} disjoint trees`; + timeIntersectVsBaseline(baseTitle, tree1, tree2); + }); + + console.log(); + console.log("# Partial overlap (middle segment shared)"); + sizes.forEach((size) => { + const tree1 = new BTreeEx(); + const tree2 = new BTreeEx(); + const overlapStart = Math.floor(size / 3); + const overlapEnd = overlapStart + Math.floor(size / 2); + for (let i = 0; i < size; i++) { + tree1.set(i, i); + if (i >= overlapStart && i < overlapEnd) + tree2.set(i, i * 10); + } + + const baseTitle = `Intersect ${tree1.size}+${tree2.size} partially overlapping trees`; + timeIntersectVsBaseline(baseTitle, tree1, tree2); + }); + + console.log(); + console.log("# Interleaved keys (every other key shared)"); + sizes.forEach((size) => { + const tree1 = new BTreeEx(); + const tree2 = new BTreeEx(); + for (let i = 0; i < size * 2; i++) { + tree1.set(i, i); + if (i % 2 === 0) + tree2.set(i, i * 3); + } + + const baseTitle = `Intersect ${tree1.size}+${tree2.size} interleaved trees`; + timeIntersectVsBaseline(baseTitle, tree1, tree2); + }); + + console.log(); + console.log("# Complete overlap (all keys shared)"); + sizes.forEach((size) => { + const tree1 = new BTreeEx(); + const tree2 = new BTreeEx(); + for (let i = 0; i < size; i++) { + tree1.set(i, i); + tree2.set(i, i * 5); + } + + const baseTitle = `Intersect ${tree1.size}+${tree2.size} identical trees`; + timeIntersectVsBaseline(baseTitle, tree1, tree2); + }); + + console.log(); + console.log("# Random overlaps (~10% shared keys)"); + sizes.forEach((size) => { + const keys1 = makeArray(size, true); + const keys2 = makeArray(size, true); + const overlapCount = Math.max(1, Math.floor(size * 0.1)); + for (let i = 0; i < overlapCount && i < keys1.length && i < keys2.length; i++) { + keys2[i] = keys1[i]; + } + + const tree1 = new BTreeEx(); + const tree2 = new BTreeEx(); + for (const key of keys1) + tree1.set(key, key * 5); + for (const key of keys2) + tree2.set(key, key * 7); + + const baseTitle = `Intersect ${tree1.size}+${tree2.size} random trees`; + timeIntersectVsBaseline(baseTitle, tree1, tree2); + }); + + console.log(); + console.log("# Intersection with empty tree"); + sizes.forEach((size) => { + const tree1 = new BTreeEx(); + const tree2 = new BTreeEx(); + for (let i = 0; i < size; i++) + tree1.set(i, i); + + const baseTitle = `Intersect ${tree1.size}-key tree with empty tree`; + timeIntersectVsBaseline(baseTitle, tree1, tree2); + }); + + console.log(); + console.log("# Large sparse-overlap trees (1M keys each, 10 overlaps per 100k)"); + { + const totalKeys = 1_000_000; + const overlapInterval = 100_000; + const overlapPerInterval = 10; + + const tree1 = new BTreeEx(); + for (let i = 0; i < totalKeys; i++) { + tree1.set(i, i); + } + + const tree2 = new BTreeEx(); + for (let i = 0; i < totalKeys; i++) { + if ((i % overlapInterval) < overlapPerInterval) { + tree2.set(i, i * 3); + } else { + tree2.set(totalKeys + i, totalKeys + i); + } + } + + const baseTitle = `Intersect ${tree1.size}+${tree2.size} sparse-overlap trees`; + timeIntersectVsBaseline(baseTitle, tree1, tree2); + } +} + +console.log(); +console.log("### forEachKeyInBoth"); +{ + const sizes = [100, 1000, 10000, 100000]; + + const runForEachKeyInBoth = ( + tree1: BTreeEx, + tree2: BTreeEx + ) => { + let count = 0; + let checksum = 0; + tree1.forEachKeyInBoth(tree2, (_k, leftValue, rightValue) => { + count++; + checksum += leftValue + rightValue; + }); + return { count, checksum }; + }; + + const runBaseline = ( + tree1: BTree, + tree2: BTree + ) => { + let count = 0; + let checksum = 0; + intersectBySorting(tree1, tree2, (_k, leftValue, rightValue) => { + count++; + checksum += leftValue + rightValue; + }); + return { count, checksum }; + }; + + const timeForEachKeyInBothVsBaseline = ( + baseTitle: string, + tree1: BTreeEx, + tree2: BTreeEx, + forEachKeyInBothLabel = 'forEachKeyInBoth()', + baselineLabel = 'sort baseline' + ) => { + measure( + result => `${baseTitle} using ${forEachKeyInBothLabel} [count=${result.count}, checksum=${result.checksum}]`, + () => runForEachKeyInBoth(tree1, tree2) + ); + measure( + result => `${baseTitle} using ${baselineLabel} [count=${result.count}, checksum=${result.checksum}]`, + () => runBaseline(tree1, tree2) + ); + }; + + console.log(); + console.log("# Non-overlapping ranges (no shared keys)"); + sizes.forEach((size) => { + const tree1 = new BTreeEx(); + const tree2 = new BTreeEx(); + const offset = size * 3; + for (let i = 0; i < size; i++) { + tree1.set(i, i); + tree2.set(offset + i, offset + i); + } + + const baseTitle = `forEachKeyInBoth ${size}+${size} disjoint trees`; + timeForEachKeyInBothVsBaseline(baseTitle, tree1, tree2); + }); + + console.log(); + console.log("# 50% overlapping ranges"); + sizes.forEach((size) => { + const tree1 = new BTreeEx(); + const tree2 = new BTreeEx(); + const offset = Math.floor(size / 2); + for (let i = 0; i < size; i++) { + tree1.set(i, i); + tree2.set(i + offset, (i + offset) * 2); + } + + const baseTitle = `forEachKeyInBoth ${size}+${size} half-overlapping trees`; + timeForEachKeyInBothVsBaseline(baseTitle, tree1, tree2); + }); + + console.log(); + console.log("# Complete overlap (all keys shared)"); + sizes.forEach((size) => { + const tree1 = new BTreeEx(); + const tree2 = new BTreeEx(); + for (let i = 0; i < size; i++) { + tree1.set(i, i); + tree2.set(i, i * 3); + } + + const baseTitle = `forEachKeyInBoth ${size}+${size} identical-key trees`; + timeForEachKeyInBothVsBaseline(baseTitle, tree1, tree2); + }); + + console.log(); + console.log("# Random overlaps (~10% shared keys)"); + sizes.forEach((size) => { + const keys1 = makeArray(size, true); + const keys2 = makeArray(size, true); + const overlapCount = Math.max(1, Math.floor(size * 0.1)); + for (let i = 0; i < overlapCount && i < keys1.length && i < keys2.length; i++) { + keys2[i] = keys1[i]; + } + + const tree1 = new BTreeEx(); + const tree2 = new BTreeEx(); + + for (let i = 0; i < keys1.length; i++) { + const key = keys1[i]; + tree1.set(key, key * 5); + } + for (let i = 0; i < keys2.length; i++) { + const key = keys2[i]; + tree2.set(key, key * 7); + } + + const baseTitle = `forEachKeyInBoth ${tree1.size}+${tree2.size} random trees`; + timeForEachKeyInBothVsBaseline(baseTitle, tree1, tree2); + }); + + console.log(); + console.log("# Large sparse-overlap trees (1M keys each, 10 overlaps per 100k)"); + { + const totalKeys = 1_000_000; + const overlapInterval = 100_000; + const overlapPerInterval = 10; + + const tree1 = new BTreeEx(); + for (let i = 0; i < totalKeys; i++) { + tree1.set(i, i); + } + + const tree2 = new BTreeEx(); + for (let i = 0; i < totalKeys; i++) { + if ((i % overlapInterval) < overlapPerInterval) { + tree2.set(i, i * 7); + } else { + tree2.set(totalKeys + i, (totalKeys + i) * 7); + } + } + + const baseTitle = `forEachKeyInBoth ${tree1.size}+${tree2.size} sparse-overlap trees`; + timeForEachKeyInBothVsBaseline(baseTitle, tree1, tree2); + } +} + +console.log(); +console.log("### forEachKeyNotIn"); +{ + const sizes = [100, 1000, 10000, 100000]; + + const runForEachKeyNotIn = ( + includeTree: BTreeEx, + excludeTree: BTreeEx + ) => { + let count = 0; + let checksum = 0; + forEachKeyNotIn(includeTree, excludeTree, (_key, value) => { + count++; + checksum += value; + }); + return { count, checksum }; + }; + + const runBaseline = ( + includeTree: BTree, + excludeTree: BTree + ) => { + let count = 0; + let checksum = 0; + subtractBySorting(includeTree, excludeTree, (_key, value) => { + count++; + checksum += value; + }); + return { count, checksum }; + }; + + const timeForEachKeyNotInVsBaseline = ( + baseTitle: string, + includeTree: BTreeEx, + excludeTree: BTreeEx, + forEachKeyNotInLabel = 'forEachKeyNotIn()', + baselineLabel = 'sort baseline' + ) => { + measure( + result => `${baseTitle} using ${forEachKeyNotInLabel} [count=${result.count}, checksum=${result.checksum}]`, + () => runForEachKeyNotIn(includeTree, excludeTree) + ); + measure( + result => `${baseTitle} using ${baselineLabel} [count=${result.count}, checksum=${result.checksum}]`, + () => runBaseline(includeTree, excludeTree) + ); + }; + + console.log(); + console.log("# Non-overlapping ranges (all keys survive)"); + sizes.forEach((size) => { + const includeTree = new BTreeEx(); + const excludeTree = new BTreeEx(); + const offset = size * 3; + for (let i = 0; i < size; i++) { + includeTree.set(i, i); + excludeTree.set(offset + i, offset + i); + } + + const baseTitle = `forEachKeyNotIn ${includeTree.size}+${excludeTree.size} disjoint trees`; + timeForEachKeyNotInVsBaseline(baseTitle, includeTree, excludeTree); + }); + + console.log(); + console.log("# 50% overlapping ranges"); + sizes.forEach((size) => { + const includeTree = new BTreeEx(); + const excludeTree = new BTreeEx(); + const offset = Math.floor(size / 2); + for (let i = 0; i < size; i++) { + includeTree.set(i, i); + excludeTree.set(i + offset, (i + offset) * 2); + } + + const baseTitle = `forEachKeyNotIn ${includeTree.size}+${excludeTree.size} half-overlapping trees`; + timeForEachKeyNotInVsBaseline(baseTitle, includeTree, excludeTree); + }); + + console.log(); + console.log("# Complete overlap (no keys survive)"); + sizes.forEach((size) => { + const includeTree = new BTreeEx(); + const excludeTree = new BTreeEx(); + for (let i = 0; i < size; i++) { + includeTree.set(i, i); + excludeTree.set(i, i * 3); + } + + const baseTitle = `forEachKeyNotIn ${includeTree.size}+${excludeTree.size} identical-key trees`; + timeForEachKeyNotInVsBaseline(baseTitle, includeTree, excludeTree); + }); + + console.log(); + console.log("# Random overlaps (~10% of include removed)"); + sizes.forEach((size) => { + const keysInclude = makeArray(size, true); + const keysExclude = makeArray(size, true); + const overlapCount = Math.max(1, Math.floor(size * 0.1)); + for (let i = 0; i < overlapCount && i < keysInclude.length && i < keysExclude.length; i++) { + keysExclude[i] = keysInclude[i]; + } + + const includeTree = new BTreeEx(); + const excludeTree = new BTreeEx(); + + for (let i = 0; i < keysInclude.length; i++) { + const key = keysInclude[i]; + includeTree.set(key, key * 5); + } + for (let i = 0; i < keysExclude.length; i++) { + const key = keysExclude[i]; + excludeTree.set(key, key * 7); + } + + const baseTitle = `forEachKeyNotIn ${includeTree.size}+${excludeTree.size} random trees`; + timeForEachKeyNotInVsBaseline(baseTitle, includeTree, excludeTree); + }); + + console.log(); + console.log("# Large sparse-overlap trees (1M keys each, 10 overlaps per 100k)"); + { + const totalKeys = 1_000_000; + const overlapInterval = 100_000; + const overlapPerInterval = 10; + + const includeTree = new BTreeEx(); + for (let i = 0; i < totalKeys; i++) { + includeTree.set(i, i); + } + + const excludeTree = new BTreeEx(); + for (let i = 0; i < totalKeys; i++) { + if ((i % overlapInterval) < overlapPerInterval) { + excludeTree.set(i, i); + } else { + excludeTree.set(totalKeys + i, totalKeys + i); + } + } + + const baseTitle = `forEachKeyNotIn ${includeTree.size}+${excludeTree.size} sparse-overlap trees`; + timeForEachKeyNotInVsBaseline(baseTitle, includeTree, excludeTree); + } +} diff --git a/extended/bulkLoad.d.ts b/extended/bulkLoad.d.ts new file mode 100644 index 0000000..79dc063 --- /dev/null +++ b/extended/bulkLoad.d.ts @@ -0,0 +1,14 @@ +import BTree from '../b+tree'; +/** + * Loads a B-Tree from a sorted list of entries in bulk. This is faster than inserting + * entries one at a time, and produces a more optimally balanced tree. + * Time and space complexity: O(n). + * @param entries The list of key/value pairs to load. Must be sorted by key in strictly ascending order. Note that + * the array is an alternating list of keys and values: [key0, value0, key1, value1, ...]. + * @param maxNodeSize The branching factor (maximum node size) for the resulting tree. + * @param compare Function to compare keys. + * @param loadFactor Desired load factor for created leaves. Must be between 0.5 and 1.0. + * @returns A new BTree containing the given entries. + * @throws Error if the entries are not sorted by key in strictly ascending order (duplicates disallowed) or if the load factor is out of the allowed range. + */ +export declare function bulkLoad(entries: (K | V)[], maxNodeSize: number, compare: (a: K, b: K) => number, loadFactor?: number): BTree; diff --git a/extended/bulkLoad.js b/extended/bulkLoad.js new file mode 100644 index 0000000..3a23f94 --- /dev/null +++ b/extended/bulkLoad.js @@ -0,0 +1,108 @@ +"use strict"; +var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) { + if (k2 === undefined) k2 = k; + Object.defineProperty(o, k2, { enumerable: true, get: function() { return m[k]; } }); +}) : (function(o, m, k, k2) { + if (k2 === undefined) k2 = k; + o[k2] = m[k]; +})); +var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) { + Object.defineProperty(o, "default", { enumerable: true, value: v }); +}) : function(o, v) { + o["default"] = v; +}); +var __importStar = (this && this.__importStar) || function (mod) { + if (mod && mod.__esModule) return mod; + var result = {}; + if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k); + __setModuleDefault(result, mod); + return result; +}; +Object.defineProperty(exports, "__esModule", { value: true }); +exports.bulkLoadRoot = exports.bulkLoad = void 0; +var b_tree_1 = __importStar(require("../b+tree")); +var shared_1 = require("./shared"); +/** + * Loads a B-Tree from a sorted list of entries in bulk. This is faster than inserting + * entries one at a time, and produces a more optimally balanced tree. + * Time and space complexity: O(n). + * @param entries The list of key/value pairs to load. Must be sorted by key in strictly ascending order. Note that + * the array is an alternating list of keys and values: [key0, value0, key1, value1, ...]. + * @param maxNodeSize The branching factor (maximum node size) for the resulting tree. + * @param compare Function to compare keys. + * @param loadFactor Desired load factor for created leaves. Must be between 0.5 and 1.0. + * @returns A new BTree containing the given entries. + * @throws Error if the entries are not sorted by key in strictly ascending order (duplicates disallowed) or if the load factor is out of the allowed range. + */ +function bulkLoad(entries, maxNodeSize, compare, loadFactor) { + if (loadFactor === void 0) { loadFactor = 0.8; } + var alternatingEntries = entries; + var root = bulkLoadRoot(alternatingEntries, maxNodeSize, compare, loadFactor); + var tree = new b_tree_1.default(undefined, compare, maxNodeSize); + var target = tree; + target._root = root; + target._size = root.size(); + return tree; +} +exports.bulkLoad = bulkLoad; +/** + * Bulk loads, returns the root node of the resulting tree. + * @internal + */ +function bulkLoadRoot(entries, maxNodeSize, compare, loadFactor) { + if (loadFactor === void 0) { loadFactor = 0.8; } + if (loadFactor < 0.5 || loadFactor > 1.0) + throw new Error("bulkLoad: loadFactor must be between 0.5 and 1.0"); + var totalPairs = (0, shared_1.alternatingCount)(entries); + if (totalPairs > 1) { + var previousKey = (0, shared_1.alternatingGetFirst)(entries, 0); + for (var i = 1; i < totalPairs; i++) { + var key = (0, shared_1.alternatingGetFirst)(entries, i); + if (compare(previousKey, key) >= 0) + throw new Error("bulkLoad: entries must be sorted by key in strictly ascending order"); + previousKey = key; + } + } + var leaves = []; + (0, shared_1.makeLeavesFrom)(entries, maxNodeSize, function (leaf) { return leaves.push(leaf); }, loadFactor); + if (leaves.length === 0) + return new b_tree_1.BNode(); + var targetNodeSize = Math.ceil(maxNodeSize * loadFactor); + var exactlyHalf = targetNodeSize === maxNodeSize / 2; + var minSize = Math.floor(maxNodeSize / 2); + var currentLevel = leaves; + while (currentLevel.length > 1) { + var nodeCount = currentLevel.length; + if (nodeCount <= maxNodeSize && (nodeCount !== maxNodeSize || !exactlyHalf)) { + currentLevel = [new b_tree_1.BNodeInternal(currentLevel, (0, b_tree_1.sumChildSizes)(currentLevel))]; + break; + } + var nextLevelCount = Math.ceil(nodeCount / targetNodeSize); + (0, b_tree_1.check)(nextLevelCount > 1); + var nextLevel = new Array(nextLevelCount); + var remainingNodes = nodeCount; + var remainingParents = nextLevelCount; + var childIndex = 0; + for (var i = 0; i < nextLevelCount; i++) { + var chunkSize = Math.ceil(remainingNodes / remainingParents); + var children = new Array(chunkSize); + var size = 0; + for (var j = 0; j < chunkSize; j++) { + var child = currentLevel[childIndex++]; + children[j] = child; + size += child.size(); + } + remainingNodes -= chunkSize; + remainingParents--; + nextLevel[i] = new b_tree_1.BNodeInternal(children, size); + } + // If last node is underfilled, balance with left sibling + var secondLastNode = nextLevel[nextLevelCount - 2]; + var lastNode = nextLevel[nextLevelCount - 1]; + while (lastNode.children.length < minSize) + lastNode.takeFromLeft(secondLastNode); + currentLevel = nextLevel; + } + return currentLevel[0]; +} +exports.bulkLoadRoot = bulkLoadRoot; diff --git a/extended/bulkLoad.ts b/extended/bulkLoad.ts new file mode 100644 index 0000000..2fe5986 --- /dev/null +++ b/extended/bulkLoad.ts @@ -0,0 +1,103 @@ +import BTree, { BNode, BNodeInternal, check, sumChildSizes } from '../b+tree'; +import { alternatingCount, alternatingGetFirst, makeLeavesFrom, type AlternatingList, type BTreeWithInternals } from './shared'; + +/** + * Loads a B-Tree from a sorted list of entries in bulk. This is faster than inserting + * entries one at a time, and produces a more optimally balanced tree. + * Time and space complexity: O(n). + * @param entries The list of key/value pairs to load. Must be sorted by key in strictly ascending order. Note that + * the array is an alternating list of keys and values: [key0, value0, key1, value1, ...]. + * @param maxNodeSize The branching factor (maximum node size) for the resulting tree. + * @param compare Function to compare keys. + * @param loadFactor Desired load factor for created leaves. Must be between 0.5 and 1.0. + * @returns A new BTree containing the given entries. + * @throws Error if the entries are not sorted by key in strictly ascending order (duplicates disallowed) or if the load factor is out of the allowed range. + */ +export function bulkLoad( + entries: (K | V)[], + maxNodeSize: number, + compare: (a: K, b: K) => number, + loadFactor = 0.8 +): BTree { + const alternatingEntries = entries as AlternatingList; + const root = bulkLoadRoot(alternatingEntries, maxNodeSize, compare, loadFactor); + const tree = new BTree(undefined, compare, maxNodeSize); + const target = tree as unknown as BTreeWithInternals; + target._root = root; + target._size = root.size(); + return tree; +} + +/** + * Bulk loads, returns the root node of the resulting tree. + * @internal + */ +export function bulkLoadRoot( + entries: AlternatingList, + maxNodeSize: number, + compare: (a: K, b: K) => number, + loadFactor = 0.8 +): BNode { + if (loadFactor < 0.5 || loadFactor > 1.0) + throw new Error("bulkLoad: loadFactor must be between 0.5 and 1.0"); + + const totalPairs = alternatingCount(entries); + if (totalPairs > 1) { + let previousKey = alternatingGetFirst(entries, 0); + for (let i = 1; i < totalPairs; i++) { + const key = alternatingGetFirst(entries, i); + if (compare(previousKey, key) >= 0) + throw new Error("bulkLoad: entries must be sorted by key in strictly ascending order"); + previousKey = key; + } + } + + const leaves: BNode[] = []; + makeLeavesFrom(entries, maxNodeSize, (leaf) => leaves.push(leaf), loadFactor); + if (leaves.length === 0) + return new BNode(); + + const targetNodeSize = Math.ceil(maxNodeSize * loadFactor); + const exactlyHalf = targetNodeSize === maxNodeSize / 2; + const minSize = Math.floor(maxNodeSize / 2); + + let currentLevel: BNode[] = leaves; + while (currentLevel.length > 1) { + const nodeCount = currentLevel.length; + if (nodeCount <= maxNodeSize && (nodeCount !== maxNodeSize || !exactlyHalf)) { + currentLevel = [new BNodeInternal(currentLevel, sumChildSizes(currentLevel))]; + break; + } + + const nextLevelCount = Math.ceil(nodeCount / targetNodeSize); + check(nextLevelCount > 1); + const nextLevel = new Array>(nextLevelCount); + let remainingNodes = nodeCount; + let remainingParents = nextLevelCount; + let childIndex = 0; + + for (let i = 0; i < nextLevelCount; i++) { + const chunkSize = Math.ceil(remainingNodes / remainingParents); + const children = new Array>(chunkSize); + let size = 0; + for (let j = 0; j < chunkSize; j++) { + const child = currentLevel[childIndex++]; + children[j] = child; + size += child.size(); + } + remainingNodes -= chunkSize; + remainingParents--; + nextLevel[i] = new BNodeInternal(children, size); + } + + // If last node is underfilled, balance with left sibling + const secondLastNode = nextLevel[nextLevelCount - 2] as BNodeInternal; + const lastNode = nextLevel[nextLevelCount - 1] as BNodeInternal; + while (lastNode.children.length < minSize) + lastNode.takeFromLeft(secondLastNode); + + currentLevel = nextLevel; + } + + return currentLevel[0]; +} diff --git a/extended/decompose.d.ts b/extended/decompose.d.ts new file mode 100644 index 0000000..cb0ff5c --- /dev/null +++ b/extended/decompose.d.ts @@ -0,0 +1 @@ +export {}; diff --git a/extended/decompose.js b/extended/decompose.js new file mode 100644 index 0000000..7b6ac9f --- /dev/null +++ b/extended/decompose.js @@ -0,0 +1,665 @@ +"use strict"; +Object.defineProperty(exports, "__esModule", { value: true }); +exports.buildFromDecomposition = exports.decompose = void 0; +var b_tree_1 = require("../b+tree"); +var shared_1 = require("./shared"); +var parallelWalk_1 = require("./parallelWalk"); +var decomposeLoadFactor = 0.7; +/** + * Decomposes two trees into disjoint nodes. Reuses interior nodes when they do not overlap/intersect with any leaf nodes + * in the other tree. Overlapping leaf nodes are broken down into new leaf nodes containing merged entries. + * The algorithm is a parallel tree walk using two cursors. The trailing cursor (behind in key space) is walked forward + * until it is at or after the leading cursor. As it does this, any whole nodes or subtrees it passes are guaranteed to + * be disjoint. This is true because the leading cursor was also previously walked in this way, and is thus pointing to + * the first key at or after the trailing cursor's previous position. + * The cursor walk is efficient, meaning it skips over disjoint subtrees entirely rather than visiting every leaf. + * Note: some of the returned leaves may be underfilled. + * @internal + */ +function decompose(left, right, combineFn, ignoreRight) { + if (ignoreRight === void 0) { ignoreRight = false; } + var maxNodeSize = left._maxNodeSize; + var cmp = left._compare; + (0, b_tree_1.check)(left._root.size() > 0 && right._root.size() > 0, "decompose requires non-empty inputs"); + // Holds the disjoint nodes that result from decomposition. + // Alternating entries of (height, node) to avoid creating small tuples + var disjoint = (0, shared_1.createAlternatingList)(); + // During the decomposition, leaves that are not disjoint are decomposed into individual entries + // that accumulate in this array in sorted order. They are flushed into leaf nodes whenever a reused + // disjoint subtree is added to the disjoint set. + // Note that there are unavoidable cases in which this will generate underfilled leaves. + // An example of this would be a leaf in one tree that contained keys [0, 100, 101, 102]. + // In the other tree, there is a leaf that contains [2, 3, 4, 5]. This leaf can be reused entirely, + // but the first tree's leaf must be decomposed into [0] and [100, 101, 102] + var pending = (0, shared_1.createAlternatingList)(); + var tallestIndex = -1, tallestHeight = -1; + // During the upward part of the cursor walk, this holds the highest disjoint node seen so far. + // This is done because we cannot know immediately whether we can add the node to the disjoint set + // because its ancestor may also be disjoint and should be reused instead. + var highestDisjoint = undefined; + var minSize = Math.floor(maxNodeSize / 2); + var onLeafCreation = function (leaf) { + var height = leaf.keys.length < minSize ? -1 : 0; + (0, shared_1.alternatingPush)(disjoint, height, leaf); + }; + var addSharedNodeToDisjointSet = function (node, height) { + // flush pending entries + (0, shared_1.makeLeavesFrom)(pending, maxNodeSize, onLeafCreation, decomposeLoadFactor); + pending.length = 0; + // Don't share underfilled leaves, instead mark them as needing merging + if (node.isLeaf && node.keys.length < minSize) { + (0, shared_1.alternatingPush)(disjoint, -1, node.clone()); + } + else { + node.isShared = true; + (0, shared_1.alternatingPush)(disjoint, height, node); + } + if (height > tallestHeight) { + tallestIndex = (0, shared_1.alternatingCount)(disjoint) - 1; + tallestHeight = height; + } + }; + var addHighestDisjoint = function () { + if (highestDisjoint !== undefined) { + addSharedNodeToDisjointSet(highestDisjoint.node, highestDisjoint.height); + highestDisjoint = undefined; + } + }; + // Mark all nodes at or above depthFrom in the cursor spine as disqualified (non-disjoint) + var disqualifySpine = function (cursor, depthFrom) { + var spine = cursor.spine; + for (var i = depthFrom; i >= 0; --i) { + var payload = spine[i].payload; + // Safe to early out because we always disqualify all ancestors of a disqualified node + // That is correct because every ancestor of a non-disjoint node is also non-disjoint + // because it must enclose the non-disjoint range. + if (payload.disqualified) + break; + payload.disqualified = true; + } + }; + // Cursor payload factory + var makePayload = function () { return ({ disqualified: false }); }; + var pushLeafRange = function (leaf, from, toExclusive) { + var keys = leaf.keys; + var values = leaf.values; + for (var i = from; i < toExclusive; ++i) + (0, shared_1.alternatingPush)(pending, keys[i], values[i]); + }; + var onMoveInLeaf = function (leaf, payload, fromIndex, toIndex, startedEqual) { + (0, b_tree_1.check)(payload.disqualified === true, "onMoveInLeaf: leaf must be disqualified"); + var start = startedEqual ? fromIndex + 1 : fromIndex; + if (start < toIndex) + pushLeafRange(leaf, start, toIndex); + }; + var onExitLeaf = function (leaf, payload, startingIndex, startedEqual, cursorThis) { + highestDisjoint = undefined; + if (!payload.disqualified) { + highestDisjoint = { node: leaf, height: 0 }; + if (cursorThis.spine.length === 0) { + // if we are exiting a leaf and there are no internal nodes, we will reach the end of the tree. + // In this case we need to add the leaf now because step up will not be called. + addHighestDisjoint(); + } + } + else { + var start = startedEqual ? startingIndex + 1 : startingIndex; + var leafSize = leaf.keys.length; + if (start < leafSize) + pushLeafRange(leaf, start, leafSize); + } + }; + var onStepUp = function (parent, height, payload, fromIndex, spineIndex, stepDownIndex, cursorThis) { + var children = parent.children; + var nextHeight = height - 1; + if (stepDownIndex !== stepDownIndex /* NaN: still walking up */ + || stepDownIndex === Number.POSITIVE_INFINITY /* target key is beyond edge of tree, done with walk */) { + if (!payload.disqualified) { + if (stepDownIndex === Number.POSITIVE_INFINITY) { + // We have finished our walk, and we won't be stepping down, so add the root + // Roots are allowed to be underfilled, so break the root up here if so to avoid + // creating underfilled interior nodes during reconstruction. + // Note: the main btree implementation allows underfilled nodes in general, this algorithm + // guarantees that no additional underfilled nodes are created beyond what was already present. + if (parent.keys.length < minSize) { + for (var i = fromIndex; i < children.length; ++i) + addSharedNodeToDisjointSet(children[i], nextHeight); + } + else { + addSharedNodeToDisjointSet(parent, height); + } + highestDisjoint = undefined; + } + else { + highestDisjoint = { node: parent, height: height }; + } + } + else { + addHighestDisjoint(); + var len = children.length; + for (var i = fromIndex + 1; i < len; ++i) + addSharedNodeToDisjointSet(children[i], nextHeight); + } + } + else { + // We have a valid step down index, so we need to disqualify the spine if needed. + // This is identical to the step down logic, but we must also perform it here because + // in the case of stepping down into a leaf, the step down callback is never called. + if (stepDownIndex > 0) { + disqualifySpine(cursorThis, spineIndex); + } + addHighestDisjoint(); + for (var i = fromIndex + 1; i < stepDownIndex; ++i) + addSharedNodeToDisjointSet(children[i], nextHeight); + } + }; + var onStepDown = function (node, height, spineIndex, stepDownIndex, cursorThis) { + if (stepDownIndex > 0) { + // When we step down into a node, we know that we have walked from a key that is less than our target. + // Because of this, if we are not stepping down into the first child, we know that all children before + // the stepDownIndex must overlap with the other tree because they must be before our target key. Since + // the child we are stepping into has a key greater than our target key, this node must overlap. + // If a child overlaps, the entire spine overlaps because a parent in a btree always encloses the range + // of its children. + disqualifySpine(cursorThis, spineIndex); + var children = node.children; + var nextHeight = height - 1; + for (var i = 0; i < stepDownIndex; ++i) + addSharedNodeToDisjointSet(children[i], nextHeight); + } + }; + var onEnterLeaf = function (leaf, destIndex, cursorThis, cursorOther) { + if (destIndex > 0 + || (0, b_tree_1.areOverlapping)(leaf.minKey(), leaf.maxKey(), (0, parallelWalk_1.getKey)(cursorOther), cursorOther.leaf.maxKey(), cmp)) { + // Similar logic to the step-down case, except in this case we also know the leaf in the other + // tree overlaps a leaf in this tree (this leaf, specifically). Thus, we can disqualify both spines. + cursorThis.leafPayload.disqualified = true; + cursorOther.leafPayload.disqualified = true; + disqualifySpine(cursorThis, cursorThis.spine.length - 1); + disqualifySpine(cursorOther, cursorOther.spine.length - 1); + pushLeafRange(leaf, 0, destIndex); + } + }; + // Need the max key of both trees to perform the "finishing" walk of which ever cursor finishes second + var maxKeyLeft = left._root.maxKey(); + var maxKeyRight = right._root.maxKey(); + var maxKey = cmp(maxKeyLeft, maxKeyRight) >= 0 ? maxKeyLeft : maxKeyRight; + // Initialize cursors at minimum keys. + var curA = (0, parallelWalk_1.createCursor)(left, makePayload, onEnterLeaf, onMoveInLeaf, onExitLeaf, onStepUp, onStepDown); + var curB; + if (ignoreRight) { + var dummyPayload_1 = { disqualified: true }; + var onStepUpIgnore = function (_1, _2, _3, _4, spineIndex, stepDownIndex, cursorThis) { + if (stepDownIndex > 0) { + disqualifySpine(cursorThis, spineIndex); + } + }; + var onStepDownIgnore = function (_, __, spineIndex, stepDownIndex, cursorThis) { + if (stepDownIndex > 0) { + disqualifySpine(cursorThis, spineIndex); + } + }; + var onEnterLeafIgnore = function (leaf, destIndex, _, cursorOther) { + if (destIndex > 0 + || (0, b_tree_1.areOverlapping)(leaf.minKey(), leaf.maxKey(), (0, parallelWalk_1.getKey)(cursorOther), cursorOther.leaf.maxKey(), cmp)) { + cursorOther.leafPayload.disqualified = true; + disqualifySpine(cursorOther, cursorOther.spine.length - 1); + } + }; + curB = (0, parallelWalk_1.createCursor)(right, function () { return dummyPayload_1; }, onEnterLeafIgnore, parallelWalk_1.noop, parallelWalk_1.noop, onStepUpIgnore, onStepDownIgnore); + } + else { + curB = (0, parallelWalk_1.createCursor)(right, makePayload, onEnterLeaf, onMoveInLeaf, onExitLeaf, onStepUp, onStepDown); + } + // The guarantee that no overlapping interior nodes are accidentally reused relies on the careful + // alternating hopping walk of the cursors: WLOG, cursorA always--with one exception--walks from a key just behind (in key space) + // the key of cursorB to the first key >= cursorB. Call this transition a "crossover point." All interior nodes that + // overlap cause a crossover point, and all crossover points are guaranteed to be walked using this method. Thus, + // all overlapping interior nodes will be found if they are checked for on step-down. + // The one exception mentioned above is when they start at the same key. In this case, they are both advanced forward and then + // their new ordering determines how they walk from there. + // The one issue then is detecting any overlaps that occur based on their very initial position (minimum key of each tree). + // This is handled by the initial disqualification step below, which essentially emulates the step down disqualification for each spine. + // Initialize disqualification w.r.t. opposite leaf. + var initDisqualify = function (cur, other) { + var minKey = (0, parallelWalk_1.getKey)(cur); + var otherMin = (0, parallelWalk_1.getKey)(other); + var otherMax = other.leaf.maxKey(); + if ((0, b_tree_1.areOverlapping)(minKey, cur.leaf.maxKey(), otherMin, otherMax, cmp)) + cur.leafPayload.disqualified = true; + for (var i = 0; i < cur.spine.length; ++i) { + var entry = cur.spine[i]; + // Since we are on the left side of the tree, we can use the leaf min key for every spine node + if ((0, b_tree_1.areOverlapping)(minKey, entry.node.maxKey(), otherMin, otherMax, cmp)) + entry.payload.disqualified = true; + } + }; + initDisqualify(curA, curB); + initDisqualify(curB, curA); + var leading = curA; + var trailing = curB; + var order = cmp((0, parallelWalk_1.getKey)(leading), (0, parallelWalk_1.getKey)(trailing)); + // Walk both cursors in alternating hops + while (true) { + var areEqual = order === 0; + if (areEqual) { + var key = (0, parallelWalk_1.getKey)(leading); + var vA = curA.leaf.values[curA.leafIndex]; + var vB = curB.leaf.values[curB.leafIndex]; + // Perform the actual merge of values here. The cursors will avoid adding a duplicate of this key/value + // to pending because they respect the areEqual flag during their moves. + var combined = combineFn(key, vA, vB); + if (combined !== undefined) + (0, shared_1.alternatingPush)(pending, key, combined); + var outTrailing = (0, parallelWalk_1.moveForwardOne)(trailing, leading); + var outLeading = (0, parallelWalk_1.moveForwardOne)(leading, trailing); + if (outTrailing || outLeading) { + if (!outTrailing || !outLeading) { + // In these cases, we pass areEqual=false because a return value of "out of tree" means + // the cursor did not move. This must be true because they started equal and one of them had more tree + // to walk (one is !out), so they cannot be equal at this point. + if (outTrailing) { + (0, parallelWalk_1.moveTo)(leading, trailing, maxKey, false, false); + } + else { + (0, parallelWalk_1.moveTo)(trailing, leading, maxKey, false, false); + } + } + break; + } + order = cmp((0, parallelWalk_1.getKey)(leading), (0, parallelWalk_1.getKey)(trailing)); + } + else { + if (order < 0) { + var tmp = trailing; + trailing = leading; + leading = tmp; + } + var _a = (0, parallelWalk_1.moveTo)(trailing, leading, (0, parallelWalk_1.getKey)(leading), true, areEqual), out = _a[0], nowEqual = _a[1]; + if (out) { + (0, parallelWalk_1.moveTo)(leading, trailing, maxKey, false, areEqual); + break; + } + else if (nowEqual) { + order = 0; + } + else { + order = -1; + } + } + } + // Ensure any trailing non-disjoint entries are added + (0, shared_1.makeLeavesFrom)(pending, maxNodeSize, onLeafCreation, decomposeLoadFactor); + // In cases like full interleaving, no leaves may be created until now + if (tallestHeight < 0 && (0, shared_1.alternatingCount)(disjoint) > 0) { + tallestIndex = 0; + } + return { disjoint: disjoint, tallestIndex: tallestIndex }; +} +exports.decompose = decompose; +/** + * Constructs a B-Tree from the result of a decomposition (set of disjoint nodes). + * @internal + */ +function buildFromDecomposition(constructor, branchingFactor, decomposed, cmp, maxNodeSize) { + var disjoint = decomposed.disjoint, tallestIndex = decomposed.tallestIndex; + var disjointEntryCount = (0, shared_1.alternatingCount)(disjoint); + // Now we have a set of disjoint subtrees and we need to merge them into a single tree. + // To do this, we start with the tallest subtree from the disjoint set and, for all subtrees + // to the "right" and "left" of it in sorted order, we append them onto the appropriate side + // of the current tree, splitting nodes as necessary to maintain balance. + // A "side" is referred to as a frontier, as it is a linked list of nodes from the root down to + // the leaf level on that side of the tree. Each appended subtree is appended to the node at the + // same height as itself on the frontier. Each tree is guaranteed to be at most as tall as the + // current frontier because we start from the tallest subtree and work outward. + var initialRoot = (0, shared_1.alternatingGetSecond)(disjoint, tallestIndex); + var frontier = [initialRoot]; + var rightContext = { + branchingFactor: branchingFactor, + spine: frontier, + sideIndex: getRightmostIndex, + sideInsertionIndex: getRightInsertionIndex, + splitOffSide: splitOffRightSide, + balanceLeaves: balanceLeavesRight, + updateMax: updateRightMax, + mergeLeaves: mergeRightEntries + }; + // Process all subtrees to the right of the tallest subtree + if (tallestIndex + 1 <= disjointEntryCount - 1) { + updateFrontier(rightContext, 0); + processSide(disjoint, tallestIndex + 1, disjointEntryCount, 1, rightContext); + } + var leftContext = { + branchingFactor: branchingFactor, + spine: frontier, + sideIndex: getLeftmostIndex, + sideInsertionIndex: getLeftmostIndex, + splitOffSide: splitOffLeftSide, + balanceLeaves: balanceLeavesLeft, + updateMax: parallelWalk_1.noop, + mergeLeaves: mergeLeftEntries + }; + // Process all subtrees to the left of the current tree + if (tallestIndex - 1 >= 0) { + // Note we need to update the frontier here because the right-side processing may have grown the tree taller. + updateFrontier(leftContext, 0); + processSide(disjoint, tallestIndex - 1, -1, -1, leftContext); + } + var reconstructed = new constructor(undefined, cmp, maxNodeSize); + reconstructed._root = frontier[0]; + // Return the resulting tree + return reconstructed; +} +exports.buildFromDecomposition = buildFromDecomposition; +/** + * Processes one side (left or right) of the disjoint subtree set during a reconstruction operation. + * Merges each subtree in the disjoint set from start to end (exclusive) into the given spine. + * @internal + */ +function processSide(disjoint, start, end, step, context) { + var spine = context.spine, sideIndex = context.sideIndex; + // Determine the depth of the first shared node on the frontier. + // Appending subtrees to the frontier must respect the copy-on-write semantics by cloning + // any shared nodes down to the insertion point. We track it by depth to avoid a log(n) walk of the + // frontier for each insertion as that would fundamentally change our asymptotics. + var isSharedFrontierDepth = 0; + var cur = spine[0]; + // Find the first shared node on the frontier + while (!cur.isShared && isSharedFrontierDepth < spine.length - 1) { + isSharedFrontierDepth++; + cur = cur.children[sideIndex(cur)]; + } + // This array holds the sum of sizes of nodes that have been inserted but not yet propagated upward. + // For example, if a subtree of size 5 is inserted at depth 2, then unflushedSizes[1] += 5. + // These sizes are added to the depth above the insertion point because the insertion updates the direct parent of the insertion. + // These sizes are flushed upward any time we need to insert at level higher than pending unflushed sizes. + // E.g. in our example, if we later insert at depth 0, we will add 5 to the node at depth 1 and the root at depth 0 before inserting. + // This scheme enables us to avoid a log(n) propagation of sizes for each insertion. + var unflushedSizes = new Array(spine.length).fill(0); // pre-fill to avoid "holey" array + for (var i = start; i != end; i += step) { + var currentHeight = spine.length - 1; // height is number of internal levels; 0 means leaf + var subtree = (0, shared_1.alternatingGetSecond)(disjoint, i); + var subtreeHeight = (0, shared_1.alternatingGetFirst)(disjoint, i); + var isEntryInsertion = subtreeHeight === -1; + (0, b_tree_1.check)(subtreeHeight <= currentHeight, "Subtree taller than spine during reconstruction."); + // If subtree height is -1 (indicating underfilled leaf), then this indicates insertion into a leaf + // otherwise, it points to a node whose children have height === subtreeHeight + var insertionDepth = currentHeight - (subtreeHeight + 1); + // Ensure path is unshared before mutation + ensureNotShared(context, isSharedFrontierDepth, insertionDepth); + var insertionCount = void 0; // non-recursive + var insertionSize = void 0; // recursive + if (isEntryInsertion) { + (0, b_tree_1.check)(subtree.isShared !== true); + insertionCount = insertionSize = subtree.keys.length; + } + else { + insertionCount = 1; + insertionSize = subtree.size(); + } + var cascadeEndDepth = findSplitCascadeEndDepth(context, insertionDepth, insertionCount); + // Calculate expansion depth (first ancestor with capacity) + var expansionDepth = Math.max(0, // -1 indicates we will cascade to new root + cascadeEndDepth); + // Update sizes on spine above the shared ancestor before we expand + updateSizeAndMax(context, unflushedSizes, isSharedFrontierDepth, expansionDepth); + var newRoot = undefined; + var sizeChangeDepth = void 0; + if (isEntryInsertion) { + newRoot = splitUpwardsAndInsertEntries(context, insertionDepth, subtree); + // if we are inserting entries, we don't have to update a cached size on the leaf as they simply return count of keys + sizeChangeDepth = insertionDepth - 1; + } + else { + newRoot = splitUpwardsAndInsert(context, insertionDepth, subtree)[0]; + sizeChangeDepth = insertionDepth; + } + if (newRoot) { + // Set the spine root to the highest up new node; the rest of the spine is updated below + spine[0] = newRoot; + unflushedSizes.push(0); // new root level, keep unflushed sizes in sync + sizeChangeDepth++; // account for the spine lengthening + } + isSharedFrontierDepth = sizeChangeDepth + 1; + unflushedSizes[sizeChangeDepth] += insertionSize; + // Finally, update the frontier from the highest new node downward + // Note that this is often the point where the new subtree is attached, + // but in the case of cascaded splits it may be higher up. + updateFrontier(context, expansionDepth); + (0, b_tree_1.check)(isSharedFrontierDepth === spine.length - 1 || spine[isSharedFrontierDepth].isShared === true, "Non-leaf subtrees must be shared."); + (0, b_tree_1.check)(unflushedSizes.length === spine.length, "Unflushed sizes length mismatch after root split."); + // Useful for debugging: + //updateSizeAndMax(context, unflushedSizes, spine.length - 1, 0); + //spine[0].checkValid(0, { _compare: cmp } as unknown as BTree, 0); + } + // Finally, propagate any remaining unflushed sizes upward and update max keys + updateSizeAndMax(context, unflushedSizes, isSharedFrontierDepth, 0); +} +; +/** + * Cascade splits upward if capacity needed, then append a subtree at a given depth on the chosen side. + * All un-propagated sizes must have already been applied to the spine up to the end of any cascading expansions. + * This method guarantees that the size of the inserted subtree will not propagate upward beyond the insertion point. + * Returns a new root if the root was split, otherwise undefined, and the node into which the subtree was inserted. + */ +function splitUpwardsAndInsert(context, insertionDepth, subtree) { + var spine = context.spine, branchingFactor = context.branchingFactor, sideIndex = context.sideIndex, sideInsertionIndex = context.sideInsertionIndex, splitOffSide = context.splitOffSide, updateMax = context.updateMax; + // We must take care to avoid accidental propagation upward of the size of the inserted subtree + // To do this, we first split nodes upward from the insertion point until we find a node with capacity + // or create a new root. Since all un-propagated sizes have already been applied to the spine up to this point, + // inserting at the end ensures no accidental propagation. + // Depth is -1 if the subtree is the same height as the current tree + if (insertionDepth >= 0) { + var carry = undefined; + // Determine initially where to insert after any splits + var insertTarget = spine[insertionDepth]; + if (insertTarget.keys.length === branchingFactor) { + insertTarget = carry = splitOffSide(insertTarget); + } + var d = insertionDepth - 1; + while (carry && d >= 0) { + var parent = spine[d]; + var sideChildIndex = sideIndex(parent); + // Refresh last key since child was split + updateMax(parent, parent.children[sideChildIndex].maxKey()); + if (parent.keys.length < branchingFactor) { + // We have reached the end of the cascade + insertNoCount(parent, sideInsertionIndex(parent), carry); + carry = undefined; + } + else { + // Splitting the parent here requires care to avoid incorrectly double counting sizes + // Example: a node is at max capacity 4, with children each of size 4 for 16 total. + // We split the node into two nodes of 2 children each, but this does *not* modify the size + // of its parent. Therefore when we insert the carry into the torn-off node, we must not + // increase its size or we will double-count the size of the carry subtree. + var tornOff = splitOffSide(parent); + insertNoCount(tornOff, sideInsertionIndex(tornOff), carry); + carry = tornOff; + } + d--; + } + var newRoot = undefined; + if (carry !== undefined) { + // Expansion reached the root, need a new root to hold carry + var oldRoot = spine[0]; + newRoot = new b_tree_1.BNodeInternal([oldRoot], oldRoot.size() + carry.size()); + insertNoCount(newRoot, sideInsertionIndex(newRoot), carry); + } + // Finally, insert the subtree at the insertion point + insertNoCount(insertTarget, sideInsertionIndex(insertTarget), subtree); + return [newRoot, insertTarget]; + } + else { + // Insertion of subtree with equal height to current tree + var oldRoot = spine[0]; + var newRoot = new b_tree_1.BNodeInternal([oldRoot], oldRoot.size()); + insertNoCount(newRoot, sideInsertionIndex(newRoot), subtree); + return [newRoot, newRoot]; + } +} +; +/** + * Inserts an underfilled leaf (entryContainer), merging with its sibling if possible and splitting upward if not. + */ +function splitUpwardsAndInsertEntries(context, insertionDepth, entryContainer) { + var branchingFactor = context.branchingFactor, spine = context.spine, balanceLeaves = context.balanceLeaves, mergeLeaves = context.mergeLeaves; + var entryCount = entryContainer.keys.length; + var parent = spine[insertionDepth]; + var parentSize = parent.keys.length; + if (parentSize + entryCount <= branchingFactor) { + // Sibling has capacity, just merge into it + mergeLeaves(parent, entryContainer); + return undefined; + } + else { + // As with the internal node splitUpwardsAndInsert method, this method also must make all structural changes + // to the tree before inserting any new content. This is to avoid accidental propagation of sizes upward. + var _a = splitUpwardsAndInsert(context, insertionDepth - 1, entryContainer), newRoot = _a[0], grandparent = _a[1]; + var minSize = Math.floor(branchingFactor / 2); + var toTake = minSize - entryCount; + balanceLeaves(grandparent, entryContainer, toTake); + return newRoot; + } +} +/** + * Clone along the spine from [isSharedFrontierDepth to depthTo] inclusive so path is safe to mutate. + * Short-circuits if first shared node is deeper than depthTo (the insertion depth). + */ +function ensureNotShared(context, isSharedFrontierDepth, depthToInclusive) { + var spine = context.spine, sideIndex = context.sideIndex; + if (depthToInclusive < 0 /* new root case */) + return; // nothing to clone when root is a leaf; equal-height case will handle this + // Clone root if needed first (depth 0) + if (isSharedFrontierDepth === 0) { + var root = spine[0]; + spine[0] = root.clone(); + } + // Clone downward along the frontier to 'depthToInclusive' + for (var depth = Math.max(isSharedFrontierDepth, 1); depth <= depthToInclusive; depth++) { + var parent = spine[depth - 1]; + var childIndex = sideIndex(parent); + var clone = parent.children[childIndex].clone(); + parent.children[childIndex] = clone; + spine[depth] = clone; + } +} +; +/** + * Propagates size updates and updates max keys for nodes in (isSharedFrontierDepth, depthTo) + */ +function updateSizeAndMax(context, unflushedSizes, isSharedFrontierDepth, depthUpToInclusive) { + var spine = context.spine, updateMax = context.updateMax; + // If isSharedFrontierDepth is <= depthUpToInclusive there is nothing to update because + // the insertion point is inside a shared node which will always have correct sizes + var maxKey = spine[isSharedFrontierDepth].maxKey(); + var startDepth = isSharedFrontierDepth - 1; + for (var depth = startDepth; depth >= depthUpToInclusive; depth--) { + var sizeAtLevel = unflushedSizes[depth]; + unflushedSizes[depth] = 0; // we are propagating it now + if (depth > 0) { + // propagate size upward, will be added lazily, either when a subtree is appended at or above that level or + // at the end of processing the entire side + unflushedSizes[depth - 1] += sizeAtLevel; + } + var node = spine[depth]; + node._size += sizeAtLevel; + // No-op if left side, as max keys in parents are unchanged by appending to the beginning of a node + updateMax(node, maxKey); + } +} +; +/** + * Update a spine (frontier) from a specific depth down, inclusive. + * Extends the frontier array if it is not already as long as the frontier. + */ +function updateFrontier(context, depthLastValid) { + var frontier = context.spine, sideIndex = context.sideIndex; + (0, b_tree_1.check)(frontier.length > depthLastValid, "updateFrontier: depthLastValid exceeds frontier height"); + var startingAncestor = frontier[depthLastValid]; + if (startingAncestor.isLeaf) + return; + var internal = startingAncestor; + var cur = internal.children[sideIndex(internal)]; + var depth = depthLastValid + 1; + while (!cur.isLeaf) { + var ni = cur; + frontier[depth] = ni; + cur = ni.children[sideIndex(ni)]; + depth++; + } + frontier[depth] = cur; +} +; +/** + * Find the first ancestor (starting at insertionDepth) with capacity. + */ +function findSplitCascadeEndDepth(context, insertionDepth, insertionCount) { + var spine = context.spine, branchingFactor = context.branchingFactor; + if (insertionDepth >= 0) { + var depth = insertionDepth; + if (spine[depth].keys.length + insertionCount <= branchingFactor) { + return depth; + } + depth--; + while (depth >= 0) { + if (spine[depth].keys.length < branchingFactor) + return depth; + depth--; + } + } + return -1; // no capacity, will need a new root +} +; +/** + * Inserts the child without updating cached size counts. + */ +function insertNoCount(parent, index, child) { + parent.children.splice(index, 0, child); + parent.keys.splice(index, 0, child.maxKey()); +} +// ---- Side-specific delegates for merging subtrees into a frontier ---- +function getLeftmostIndex() { + return 0; +} +function getRightmostIndex(node) { + return node.children.length - 1; +} +function getRightInsertionIndex(node) { + return node.children.length; +} +function splitOffRightSide(node) { + return node.splitOffRightSide(); +} +function splitOffLeftSide(node) { + return node.splitOffLeftSide(); +} +function balanceLeavesRight(parent, underfilled, toTake) { + var siblingIndex = parent.children.length - 2; + var sibling = parent.children[siblingIndex]; + var index = sibling.keys.length - toTake; + var movedKeys = sibling.keys.splice(index); + var movedValues = sibling.values.splice(index); + underfilled.keys.unshift.apply(underfilled.keys, movedKeys); + underfilled.values.unshift.apply(underfilled.values, movedValues); + parent.keys[siblingIndex] = sibling.maxKey(); +} +function balanceLeavesLeft(parent, underfilled, toTake) { + var sibling = parent.children[1]; + var movedKeys = sibling.keys.splice(0, toTake); + var movedValues = sibling.values.splice(0, toTake); + underfilled.keys.push.apply(underfilled.keys, movedKeys); + underfilled.values.push.apply(underfilled.values, movedValues); + parent.keys[0] = underfilled.maxKey(); +} +function updateRightMax(node, maxBelow) { + node.keys[node.keys.length - 1] = maxBelow; +} +function mergeRightEntries(leaf, entries) { + leaf.keys.push.apply(leaf.keys, entries.keys); + leaf.values.push.apply(leaf.values, entries.values); +} +function mergeLeftEntries(leaf, entries) { + leaf.keys.unshift.apply(leaf.keys, entries.keys); + leaf.values.unshift.apply(leaf.values, entries.values); +} diff --git a/extended/decompose.ts b/extended/decompose.ts new file mode 100644 index 0000000..9a9594f --- /dev/null +++ b/extended/decompose.ts @@ -0,0 +1,836 @@ +import BTree, { areOverlapping, BNode, BNodeInternal, check } from '../b+tree'; +import { alternatingCount, alternatingGetFirst, alternatingGetSecond, alternatingPush, BTreeConstructor, createAlternatingList, makeLeavesFrom, type AlternatingList, type BTreeWithInternals } from './shared'; +import { createCursor, getKey, Cursor, moveForwardOne, moveTo, noop } from "./parallelWalk"; + +/** + * A set of disjoint nodes, their heights, and the index of the tallest node. + * A height of -1 indicates an underfilled non-shared node that must be merged. + * Any shared nodes (including underfilled leaves) must have height >= 0. + * @internal + */ +export type DecomposeResult = { disjoint: AlternatingList>, tallestIndex: number }; + +/** + * Payload type used by decomposition cursors. + */ +type DecomposePayload = { disqualified: boolean }; + +const decomposeLoadFactor = 0.7; + +/** + * Decomposes two trees into disjoint nodes. Reuses interior nodes when they do not overlap/intersect with any leaf nodes + * in the other tree. Overlapping leaf nodes are broken down into new leaf nodes containing merged entries. + * The algorithm is a parallel tree walk using two cursors. The trailing cursor (behind in key space) is walked forward + * until it is at or after the leading cursor. As it does this, any whole nodes or subtrees it passes are guaranteed to + * be disjoint. This is true because the leading cursor was also previously walked in this way, and is thus pointing to + * the first key at or after the trailing cursor's previous position. + * The cursor walk is efficient, meaning it skips over disjoint subtrees entirely rather than visiting every leaf. + * Note: some of the returned leaves may be underfilled. + * @internal + */ +export function decompose( + left: BTreeWithInternals, + right: BTreeWithInternals, + combineFn: (key: K, leftValue: V, rightValue: V) => V | undefined, + ignoreRight: boolean = false +): DecomposeResult { + const maxNodeSize = left._maxNodeSize; + const cmp = left._compare; + check(left._root.size() > 0 && right._root.size() > 0, "decompose requires non-empty inputs"); + // Holds the disjoint nodes that result from decomposition. + // Alternating entries of (height, node) to avoid creating small tuples + const disjoint = createAlternatingList>(); + // During the decomposition, leaves that are not disjoint are decomposed into individual entries + // that accumulate in this array in sorted order. They are flushed into leaf nodes whenever a reused + // disjoint subtree is added to the disjoint set. + // Note that there are unavoidable cases in which this will generate underfilled leaves. + // An example of this would be a leaf in one tree that contained keys [0, 100, 101, 102]. + // In the other tree, there is a leaf that contains [2, 3, 4, 5]. This leaf can be reused entirely, + // but the first tree's leaf must be decomposed into [0] and [100, 101, 102] + const pending = createAlternatingList(); + let tallestIndex = -1, tallestHeight = -1; + + // During the upward part of the cursor walk, this holds the highest disjoint node seen so far. + // This is done because we cannot know immediately whether we can add the node to the disjoint set + // because its ancestor may also be disjoint and should be reused instead. + let highestDisjoint: { node: BNode, height: number } | undefined + // Have to do this as cast to convince TS it's ever assigned + = undefined as { node: BNode, height: number } | undefined; + + const minSize = Math.floor(maxNodeSize / 2); + const onLeafCreation = (leaf: BNode) => { + let height = leaf.keys.length < minSize ? -1 : 0; + alternatingPush(disjoint, height, leaf); + } + + const addSharedNodeToDisjointSet = (node: BNode, height: number) => { + // flush pending entries + makeLeavesFrom(pending, maxNodeSize, onLeafCreation, decomposeLoadFactor); + pending.length = 0; + + // Don't share underfilled leaves, instead mark them as needing merging + if (node.isLeaf && node.keys.length < minSize) { + alternatingPush(disjoint, -1, node.clone()); + } else { + node.isShared = true; + alternatingPush(disjoint, height, node); + } + + if (height > tallestHeight) { + tallestIndex = alternatingCount(disjoint) - 1; + tallestHeight = height; + } + }; + + const addHighestDisjoint = () => { + if (highestDisjoint !== undefined) { + addSharedNodeToDisjointSet(highestDisjoint.node, highestDisjoint.height); + highestDisjoint = undefined; + } + }; + + // Mark all nodes at or above depthFrom in the cursor spine as disqualified (non-disjoint) + const disqualifySpine = (cursor: Cursor, depthFrom: number) => { + const spine = cursor.spine; + for (let i = depthFrom; i >= 0; --i) { + const payload = spine[i].payload; + // Safe to early out because we always disqualify all ancestors of a disqualified node + // That is correct because every ancestor of a non-disjoint node is also non-disjoint + // because it must enclose the non-disjoint range. + if (payload.disqualified) + break; + payload.disqualified = true; + } + }; + + // Cursor payload factory + const makePayload = (): DecomposePayload => ({ disqualified: false }); + + const pushLeafRange = (leaf: BNode, from: number, toExclusive: number) => { + const keys = leaf.keys; + const values = leaf.values; + for (let i = from; i < toExclusive; ++i) + alternatingPush(pending, keys[i], values[i]); + }; + + const onMoveInLeaf = ( + leaf: BNode, + payload: DecomposePayload, + fromIndex: number, + toIndex: number, + startedEqual: boolean + ) => { + check(payload.disqualified === true, "onMoveInLeaf: leaf must be disqualified"); + const start = startedEqual ? fromIndex + 1 : fromIndex; + if (start < toIndex) + pushLeafRange(leaf, start, toIndex); + }; + + const onExitLeaf = ( + leaf: BNode, + payload: DecomposePayload, + startingIndex: number, + startedEqual: boolean, + cursorThis: Cursor, + ) => { + highestDisjoint = undefined; + if (!payload.disqualified) { + highestDisjoint = { node: leaf, height: 0 }; + if (cursorThis.spine.length === 0) { + // if we are exiting a leaf and there are no internal nodes, we will reach the end of the tree. + // In this case we need to add the leaf now because step up will not be called. + addHighestDisjoint(); + } + } else { + const start = startedEqual ? startingIndex + 1 : startingIndex; + const leafSize = leaf.keys.length; + if (start < leafSize) + pushLeafRange(leaf, start, leafSize); + } + }; + + const onStepUp = ( + parent: BNodeInternal, + height: number, + payload: DecomposePayload, + fromIndex: number, + spineIndex: number, + stepDownIndex: number, + cursorThis: Cursor + ) => { + const children = parent.children; + const nextHeight = height - 1; + if (stepDownIndex !== stepDownIndex /* NaN: still walking up */ + || stepDownIndex === Number.POSITIVE_INFINITY /* target key is beyond edge of tree, done with walk */) { + if (!payload.disqualified) { + if (stepDownIndex === Number.POSITIVE_INFINITY) { + // We have finished our walk, and we won't be stepping down, so add the root + // Roots are allowed to be underfilled, so break the root up here if so to avoid + // creating underfilled interior nodes during reconstruction. + // Note: the main btree implementation allows underfilled nodes in general, this algorithm + // guarantees that no additional underfilled nodes are created beyond what was already present. + if (parent.keys.length < minSize) { + for (let i = fromIndex; i < children.length; ++i) + addSharedNodeToDisjointSet(children[i], nextHeight); + } else { + addSharedNodeToDisjointSet(parent, height); + } + highestDisjoint = undefined; + } else { + highestDisjoint = { node: parent, height }; + } + } else { + addHighestDisjoint(); + const len = children.length; + for (let i = fromIndex + 1; i < len; ++i) + addSharedNodeToDisjointSet(children[i], nextHeight); + } + } else { + // We have a valid step down index, so we need to disqualify the spine if needed. + // This is identical to the step down logic, but we must also perform it here because + // in the case of stepping down into a leaf, the step down callback is never called. + if (stepDownIndex > 0) { + disqualifySpine(cursorThis, spineIndex); + } + addHighestDisjoint(); + for (let i = fromIndex + 1; i < stepDownIndex; ++i) + addSharedNodeToDisjointSet(children[i], nextHeight); + } + }; + + const onStepDown = ( + node: BNodeInternal, + height: number, + spineIndex: number, + stepDownIndex: number, + cursorThis: Cursor + ) => { + if (stepDownIndex > 0) { + // When we step down into a node, we know that we have walked from a key that is less than our target. + // Because of this, if we are not stepping down into the first child, we know that all children before + // the stepDownIndex must overlap with the other tree because they must be before our target key. Since + // the child we are stepping into has a key greater than our target key, this node must overlap. + // If a child overlaps, the entire spine overlaps because a parent in a btree always encloses the range + // of its children. + disqualifySpine(cursorThis, spineIndex); + const children = node.children; + const nextHeight = height - 1; + for (let i = 0; i < stepDownIndex; ++i) + addSharedNodeToDisjointSet(children[i], nextHeight); + } + }; + + const onEnterLeaf = ( + leaf: BNode, + destIndex: number, + cursorThis: Cursor, + cursorOther: Cursor + ) => { + if (destIndex > 0 + || areOverlapping(leaf.minKey()!, leaf.maxKey(), getKey(cursorOther), cursorOther.leaf.maxKey(), cmp)) { + // Similar logic to the step-down case, except in this case we also know the leaf in the other + // tree overlaps a leaf in this tree (this leaf, specifically). Thus, we can disqualify both spines. + cursorThis.leafPayload.disqualified = true; + cursorOther.leafPayload.disqualified = true; + disqualifySpine(cursorThis, cursorThis.spine.length - 1); + disqualifySpine(cursorOther, cursorOther.spine.length - 1); + pushLeafRange(leaf, 0, destIndex); + } + }; + + // Need the max key of both trees to perform the "finishing" walk of which ever cursor finishes second + const maxKeyLeft = left._root.maxKey() as K; + const maxKeyRight = right._root.maxKey() as K; + const maxKey = cmp(maxKeyLeft, maxKeyRight) >= 0 ? maxKeyLeft : maxKeyRight; + + // Initialize cursors at minimum keys. + const curA = createCursor(left, makePayload, onEnterLeaf, onMoveInLeaf, onExitLeaf, onStepUp, onStepDown); + + let curB: typeof curA; + if (ignoreRight) { + const dummyPayload: DecomposePayload = { disqualified: true }; + const onStepUpIgnore = ( + _1: BNodeInternal, + _2: number, + _3: DecomposePayload, + _4: number, + spineIndex: number, + stepDownIndex: number, + cursorThis: Cursor + ) => { + if (stepDownIndex > 0) { + disqualifySpine(cursorThis, spineIndex); + } + }; + + const onStepDownIgnore = ( + _: BNodeInternal, + __: number, + spineIndex: number, + stepDownIndex: number, + cursorThis: Cursor + ) => { + if (stepDownIndex > 0) { + disqualifySpine(cursorThis, spineIndex); + } + }; + + const onEnterLeafIgnore = ( + leaf: BNode, + destIndex: number, + _: Cursor, + cursorOther: Cursor + ) => { + if (destIndex > 0 + || areOverlapping(leaf.minKey()!, leaf.maxKey(), getKey(cursorOther), cursorOther.leaf.maxKey(), cmp)) { + cursorOther.leafPayload.disqualified = true; + disqualifySpine(cursorOther, cursorOther.spine.length - 1); + } + }; + curB = createCursor(right, () => dummyPayload, onEnterLeafIgnore, noop, noop, onStepUpIgnore, onStepDownIgnore); + } else { + curB = createCursor(right, makePayload, onEnterLeaf, onMoveInLeaf, onExitLeaf, onStepUp, onStepDown); + } + + // The guarantee that no overlapping interior nodes are accidentally reused relies on the careful + // alternating hopping walk of the cursors: WLOG, cursorA always--with one exception--walks from a key just behind (in key space) + // the key of cursorB to the first key >= cursorB. Call this transition a "crossover point." All interior nodes that + // overlap cause a crossover point, and all crossover points are guaranteed to be walked using this method. Thus, + // all overlapping interior nodes will be found if they are checked for on step-down. + // The one exception mentioned above is when they start at the same key. In this case, they are both advanced forward and then + // their new ordering determines how they walk from there. + // The one issue then is detecting any overlaps that occur based on their very initial position (minimum key of each tree). + // This is handled by the initial disqualification step below, which essentially emulates the step down disqualification for each spine. + // Initialize disqualification w.r.t. opposite leaf. + const initDisqualify = (cur: Cursor, other: Cursor) => { + const minKey = getKey(cur); + const otherMin = getKey(other); + const otherMax = other.leaf.maxKey(); + if (areOverlapping(minKey, cur.leaf.maxKey(), otherMin, otherMax, cmp)) + cur.leafPayload.disqualified = true; + for (let i = 0; i < cur.spine.length; ++i) { + const entry = cur.spine[i]; + // Since we are on the left side of the tree, we can use the leaf min key for every spine node + if (areOverlapping(minKey, entry.node.maxKey(), otherMin, otherMax, cmp)) + entry.payload.disqualified = true; + } + }; + + initDisqualify(curA, curB); + initDisqualify(curB, curA); + + let leading = curA; + let trailing = curB; + let order = cmp(getKey(leading), getKey(trailing)); + + // Walk both cursors in alternating hops + while (true) { + const areEqual = order === 0; + + if (areEqual) { + const key = getKey(leading); + const vA = curA.leaf.values[curA.leafIndex]; + const vB = curB.leaf.values[curB.leafIndex]; + // Perform the actual merge of values here. The cursors will avoid adding a duplicate of this key/value + // to pending because they respect the areEqual flag during their moves. + const combined = combineFn(key, vA, vB); + if (combined !== undefined) + alternatingPush(pending, key, combined); + const outTrailing = moveForwardOne(trailing, leading); + const outLeading = moveForwardOne(leading, trailing); + if (outTrailing || outLeading) { + if (!outTrailing || !outLeading) { + // In these cases, we pass areEqual=false because a return value of "out of tree" means + // the cursor did not move. This must be true because they started equal and one of them had more tree + // to walk (one is !out), so they cannot be equal at this point. + if (outTrailing) { + moveTo(leading, trailing, maxKey, false, false); + } else { + moveTo(trailing, leading, maxKey, false, false); + } + } + break; + } + order = cmp(getKey(leading), getKey(trailing)); + } else { + if (order < 0) { + const tmp = trailing; + trailing = leading; + leading = tmp; + } + const [out, nowEqual] = moveTo(trailing, leading, getKey(leading), true, areEqual); + if (out) { + moveTo(leading, trailing, maxKey, false, areEqual); + break; + } else if (nowEqual) { + order = 0; + } else { + order = -1; + } + } + } + + // Ensure any trailing non-disjoint entries are added + makeLeavesFrom(pending, maxNodeSize, onLeafCreation, decomposeLoadFactor); + // In cases like full interleaving, no leaves may be created until now + if (tallestHeight < 0 && alternatingCount(disjoint) > 0) { + tallestIndex = 0; + } + return { disjoint, tallestIndex }; +} + +/** + * Constructs a B-Tree from the result of a decomposition (set of disjoint nodes). + * @internal + */ +export function buildFromDecomposition, K, V>( + constructor: BTreeConstructor, + branchingFactor: number, + decomposed: DecomposeResult, + cmp: (a: K, b: K) => number, + maxNodeSize: number +): TBTree { + const { disjoint, tallestIndex } = decomposed; + const disjointEntryCount = alternatingCount(disjoint); + + // Now we have a set of disjoint subtrees and we need to merge them into a single tree. + // To do this, we start with the tallest subtree from the disjoint set and, for all subtrees + // to the "right" and "left" of it in sorted order, we append them onto the appropriate side + // of the current tree, splitting nodes as necessary to maintain balance. + // A "side" is referred to as a frontier, as it is a linked list of nodes from the root down to + // the leaf level on that side of the tree. Each appended subtree is appended to the node at the + // same height as itself on the frontier. Each tree is guaranteed to be at most as tall as the + // current frontier because we start from the tallest subtree and work outward. + const initialRoot = alternatingGetSecond(disjoint, tallestIndex); + const frontier: BNode[] = [initialRoot]; + + const rightContext: SideContext = { + branchingFactor, + spine: frontier, + sideIndex: getRightmostIndex, + sideInsertionIndex: getRightInsertionIndex, + splitOffSide: splitOffRightSide, + balanceLeaves: balanceLeavesRight, + updateMax: updateRightMax, + mergeLeaves: mergeRightEntries + }; + // Process all subtrees to the right of the tallest subtree + if (tallestIndex + 1 <= disjointEntryCount - 1) { + updateFrontier(rightContext, 0); + processSide( + disjoint, + tallestIndex + 1, + disjointEntryCount, 1, + rightContext + ); + } + + const leftContext: SideContext = { + branchingFactor, + spine: frontier, + sideIndex: getLeftmostIndex, + sideInsertionIndex: getLeftmostIndex, + splitOffSide: splitOffLeftSide, + balanceLeaves: balanceLeavesLeft, + updateMax: noop, // left side appending doesn't update max keys, + mergeLeaves: mergeLeftEntries + }; + // Process all subtrees to the left of the current tree + if (tallestIndex - 1 >= 0) { + // Note we need to update the frontier here because the right-side processing may have grown the tree taller. + updateFrontier(leftContext, 0); + processSide( + disjoint, + tallestIndex - 1, + -1, + -1, + leftContext + ); + } + + const reconstructed = new constructor(undefined, cmp, maxNodeSize); + reconstructed._root = frontier[0]; + + // Return the resulting tree + return reconstructed as unknown as TBTree; +} + +/** + * Processes one side (left or right) of the disjoint subtree set during a reconstruction operation. + * Merges each subtree in the disjoint set from start to end (exclusive) into the given spine. + * @internal + */ +function processSide( + disjoint: AlternatingList>, + start: number, + end: number, + step: number, + context: SideContext +): void { + const { spine, sideIndex } = context; + // Determine the depth of the first shared node on the frontier. + // Appending subtrees to the frontier must respect the copy-on-write semantics by cloning + // any shared nodes down to the insertion point. We track it by depth to avoid a log(n) walk of the + // frontier for each insertion as that would fundamentally change our asymptotics. + let isSharedFrontierDepth = 0; + let cur = spine[0]; + // Find the first shared node on the frontier + while (!cur.isShared && isSharedFrontierDepth < spine.length - 1) { + isSharedFrontierDepth++; + cur = (cur as BNodeInternal).children[sideIndex(cur as BNodeInternal)]; + } + + // This array holds the sum of sizes of nodes that have been inserted but not yet propagated upward. + // For example, if a subtree of size 5 is inserted at depth 2, then unflushedSizes[1] += 5. + // These sizes are added to the depth above the insertion point because the insertion updates the direct parent of the insertion. + // These sizes are flushed upward any time we need to insert at level higher than pending unflushed sizes. + // E.g. in our example, if we later insert at depth 0, we will add 5 to the node at depth 1 and the root at depth 0 before inserting. + // This scheme enables us to avoid a log(n) propagation of sizes for each insertion. + const unflushedSizes: number[] = new Array(spine.length).fill(0); // pre-fill to avoid "holey" array + + for (let i = start; i != end; i += step) { + const currentHeight = spine.length - 1; // height is number of internal levels; 0 means leaf + const subtree = alternatingGetSecond(disjoint, i); + const subtreeHeight = alternatingGetFirst(disjoint, i); + const isEntryInsertion = subtreeHeight === -1; + check(subtreeHeight <= currentHeight, "Subtree taller than spine during reconstruction."); + // If subtree height is -1 (indicating underfilled leaf), then this indicates insertion into a leaf + // otherwise, it points to a node whose children have height === subtreeHeight + const insertionDepth = currentHeight - (subtreeHeight + 1); + + // Ensure path is unshared before mutation + ensureNotShared(context, isSharedFrontierDepth, insertionDepth); + + let insertionCount: number; // non-recursive + let insertionSize: number; // recursive + if (isEntryInsertion) { + check(subtree.isShared !== true); + insertionCount = insertionSize = subtree.keys.length; + } else { + insertionCount = 1; + insertionSize = subtree.size(); + } + + const cascadeEndDepth = findSplitCascadeEndDepth(context, insertionDepth, insertionCount); + + // Calculate expansion depth (first ancestor with capacity) + const expansionDepth = Math.max( + 0, // -1 indicates we will cascade to new root + cascadeEndDepth + ); + + // Update sizes on spine above the shared ancestor before we expand + updateSizeAndMax(context, unflushedSizes, isSharedFrontierDepth, expansionDepth); + + let newRoot: BNodeInternal | undefined = undefined; + let sizeChangeDepth: number; + if (isEntryInsertion) { + newRoot = splitUpwardsAndInsertEntries(context, insertionDepth, subtree); + // if we are inserting entries, we don't have to update a cached size on the leaf as they simply return count of keys + sizeChangeDepth = insertionDepth - 1; + } else { + [newRoot] = splitUpwardsAndInsert(context, insertionDepth, subtree); + sizeChangeDepth = insertionDepth; + } + + if (newRoot) { + // Set the spine root to the highest up new node; the rest of the spine is updated below + spine[0] = newRoot; + unflushedSizes.push(0); // new root level, keep unflushed sizes in sync + sizeChangeDepth++; // account for the spine lengthening + } + + isSharedFrontierDepth = sizeChangeDepth + 1; + unflushedSizes[sizeChangeDepth] += insertionSize; + + // Finally, update the frontier from the highest new node downward + // Note that this is often the point where the new subtree is attached, + // but in the case of cascaded splits it may be higher up. + updateFrontier(context, expansionDepth); + check(isSharedFrontierDepth === spine.length - 1 || spine[isSharedFrontierDepth].isShared === true, "Non-leaf subtrees must be shared."); + check(unflushedSizes.length === spine.length, "Unflushed sizes length mismatch after root split."); + // Useful for debugging: + //updateSizeAndMax(context, unflushedSizes, spine.length - 1, 0); + //spine[0].checkValid(0, { _compare: cmp } as unknown as BTree, 0); + } + + // Finally, propagate any remaining unflushed sizes upward and update max keys + updateSizeAndMax(context, unflushedSizes, isSharedFrontierDepth, 0); +}; + +/** + * Cascade splits upward if capacity needed, then append a subtree at a given depth on the chosen side. + * All un-propagated sizes must have already been applied to the spine up to the end of any cascading expansions. + * This method guarantees that the size of the inserted subtree will not propagate upward beyond the insertion point. + * Returns a new root if the root was split, otherwise undefined, and the node into which the subtree was inserted. + */ +function splitUpwardsAndInsert( + context: SideContext, + insertionDepth: number, + subtree: BNode +): [newRoot: BNodeInternal | undefined, insertTarget: BNodeInternal] { + const { spine, branchingFactor, sideIndex, sideInsertionIndex, splitOffSide, updateMax } = context; + // We must take care to avoid accidental propagation upward of the size of the inserted subtree + // To do this, we first split nodes upward from the insertion point until we find a node with capacity + // or create a new root. Since all un-propagated sizes have already been applied to the spine up to this point, + // inserting at the end ensures no accidental propagation. + + // Depth is -1 if the subtree is the same height as the current tree + if (insertionDepth >= 0) { + let carry: BNode | undefined = undefined; + // Determine initially where to insert after any splits + let insertTarget: BNodeInternal = spine[insertionDepth] as BNodeInternal; + if (insertTarget.keys.length === branchingFactor) { + insertTarget = carry = splitOffSide(insertTarget); + } + + let d = insertionDepth - 1; + while (carry && d >= 0) { + const parent = spine[d] as BNodeInternal; + const sideChildIndex = sideIndex(parent); + // Refresh last key since child was split + updateMax(parent, parent.children[sideChildIndex].maxKey()); + if (parent.keys.length < branchingFactor) { + // We have reached the end of the cascade + insertNoCount(parent, sideInsertionIndex(parent), carry); + carry = undefined; + } else { + // Splitting the parent here requires care to avoid incorrectly double counting sizes + // Example: a node is at max capacity 4, with children each of size 4 for 16 total. + // We split the node into two nodes of 2 children each, but this does *not* modify the size + // of its parent. Therefore when we insert the carry into the torn-off node, we must not + // increase its size or we will double-count the size of the carry subtree. + const tornOff = splitOffSide(parent); + insertNoCount(tornOff, sideInsertionIndex(tornOff), carry); + carry = tornOff; + } + d--; + } + + let newRoot: BNodeInternal | undefined = undefined; + if (carry !== undefined) { + // Expansion reached the root, need a new root to hold carry + const oldRoot = spine[0] as BNodeInternal; + newRoot = new BNodeInternal([oldRoot], oldRoot.size() + carry.size()); + insertNoCount(newRoot, sideInsertionIndex(newRoot), carry); + } + + // Finally, insert the subtree at the insertion point + insertNoCount(insertTarget, sideInsertionIndex(insertTarget), subtree); + return [newRoot, insertTarget]; + } else { + // Insertion of subtree with equal height to current tree + const oldRoot = spine[0] as BNodeInternal; + const newRoot = new BNodeInternal([oldRoot], oldRoot.size()); + insertNoCount(newRoot, sideInsertionIndex(newRoot), subtree); + return [newRoot, newRoot]; + } +}; + +/** + * Inserts an underfilled leaf (entryContainer), merging with its sibling if possible and splitting upward if not. + */ +function splitUpwardsAndInsertEntries( + context: SideContext, + insertionDepth: number, + entryContainer: BNode +): BNodeInternal | undefined { + const { branchingFactor, spine, balanceLeaves, mergeLeaves } = context; + const entryCount = entryContainer.keys.length; + const parent = spine[insertionDepth]; + const parentSize = parent.keys.length; + if (parentSize + entryCount <= branchingFactor) { + // Sibling has capacity, just merge into it + mergeLeaves(parent, entryContainer); + return undefined; + } else { + // As with the internal node splitUpwardsAndInsert method, this method also must make all structural changes + // to the tree before inserting any new content. This is to avoid accidental propagation of sizes upward. + const [newRoot, grandparent] = splitUpwardsAndInsert( + context, + insertionDepth - 1, + entryContainer + ); + const minSize = Math.floor(branchingFactor / 2); + const toTake = minSize - entryCount; + balanceLeaves(grandparent, entryContainer, toTake); + return newRoot; + } +} + +/** + * Clone along the spine from [isSharedFrontierDepth to depthTo] inclusive so path is safe to mutate. + * Short-circuits if first shared node is deeper than depthTo (the insertion depth). + */ +function ensureNotShared( + context: SideContext, + isSharedFrontierDepth: number, + depthToInclusive: number) { + const { spine, sideIndex } = context; + if (depthToInclusive < 0 /* new root case */) + return; // nothing to clone when root is a leaf; equal-height case will handle this + + // Clone root if needed first (depth 0) + if (isSharedFrontierDepth === 0) { + const root = spine[0]; + spine[0] = root.clone(); + } + + // Clone downward along the frontier to 'depthToInclusive' + for (let depth = Math.max(isSharedFrontierDepth, 1); depth <= depthToInclusive; depth++) { + const parent = spine[depth - 1] as BNodeInternal; + const childIndex = sideIndex(parent); + const clone = parent.children[childIndex].clone(); + parent.children[childIndex] = clone; + spine[depth] = clone; + } +}; + +/** + * Propagates size updates and updates max keys for nodes in (isSharedFrontierDepth, depthTo) + */ +function updateSizeAndMax( + context: SideContext, + unflushedSizes: number[], + isSharedFrontierDepth: number, + depthUpToInclusive: number) { + const { spine, updateMax } = context; + // If isSharedFrontierDepth is <= depthUpToInclusive there is nothing to update because + // the insertion point is inside a shared node which will always have correct sizes + const maxKey = spine[isSharedFrontierDepth].maxKey(); + const startDepth = isSharedFrontierDepth - 1; + for (let depth = startDepth; depth >= depthUpToInclusive; depth--) { + const sizeAtLevel = unflushedSizes[depth]; + unflushedSizes[depth] = 0; // we are propagating it now + if (depth > 0) { + // propagate size upward, will be added lazily, either when a subtree is appended at or above that level or + // at the end of processing the entire side + unflushedSizes[depth - 1] += sizeAtLevel; + } + const node = spine[depth] as BNodeInternal; + node._size += sizeAtLevel; + // No-op if left side, as max keys in parents are unchanged by appending to the beginning of a node + updateMax(node, maxKey); + } +}; + +/** + * Update a spine (frontier) from a specific depth down, inclusive. + * Extends the frontier array if it is not already as long as the frontier. + */ +function updateFrontier(context: SideContext, depthLastValid: number): void { + const { spine: frontier, sideIndex } = context; + check(frontier.length > depthLastValid, "updateFrontier: depthLastValid exceeds frontier height"); + const startingAncestor = frontier[depthLastValid]; + if (startingAncestor.isLeaf) + return; + const internal = startingAncestor as BNodeInternal; + let cur: BNode = internal.children[sideIndex(internal)]; + let depth = depthLastValid + 1; + while (!cur.isLeaf) { + const ni = cur as BNodeInternal; + frontier[depth] = ni; + cur = ni.children[sideIndex(ni)]; + depth++; + } + frontier[depth] = cur; +}; + +/** + * Find the first ancestor (starting at insertionDepth) with capacity. + */ +function findSplitCascadeEndDepth(context: SideContext, insertionDepth: number, insertionCount: number): number { + const { spine, branchingFactor } = context; + if (insertionDepth >= 0) { + let depth = insertionDepth; + if (spine[depth].keys.length + insertionCount <= branchingFactor) { + return depth; + } + depth--; + while (depth >= 0) { + if (spine[depth].keys.length < branchingFactor) + return depth; + depth-- + } + } + return -1; // no capacity, will need a new root +}; + +/** + * Inserts the child without updating cached size counts. + */ +function insertNoCount( + parent: BNodeInternal, + index: number, + child: BNode +): void { + parent.children.splice(index, 0, child); + parent.keys.splice(index, 0, child.maxKey()); +} + +type SideContext = { + branchingFactor: number; + spine: BNode[]; + sideIndex: (node: BNodeInternal) => number; + sideInsertionIndex: (node: BNodeInternal) => number; + splitOffSide: (node: BNodeInternal) => BNodeInternal; + updateMax: (node: BNodeInternal, maxBelow: K) => void; + mergeLeaves: (leaf: BNode, entries: BNode) => void; + balanceLeaves: (parent: BNodeInternal, underfilled: BNode, toTake: number) => void; +}; + +// ---- Side-specific delegates for merging subtrees into a frontier ---- + +function getLeftmostIndex(): number { + return 0; +} + +function getRightmostIndex(node: BNodeInternal): number { + return node.children.length - 1; +} + +function getRightInsertionIndex(node: BNodeInternal): number { + return node.children.length; +} + +function splitOffRightSide(node: BNodeInternal): BNodeInternal { + return node.splitOffRightSide(); +} + +function splitOffLeftSide(node: BNodeInternal): BNodeInternal { + return node.splitOffLeftSide(); +} + +function balanceLeavesRight(parent: BNodeInternal, underfilled: BNode, toTake: number): void { + const siblingIndex = parent.children.length - 2; + const sibling = parent.children[siblingIndex]; + const index = sibling.keys.length - toTake; + const movedKeys = sibling.keys.splice(index); + const movedValues = sibling.values.splice(index); + underfilled.keys.unshift.apply(underfilled.keys, movedKeys); + underfilled.values.unshift.apply(underfilled.values, movedValues); + parent.keys[siblingIndex] = sibling.maxKey(); +} + +function balanceLeavesLeft(parent: BNodeInternal, underfilled: BNode, toTake: number): void { + const sibling = parent.children[1]; + const movedKeys = sibling.keys.splice(0, toTake); + const movedValues = sibling.values.splice(0, toTake); + underfilled.keys.push.apply(underfilled.keys, movedKeys); + underfilled.values.push.apply(underfilled.values, movedValues); + parent.keys[0] = underfilled.maxKey(); +} + +function updateRightMax(node: BNodeInternal, maxBelow: K): void { + node.keys[node.keys.length - 1] = maxBelow; +} + +function mergeRightEntries(leaf: BNode, entries: BNode): void { + leaf.keys.push.apply(leaf.keys, entries.keys); + leaf.values.push.apply(leaf.values, entries.values); +} + +function mergeLeftEntries(leaf: BNode, entries: BNode): void{ + leaf.keys.unshift.apply(leaf.keys, entries.keys); + leaf.values.unshift.apply(leaf.values, entries.values); +} diff --git a/extended/diffAgainst.d.ts b/extended/diffAgainst.d.ts index c54cc79..7b4e0aa 100644 --- a/extended/diffAgainst.d.ts +++ b/extended/diffAgainst.d.ts @@ -11,12 +11,13 @@ import BTree from '../b+tree'; * @param onlyA Callback invoked for all keys only present in `treeA`. * @param onlyB Callback invoked for all keys only present in `treeB`. * @param different Callback invoked for all keys with differing values. + * @returns The first `break` payload returned by a handler, or `undefined` if no handler breaks. + * @throws Error if the supplied trees were created with different comparators. */ -export declare function diffAgainst(_treeA: BTree, _treeB: BTree, onlyA?: (k: K, v: V) => { +export default function diffAgainst(_treeA: BTree, _treeB: BTree, onlyA?: (k: K, v: V) => { break?: R; } | void, onlyB?: (k: K, v: V) => { break?: R; } | void, different?: (k: K, vThis: V, vOther: V) => { break?: R; } | void): R | undefined; -export default diffAgainst; diff --git a/extended/diffAgainst.js b/extended/diffAgainst.js index 9efbad4..94673e9 100644 --- a/extended/diffAgainst.js +++ b/extended/diffAgainst.js @@ -1,6 +1,5 @@ "use strict"; Object.defineProperty(exports, "__esModule", { value: true }); -exports.diffAgainst = void 0; var b_tree_1 = require("../b+tree"); /** * Computes the differences between `treeA` and `treeB`. @@ -14,6 +13,8 @@ var b_tree_1 = require("../b+tree"); * @param onlyA Callback invoked for all keys only present in `treeA`. * @param onlyB Callback invoked for all keys only present in `treeB`. * @param different Callback invoked for all keys with differing values. + * @returns The first `break` payload returned by a handler, or `undefined` if no handler breaks. + * @throws Error if the supplied trees were created with different comparators. */ function diffAgainst(_treeA, _treeB, onlyA, onlyB, different) { var treeA = _treeA; @@ -126,11 +127,11 @@ function diffAgainst(_treeA, _treeB, onlyA, onlyB, different) { return finishCursorWalk(otherCursor, thisCursor, compareKeys, onlyB); return undefined; } -exports.diffAgainst = diffAgainst; +exports.default = diffAgainst; /** * Finishes walking `cursor` once the other cursor has already completed its walk. */ -var finishCursorWalk = function (cursor, cursorFinished, compareKeys, callback) { +function finishCursorWalk(cursor, cursorFinished, compareKeys, callback) { var compared = compareDiffCursors(cursor, cursorFinished, compareKeys); if (compared === 0) { if (!stepDiffCursor(cursor)) @@ -140,11 +141,11 @@ var finishCursorWalk = function (cursor, cursorFinished, compareKeys, callback) (0, b_tree_1.check)(false, 'cursor walk terminated early'); } return stepToEnd(cursor, callback); -}; +} /** * Walks the cursor to the end of the tree, invoking the callback for each key/value pair. */ -var stepToEnd = function (cursor, callback) { +function stepToEnd(cursor, callback) { var canStep = true; while (canStep) { var leaf = cursor.leaf, levelIndices = cursor.levelIndices, currentKey = cursor.currentKey; @@ -157,8 +158,8 @@ var stepToEnd = function (cursor, callback) { canStep = stepDiffCursor(cursor); } return undefined; -}; -var makeDiffCursor = function (internal) { +} +function makeDiffCursor(internal) { var root = internal._root; return { height: internal.height, @@ -167,12 +168,12 @@ var makeDiffCursor = function (internal) { leaf: undefined, currentKey: root.maxKey() }; -}; +} /** * Advances the cursor to the next step in the walk of its tree. * Cursors are walked backwards in sort order, as this allows them to leverage maxKey() in order to be compared in O(1). */ -var stepDiffCursor = function (cursor, stepToNode) { +function stepDiffCursor(cursor, stepToNode) { var internalSpine = cursor.internalSpine, levelIndices = cursor.levelIndices, leaf = cursor.leaf; if (stepToNode === true || leaf) { var levelsLength = levelIndices.length; @@ -229,12 +230,12 @@ var stepDiffCursor = function (cursor, stepToNode) { } return true; } -}; +} /** * Compares two cursors and returns which cursor is ahead in the traversal. * Note that cursors advance in reverse sort order. */ -var compareDiffCursors = function (cursorA, cursorB, compareKeys) { +function compareDiffCursors(cursorA, cursorB, compareKeys) { var heightA = cursorA.height, currentKeyA = cursorA.currentKey, levelIndicesA = cursorA.levelIndices; var heightB = cursorB.height, currentKeyB = cursorB.currentKey, levelIndicesB = cursorB.levelIndices; // Reverse the comparison order, as cursors are advanced in reverse sorting order @@ -250,5 +251,4 @@ var compareDiffCursors = function (cursorA, cursorB, compareKeys) { var depthANormalized = levelIndicesA.length - (heightA - heightMin); var depthBNormalized = levelIndicesB.length - (heightB - heightMin); return depthANormalized - depthBNormalized; -}; -exports.default = diffAgainst; +} diff --git a/extended/diffAgainst.ts b/extended/diffAgainst.ts index 154d981..647244b 100644 --- a/extended/diffAgainst.ts +++ b/extended/diffAgainst.ts @@ -1,27 +1,6 @@ import BTree from '../b+tree'; import { BNode, BNodeInternal, check } from '../b+tree'; -import type { BTreeWithInternals } from './shared'; - -/** - * A walkable pointer into a BTree for computing efficient diffs between trees with shared data. - * - A cursor points to either a key/value pair (KVP) or a node (which can be either a leaf or an internal node). - * As a consequence, a cursor cannot be created for an empty tree. - * - A cursor can be walked forwards using `step`. A cursor can be compared to another cursor to - * determine which is ahead in advancement. - * - A cursor is valid only for the tree it was created from, and only until the first edit made to - * that tree since the cursor's creation. - * - A cursor contains a key for the current location, which is the maxKey when the cursor points to a node - * and a key corresponding to a value when pointing to a leaf. - * - Leaf is only populated if the cursor points to a KVP. If this is the case, levelIndices.length === internalSpine.length + 1 - * and levelIndices[levelIndices.length - 1] is the index of the value. - */ -type DiffCursor = { - height: number; - internalSpine: BNode[][]; - levelIndices: number[]; - leaf: BNode | undefined; - currentKey: K; -}; +import { type BTreeWithInternals } from './shared'; /** * Computes the differences between `treeA` and `treeB`. @@ -35,8 +14,10 @@ type DiffCursor = { * @param onlyA Callback invoked for all keys only present in `treeA`. * @param onlyB Callback invoked for all keys only present in `treeB`. * @param different Callback invoked for all keys with differing values. + * @returns The first `break` payload returned by a handler, or `undefined` if no handler breaks. + * @throws Error if the supplied trees were created with different comparators. */ -export function diffAgainst( +export default function diffAgainst( _treeA: BTree, _treeB: BTree, onlyA?: (k: K, v: V) => { break?: R } | void, @@ -156,12 +137,12 @@ export function diffAgainst( /** * Finishes walking `cursor` once the other cursor has already completed its walk. */ -const finishCursorWalk = ( +function finishCursorWalk( cursor: DiffCursor, cursorFinished: DiffCursor, compareKeys: (a: K, b: K) => number, callback: (k: K, v: V) => { break?: R } | void -): R | undefined => { +): R | undefined { const compared = compareDiffCursors(cursor, cursorFinished, compareKeys); if (compared === 0) { if (!stepDiffCursor(cursor)) @@ -170,15 +151,15 @@ const finishCursorWalk = ( check(false, 'cursor walk terminated early'); } return stepToEnd(cursor, callback); -}; +} /** * Walks the cursor to the end of the tree, invoking the callback for each key/value pair. */ -const stepToEnd = ( +function stepToEnd( cursor: DiffCursor, callback: (k: K, v: V) => { break?: R } | void -): R | undefined => { +): R | undefined { let canStep = true; while (canStep) { const { leaf, levelIndices, currentKey } = cursor; @@ -191,11 +172,11 @@ const stepToEnd = ( canStep = stepDiffCursor(cursor); } return undefined; -}; +} -const makeDiffCursor = ( +function makeDiffCursor( internal: BTreeWithInternals -): DiffCursor => { +): DiffCursor { const root = internal._root; return { height: internal.height, @@ -204,13 +185,13 @@ const makeDiffCursor = ( leaf: undefined, currentKey: root.maxKey() }; -}; +} /** * Advances the cursor to the next step in the walk of its tree. * Cursors are walked backwards in sort order, as this allows them to leverage maxKey() in order to be compared in O(1). */ -const stepDiffCursor = (cursor: DiffCursor, stepToNode?: boolean): boolean => { +function stepDiffCursor(cursor: DiffCursor, stepToNode?: boolean): boolean { const { internalSpine, levelIndices, leaf } = cursor; if (stepToNode === true || leaf) { const levelsLength = levelIndices.length; @@ -264,17 +245,17 @@ const stepDiffCursor = (cursor: DiffCursor, stepToNode?: boolean): b } return true; } -}; +} /** * Compares two cursors and returns which cursor is ahead in the traversal. * Note that cursors advance in reverse sort order. */ -const compareDiffCursors = ( +function compareDiffCursors( cursorA: DiffCursor, cursorB: DiffCursor, compareKeys: (a: K, b: K) => number -): number => { +): number { const { height: heightA, currentKey: currentKeyA, levelIndices: levelIndicesA } = cursorA; const { height: heightB, currentKey: currentKeyB, levelIndices: levelIndicesB } = cursorB; // Reverse the comparison order, as cursors are advanced in reverse sorting order @@ -291,6 +272,25 @@ const compareDiffCursors = ( const depthANormalized = levelIndicesA.length - (heightA - heightMin); const depthBNormalized = levelIndicesB.length - (heightB - heightMin); return depthANormalized - depthBNormalized; -}; +} -export default diffAgainst; +/** + * A walkable pointer into a BTree for computing efficient diffs between trees with shared data. + * - A cursor points to either a key/value pair (KVP) or a node (which can be either a leaf or an internal node). + * As a consequence, a cursor cannot be created for an empty tree. + * - A cursor can be walked forwards using `step`. A cursor can be compared to another cursor to + * determine which is ahead in advancement. + * - A cursor is valid only for the tree it was created from, and only until the first edit made to + * that tree since the cursor's creation. + * - A cursor contains a key for the current location, which is the maxKey when the cursor points to a node + * and a key corresponding to a value when pointing to a leaf. + * - Leaf is only populated if the cursor points to a KVP. If this is the case, levelIndices.length === internalSpine.length + 1 + * and levelIndices[levelIndices.length - 1] is the index of the value. + */ +type DiffCursor = { + height: number; + internalSpine: BNode[][]; + levelIndices: number[]; + leaf: BNode | undefined; + currentKey: K; +}; diff --git a/extended/forEachKeyInBoth.d.ts b/extended/forEachKeyInBoth.d.ts new file mode 100644 index 0000000..296d439 --- /dev/null +++ b/extended/forEachKeyInBoth.d.ts @@ -0,0 +1,19 @@ +import BTree from '../b+tree'; +/** + * Calls the supplied `callback` for each key/value pair shared by both trees, in sorted key order. + * Neither tree is modified. + * + * Complexity is O(N + M) when the trees overlap heavily, and additionally bounded by O(log(N + M) * D) + * where `D` is the number of disjoint key ranges between the trees, because whole non-intersecting subtrees + * are skipped. + * In practice, that means for keys of random distribution the performance is linear and for keys with significant + * numbers of non-overlapping key ranges it is much faster. + * @param treeA First tree to compare. + * @param treeB Second tree to compare. + * @param callback Invoked for keys that appear in both trees. It can cause iteration to early exit by returning `{ break: R }`. + * @returns The first `break` payload returned by the callback, or `undefined` if the walk finishes. + * @throws Error if the trees were built with different comparators. + */ +export default function forEachKeyInBoth(treeA: BTree, treeB: BTree, callback: (key: K, leftValue: V, rightValue: V) => { + break?: R; +} | void): R | undefined; diff --git a/extended/forEachKeyInBoth.js b/extended/forEachKeyInBoth.js new file mode 100644 index 0000000..92f10b6 --- /dev/null +++ b/extended/forEachKeyInBoth.js @@ -0,0 +1,73 @@ +"use strict"; +Object.defineProperty(exports, "__esModule", { value: true }); +var shared_1 = require("./shared"); +var parallelWalk_1 = require("./parallelWalk"); +/** + * Calls the supplied `callback` for each key/value pair shared by both trees, in sorted key order. + * Neither tree is modified. + * + * Complexity is O(N + M) when the trees overlap heavily, and additionally bounded by O(log(N + M) * D) + * where `D` is the number of disjoint key ranges between the trees, because whole non-intersecting subtrees + * are skipped. + * In practice, that means for keys of random distribution the performance is linear and for keys with significant + * numbers of non-overlapping key ranges it is much faster. + * @param treeA First tree to compare. + * @param treeB Second tree to compare. + * @param callback Invoked for keys that appear in both trees. It can cause iteration to early exit by returning `{ break: R }`. + * @returns The first `break` payload returned by the callback, or `undefined` if the walk finishes. + * @throws Error if the trees were built with different comparators. + */ +function forEachKeyInBoth(treeA, treeB, callback) { + var _treeA = treeA; + var _treeB = treeB; + (0, shared_1.checkCanDoSetOperation)(_treeA, _treeB, true); + if (treeB.size === 0 || treeA.size === 0) + return; + var cmp = treeA._compare; + var makePayload = function () { return undefined; }; + var cursorA = (0, parallelWalk_1.createCursor)(_treeA, makePayload, parallelWalk_1.noop, parallelWalk_1.noop, parallelWalk_1.noop, parallelWalk_1.noop, parallelWalk_1.noop); + var cursorB = (0, parallelWalk_1.createCursor)(_treeB, makePayload, parallelWalk_1.noop, parallelWalk_1.noop, parallelWalk_1.noop, parallelWalk_1.noop, parallelWalk_1.noop); + var leading = cursorA; + var trailing = cursorB; + var order = cmp((0, parallelWalk_1.getKey)(leading), (0, parallelWalk_1.getKey)(trailing)); + // This walk is somewhat similar to a merge walk in that it does an alternating hop walk with cursors. + // However, the only thing we care about is when the two cursors are equal (equality is intersection). + // When they are not equal we just advance the trailing cursor. + while (true) { + var areEqual = order === 0; + if (areEqual) { + var key = (0, parallelWalk_1.getKey)(leading); + var vA = cursorA.leaf.values[cursorA.leafIndex]; + var vB = cursorB.leaf.values[cursorB.leafIndex]; + var result = callback(key, vA, vB); + if (result && result.break) { + return result.break; + } + var outT = (0, parallelWalk_1.moveForwardOne)(trailing, leading); + var outL = (0, parallelWalk_1.moveForwardOne)(leading, trailing); + if (outT && outL) + break; + order = cmp((0, parallelWalk_1.getKey)(leading), (0, parallelWalk_1.getKey)(trailing)); + } + else { + if (order < 0) { + var tmp = trailing; + trailing = leading; + leading = tmp; + } + // At this point, leading is guaranteed to be ahead of trailing. + var _a = (0, parallelWalk_1.moveTo)(trailing, leading, (0, parallelWalk_1.getKey)(leading), true, areEqual), out = _a[0], nowEqual = _a[1]; + if (out) { + // We've reached the end of one tree, so intersections are guaranteed to be done. + break; + } + else if (nowEqual) { + order = 0; + } + else { + order = -1; // trailing is ahead of leading + } + } + } +} +exports.default = forEachKeyInBoth; diff --git a/extended/forEachKeyInBoth.ts b/extended/forEachKeyInBoth.ts new file mode 100644 index 0000000..6bd34b2 --- /dev/null +++ b/extended/forEachKeyInBoth.ts @@ -0,0 +1,74 @@ +import BTree from '../b+tree'; +import { type BTreeWithInternals, checkCanDoSetOperation } from './shared'; +import { createCursor, moveForwardOne, moveTo, getKey, noop } from "./parallelWalk" + +/** + * Calls the supplied `callback` for each key/value pair shared by both trees, in sorted key order. + * Neither tree is modified. + * + * Complexity is O(N + M) when the trees overlap heavily, and additionally bounded by O(log(N + M) * D) + * where `D` is the number of disjoint key ranges between the trees, because whole non-intersecting subtrees + * are skipped. + * In practice, that means for keys of random distribution the performance is linear and for keys with significant + * numbers of non-overlapping key ranges it is much faster. + * @param treeA First tree to compare. + * @param treeB Second tree to compare. + * @param callback Invoked for keys that appear in both trees. It can cause iteration to early exit by returning `{ break: R }`. + * @returns The first `break` payload returned by the callback, or `undefined` if the walk finishes. + * @throws Error if the trees were built with different comparators. + */ +export default function forEachKeyInBoth( + treeA: BTree, + treeB: BTree, + callback: (key: K, leftValue: V, rightValue: V) => { break?: R } | void +): R | undefined { + const _treeA = treeA as unknown as BTreeWithInternals; + const _treeB = treeB as unknown as BTreeWithInternals; + checkCanDoSetOperation(_treeA, _treeB, true); + if (treeB.size === 0 || treeA.size === 0) + return; + + const cmp = treeA._compare; + const makePayload = (): undefined => undefined; + let cursorA = createCursor(_treeA, makePayload, noop, noop, noop, noop, noop); + let cursorB = createCursor(_treeB, makePayload, noop, noop, noop, noop, noop); + let leading = cursorA; + let trailing = cursorB; + let order = cmp(getKey(leading), getKey(trailing)); + + // This walk is somewhat similar to a merge walk in that it does an alternating hop walk with cursors. + // However, the only thing we care about is when the two cursors are equal (equality is intersection). + // When they are not equal we just advance the trailing cursor. + while (true) { + const areEqual = order === 0; + if (areEqual) { + const key = getKey(leading); + const vA = cursorA.leaf.values[cursorA.leafIndex]; + const vB = cursorB.leaf.values[cursorB.leafIndex]; + const result = callback(key, vA, vB); + if (result && result.break) { + return result.break; + } + const outT = moveForwardOne(trailing, leading); + const outL = moveForwardOne(leading, trailing); + if (outT && outL) + break; + order = cmp(getKey(leading), getKey(trailing)); + } else { + if (order < 0) { + const tmp = trailing; + trailing = leading; leading = tmp; + } + // At this point, leading is guaranteed to be ahead of trailing. + const [out, nowEqual] = moveTo(trailing, leading, getKey(leading), true, areEqual) + if (out) { + // We've reached the end of one tree, so intersections are guaranteed to be done. + break; + } else if (nowEqual) { + order = 0; + } else { + order = -1; // trailing is ahead of leading + } + } + } +} diff --git a/extended/forEachKeyNotIn.d.ts b/extended/forEachKeyNotIn.d.ts new file mode 100644 index 0000000..474750a --- /dev/null +++ b/extended/forEachKeyNotIn.d.ts @@ -0,0 +1,18 @@ +import BTree from '../b+tree'; +/** + * Calls the supplied `callback` for each key/value pair that is in `includeTree` but not in `excludeTree` + * (set subtraction). The callback runs in sorted key order and neither tree is modified. + * + * Complexity is O(N + M) when the key ranges overlap heavily, and additionally bounded by O(log(N + M) * D) + * where `D` is the number of disjoint ranges between the trees, because non-overlapping subtrees are skipped. + * In practice, that means for keys of random distribution the performance is linear and for keys with significant + * numbers of non-overlapping key ranges it is much faster. + * @param includeTree The tree to iterate keys from. + * @param excludeTree Keys present in this tree are omitted from the callback. + * @param callback Invoked for keys that are in `includeTree` but not `excludeTree`. It can cause iteration to early exit by returning `{ break: R }`. + * @returns The first `break` payload returned by the callback, or `undefined` if all qualifying keys are visited. + * @throws Error if the trees were built with different comparators. + */ +export default function forEachKeyNotIn(includeTree: BTree, excludeTree: BTree, callback: (key: K, value: V) => { + break?: R; +} | void): R | undefined; diff --git a/extended/forEachKeyNotIn.js b/extended/forEachKeyNotIn.js new file mode 100644 index 0000000..22d5961 --- /dev/null +++ b/extended/forEachKeyNotIn.js @@ -0,0 +1,87 @@ +"use strict"; +Object.defineProperty(exports, "__esModule", { value: true }); +var shared_1 = require("./shared"); +var parallelWalk_1 = require("./parallelWalk"); +/** + * Calls the supplied `callback` for each key/value pair that is in `includeTree` but not in `excludeTree` + * (set subtraction). The callback runs in sorted key order and neither tree is modified. + * + * Complexity is O(N + M) when the key ranges overlap heavily, and additionally bounded by O(log(N + M) * D) + * where `D` is the number of disjoint ranges between the trees, because non-overlapping subtrees are skipped. + * In practice, that means for keys of random distribution the performance is linear and for keys with significant + * numbers of non-overlapping key ranges it is much faster. + * @param includeTree The tree to iterate keys from. + * @param excludeTree Keys present in this tree are omitted from the callback. + * @param callback Invoked for keys that are in `includeTree` but not `excludeTree`. It can cause iteration to early exit by returning `{ break: R }`. + * @returns The first `break` payload returned by the callback, or `undefined` if all qualifying keys are visited. + * @throws Error if the trees were built with different comparators. + */ +function forEachKeyNotIn(includeTree, excludeTree, callback) { + var _includeTree = includeTree; + var _excludeTree = excludeTree; + (0, shared_1.checkCanDoSetOperation)(_includeTree, _excludeTree, true); + if (includeTree.size === 0) { + return; + } + var finishWalk = function () { + var out = false; + do { + var key = (0, parallelWalk_1.getKey)(cursorInclude); + var value = cursorInclude.leaf.values[cursorInclude.leafIndex]; + var result = callback(key, value); + if (result && result.break) { + return result.break; + } + out = (0, parallelWalk_1.moveForwardOne)(cursorInclude, cursorExclude); + } while (!out); + return undefined; + }; + var cmp = includeTree._compare; + var makePayload = function () { return undefined; }; + var cursorInclude = (0, parallelWalk_1.createCursor)(_includeTree, makePayload, parallelWalk_1.noop, parallelWalk_1.noop, parallelWalk_1.noop, parallelWalk_1.noop, parallelWalk_1.noop); + if (excludeTree.size === 0) { + return finishWalk(); + } + var cursorExclude = (0, parallelWalk_1.createCursor)(_excludeTree, makePayload, parallelWalk_1.noop, parallelWalk_1.noop, parallelWalk_1.noop, parallelWalk_1.noop, parallelWalk_1.noop); + var order = cmp((0, parallelWalk_1.getKey)(cursorInclude), (0, parallelWalk_1.getKey)(cursorExclude)); + while (true) { + var areEqual = order === 0; + if (areEqual) { + // Keys are equal, so this key is in both trees and should be skipped. + var outInclude = (0, parallelWalk_1.moveForwardOne)(cursorInclude, cursorExclude); + if (outInclude) + break; + order = 1; // include is now ahead of exclude + } + else { + if (order < 0) { + var key = (0, parallelWalk_1.getKey)(cursorInclude); + var value = cursorInclude.leaf.values[cursorInclude.leafIndex]; + var result = callback(key, value); + if (result && result.break) { + return result.break; + } + var outInclude = (0, parallelWalk_1.moveForwardOne)(cursorInclude, cursorExclude); + if (outInclude) { + break; + } + order = cmp((0, parallelWalk_1.getKey)(cursorInclude), (0, parallelWalk_1.getKey)(cursorExclude)); + } + else { + // At this point, include is guaranteed to be ahead of exclude. + var _a = (0, parallelWalk_1.moveTo)(cursorExclude, cursorInclude, (0, parallelWalk_1.getKey)(cursorInclude), true, areEqual), out = _a[0], nowEqual = _a[1]; + if (out) { + // We've reached the end of exclude, so call for all remaining keys in include + return finishWalk(); + } + else if (nowEqual) { + order = 0; + } + else { + order = -1; + } + } + } + } +} +exports.default = forEachKeyNotIn; diff --git a/extended/forEachKeyNotIn.ts b/extended/forEachKeyNotIn.ts new file mode 100644 index 0000000..30072d1 --- /dev/null +++ b/extended/forEachKeyNotIn.ts @@ -0,0 +1,91 @@ +import BTree from '../b+tree'; +import { type BTreeWithInternals, checkCanDoSetOperation } from './shared'; +import { createCursor, moveForwardOne, moveTo, getKey, noop } from "./parallelWalk" + +/** + * Calls the supplied `callback` for each key/value pair that is in `includeTree` but not in `excludeTree` + * (set subtraction). The callback runs in sorted key order and neither tree is modified. + * + * Complexity is O(N + M) when the key ranges overlap heavily, and additionally bounded by O(log(N + M) * D) + * where `D` is the number of disjoint ranges between the trees, because non-overlapping subtrees are skipped. + * In practice, that means for keys of random distribution the performance is linear and for keys with significant + * numbers of non-overlapping key ranges it is much faster. + * @param includeTree The tree to iterate keys from. + * @param excludeTree Keys present in this tree are omitted from the callback. + * @param callback Invoked for keys that are in `includeTree` but not `excludeTree`. It can cause iteration to early exit by returning `{ break: R }`. + * @returns The first `break` payload returned by the callback, or `undefined` if all qualifying keys are visited. + * @throws Error if the trees were built with different comparators. + */ +export default function forEachKeyNotIn( + includeTree: BTree, + excludeTree: BTree, + callback: (key: K, value: V) => { break?: R } | void +): R | undefined { + const _includeTree = includeTree as unknown as BTreeWithInternals; + const _excludeTree = excludeTree as unknown as BTreeWithInternals; + checkCanDoSetOperation(_includeTree, _excludeTree, true); + if (includeTree.size === 0) { + return; + } + + const finishWalk = (): R | undefined => { + let out = false; + do { + const key = getKey(cursorInclude); + const value = cursorInclude.leaf.values[cursorInclude.leafIndex]; + const result = callback(key, value); + if (result && result.break) { + return result.break; + } + out = moveForwardOne(cursorInclude, cursorExclude); + } while (!out); + return undefined; + } + + const cmp = includeTree._compare; + const makePayload = (): undefined => undefined; + let cursorInclude = createCursor(_includeTree, makePayload, noop, noop, noop, noop, noop); + + if (excludeTree.size === 0) { + return finishWalk(); + } + + let cursorExclude = createCursor(_excludeTree, makePayload, noop, noop, noop, noop, noop); + let order = cmp(getKey(cursorInclude), getKey(cursorExclude)); + + while (true) { + const areEqual = order === 0; + if (areEqual) { + // Keys are equal, so this key is in both trees and should be skipped. + const outInclude = moveForwardOne(cursorInclude, cursorExclude); + if (outInclude) + break; + order = 1; // include is now ahead of exclude + } else { + if (order < 0) { + const key = getKey(cursorInclude); + const value = cursorInclude.leaf.values[cursorInclude.leafIndex]; + const result = callback(key, value); + if (result && result.break) { + return result.break; + } + const outInclude = moveForwardOne(cursorInclude, cursorExclude); + if (outInclude) { + break; + } + order = cmp(getKey(cursorInclude), getKey(cursorExclude)); + } else { + // At this point, include is guaranteed to be ahead of exclude. + const [out, nowEqual] = moveTo(cursorExclude, cursorInclude, getKey(cursorInclude), true, areEqual) + if (out) { + // We've reached the end of exclude, so call for all remaining keys in include + return finishWalk(); + } else if (nowEqual) { + order = 0; + } else { + order = -1; + } + } + } + } +} diff --git a/extended/index.d.ts b/extended/index.d.ts index 528377f..240c2d4 100644 --- a/extended/index.d.ts +++ b/extended/index.d.ts @@ -1,7 +1,41 @@ import BTree from '../b+tree'; +/** + * An extended version of the `BTree` class that includes additional functionality + * such as bulk loading, set operations, and diffing. + * It is separated to keep the core BTree class small from a bundle size perspective. + * Note: each additional functionality piece is available as a standalone function from the extended folder. + * @extends BTree + */ export declare class BTreeEx extends BTree { + /** + * Bulk loads a new `BTreeEx` from a sorted alternating list of entries. + * This reuses the same algorithm as `extended/bulkLoad`, but produces a `BTreeEx`. + * Time and space complexity are O(n). + * @param entries Alternating array of keys and values: `[key0, value0, key1, value1, ...]`. Must be sorted by key in strictly ascending order. + * @param maxNodeSize The branching factor (maximum number of children per node). + * @param compare Comparator to use. Defaults to the standard comparator if omitted. + * @returns A fully built tree containing the supplied entries. + * @throws Error if the entries are not strictly sorted or contain duplicate keys. + */ + static bulkLoad(entries: (K | V)[], maxNodeSize: number, compare?: (a: K, b: K) => number): BTreeEx; + /** See {@link BTree.clone}. */ clone(): this; + /** See {@link BTree.greedyClone}. */ greedyClone(force?: boolean): this; + /** + * Computes the differences between `this` and `other`. + * For efficiency, the diff is returned via invocations of supplied handlers. + * The computation is optimized for the case in which the two trees have large amounts of shared data + * (obtained by calling the `clone` or `with` APIs) and will avoid any iteration of shared state. + * The handlers can cause computation to early exit by returning `{ break: R }`. + * Neither collection should be mutated during the comparison (inside your callbacks), as this method assumes they remain stable. + * @param other The tree to compute a diff against. + * @param onlyThis Callback invoked for all keys only present in `this`. + * @param onlyOther Callback invoked for all keys only present in `other`. + * @param different Callback invoked for all keys with differing values. + * @returns The first `break` payload returned by a handler, or `undefined` if no handler breaks. + * @throws Error if the supplied trees were created with different comparators. + */ diffAgainst(other: BTree, onlyThis?: (k: K, v: V) => { break?: R; } | void, onlyOther?: (k: K, v: V) => { @@ -9,13 +43,90 @@ export declare class BTreeEx extends BTree { } | void, different?: (k: K, vThis: V, vOther: V) => { break?: R; } | void): R | undefined; + /** + * Calls the supplied `callback` for each key/value pair shared by this tree and `other`, in sorted key order. + * Neither tree is modified. + * + * Complexity is O(N + M) when the trees overlap heavily, and additionally bounded by O(log(N + M) * D) + * where `D` is the number of disjoint key ranges between the trees, because disjoint subtrees are skipped. + * In practice, that means for keys of random distribution the performance is linear and for keys with significant + * numbers of non-overlapping key ranges it is much faster. + * @param other The other tree to compare with this one. + * @param callback Called for keys that appear in both trees. It can cause iteration to early exit by returning `{ break: R }`. + * @returns The first `break` payload returned by the callback, or `undefined` if the walk finishes. + * @throws Error if the two trees were created with different comparators. + */ + forEachKeyInBoth(other: BTree, callback: (key: K, leftValue: V, rightValue: V) => { + break?: R; + } | void): R | undefined; + /** + * Calls the supplied `callback` for each key/value pair that exists in this tree but not in `other` + * (set subtraction). The callback runs in sorted key order and neither tree is modified. + * + * Complexity is O(N + M) when the key ranges overlap heavily, and additionally bounded by O(log(N + M) * D) + * where `D` is the number of disjoint ranges between the trees, because non-overlapping subtrees are skipped. + * In practice, that means for keys of random distribution the performance is linear and for keys with significant + * numbers of non-overlapping key ranges it is much faster. + * @param other Keys present in this tree will be omitted from the callback. + * @param callback Invoked for keys unique to `this`. It can cause iteration to early exit by returning `{ break: R }`. + * @returns The first `break` payload returned by the callback, or `undefined` if all qualifying keys are visited. + * @throws Error if the trees were created with different comparators. + */ + forEachKeyNotIn(other: BTree, callback: (key: K, value: V) => { + break?: R; + } | void): R | undefined; + /** + * Returns a new tree containing only keys present in both trees. + * Neither tree is modified. + * + * Complexity is O(N + M) in the fully overlapping case and additionally bounded by O(log(N + M) * D), + * where `D` is the number of disjoint key ranges, because disjoint subtrees are skipped entirely. + * In practice, that means for keys of random distribution the performance is linear and for keys with significant + * numbers of non-overlapping key ranges it is much faster. + * @param other The other tree to intersect with this one. + * @param combineFn Called for keys that appear in both trees. Return the desired value. + * @returns A new `BTreeEx` populated with the intersection. + * @throws Error if the trees were created with different comparators. + */ + intersect(other: BTreeEx, combineFn: (key: K, leftValue: V, rightValue: V) => V): BTreeEx; + /** + * Efficiently unions this tree with `other`, reusing subtrees wherever possible without modifying either input. + * + * Complexity is O(N + M) in the fully overlapping case, and additionally bounded by O(log(N + M) * D) + * where `D` is the number of disjoint key ranges, because disjoint subtrees are skipped entirely. + * In practice, that means for keys of random distribution the performance is linear and for keys with significant + * numbers of non-overlapping key ranges it is much faster. + * @param other The other tree to union with this one. + * @param combineFn Called for keys that appear in both trees. Return the desired value, or `undefined` to omit the key. + * @returns A new `BTreeEx` that contains the unioned key/value pairs. + * @throws Error if the trees were created with different comparators or max node sizes. + */ + union(other: BTreeEx, combineFn: (key: K, leftValue: V, rightValue: V) => V | undefined): BTreeEx; + /** + * Returns a new tree containing only the keys that are present in this tree but not `other` (set subtraction). + * Neither input tree is modified. + * + * Complexity is O(N + M) for time and O(N) for allocations in the worst case. Additionally, time is bounded by + * O(log(N + M) * D1) and space by O(log N * D2) where `D1` is the number of disjoint key ranges between the trees + * and `D2` is the number of disjoint ranges inside this tree. + * In practice, that means for keys of random distribution the performance is linear and for keys with significant + * numbers of non-overlapping key ranges it is much faster. + * @param other The tree whose keys will be removed from the result. + * @returns A new `BTreeEx` representing `this \ other`. + * @throws Error if the trees were created with different comparators or max node sizes. + */ + subtract(other: BTreeEx): BTreeEx; } export interface BTreeEx { + /** See {@link BTree.with}. */ with(key: K): BTreeEx; with(key: K, value: V2, overwrite?: boolean): BTreeEx; with(key: K, value?: V2, overwrite?: boolean): BTreeEx; + /** See {@link BTree.withPairs}. */ withPairs(pairs: [K, V | V2][], overwrite: boolean): BTreeEx; + /** See {@link BTree.withKeys}. */ withKeys(keys: K[], returnThisIfUnchanged?: boolean): BTreeEx; + /** See {@link BTree.mapValues}. */ mapValues(callback: (v: V, k: K, counter: number) => R): BTreeEx; } export default BTreeEx; diff --git a/extended/index.js b/extended/index.js index 9d9ba2d..4966bc9 100644 --- a/extended/index.js +++ b/extended/index.js @@ -14,18 +14,70 @@ var __extends = (this && this.__extends) || (function () { d.prototype = b === null ? Object.create(b) : (__.prototype = b.prototype, new __()); }; })(); +var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) { + if (k2 === undefined) k2 = k; + Object.defineProperty(o, k2, { enumerable: true, get: function() { return m[k]; } }); +}) : (function(o, m, k, k2) { + if (k2 === undefined) k2 = k; + o[k2] = m[k]; +})); +var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) { + Object.defineProperty(o, "default", { enumerable: true, value: v }); +}) : function(o, v) { + o["default"] = v; +}); +var __importStar = (this && this.__importStar) || function (mod) { + if (mod && mod.__esModule) return mod; + var result = {}; + if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k); + __setModuleDefault(result, mod); + return result; +}; var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); exports.BTreeEx = void 0; -var b_tree_1 = __importDefault(require("../b+tree")); -var diffAgainst_1 = require("./diffAgainst"); +var b_tree_1 = __importStar(require("../b+tree")); +var diffAgainst_1 = __importDefault(require("./diffAgainst")); +var forEachKeyInBoth_1 = __importDefault(require("./forEachKeyInBoth")); +var forEachKeyNotIn_1 = __importDefault(require("./forEachKeyNotIn")); +var intersect_1 = __importDefault(require("./intersect")); +var subtract_1 = __importDefault(require("./subtract")); +var union_1 = __importDefault(require("./union")); +var bulkLoad_1 = require("./bulkLoad"); +/** + * An extended version of the `BTree` class that includes additional functionality + * such as bulk loading, set operations, and diffing. + * It is separated to keep the core BTree class small from a bundle size perspective. + * Note: each additional functionality piece is available as a standalone function from the extended folder. + * @extends BTree + */ var BTreeEx = /** @class */ (function (_super) { __extends(BTreeEx, _super); function BTreeEx() { return _super !== null && _super.apply(this, arguments) || this; } + /** + * Bulk loads a new `BTreeEx` from a sorted alternating list of entries. + * This reuses the same algorithm as `extended/bulkLoad`, but produces a `BTreeEx`. + * Time and space complexity are O(n). + * @param entries Alternating array of keys and values: `[key0, value0, key1, value1, ...]`. Must be sorted by key in strictly ascending order. + * @param maxNodeSize The branching factor (maximum number of children per node). + * @param compare Comparator to use. Defaults to the standard comparator if omitted. + * @returns A fully built tree containing the supplied entries. + * @throws Error if the entries are not strictly sorted or contain duplicate keys. + */ + BTreeEx.bulkLoad = function (entries, maxNodeSize, compare) { + var cmp = compare !== null && compare !== void 0 ? compare : b_tree_1.defaultComparator; + var root = (0, bulkLoad_1.bulkLoadRoot)(entries, maxNodeSize, cmp); + var tree = new BTreeEx(undefined, cmp, maxNodeSize); + var target = tree; + target._root = root; + target._size = root.size(); + return tree; + }; + /** See {@link BTree.clone}. */ BTreeEx.prototype.clone = function () { var source = this; source._root.isShared = true; @@ -35,6 +87,7 @@ var BTreeEx = /** @class */ (function (_super) { target._size = source._size; return result; }; + /** See {@link BTree.greedyClone}. */ BTreeEx.prototype.greedyClone = function (force) { var source = this; var result = new BTreeEx(undefined, this._compare, this._maxNodeSize); @@ -43,8 +96,101 @@ var BTreeEx = /** @class */ (function (_super) { target._size = source._size; return result; }; + /** + * Computes the differences between `this` and `other`. + * For efficiency, the diff is returned via invocations of supplied handlers. + * The computation is optimized for the case in which the two trees have large amounts of shared data + * (obtained by calling the `clone` or `with` APIs) and will avoid any iteration of shared state. + * The handlers can cause computation to early exit by returning `{ break: R }`. + * Neither collection should be mutated during the comparison (inside your callbacks), as this method assumes they remain stable. + * @param other The tree to compute a diff against. + * @param onlyThis Callback invoked for all keys only present in `this`. + * @param onlyOther Callback invoked for all keys only present in `other`. + * @param different Callback invoked for all keys with differing values. + * @returns The first `break` payload returned by a handler, or `undefined` if no handler breaks. + * @throws Error if the supplied trees were created with different comparators. + */ BTreeEx.prototype.diffAgainst = function (other, onlyThis, onlyOther, different) { - return (0, diffAgainst_1.diffAgainst)(this, other, onlyThis, onlyOther, different); + return (0, diffAgainst_1.default)(this, other, onlyThis, onlyOther, different); + }; + /** + * Calls the supplied `callback` for each key/value pair shared by this tree and `other`, in sorted key order. + * Neither tree is modified. + * + * Complexity is O(N + M) when the trees overlap heavily, and additionally bounded by O(log(N + M) * D) + * where `D` is the number of disjoint key ranges between the trees, because disjoint subtrees are skipped. + * In practice, that means for keys of random distribution the performance is linear and for keys with significant + * numbers of non-overlapping key ranges it is much faster. + * @param other The other tree to compare with this one. + * @param callback Called for keys that appear in both trees. It can cause iteration to early exit by returning `{ break: R }`. + * @returns The first `break` payload returned by the callback, or `undefined` if the walk finishes. + * @throws Error if the two trees were created with different comparators. + */ + BTreeEx.prototype.forEachKeyInBoth = function (other, callback) { + return (0, forEachKeyInBoth_1.default)(this, other, callback); + }; + /** + * Calls the supplied `callback` for each key/value pair that exists in this tree but not in `other` + * (set subtraction). The callback runs in sorted key order and neither tree is modified. + * + * Complexity is O(N + M) when the key ranges overlap heavily, and additionally bounded by O(log(N + M) * D) + * where `D` is the number of disjoint ranges between the trees, because non-overlapping subtrees are skipped. + * In practice, that means for keys of random distribution the performance is linear and for keys with significant + * numbers of non-overlapping key ranges it is much faster. + * @param other Keys present in this tree will be omitted from the callback. + * @param callback Invoked for keys unique to `this`. It can cause iteration to early exit by returning `{ break: R }`. + * @returns The first `break` payload returned by the callback, or `undefined` if all qualifying keys are visited. + * @throws Error if the trees were created with different comparators. + */ + BTreeEx.prototype.forEachKeyNotIn = function (other, callback) { + return (0, forEachKeyNotIn_1.default)(this, other, callback); + }; + /** + * Returns a new tree containing only keys present in both trees. + * Neither tree is modified. + * + * Complexity is O(N + M) in the fully overlapping case and additionally bounded by O(log(N + M) * D), + * where `D` is the number of disjoint key ranges, because disjoint subtrees are skipped entirely. + * In practice, that means for keys of random distribution the performance is linear and for keys with significant + * numbers of non-overlapping key ranges it is much faster. + * @param other The other tree to intersect with this one. + * @param combineFn Called for keys that appear in both trees. Return the desired value. + * @returns A new `BTreeEx` populated with the intersection. + * @throws Error if the trees were created with different comparators. + */ + BTreeEx.prototype.intersect = function (other, combineFn) { + return (0, intersect_1.default)(this, other, combineFn); + }; + /** + * Efficiently unions this tree with `other`, reusing subtrees wherever possible without modifying either input. + * + * Complexity is O(N + M) in the fully overlapping case, and additionally bounded by O(log(N + M) * D) + * where `D` is the number of disjoint key ranges, because disjoint subtrees are skipped entirely. + * In practice, that means for keys of random distribution the performance is linear and for keys with significant + * numbers of non-overlapping key ranges it is much faster. + * @param other The other tree to union with this one. + * @param combineFn Called for keys that appear in both trees. Return the desired value, or `undefined` to omit the key. + * @returns A new `BTreeEx` that contains the unioned key/value pairs. + * @throws Error if the trees were created with different comparators or max node sizes. + */ + BTreeEx.prototype.union = function (other, combineFn) { + return (0, union_1.default)(this, other, combineFn); + }; + /** + * Returns a new tree containing only the keys that are present in this tree but not `other` (set subtraction). + * Neither input tree is modified. + * + * Complexity is O(N + M) for time and O(N) for allocations in the worst case. Additionally, time is bounded by + * O(log(N + M) * D1) and space by O(log N * D2) where `D1` is the number of disjoint key ranges between the trees + * and `D2` is the number of disjoint ranges inside this tree. + * In practice, that means for keys of random distribution the performance is linear and for keys with significant + * numbers of non-overlapping key ranges it is much faster. + * @param other The tree whose keys will be removed from the result. + * @returns A new `BTreeEx` representing `this \ other`. + * @throws Error if the trees were created with different comparators or max node sizes. + */ + BTreeEx.prototype.subtract = function (other) { + return (0, subtract_1.default)(this, other); }; return BTreeEx; }(b_tree_1.default)); diff --git a/extended/index.ts b/extended/index.ts index 0d6edf9..a88636c 100644 --- a/extended/index.ts +++ b/extended/index.ts @@ -1,8 +1,46 @@ -import BTree from '../b+tree'; +import BTree, { defaultComparator } from '../b+tree'; import type { BTreeWithInternals } from './shared'; -import { diffAgainst as diffAgainstAlgorithm } from './diffAgainst'; +import diffAgainst from './diffAgainst'; +import forEachKeyInBoth from './forEachKeyInBoth'; +import forEachKeyNotIn from './forEachKeyNotIn'; +import intersect from './intersect'; +import subtract from './subtract'; +import union from './union'; +import { bulkLoadRoot } from './bulkLoad'; +/** + * An extended version of the `BTree` class that includes additional functionality + * such as bulk loading, set operations, and diffing. + * It is separated to keep the core BTree class small from a bundle size perspective. + * Note: each additional functionality piece is available as a standalone function from the extended folder. + * @extends BTree + */ export class BTreeEx extends BTree { + /** + * Bulk loads a new `BTreeEx` from a sorted alternating list of entries. + * This reuses the same algorithm as `extended/bulkLoad`, but produces a `BTreeEx`. + * Time and space complexity are O(n). + * @param entries Alternating array of keys and values: `[key0, value0, key1, value1, ...]`. Must be sorted by key in strictly ascending order. + * @param maxNodeSize The branching factor (maximum number of children per node). + * @param compare Comparator to use. Defaults to the standard comparator if omitted. + * @returns A fully built tree containing the supplied entries. + * @throws Error if the entries are not strictly sorted or contain duplicate keys. + */ + static bulkLoad( + entries: (K | V)[], + maxNodeSize: number, + compare?: (a: K, b: K) => number + ): BTreeEx { + const cmp = compare ?? (defaultComparator as unknown as (a: K, b: K) => number); + const root = bulkLoadRoot(entries, maxNodeSize, cmp); + const tree = new BTreeEx(undefined, cmp, maxNodeSize); + const target = tree as unknown as BTreeWithInternals; + target._root = root; + target._size = root.size(); + return tree; + } + + /** See {@link BTree.clone}. */ clone(): this { const source = this as unknown as BTreeWithInternals; source._root.isShared = true; @@ -13,6 +51,7 @@ export class BTreeEx extends BTree { return result as this; } + /** See {@link BTree.greedyClone}. */ greedyClone(force?: boolean): this { const source = this as unknown as BTreeWithInternals; const result = new BTreeEx(undefined, this._compare, this._maxNodeSize); @@ -22,22 +61,130 @@ export class BTreeEx extends BTree { return result as this; } + /** + * Computes the differences between `this` and `other`. + * For efficiency, the diff is returned via invocations of supplied handlers. + * The computation is optimized for the case in which the two trees have large amounts of shared data + * (obtained by calling the `clone` or `with` APIs) and will avoid any iteration of shared state. + * The handlers can cause computation to early exit by returning `{ break: R }`. + * Neither collection should be mutated during the comparison (inside your callbacks), as this method assumes they remain stable. + * @param other The tree to compute a diff against. + * @param onlyThis Callback invoked for all keys only present in `this`. + * @param onlyOther Callback invoked for all keys only present in `other`. + * @param different Callback invoked for all keys with differing values. + * @returns The first `break` payload returned by a handler, or `undefined` if no handler breaks. + * @throws Error if the supplied trees were created with different comparators. + */ diffAgainst( other: BTree, onlyThis?: (k: K, v: V) => { break?: R } | void, onlyOther?: (k: K, v: V) => { break?: R } | void, different?: (k: K, vThis: V, vOther: V) => { break?: R } | void ): R | undefined { - return diffAgainstAlgorithm(this, other, onlyThis, onlyOther, different); + return diffAgainst(this, other, onlyThis, onlyOther, different); + } + + /** + * Calls the supplied `callback` for each key/value pair shared by this tree and `other`, in sorted key order. + * Neither tree is modified. + * + * Complexity is O(N + M) when the trees overlap heavily, and additionally bounded by O(log(N + M) * D) + * where `D` is the number of disjoint key ranges between the trees, because disjoint subtrees are skipped. + * In practice, that means for keys of random distribution the performance is linear and for keys with significant + * numbers of non-overlapping key ranges it is much faster. + * @param other The other tree to compare with this one. + * @param callback Called for keys that appear in both trees. It can cause iteration to early exit by returning `{ break: R }`. + * @returns The first `break` payload returned by the callback, or `undefined` if the walk finishes. + * @throws Error if the two trees were created with different comparators. + */ + forEachKeyInBoth( + other: BTree, + callback: (key: K, leftValue: V, rightValue: V) => { break?: R } | void + ): R | undefined { + return forEachKeyInBoth(this, other, callback); + } + + /** + * Calls the supplied `callback` for each key/value pair that exists in this tree but not in `other` + * (set subtraction). The callback runs in sorted key order and neither tree is modified. + * + * Complexity is O(N + M) when the key ranges overlap heavily, and additionally bounded by O(log(N + M) * D) + * where `D` is the number of disjoint ranges between the trees, because non-overlapping subtrees are skipped. + * In practice, that means for keys of random distribution the performance is linear and for keys with significant + * numbers of non-overlapping key ranges it is much faster. + * @param other Keys present in this tree will be omitted from the callback. + * @param callback Invoked for keys unique to `this`. It can cause iteration to early exit by returning `{ break: R }`. + * @returns The first `break` payload returned by the callback, or `undefined` if all qualifying keys are visited. + * @throws Error if the trees were created with different comparators. + */ + forEachKeyNotIn( + other: BTree, + callback: (key: K, value: V) => { break?: R } | void + ): R | undefined { + return forEachKeyNotIn(this, other, callback); + } + + /** + * Returns a new tree containing only keys present in both trees. + * Neither tree is modified. + * + * Complexity is O(N + M) in the fully overlapping case and additionally bounded by O(log(N + M) * D), + * where `D` is the number of disjoint key ranges, because disjoint subtrees are skipped entirely. + * In practice, that means for keys of random distribution the performance is linear and for keys with significant + * numbers of non-overlapping key ranges it is much faster. + * @param other The other tree to intersect with this one. + * @param combineFn Called for keys that appear in both trees. Return the desired value. + * @returns A new `BTreeEx` populated with the intersection. + * @throws Error if the trees were created with different comparators. + */ + intersect(other: BTreeEx, combineFn: (key: K, leftValue: V, rightValue: V) => V): BTreeEx { + return intersect, K, V>(this, other, combineFn); + } + + /** + * Efficiently unions this tree with `other`, reusing subtrees wherever possible without modifying either input. + * + * Complexity is O(N + M) in the fully overlapping case, and additionally bounded by O(log(N + M) * D) + * where `D` is the number of disjoint key ranges, because disjoint subtrees are skipped entirely. + * In practice, that means for keys of random distribution the performance is linear and for keys with significant + * numbers of non-overlapping key ranges it is much faster. + * @param other The other tree to union with this one. + * @param combineFn Called for keys that appear in both trees. Return the desired value, or `undefined` to omit the key. + * @returns A new `BTreeEx` that contains the unioned key/value pairs. + * @throws Error if the trees were created with different comparators or max node sizes. + */ + union(other: BTreeEx, combineFn: (key: K, leftValue: V, rightValue: V) => V | undefined): BTreeEx { + return union, K, V>(this, other, combineFn); + } + + /** + * Returns a new tree containing only the keys that are present in this tree but not `other` (set subtraction). + * Neither input tree is modified. + * + * Complexity is O(N + M) for time and O(N) for allocations in the worst case. Additionally, time is bounded by + * O(log(N + M) * D1) and space by O(log N * D2) where `D1` is the number of disjoint key ranges between the trees + * and `D2` is the number of disjoint ranges inside this tree. + * In practice, that means for keys of random distribution the performance is linear and for keys with significant + * numbers of non-overlapping key ranges it is much faster. + * @param other The tree whose keys will be removed from the result. + * @returns A new `BTreeEx` representing `this \ other`. + * @throws Error if the trees were created with different comparators or max node sizes. + */ + subtract(other: BTreeEx): BTreeEx { + return subtract, K, V>(this, other); } } export interface BTreeEx { + /** See {@link BTree.with}. */ with(key: K): BTreeEx; with(key: K, value: V2, overwrite?: boolean): BTreeEx; with(key: K, value?: V2, overwrite?: boolean): BTreeEx; + /** See {@link BTree.withPairs}. */ withPairs(pairs: [K, V | V2][], overwrite: boolean): BTreeEx; + /** See {@link BTree.withKeys}. */ withKeys(keys: K[], returnThisIfUnchanged?: boolean): BTreeEx; + /** See {@link BTree.mapValues}. */ mapValues(callback: (v: V, k: K, counter: number) => R): BTreeEx; } diff --git a/extended/intersect.d.ts b/extended/intersect.d.ts new file mode 100644 index 0000000..ddda066 --- /dev/null +++ b/extended/intersect.d.ts @@ -0,0 +1,16 @@ +import BTree from '../b+tree'; +/** + * Returns a new tree containing only keys present in both input trees. + * Neither tree is modified. + * + * Complexity is O(N + M) in the fully overlapping case and additionally bounded by O(log(N + M) * D), + * where `D` is the number of disjoint key ranges, because disjoint subtrees are skipped entirely. + * In practice, that means for keys of random distribution the performance is linear and for keys with significant + * numbers of non-overlapping key ranges it is much faster. + * @param treeA First tree to intersect. + * @param treeB Second tree to intersect. + * @param combineFn Called for keys that appear in both trees. Return the desired value. + * @returns A new tree populated with the intersection. + * @throws Error if the trees were created with different comparators. + */ +export default function intersect, K, V>(treeA: TBTree, treeB: TBTree, combineFn: (key: K, leftValue: V, rightValue: V) => V): TBTree; diff --git a/extended/intersect.js b/extended/intersect.js new file mode 100644 index 0000000..2f8c89b --- /dev/null +++ b/extended/intersect.js @@ -0,0 +1,42 @@ +"use strict"; +var __importDefault = (this && this.__importDefault) || function (mod) { + return (mod && mod.__esModule) ? mod : { "default": mod }; +}; +Object.defineProperty(exports, "__esModule", { value: true }); +var shared_1 = require("./shared"); +var forEachKeyInBoth_1 = __importDefault(require("./forEachKeyInBoth")); +var bulkLoad_1 = require("./bulkLoad"); +/** + * Returns a new tree containing only keys present in both input trees. + * Neither tree is modified. + * + * Complexity is O(N + M) in the fully overlapping case and additionally bounded by O(log(N + M) * D), + * where `D` is the number of disjoint key ranges, because disjoint subtrees are skipped entirely. + * In practice, that means for keys of random distribution the performance is linear and for keys with significant + * numbers of non-overlapping key ranges it is much faster. + * @param treeA First tree to intersect. + * @param treeB Second tree to intersect. + * @param combineFn Called for keys that appear in both trees. Return the desired value. + * @returns A new tree populated with the intersection. + * @throws Error if the trees were created with different comparators. + */ +function intersect(treeA, treeB, combineFn) { + var _treeA = treeA; + var _treeB = treeB; + var branchingFactor = (0, shared_1.checkCanDoSetOperation)(_treeA, _treeB, true); + if (_treeA._root.size() === 0) + return treeA.clone(); + if (_treeB._root.size() === 0) + return treeB.clone(); + var intersected = (0, shared_1.createAlternatingList)(); + (0, forEachKeyInBoth_1.default)(treeA, treeB, function (key, leftValue, rightValue) { + var mergedValue = combineFn(key, leftValue, rightValue); + (0, shared_1.alternatingPush)(intersected, key, mergedValue); + }); + // Intersected keys are guaranteed to be in order, so we can bulk load + var constructor = treeA.constructor; + var resultTree = new constructor(undefined, treeA._compare, branchingFactor); + resultTree._root = (0, bulkLoad_1.bulkLoadRoot)(intersected, branchingFactor, treeA._compare); + return resultTree; +} +exports.default = intersect; diff --git a/extended/intersect.ts b/extended/intersect.ts new file mode 100644 index 0000000..133faf7 --- /dev/null +++ b/extended/intersect.ts @@ -0,0 +1,44 @@ +import BTree from '../b+tree'; +import { alternatingPush, createAlternatingList, checkCanDoSetOperation, type BTreeWithInternals, BTreeConstructor } from './shared'; +import forEachKeyInBoth from './forEachKeyInBoth'; +import { bulkLoadRoot } from './bulkLoad'; + +/** + * Returns a new tree containing only keys present in both input trees. + * Neither tree is modified. + * + * Complexity is O(N + M) in the fully overlapping case and additionally bounded by O(log(N + M) * D), + * where `D` is the number of disjoint key ranges, because disjoint subtrees are skipped entirely. + * In practice, that means for keys of random distribution the performance is linear and for keys with significant + * numbers of non-overlapping key ranges it is much faster. + * @param treeA First tree to intersect. + * @param treeB Second tree to intersect. + * @param combineFn Called for keys that appear in both trees. Return the desired value. + * @returns A new tree populated with the intersection. + * @throws Error if the trees were created with different comparators. + */ +export default function intersect, K, V>( + treeA: TBTree, + treeB: TBTree, + combineFn: (key: K, leftValue: V, rightValue: V) => V +): TBTree { + const _treeA = treeA as unknown as BTreeWithInternals; + const _treeB = treeB as unknown as BTreeWithInternals; + const branchingFactor = checkCanDoSetOperation(_treeA, _treeB, true); + if (_treeA._root.size() === 0) + return treeA.clone(); + if (_treeB._root.size() === 0) + return treeB.clone(); + + const intersected = createAlternatingList(); + forEachKeyInBoth(treeA, treeB, (key, leftValue, rightValue) => { + const mergedValue = combineFn(key, leftValue, rightValue); + alternatingPush(intersected, key, mergedValue); + }); + + // Intersected keys are guaranteed to be in order, so we can bulk load + const constructor = treeA.constructor as BTreeConstructor; + const resultTree = new constructor(undefined, treeA._compare, branchingFactor); + resultTree._root = bulkLoadRoot(intersected, branchingFactor, treeA._compare); + return resultTree as unknown as TBTree; +} diff --git a/extended/parallelWalk.d.ts b/extended/parallelWalk.d.ts new file mode 100644 index 0000000..cb0ff5c --- /dev/null +++ b/extended/parallelWalk.d.ts @@ -0,0 +1 @@ +export {}; diff --git a/extended/parallelWalk.js b/extended/parallelWalk.js new file mode 100644 index 0000000..1cfb82b --- /dev/null +++ b/extended/parallelWalk.js @@ -0,0 +1,188 @@ +"use strict"; +Object.defineProperty(exports, "__esModule", { value: true }); +exports.noop = exports.moveTo = exports.getKey = exports.createCursor = exports.moveForwardOne = void 0; +/** + * Walks the cursor forward by one key. + * Returns true if end-of-tree was reached (cursor not structurally mutated). + * Optimized for this case over the more general `moveTo` function. + * @internal + */ +function moveForwardOne(cur, other) { + var leaf = cur.leaf; + var nextIndex = cur.leafIndex + 1; + if (nextIndex < leaf.keys.length) { + // Still within current leaf + cur.onMoveInLeaf(leaf, cur.leafPayload, cur.leafIndex, nextIndex, true); + cur.leafIndex = nextIndex; + return false; + } + // If our optimized step within leaf failed, use full moveTo logic + // Pass isInclusive=false to ensure we walk forward to the key exactly after the current + return moveTo(cur, other, getKey(cur), false, true)[0]; +} +exports.moveForwardOne = moveForwardOne; +/** + * Create a cursor pointing to the leftmost key of the supplied tree. + * @internal + */ +function createCursor(tree, makePayload, onEnterLeaf, onMoveInLeaf, onExitLeaf, onStepUp, onStepDown) { + var spine = []; + var n = tree._root; + while (!n.isLeaf) { + var ni = n; + var payload = makePayload(); + spine.push({ node: ni, childIndex: 0, payload: payload }); + n = ni.children[0]; + } + var leafPayload = makePayload(); + var cur = { + tree: tree, + leaf: n, leafIndex: 0, + spine: spine, + leafPayload: leafPayload, + makePayload: makePayload, + onEnterLeaf: onEnterLeaf, + onMoveInLeaf: onMoveInLeaf, + onExitLeaf: onExitLeaf, + onStepUp: onStepUp, + onStepDown: onStepDown + }; + return cur; +} +exports.createCursor = createCursor; +/** + * Gets the key at the current cursor position. + * @internal + */ +function getKey(c) { + return c.leaf.keys[c.leafIndex]; +} +exports.getKey = getKey; +/** + * Move cursor strictly forward to the first key >= (inclusive) or > (exclusive) target. + * Returns a boolean indicating if end-of-tree was reached (cursor not structurally mutated). + * Also returns a boolean indicating if the target key was landed on exactly. + * @internal + */ +function moveTo(cur, other, targetKey, isInclusive, startedEqual) { + // Cache for perf + var cmp = cur.tree._compare; + var onMoveInLeaf = cur.onMoveInLeaf; + // Fast path: destination within current leaf + var leaf = cur.leaf; + var leafPayload = cur.leafPayload; + var i = leaf.indexOf(targetKey, -1, cmp); + var destInLeaf; + var targetExactlyReached; + if (i < 0) { + destInLeaf = ~i; + targetExactlyReached = false; + } + else { + if (isInclusive) { + destInLeaf = i; + targetExactlyReached = true; + } + else { + destInLeaf = i + 1; + targetExactlyReached = false; + } + } + var leafKeyCount = leaf.keys.length; + if (destInLeaf < leafKeyCount) { + onMoveInLeaf(leaf, leafPayload, cur.leafIndex, destInLeaf, startedEqual); + cur.leafIndex = destInLeaf; + return [false, targetExactlyReached]; + } + // Find first ancestor with a viable right step + var spine = cur.spine; + var initialSpineLength = spine.length; + var descentLevel = -1; + var descentIndex = -1; + for (var s = initialSpineLength - 1; s >= 0; s--) { + var parent = spine[s].node; + var indexOf = parent.indexOf(targetKey, -1, cmp); + var stepDownIndex = void 0; + if (indexOf < 0) { + stepDownIndex = ~indexOf; + } + else { + stepDownIndex = isInclusive ? indexOf : indexOf + 1; + } + // Note: when key not found, indexOf with failXor=0 already returns insertion index + if (stepDownIndex < parent.keys.length) { + descentLevel = s; + descentIndex = stepDownIndex; + break; + } + } + // Exit leaf; even if no spine, we did walk out of it conceptually + var startIndex = cur.leafIndex; + cur.onExitLeaf(leaf, leafPayload, startIndex, startedEqual, cur); + var onStepUp = cur.onStepUp; + if (descentLevel < 0) { + // No descent point; step up all the way; last callback gets infinity + for (var depth = initialSpineLength - 1; depth >= 0; depth--) { + var entry_1 = spine[depth]; + var sd = depth === 0 ? Number.POSITIVE_INFINITY : Number.NaN; + onStepUp(entry_1.node, initialSpineLength - depth, entry_1.payload, entry_1.childIndex, depth, sd, cur, other); + } + return [true, false]; + } + // Step up through ancestors above the descentLevel + for (var depth = initialSpineLength - 1; depth > descentLevel; depth--) { + var entry_2 = spine[depth]; + onStepUp(entry_2.node, initialSpineLength - depth, entry_2.payload, entry_2.childIndex, depth, Number.NaN, cur, other); + } + var entry = spine[descentLevel]; + onStepUp(entry.node, initialSpineLength - descentLevel, entry.payload, entry.childIndex, descentLevel, descentIndex, cur, other); + entry.childIndex = descentIndex; + var onStepDown = cur.onStepDown; + var makePayload = cur.makePayload; + // Descend, invoking onStepDown and creating payloads + var height = initialSpineLength - descentLevel - 1; // calculate height before changing length + spine.length = descentLevel + 1; + var node = spine[descentLevel].node.children[descentIndex]; + while (!node.isLeaf) { + var ni = node; + var keys = ni.keys; + var stepDownIndex = ni.indexOf(targetKey, 0, cmp); + if (!isInclusive && stepDownIndex < keys.length && cmp(keys[stepDownIndex], targetKey) === 0) + stepDownIndex++; + var payload = makePayload(); + var spineIndex = spine.length; + spine.push({ node: ni, childIndex: stepDownIndex, payload: payload }); + onStepDown(ni, height, spineIndex, stepDownIndex, cur, other); + node = ni.children[stepDownIndex]; + height -= 1; + } + // Enter destination leaf + var idx = node.indexOf(targetKey, -1, cmp); + var destIndex; + if (idx < 0) { + destIndex = ~idx; + targetExactlyReached = false; + } + else { + if (isInclusive) { + destIndex = idx; + targetExactlyReached = true; + } + else { + destIndex = idx + 1; + targetExactlyReached = false; + } + } + cur.leaf = node; + cur.leafPayload = makePayload(); + cur.leafIndex = destIndex; + cur.onEnterLeaf(node, destIndex, cur, other); + return [false, targetExactlyReached]; +} +exports.moveTo = moveTo; +/** + * A no-operation function. + * @internal + */ +function noop() { } +exports.noop = noop; diff --git a/extended/parallelWalk.ts b/extended/parallelWalk.ts new file mode 100644 index 0000000..fe9a3ac --- /dev/null +++ b/extended/parallelWalk.ts @@ -0,0 +1,221 @@ +import { BNode, BNodeInternal } from '../b+tree'; +import type { BTreeWithInternals } from './shared'; + +/** + * A walkable cursor for BTree set operations. + * @internal + */ +export interface Cursor { + tree: BTreeWithInternals; + leaf: BNode; + leafIndex: number; + spine: Array<{ node: BNodeInternal, childIndex: number, payload: TPayload }>; + leafPayload: TPayload; + makePayload: () => TPayload; + onMoveInLeaf: (leaf: BNode, payload: TPayload, fromIndex: number, toIndex: number, isInclusive: boolean) => void; + onExitLeaf: (leaf: BNode, payload: TPayload, startingIndex: number, isInclusive: boolean, cursorThis: Cursor) => void; + onStepUp: (parent: BNodeInternal, height: number, payload: TPayload, fromIndex: number, spineIndex: number, stepDownIndex: number, cursorThis: Cursor, cursorOther: Cursor) => void; + onStepDown: (node: BNodeInternal, height: number, spineIndex: number, stepDownIndex: number, cursorThis: Cursor, cursorOther: Cursor) => void; + onEnterLeaf: (leaf: BNode, destIndex: number, cursorThis: Cursor, cursorOther: Cursor) => void; +} + +/** + * Walks the cursor forward by one key. + * Returns true if end-of-tree was reached (cursor not structurally mutated). + * Optimized for this case over the more general `moveTo` function. + * @internal + */ +export function moveForwardOne( + cur: Cursor, + other: Cursor +): boolean { + const leaf = cur.leaf; + const nextIndex = cur.leafIndex + 1; + if (nextIndex < leaf.keys.length) { + // Still within current leaf + cur.onMoveInLeaf(leaf, cur.leafPayload, cur.leafIndex, nextIndex, true); + cur.leafIndex = nextIndex; + return false; + } + + // If our optimized step within leaf failed, use full moveTo logic + // Pass isInclusive=false to ensure we walk forward to the key exactly after the current + return moveTo(cur, other, getKey(cur), false, true)[0]; +} + +/** + * Create a cursor pointing to the leftmost key of the supplied tree. + * @internal + */ +export function createCursor( + tree: BTreeWithInternals, + makePayload: Cursor["makePayload"], + onEnterLeaf: Cursor["onEnterLeaf"], + onMoveInLeaf: Cursor["onMoveInLeaf"], + onExitLeaf: Cursor["onExitLeaf"], + onStepUp: Cursor["onStepUp"], + onStepDown: Cursor["onStepDown"], +): Cursor { + const spine: Array<{ node: BNodeInternal, childIndex: number, payload: TP }> = []; + let n: BNode = tree._root; + while (!n.isLeaf) { + const ni = n as BNodeInternal; + const payload = makePayload(); + spine.push({ node: ni, childIndex: 0, payload }); + n = ni.children[0]; + } + const leafPayload = makePayload(); + const cur: Cursor = { + tree, leaf: n, leafIndex: 0, spine, leafPayload, makePayload: makePayload, + onEnterLeaf, onMoveInLeaf, onExitLeaf, onStepUp, onStepDown + }; + return cur; +} + +/** + * Gets the key at the current cursor position. + * @internal + */ +export function getKey(c: Cursor): K { + return c.leaf.keys[c.leafIndex]; +} + +/** + * Move cursor strictly forward to the first key >= (inclusive) or > (exclusive) target. + * Returns a boolean indicating if end-of-tree was reached (cursor not structurally mutated). + * Also returns a boolean indicating if the target key was landed on exactly. + * @internal + */ +export function moveTo( + cur: Cursor, + other: Cursor, + targetKey: K, + isInclusive: boolean, + startedEqual: boolean, +): [outOfTree: boolean, targetExactlyReached: boolean] { + // Cache for perf + const cmp = cur.tree._compare + const onMoveInLeaf = cur.onMoveInLeaf; + // Fast path: destination within current leaf + const leaf = cur.leaf; + const leafPayload = cur.leafPayload; + const i = leaf.indexOf(targetKey, -1, cmp); + let destInLeaf: number; + let targetExactlyReached: boolean; + if (i < 0) { + destInLeaf = ~i; + targetExactlyReached = false; + } else { + if (isInclusive) { + destInLeaf = i; + targetExactlyReached = true; + } else { + destInLeaf = i + 1; + targetExactlyReached = false; + } + } + const leafKeyCount = leaf.keys.length; + if (destInLeaf < leafKeyCount) { + onMoveInLeaf(leaf, leafPayload, cur.leafIndex, destInLeaf, startedEqual); + cur.leafIndex = destInLeaf; + return [false, targetExactlyReached]; + } + + // Find first ancestor with a viable right step + const spine = cur.spine; + const initialSpineLength = spine.length; + let descentLevel = -1; + let descentIndex = -1; + + for (let s = initialSpineLength - 1; s >= 0; s--) { + const parent = spine[s].node; + const indexOf = parent.indexOf(targetKey, -1, cmp); + let stepDownIndex: number; + if (indexOf < 0) { + stepDownIndex = ~indexOf; + } else { + stepDownIndex = isInclusive ? indexOf : indexOf + 1; + } + + // Note: when key not found, indexOf with failXor=0 already returns insertion index + if (stepDownIndex < parent.keys.length) { + descentLevel = s; + descentIndex = stepDownIndex; + break; + } + } + + // Exit leaf; even if no spine, we did walk out of it conceptually + const startIndex = cur.leafIndex; + cur.onExitLeaf(leaf, leafPayload, startIndex, startedEqual, cur); + + const onStepUp = cur.onStepUp; + if (descentLevel < 0) { + // No descent point; step up all the way; last callback gets infinity + for (let depth = initialSpineLength - 1; depth >= 0; depth--) { + const entry = spine[depth]; + const sd = depth === 0 ? Number.POSITIVE_INFINITY : Number.NaN; + onStepUp(entry.node, initialSpineLength - depth, entry.payload, entry.childIndex, depth, sd, cur, other); + } + return [true, false]; + } + + // Step up through ancestors above the descentLevel + for (let depth = initialSpineLength - 1; depth > descentLevel; depth--) { + const entry = spine[depth]; + onStepUp(entry.node, initialSpineLength - depth, entry.payload, entry.childIndex, depth, Number.NaN, cur, other); + } + + const entry = spine[descentLevel]; + onStepUp(entry.node, initialSpineLength - descentLevel, entry.payload, entry.childIndex, descentLevel, descentIndex, cur, other); + entry.childIndex = descentIndex; + + const onStepDown = cur.onStepDown; + const makePayload = cur.makePayload; + + // Descend, invoking onStepDown and creating payloads + let height = initialSpineLength - descentLevel - 1; // calculate height before changing length + spine.length = descentLevel + 1; + let node: BNode = spine[descentLevel].node.children[descentIndex]; + + while (!node.isLeaf) { + const ni = node as BNodeInternal; + const keys = ni.keys; + let stepDownIndex = ni.indexOf(targetKey, 0, cmp); + if (!isInclusive && stepDownIndex < keys.length && cmp(keys[stepDownIndex], targetKey) === 0) + stepDownIndex++; + const payload = makePayload(); + const spineIndex = spine.length; + spine.push({ node: ni, childIndex: stepDownIndex, payload }); + onStepDown(ni, height, spineIndex, stepDownIndex, cur, other); + node = ni.children[stepDownIndex]; + height -= 1; + } + + // Enter destination leaf + const idx = node.indexOf(targetKey, -1, cmp); + let destIndex: number; + if (idx < 0) { + destIndex = ~idx; + targetExactlyReached = false; + } else { + if (isInclusive) { + destIndex = idx; + targetExactlyReached = true; + } else { + destIndex = idx + 1; + targetExactlyReached = false; + } + } + cur.leaf = node; + cur.leafPayload = makePayload(); + cur.leafIndex = destIndex; + cur.onEnterLeaf(node, destIndex, cur, other); + return [false, targetExactlyReached]; +} + +/** + * A no-operation function. + * @internal + */ +export function noop(): void { } diff --git a/extended/shared.d.ts b/extended/shared.d.ts index 4527a4e..cb0ff5c 100644 --- a/extended/shared.d.ts +++ b/extended/shared.d.ts @@ -1,8 +1 @@ -import type { BNode } from '../b+tree'; -import BTree from '../b+tree'; -export declare type BTreeWithInternals = { - _root: BNode; - _size: number; - _maxNodeSize: number; - _compare: (a: K, b: K) => number; -} & Omit, '_root' | '_size' | '_maxNodeSize' | '_compare'>; +export {}; diff --git a/extended/shared.js b/extended/shared.js index c8ad2e5..982b280 100644 --- a/extended/shared.js +++ b/extended/shared.js @@ -1,2 +1,110 @@ "use strict"; Object.defineProperty(exports, "__esModule", { value: true }); +exports.checkCanDoSetOperation = exports.branchingFactorErrorMsg = exports.comparatorErrorMsg = exports.alternatingPush = exports.alternatingGetSecond = exports.alternatingGetFirst = exports.alternatingCount = exports.createAlternatingList = exports.makeLeavesFrom = void 0; +var b_tree_1 = require("../b+tree"); +/** + * Builds leaves from the given alternating list of entries. + * The supplied load factor will be respected if possible, but may be exceeded + * to ensure the 50% full rule is maintained. + * Note: if < maxNodeSize entries are provided, only one leaf will be created, which may be underfilled. + * @param alternatingList The list of entries to build leaves from. + * @param maxNodeSize The maximum node size (branching factor) for the resulting leaves. + * @param onLeafCreation Called when a new leaf is created. + * @param loadFactor Desired load factor for created leaves. Must be between 0.5 and 1.0. + * @internal + */ +function makeLeavesFrom(alternatingList, maxNodeSize, onLeafCreation, loadFactor) { + var totalPairs = alternatingCount(alternatingList); + if (totalPairs === 0) + return 0; + var targetSize = Math.ceil(maxNodeSize * loadFactor); + // This method creates as many evenly filled leaves as possible from + // the pending entries. All will be > 50% full if we are creating more than one leaf. + var remainingLeaves = totalPairs <= maxNodeSize ? 1 : Math.ceil(totalPairs / targetSize); + var remaining = totalPairs; + var pairIndex = 0; + while (remainingLeaves > 0) { + var chunkSize = Math.ceil(remaining / remainingLeaves); + var keys = new Array(chunkSize); + var vals = new Array(chunkSize); + for (var i = 0; i < chunkSize; i++) { + keys[i] = alternatingGetFirst(alternatingList, pairIndex); + vals[i] = alternatingGetSecond(alternatingList, pairIndex); + pairIndex++; + } + remaining -= chunkSize; + remainingLeaves--; + var leaf = new b_tree_1.BNode(keys, vals); + onLeafCreation(leaf); + } +} +exports.makeLeavesFrom = makeLeavesFrom; +; +// ------- Alternating list helpers ------- +// These helpers manage a list that alternates between two types of entries. +// Storing data this way avoids small tuple allocations and shows major improvements +// in GC time in benchmarks. +/** + * Creates an empty alternating list with the specified element types. + * @internal + */ +function createAlternatingList() { + return []; +} +exports.createAlternatingList = createAlternatingList; +/** + * Counts the number of `[A, B]` pairs stored in the alternating list. + * @internal + */ +function alternatingCount(list) { + return list.length >> 1; +} +exports.alternatingCount = alternatingCount; +/** + * Reads the first entry of the pair at the given index. + * @internal + */ +function alternatingGetFirst(list, index) { + return list[index << 1]; +} +exports.alternatingGetFirst = alternatingGetFirst; +/** + * Reads the second entry of the pair at the given index. + * @internal + */ +function alternatingGetSecond(list, index) { + return list[(index << 1) + 1]; +} +exports.alternatingGetSecond = alternatingGetSecond; +/** + * Appends a pair to the alternating list. + * @internal + */ +function alternatingPush(list, first, second) { + // Micro benchmarks show this is the fastest way to do this + list.push(first, second); +} +exports.alternatingPush = alternatingPush; +/** + * Error message used when comparators differ between trees. + * @internal + */ +exports.comparatorErrorMsg = "Cannot perform set operations on BTrees with different comparators."; +/** + * Error message used when branching factors differ between trees. + * @internal + */ +exports.branchingFactorErrorMsg = "Cannot perform set operations on BTrees with different max node sizes."; +/** + * Checks that two trees can be used together in a set operation. + * @internal + */ +function checkCanDoSetOperation(treeA, treeB, supportsDifferentBranchingFactors) { + if (treeA._compare !== treeB._compare) + throw new Error(exports.comparatorErrorMsg); + var branchingFactor = treeA._maxNodeSize; + if (!supportsDifferentBranchingFactors && branchingFactor !== treeB._maxNodeSize) + throw new Error(exports.branchingFactorErrorMsg); + return branchingFactor; +} +exports.checkCanDoSetOperation = checkCanDoSetOperation; diff --git a/extended/shared.ts b/extended/shared.ts index 58c7982..19c5a34 100644 --- a/extended/shared.ts +++ b/extended/shared.ts @@ -1,9 +1,140 @@ -import type { BNode } from '../b+tree'; -import BTree from '../b+tree'; +import BTree, { BNode } from '../b+tree'; -export type BTreeWithInternals = { +/** + * BTree with access to internal properties. + * @internal + */ +export type BTreeWithInternals = BTree> = { _root: BNode; _size: number; _maxNodeSize: number; _compare: (a: K, b: K) => number; -} & Omit, '_root' | '_size' | '_maxNodeSize' | '_compare'>; +} & Omit; + +/** + * Alternating list storing entries as `[A0, B0, A1, B1, ...]`. + * @internal + */ +export type AlternatingList = Array; + +/** + * Builds leaves from the given alternating list of entries. + * The supplied load factor will be respected if possible, but may be exceeded + * to ensure the 50% full rule is maintained. + * Note: if < maxNodeSize entries are provided, only one leaf will be created, which may be underfilled. + * @param alternatingList The list of entries to build leaves from. + * @param maxNodeSize The maximum node size (branching factor) for the resulting leaves. + * @param onLeafCreation Called when a new leaf is created. + * @param loadFactor Desired load factor for created leaves. Must be between 0.5 and 1.0. + * @internal + */ +export function makeLeavesFrom( + alternatingList: AlternatingList, + maxNodeSize: number, + onLeafCreation: (node: BNode) => void, + loadFactor: number +) { + const totalPairs = alternatingCount(alternatingList); + if (totalPairs === 0) + return 0; + + const targetSize = Math.ceil(maxNodeSize * loadFactor); + + // This method creates as many evenly filled leaves as possible from + // the pending entries. All will be > 50% full if we are creating more than one leaf. + let remainingLeaves = totalPairs <= maxNodeSize ? 1 : Math.ceil(totalPairs / targetSize); + let remaining = totalPairs; + let pairIndex = 0; + while (remainingLeaves > 0) { + const chunkSize = Math.ceil(remaining / remainingLeaves); + const keys = new Array(chunkSize); + const vals = new Array(chunkSize); + for (let i = 0; i < chunkSize; i++) { + keys[i] = alternatingGetFirst(alternatingList, pairIndex); + vals[i] = alternatingGetSecond(alternatingList, pairIndex); + pairIndex++; + } + remaining -= chunkSize; + remainingLeaves--; + const leaf = new BNode(keys, vals); + onLeafCreation(leaf); + } +}; + +// ------- Alternating list helpers ------- +// These helpers manage a list that alternates between two types of entries. +// Storing data this way avoids small tuple allocations and shows major improvements +// in GC time in benchmarks. + +/** + * Creates an empty alternating list with the specified element types. + * @internal + */ +export function createAlternatingList(): AlternatingList { + return [] as AlternatingList; +} + +/** + * Counts the number of `[A, B]` pairs stored in the alternating list. + * @internal + */ +export function alternatingCount(list: AlternatingList): number { + return list.length >> 1; +} + +/** + * Reads the first entry of the pair at the given index. + * @internal + */ +export function alternatingGetFirst(list: AlternatingList, index: number): A { + return list[index << 1] as A; +} + +/** + * Reads the second entry of the pair at the given index. + * @internal + */ +export function alternatingGetSecond(list: AlternatingList, index: number): B { + return list[(index << 1) + 1] as B; +} + +/** + * Appends a pair to the alternating list. + * @internal + */ +export function alternatingPush(list: AlternatingList, first: A, second: B): void { + // Micro benchmarks show this is the fastest way to do this + list.push(first, second); +} + +/** + * Error message used when comparators differ between trees. + * @internal + */ +export const comparatorErrorMsg = "Cannot perform set operations on BTrees with different comparators."; + +/** + * Error message used when branching factors differ between trees. + * @internal + */ +export const branchingFactorErrorMsg = "Cannot perform set operations on BTrees with different max node sizes."; + +/** + * Checks that two trees can be used together in a set operation. + * @internal + */ +export function checkCanDoSetOperation(treeA: BTreeWithInternals, treeB: BTreeWithInternals, supportsDifferentBranchingFactors: boolean): number { + if (treeA._compare !== treeB._compare) + throw new Error(comparatorErrorMsg); + + const branchingFactor = treeA._maxNodeSize; + if (!supportsDifferentBranchingFactors && branchingFactor !== treeB._maxNodeSize) + throw new Error(branchingFactorErrorMsg); + return branchingFactor; +} + +/** + * Helper constructor signature used by set-operation helpers to create a result tree that preserves the input subtype. + * @internal + */ +export type BTreeConstructor, K, V> = new (entries?: [K, V][], compare?: (a: K, b: K) => number, maxNodeSize?: number) => BTreeWithInternals; diff --git a/extended/subtract.d.ts b/extended/subtract.d.ts new file mode 100644 index 0000000..27060ce --- /dev/null +++ b/extended/subtract.d.ts @@ -0,0 +1,16 @@ +import BTree from '../b+tree'; +/** + * Returns a new tree containing only the keys that are present in `targetTree` but not `subtractTree` (set subtraction). + * Neither tree is modified. + * + * Complexity is O(N + M) for time and O(N) for allocations in the worst case. Additionally, time is bounded by + * O(log(N + M) * D1) and space by O(log N * D2), where `D1` is the number of disjoint key ranges between the trees + * and `D2` is the number of disjoint ranges inside `targetTree`, because disjoint subtrees are skipped entirely. + * In practice, that means for keys of random distribution the performance is linear and for keys with significant + * numbers of non-overlapping key ranges it is much faster. + * @param targetTree The tree to subtract from. + * @param subtractTree The tree whose keys will be removed from the result. + * @returns A new tree that contains the subtraction result. + * @throws Error if the trees were created with different comparators or max node sizes. + */ +export default function subtract, K, V>(targetTree: TBTree, subtractTree: TBTree): TBTree; diff --git a/extended/subtract.js b/extended/subtract.js new file mode 100644 index 0000000..b118d73 --- /dev/null +++ b/extended/subtract.js @@ -0,0 +1,35 @@ +"use strict"; +Object.defineProperty(exports, "__esModule", { value: true }); +var shared_1 = require("./shared"); +var decompose_1 = require("./decompose"); +/** + * Returns a new tree containing only the keys that are present in `targetTree` but not `subtractTree` (set subtraction). + * Neither tree is modified. + * + * Complexity is O(N + M) for time and O(N) for allocations in the worst case. Additionally, time is bounded by + * O(log(N + M) * D1) and space by O(log N * D2), where `D1` is the number of disjoint key ranges between the trees + * and `D2` is the number of disjoint ranges inside `targetTree`, because disjoint subtrees are skipped entirely. + * In practice, that means for keys of random distribution the performance is linear and for keys with significant + * numbers of non-overlapping key ranges it is much faster. + * @param targetTree The tree to subtract from. + * @param subtractTree The tree whose keys will be removed from the result. + * @returns A new tree that contains the subtraction result. + * @throws Error if the trees were created with different comparators or max node sizes. + */ +function subtract(targetTree, subtractTree) { + var _targetTree = targetTree; + var _subtractTree = subtractTree; + var branchingFactor = (0, shared_1.checkCanDoSetOperation)(_targetTree, _subtractTree, false); + if (_targetTree._root.size() === 0 || _subtractTree._root.size() === 0) + return targetTree.clone(); + // Decompose target tree into disjoint subtrees leaves. + // As many of these as possible will be reused from the original trees, and the remaining + // will be leaves that are exploded (and filtered) due to intersecting leaves in subtractTree. + var decomposed = (0, decompose_1.decompose)(_targetTree, _subtractTree, function () { return undefined; }, true); + var constructor = targetTree.constructor; + if ((0, shared_1.alternatingCount)(decomposed.disjoint) === 0) { + return new constructor(undefined, targetTree._compare, targetTree._maxNodeSize); + } + return (0, decompose_1.buildFromDecomposition)(constructor, branchingFactor, decomposed, targetTree._compare, targetTree._maxNodeSize); +} +exports.default = subtract; diff --git a/extended/subtract.ts b/extended/subtract.ts new file mode 100644 index 0000000..89cd7e8 --- /dev/null +++ b/extended/subtract.ts @@ -0,0 +1,38 @@ +import BTree from '../b+tree'; +import { checkCanDoSetOperation, type BTreeWithInternals, BTreeConstructor, alternatingCount } from './shared'; +import { buildFromDecomposition, decompose } from './decompose'; + +/** + * Returns a new tree containing only the keys that are present in `targetTree` but not `subtractTree` (set subtraction). + * Neither tree is modified. + * + * Complexity is O(N + M) for time and O(N) for allocations in the worst case. Additionally, time is bounded by + * O(log(N + M) * D1) and space by O(log N * D2), where `D1` is the number of disjoint key ranges between the trees + * and `D2` is the number of disjoint ranges inside `targetTree`, because disjoint subtrees are skipped entirely. + * In practice, that means for keys of random distribution the performance is linear and for keys with significant + * numbers of non-overlapping key ranges it is much faster. + * @param targetTree The tree to subtract from. + * @param subtractTree The tree whose keys will be removed from the result. + * @returns A new tree that contains the subtraction result. + * @throws Error if the trees were created with different comparators or max node sizes. + */ +export default function subtract, K, V>( + targetTree: TBTree, + subtractTree: TBTree +): TBTree { + const _targetTree = targetTree as unknown as BTreeWithInternals; + const _subtractTree = subtractTree as unknown as BTreeWithInternals; + const branchingFactor = checkCanDoSetOperation(_targetTree, _subtractTree, false); + if (_targetTree._root.size() === 0 || _subtractTree._root.size() === 0) + return targetTree.clone(); + + // Decompose target tree into disjoint subtrees leaves. + // As many of these as possible will be reused from the original trees, and the remaining + // will be leaves that are exploded (and filtered) due to intersecting leaves in subtractTree. + const decomposed = decompose(_targetTree, _subtractTree, () => undefined, true); + const constructor = targetTree.constructor as BTreeConstructor; + if (alternatingCount(decomposed.disjoint) === 0) { + return new constructor(undefined, targetTree._compare, targetTree._maxNodeSize) as unknown as TBTree; + } + return buildFromDecomposition(constructor, branchingFactor, decomposed, targetTree._compare, targetTree._maxNodeSize); +} diff --git a/extended/union.d.ts b/extended/union.d.ts new file mode 100644 index 0000000..74ea5cc --- /dev/null +++ b/extended/union.d.ts @@ -0,0 +1,16 @@ +import BTree from '../b+tree'; +/** + * Efficiently unions two trees, reusing subtrees wherever possible without mutating either input. + * + * Complexity is O(N + M) when the trees overlap heavily, and additionally bounded by O(log(N + M) * D) + * where `D` is the number of disjoint key ranges, because disjoint subtrees are skipped entirely. + * In practice, that means for keys of random distribution the performance is linear and for keys with significant + * numbers of non-overlapping key ranges it is much faster. + * @param treeA First tree to union. + * @param treeB Second tree to union. + * @param combineFn Called for keys that appear in both trees. Return the desired value, or + * `undefined` to omit the key from the result. Note: symmetric difference can be achieved by always returning `undefined`. + * @returns A new BTree that contains the unioned key/value pairs. + * @throws Error if the trees were created with different comparators or max node sizes. + */ +export default function union, K, V>(treeA: TBTree, treeB: TBTree, combineFn: (key: K, leftValue: V, rightValue: V) => V | undefined): TBTree; diff --git a/extended/union.js b/extended/union.js new file mode 100644 index 0000000..b7144ed --- /dev/null +++ b/extended/union.js @@ -0,0 +1,36 @@ +"use strict"; +Object.defineProperty(exports, "__esModule", { value: true }); +var shared_1 = require("./shared"); +var decompose_1 = require("./decompose"); +/** + * Efficiently unions two trees, reusing subtrees wherever possible without mutating either input. + * + * Complexity is O(N + M) when the trees overlap heavily, and additionally bounded by O(log(N + M) * D) + * where `D` is the number of disjoint key ranges, because disjoint subtrees are skipped entirely. + * In practice, that means for keys of random distribution the performance is linear and for keys with significant + * numbers of non-overlapping key ranges it is much faster. + * @param treeA First tree to union. + * @param treeB Second tree to union. + * @param combineFn Called for keys that appear in both trees. Return the desired value, or + * `undefined` to omit the key from the result. Note: symmetric difference can be achieved by always returning `undefined`. + * @returns A new BTree that contains the unioned key/value pairs. + * @throws Error if the trees were created with different comparators or max node sizes. + */ +function union(treeA, treeB, combineFn) { + if (treeA === treeB) + return treeA.clone(); + var _treeA = treeA; + var _treeB = treeB; + var branchingFactor = (0, shared_1.checkCanDoSetOperation)(_treeA, _treeB, false); + if (_treeA._root.size() === 0) + return treeB.clone(); + if (_treeB._root.size() === 0) + return treeA.clone(); + // Decompose both trees into disjoint subtrees leaves. + // As many of these as possible will be reused from the original trees, and the remaining + // will be leaves that are the result of merging intersecting leaves. + var decomposed = (0, decompose_1.decompose)(_treeA, _treeB, combineFn); + var constructor = treeA.constructor; + return (0, decompose_1.buildFromDecomposition)(constructor, branchingFactor, decomposed, _treeA._compare, _treeA._maxNodeSize); +} +exports.default = union; diff --git a/extended/union.ts b/extended/union.ts new file mode 100644 index 0000000..8a84c56 --- /dev/null +++ b/extended/union.ts @@ -0,0 +1,40 @@ +import BTree from '../b+tree'; +import { BTreeConstructor, type BTreeWithInternals, checkCanDoSetOperation } from './shared'; +import { decompose, buildFromDecomposition } from "./decompose"; + +/** + * Efficiently unions two trees, reusing subtrees wherever possible without mutating either input. + * + * Complexity is O(N + M) when the trees overlap heavily, and additionally bounded by O(log(N + M) * D) + * where `D` is the number of disjoint key ranges, because disjoint subtrees are skipped entirely. + * In practice, that means for keys of random distribution the performance is linear and for keys with significant + * numbers of non-overlapping key ranges it is much faster. + * @param treeA First tree to union. + * @param treeB Second tree to union. + * @param combineFn Called for keys that appear in both trees. Return the desired value, or + * `undefined` to omit the key from the result. Note: symmetric difference can be achieved by always returning `undefined`. + * @returns A new BTree that contains the unioned key/value pairs. + * @throws Error if the trees were created with different comparators or max node sizes. + */ +export default function union, K, V>( + treeA: TBTree, + treeB: TBTree, + combineFn: (key: K, leftValue: V, rightValue: V) => V | undefined +): TBTree { + if (treeA === treeB) + return treeA.clone(); + const _treeA = treeA as unknown as BTreeWithInternals; + const _treeB = treeB as unknown as BTreeWithInternals; + const branchingFactor = checkCanDoSetOperation(_treeA, _treeB, false); + if (_treeA._root.size() === 0) + return treeB.clone(); + if (_treeB._root.size() === 0) + return treeA.clone(); + + // Decompose both trees into disjoint subtrees leaves. + // As many of these as possible will be reused from the original trees, and the remaining + // will be leaves that are the result of merging intersecting leaves. + const decomposed = decompose(_treeA, _treeB, combineFn); + const constructor = treeA.constructor as BTreeConstructor; + return buildFromDecomposition(constructor, branchingFactor, decomposed, _treeA._compare, _treeA._maxNodeSize); +} diff --git a/package-lock.json b/package-lock.json index 985f56a..c6a9486 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "sorted-btree", - "version": "1.9.0", + "version": "2.0.0", "lockfileVersion": 2, "requires": true, "packages": { "": { "name": "sorted-btree", - "version": "1.9.0", + "version": "2.0.0", "license": "MIT", "devDependencies": { "@types/bintrees": "^1.0.2", diff --git a/package.json b/package.json index c19b51c..a375e43 100644 --- a/package.json +++ b/package.json @@ -83,7 +83,8 @@ "jsx", "json" ], - "bail": true, + "verbose": true, + "bail": false, "testEnvironment": "node" }, "testpack": { diff --git a/readme.md b/readme.md index 224baf0..997d855 100644 --- a/readme.md +++ b/readme.md @@ -92,6 +92,8 @@ Features - Includes a lattice of interfaces for TypeScript users (see below) - Supports diffing computation between two trees that is highly optimized for the case in which a majority of nodes are shared (such as when persistent methods are used). +- Supports fast union & shared-key iteration via `forEachKeyInBoth` with asymptotic speedups when large disjoint ranges of keys are present. + The union operation generates a new tree that shares nodes with the original trees when possible. ### Additional operations supported on this B+ tree ### @@ -133,6 +135,7 @@ The "scanning" methods (`forEach, forRange, editRange, deleteRange`) will normal - Get a new tree with one pair removed: `t.without(key)` - Get a new tree with specific pairs removed: `t.withoutKeys(keys)` - Get a new tree with a range of keys removed: `t.withoutRange(low, high, includeHi)` +- Get a new tree that is the result of a union: `t.union(other, unionFn)` **Things to keep in mind:** I ran a test which suggested `t.with` is three times slower than `t.set`. These methods do not return a frozen tree even if the original tree was frozen (for performance reasons, e.g. frozen trees use slightly more memory.) diff --git a/scripts/size-report.js b/scripts/size-report.js index f40c19d..49f5269 100644 --- a/scripts/size-report.js +++ b/scripts/size-report.js @@ -73,7 +73,10 @@ const header = console.log(header); console.log('-'.repeat(header.length)); -for (const entry of entryPoints) { +const btreeExTransitive = { raw: 0, min: 0, gz: 0 }; +const btreeExTransitiveHasValue = { raw: false, min: false, gz: false }; + +entryPoints.forEach((entry, index) => { const raw = fileSize(entry.raw); const min = fileSize(entry.min); const gz = gzipSize(entry.min); @@ -83,6 +86,30 @@ for (const entry of entryPoints) { pad(formatBytes(min), 13) + formatBytes(gz); console.log(line); + if (index > 0) { + if (typeof raw === 'number') { + btreeExTransitive.raw += raw; + btreeExTransitiveHasValue.raw = true; + } + if (typeof min === 'number') { + btreeExTransitive.min += min; + btreeExTransitiveHasValue.min = true; + } + if (typeof gz === 'number') { + btreeExTransitive.gz += gz; + btreeExTransitiveHasValue.gz = true; + } + } +}); + +if (entryPoints.length > 1) { + const line = + pad('BTreeEx transitive', nameColumnWidth) + + pad(btreeExTransitiveHasValue.raw ? formatBytes(btreeExTransitive.raw) : 'n/a', 13) + + pad(btreeExTransitiveHasValue.min ? formatBytes(btreeExTransitive.min) : 'n/a', 13) + + (btreeExTransitiveHasValue.gz ? formatBytes(btreeExTransitive.gz) : 'n/a'); + console.log('-'.repeat(header.length)); + console.log(line); } if (process.exitCode) { diff --git a/b+tree.test.ts b/test/b+tree.test.ts similarity index 78% rename from b+tree.test.ts rename to test/b+tree.test.ts index d62163f..df88c26 100644 --- a/b+tree.test.ts +++ b/test/b+tree.test.ts @@ -1,21 +1,10 @@ -import BTree, {IMap, defaultComparator, simpleComparator} from './b+tree'; -import BTreeEx from './extended'; -import diffAgainst from './extended/diffAgainst'; -import SortedArray from './sorted-array'; -import MersenneTwister from 'mersenne-twister'; +import BTree, { IMap, defaultComparator, simpleComparator } from '../b+tree'; +import BTreeEx from '../extended'; +import SortedArray from '../sorted-array'; +import { addToBoth, expectTreeEqualTo, randInt } from './shared'; var test: (name:string,f:()=>void)=>void = it; -var rand: any = new MersenneTwister(1234); -function randInt(max: number) { return rand.random_int() % max; } -function expectTreeEqualTo(a: BTree, b: SortedArray) { - a.checkValid(); - expect(a.toArray()).toEqual(b.getArray()); -} -function addToBoth(a: IMap, b: IMap, k: K, v: V) { - expect(a.set(k,v)).toEqual(b.set(k,v)); -} - describe('defaultComparator', () => { const dateA = new Date(Date.UTC(96, 1, 2, 3, 4, 5)); @@ -193,6 +182,109 @@ describe('height calculation', () => }); }); +describe('cached sizes', () => +{ + function buildTestTree(entryCount: number, maxNodeSize: number) { + const tree = new BTree(undefined, undefined, maxNodeSize); + for (let i = 0; i < entryCount; i++) { + tree.set(i, i); + } + return tree; + } + + function expectSize(tree: BTree, size: number) { + expect(tree.size).toBe(size); + tree.checkValid(); + } + + [4, 6, 8, 16].forEach(nodeSize => { + describe(`fanout ${nodeSize}`, () => { + test('checkValid detects root size mismatch', () => { + const tree = buildTestTree(nodeSize * 8, nodeSize); + const root = (tree as any)._root; + expect(root.isLeaf).toBe(false); + (root as any).size = 0; + expect(() => tree.checkValid()).toThrow(); + }); + + test('checkValid detects mismatched child sizes', () => { + const tree = buildTestTree(nodeSize * nodeSize * 4, nodeSize); + const root = (tree as any)._root; + expect(root.isLeaf).toBe(false); + const internalChild = (root as any).children.find((child: any) => !child.isLeaf); + expect(internalChild).toBeDefined(); + (internalChild as any).size = 0; + expect(() => tree.checkValid()).toThrow(); + }); + + test('mutations preserve cached sizes', () => { + const tree = buildTestTree(nodeSize * 4, nodeSize); + const initialSize = tree.size; + const expectedKeys = new Set(); + for (let i = 0; i < initialSize; i++) + expectedKeys.add(i); + expectSize(tree, expectedKeys.size); + + // Insert sequential items + const itemsToAdd = nodeSize * 2; + for (let i = 0; i < itemsToAdd; i++) { + const key = initialSize + i; + tree.set(key, key); + expectedKeys.add(key); + } + expectSize(tree, expectedKeys.size); + + // Delete every third new item + let deleted = 0; + for (let i = 0; i < itemsToAdd; i += 3) { + const key = initialSize + i; + if (tree.delete(key)) { + deleted++; + expectedKeys.delete(key); + } + } + expectSize(tree, expectedKeys.size); + + // Bulk delete a middle range + const low = Math.floor(initialSize / 2); + const high = low + nodeSize; + const rangeDeleted = tree.deleteRange(low, high, true); + const toRemove = Array.from(expectedKeys).filter(k => k >= low && k <= high); + expect(rangeDeleted).toBe(toRemove.length); + toRemove.forEach(k => expectedKeys.delete(k)); + expectSize(tree, expectedKeys.size); + + // Mix insertions and overwrites + const extra = nodeSize * 5; + for (let i = 0; i < extra; i++) { + const insertKey = -i - 1; + tree.set(insertKey, insertKey); + expectedKeys.add(insertKey); + const overwriteKey = i % (initialSize + 1); + tree.set(overwriteKey, 42); // overwrite existing keys + expectedKeys.add(overwriteKey); + } + expectSize(tree, expectedKeys.size); + + // Clone should preserve size and cached metadata + const toClone = tree.clone(); + expectSize(toClone, expectedKeys.size); + + // Edit range deletes some entries, patches others + tree.editRange(-extra, extra, false, (k, v, counter) => { + if (counter % 11 === 0) { + expectedKeys.delete(k); + return { delete: true }; + } + if (k % 5 === 0) + return { value: v + 1 }; + }); + expectSize(tree, expectedKeys.size); + }); + }); + }); +}); + describe('Simple tests on leaf nodes', () => { test('A few insertions (fanout 8)', insert8.bind(null, 8)); @@ -752,206 +844,6 @@ function testBTree(maxNodeSize: number) expectTreeEqualTo(t9, list); }); - describe("Diff computation", () => { - let onlyThis: Map; - let onlyOther: Map; - let different: Map; - function reset(): void { - onlyOther = new Map(); - onlyThis = new Map(); - different = new Map(); - } - - beforeEach(() => reset()); - - const OnlyThis = (k: number, v: number) => { onlyThis.set(k, v); } - const OnlyOther = (k: number, v: number) => { onlyOther.set(k, v); } - const Different = (k: number, vThis: number, vOther: number) => { different.set(k, `vThis: ${vThis}, vOther: ${vOther}`); } - const compare = (a: number, b: number) => a - b; - - function expectMapsEquals(mapA: Map, mapB: Map) { - const onlyA = []; - const onlyB = []; - const different = []; - mapA.forEach((valueA, keyA) => { - const valueB = mapB.get(keyA); - if (valueB === undefined) { - onlyA.push([keyA, valueA]); - } else if (!Object.is(valueB, valueB)) { - different.push([keyA, valueA, valueB]); - } - }); - mapB.forEach((valueB, keyB) => { - const valueA = mapA.get(keyB); - if (valueA === undefined) { - onlyA.push([keyB, valueB]); - } - }); - expect(onlyA.length).toEqual(0); - expect(onlyB.length).toEqual(0); - expect(different.length).toEqual(0); - } - - function expectDiffCorrect(treeThis: BTreeEx, treeOther: BTreeEx): void { - reset(); - treeThis.diffAgainst(treeOther, OnlyThis, OnlyOther, Different); - let onlyThisT: Map = new Map(); - let onlyOtherT: Map = new Map(); - let differentT: Map = new Map(); - treeThis.forEachPair((kThis, vThis) => { - if (!treeOther.has(kThis)) { - onlyThisT.set(kThis, vThis); - } else { - const vOther = treeOther.get(kThis); - if (!Object.is(vThis, vOther)) - differentT.set(kThis, `vThis: ${vThis}, vOther: ${vOther}`); - } - }); - treeOther.forEachPair((kOther, vOther) => { - if (!treeThis.has(kOther)) { - onlyOtherT.set(kOther, vOther); - } - }); - expectMapsEquals(onlyThis, onlyThisT); - expectMapsEquals(onlyOther, onlyOtherT); - expectMapsEquals(different, differentT); - } - - test(`Diff of trees with different comparators is an error`, () => { - const treeA = new BTreeEx([], compare); - const treeB = new BTreeEx([], (a, b) => b - a); - expect(() => treeA.diffAgainst(treeB, OnlyThis, OnlyOther, Different)).toThrow('comparators'); - }); - - test(`Standalone diffAgainst works with core trees`, () => { - const treeA = new BTree([[1, 1], [2, 2], [4, 4]], compare, maxNodeSize); - const treeB = new BTree([[1, 1], [2, 22], [3, 3]], compare, maxNodeSize); - const onlyThisKeys: number[] = []; - const onlyOtherKeys: number[] = []; - const differentKeys: number[] = []; - diffAgainst( - treeA, - treeB, - (k) => { onlyThisKeys.push(k); }, - (k) => { onlyOtherKeys.push(k); }, - (k) => { differentKeys.push(k); } - ); - expect(onlyThisKeys).toEqual([4]); - expect(onlyOtherKeys).toEqual([3]); - expect(differentKeys).toEqual([2]); - }); - - const entriesGroup: [number, number][][] = [[], [[1, 1], [2, 2], [3, 3], [4, 4], [5, 5]]]; - entriesGroup.forEach(entries => { - test(`Diff of the same tree ${entries.length > 0 ? "(non-empty)" : "(empty)"}`, () => { - const tree = new BTreeEx(entries, compare, maxNodeSize); - expectDiffCorrect(tree, tree); - expect(onlyOther.size).toEqual(0); - expect(onlyThis.size).toEqual(0); - expect(different.size).toEqual(0); - }); - }); - - test(`Diff of identical trees`, () => { - const treeA = new BTreeEx(entriesGroup[1], compare, maxNodeSize); - const treeB = new BTreeEx(entriesGroup[1], compare, maxNodeSize); - expectDiffCorrect(treeA, treeB); - }); - - [entriesGroup, [...entriesGroup].reverse()].forEach(doubleEntries => { - test(`Diff of an ${doubleEntries[0].length === 0 ? 'empty' : 'non-empty'} tree and a ${doubleEntries[1].length === 0 ? 'empty' : 'non-empty'} one`, () => { - const treeA = new BTreeEx(doubleEntries[0], compare, maxNodeSize); - const treeB = new BTreeEx(doubleEntries[1], compare, maxNodeSize); - expectDiffCorrect(treeA, treeB); - }); - }); - - test(`Diff of different trees`, () => { - const treeA = new BTreeEx(entriesGroup[1], compare, maxNodeSize); - const treeB = new BTreeEx(entriesGroup[1], compare, maxNodeSize); - treeB.set(-1, -1); - treeB.delete(2); - treeB.set(3, 4); - treeB.set(10, 10); - expectDiffCorrect(treeA, treeB); - }); - - test(`Diff of odds and evens`, () => { - const treeA = new BTreeEx([[1, 1], [3, 3], [5, 5], [7, 7]], compare, maxNodeSize); - const treeB = new BTreeEx([[2, 2], [4, 4], [6, 6], [8, 8]], compare, maxNodeSize); - expectDiffCorrect(treeA, treeB); - expectDiffCorrect(treeB, treeA); - }); - - function applyChanges(treeA: BTreeEx, duplicate: (tree: BTreeEx) => BTreeEx): void { - const treeB = duplicate(treeA); - const maxKey: number = treeA.maxKey()!; - const onlyInA = -10; - treeA.set(onlyInA, onlyInA); - const onlyInBSmall = -1; - treeB.set(onlyInBSmall, onlyInBSmall); - const onlyInBLarge = maxKey + 1; - treeB.set(onlyInBLarge, onlyInBLarge); - const onlyInAFromDelete = 10 - treeB.delete(onlyInAFromDelete); - const differingValue = -100; - const modifiedInB1 = 3, modifiedInB2 = maxKey - 2; - treeB.set(modifiedInB1, differingValue); - treeB.set(modifiedInB2, differingValue) - treeA.diffAgainst(treeB, OnlyThis, OnlyOther, Different); - expectDiffCorrect(treeA, treeB); - } - - function makeLargeTree(size?: number): BTreeEx { - size = size ?? Math.pow(maxNodeSize, 3); - const tree = new BTreeEx([], compare, maxNodeSize); - for (let i = 0; i < size; i++) { - tree.set(i, i); - } - return tree; - } - - test(`Diff of large trees`, () => { - const tree = makeLargeTree(); - applyChanges(tree, tree => tree.greedyClone()); - }); - - test(`Diff of cloned trees`, () => { - const tree = makeLargeTree(); - applyChanges(tree, tree => tree.clone()); - }); - - test(`Diff can early exit`, () => { - const tree = makeLargeTree(100); - const tree2 = tree.clone(); - tree2.set(-1, -1); - tree2.delete(10); - tree2.set(20, -1); - tree2.set(110, -1); - const ReturnKey = (key: number) => { return { break: key }; }; - - let val = tree.diffAgainst(tree2, OnlyThis, OnlyOther, ReturnKey); - expect(onlyOther.size).toEqual(1); - expect(onlyThis.size).toEqual(0); - expect(val).toEqual(20); - reset(); - - val = tree.diffAgainst(tree2, OnlyThis, ReturnKey, Different); - expect(different.size).toEqual(0); - expect(onlyThis.size).toEqual(0); - expect(val).toEqual(110); - reset(); - - val = tree.diffAgainst(tree2, ReturnKey, OnlyOther, Different); - expect(different.size).toEqual(1); - expect(onlyOther.size).toEqual(1); - expect(val).toEqual(10); - reset(); - - expectDiffCorrect(tree, tree2); - }); - }); - test("Issue #2 reproduction", () => { const tree = new BTree([], (a, b) => a - b, maxNodeSize); for (let i = 0; i <= 1999; i++) { diff --git a/test/bulkLoad.test.ts b/test/bulkLoad.test.ts new file mode 100644 index 0000000..1bc017f --- /dev/null +++ b/test/bulkLoad.test.ts @@ -0,0 +1,221 @@ +import BTree, { BNode, BNodeInternal } from '../b+tree'; +import BTreeEx from '../extended'; +import { bulkLoad } from '../extended/bulkLoad'; +import MersenneTwister from 'mersenne-twister'; +import { makeArray, randomInt } from './shared'; + +type Pair = [number, number]; +const compareNumbers = (a: number, b: number) => a - b; +const branchingFactors = [4, 10, 32, 128]; + +function sequentialPairs(count: number, start = 0, step = 1): Pair[] { + const pairs: Pair[] = []; + let key = start; + for (let i = 0; i < count; i++) { + pairs.push([key, key * 2]); + key += step; + } + return pairs; +} + +function pairsFromKeys(keys: number[]): Pair[] { + return keys.map((key, index) => [key, index - key]); +} + +function toAlternating(pairs: Pair[]): number[] { + const alternating: number[] = []; + for (const [key, value] of pairs) + alternating.push(key, value); + return alternating; +} + +function buildTreeFromPairs(maxNodeSize: number, pairs: Pair[], loadFactor: number) { + const alternating = toAlternating(pairs); + const tree = bulkLoad(alternating, maxNodeSize, compareNumbers, loadFactor); + const root = tree['_root'] as BNode; + return { tree, root }; +} + +function expectTreeMatches(tree: BTree, expected: Pair[]) { + tree.checkValid(); + expect(tree.size).toBe(expected.length); + expect(tree.toArray()).toEqual(expected); +} + +function collectLeaves(node: BNode): BNode[] { + if (node.isLeaf) + return [node]; + const internal = node as unknown as BNodeInternal; + const leaves: BNode[] = []; + for (const child of internal.children) + leaves.push(...collectLeaves(child as BNode)); + return leaves; +} + +function assertInternalNodeFanout(node: BNode, maxNodeSize: number, isRoot = true) { + if (node.isLeaf) + return; + const internal = node as unknown as BNodeInternal; + if (isRoot) { + expect(internal.children.length).toBeGreaterThanOrEqual(2); + } else { + expect(internal.children.length).toBeGreaterThanOrEqual(Math.floor(maxNodeSize / 2)); + } + expect(internal.children.length).toBeLessThanOrEqual(maxNodeSize); + for (const child of internal.children) + assertInternalNodeFanout(child as BNode, maxNodeSize, false); +} + +describe.each(branchingFactors)('bulkLoad fanout %i', (maxNodeSize) => { + test('throws when keys are not strictly ascending', () => { + const alternating = [3, 30, 2, 20]; + expect(() => bulkLoad(alternating.slice(), maxNodeSize, compareNumbers)) + .toThrow('bulkLoad: entries must be sorted by key in strictly ascending order'); + }); + + test('empty input produces empty tree', () => { + const { tree, root } = buildTreeFromPairs(maxNodeSize, [], 1.0); + expect(root?.isLeaf).toBe(true); + expect(root?.keys.length ?? 0).toBe(0); + expectTreeMatches(tree, []); + }); + + test('single entry stays in one leaf', () => { + const pairs = sequentialPairs(1, 5); + const { tree } = buildTreeFromPairs(maxNodeSize, pairs, 1.0); + expectTreeMatches(tree, pairs); + const root = tree['_root'] as BNode; + expect(root.isLeaf).toBe(true); + expect(root.keys).toEqual([5]); + }); + + test('fills a single leaf up to capacity', () => { + const pairs = sequentialPairs(maxNodeSize, 0, 2); + const { tree } = buildTreeFromPairs(maxNodeSize, pairs, 1.0); + expectTreeMatches(tree, pairs); + const root = tree['_root'] as BNode; + expect(root.isLeaf).toBe(true); + expect(root.keys.length).toBe(maxNodeSize); + }); + + test('does not produce underfilled nodes if possible', () => { + const pairs = sequentialPairs(maxNodeSize, 0, 2); + // despite asking for only 60% load factor, we should still get a full node + // because splitting into > 1 leaf would cause underfilled nodes + const { tree } = buildTreeFromPairs(maxNodeSize, pairs, 0.6); + expectTreeMatches(tree, pairs); + const root = tree['_root'] as BNode; + expect(root.isLeaf).toBe(true); + expect(root.keys.length).toBe(maxNodeSize); + }); + + test('does not mutate the supplied entry list', () => { + const pairs = sequentialPairs(maxNodeSize, 0, 2); + buildTreeFromPairs(maxNodeSize, pairs, 0.6); + expect(pairs.length).toBe(maxNodeSize); + }); + + test('throws when load factor is too low or too high', () => { + const pairs = sequentialPairs(maxNodeSize, 0, 2); + const alternating = toAlternating(pairs); + expect(() => bulkLoad(alternating, maxNodeSize, compareNumbers, 0.3)).toThrow(); + expect(() => bulkLoad(alternating, maxNodeSize, compareNumbers, 1.1)).toThrow(); + }); + + test('distributes keys nearly evenly across leaves when not divisible by fanout', () => { + const inputSize = maxNodeSize * 3 + Math.floor(maxNodeSize / 2) + 1; + const pairs = sequentialPairs(inputSize, 10, 3); + const { tree } = buildTreeFromPairs(maxNodeSize, pairs, 0.8); + expectTreeMatches(tree, pairs); + const leaves = collectLeaves(tree['_root'] as BNode); + const leafSizes = leaves.map((leaf) => leaf.keys.length); + const min = Math.min.apply(Math, leafSizes); + const max = Math.max.apply(Math, leafSizes); + expect(max - min).toBeLessThanOrEqual(1); + }); + + test('creates multiple internal layers when leaf count exceeds branching factor', () => { + const inputSize = maxNodeSize * maxNodeSize + Math.floor(maxNodeSize / 2) + 1; + const pairs = sequentialPairs(inputSize, 0, 1); + const { tree } = buildTreeFromPairs(maxNodeSize, pairs, 0.8); + expectTreeMatches(tree, pairs); + const root = tree['_root'] as BNode; + expect(root.isLeaf).toBe(false); + assertInternalNodeFanout(root, maxNodeSize); + }); + + test('loads 10000 entries and preserves all data', () => { + const keys = makeArray(10000, false, 3); + const pairs = pairsFromKeys(keys); + const { tree } = buildTreeFromPairs(maxNodeSize, pairs, 1.0); + expectTreeMatches(tree, pairs); + const leaves = collectLeaves(tree['_root'] as BNode); + expect(leaves.length).toBe(Math.ceil(pairs.length / maxNodeSize)); + assertInternalNodeFanout(tree['_root'] as BNode, maxNodeSize); + }); + + test('entries with 50% load factor, second layer with exactly half full nodes', () => { + // Create enough entries to require a second layer that has exactly two nodes when maxNodeSize is even. + const entryCount = Math.ceil(maxNodeSize / 2) * maxNodeSize; + const keys = makeArray(entryCount, false, 3); + const pairs = pairsFromKeys(keys); + const { tree } = buildTreeFromPairs(maxNodeSize, pairs, 0.5); + expectTreeMatches(tree, pairs); + }); +}); + +describe('BTreeEx.bulkLoad', () => { + test.each(branchingFactors)('creates tree for fanout %i', (maxNodeSize) => { + const pairs = sequentialPairs(maxNodeSize * 2 + 3, 7, 1); + const alternating = toAlternating(pairs); + const tree = BTreeEx.bulkLoad(alternating, maxNodeSize, compareNumbers); + expect(tree).toBeInstanceOf(BTreeEx); + expectTreeMatches(tree, pairs); + }); +}); + +describe('bulkLoad fuzz tests', () => { + const FUZZ_SETTINGS = { + branchingFactors, + ooms: [0, 2, 3], + iterationsPerOOM: 3, + loadFactors: [0.5, 0.8, 1.0], + timeoutMs: 30_000, + } as const; + + jest.setTimeout(FUZZ_SETTINGS.timeoutMs); + + const rng = new MersenneTwister(0xB01C10AD); + + for (const maxNodeSize of FUZZ_SETTINGS.branchingFactors) { + describe(`fanout ${maxNodeSize}`, () => { + for (const oom of FUZZ_SETTINGS.ooms) { + const baseSize = 5 * Math.pow(10, oom); + for (let iteration = 0; iteration < FUZZ_SETTINGS.iterationsPerOOM; iteration++) { + for (const loadFactor of FUZZ_SETTINGS.loadFactors) { + const targetNodeSize = Math.ceil(maxNodeSize * loadFactor); + const sizeJitter = randomInt(rng, baseSize); + const size = baseSize + sizeJitter; + + test(`size ${size}, iteration ${iteration}`, () => { + const keys = makeArray(size, false, 0, rng); + const pairs = pairsFromKeys(keys).map(([key, value], index) => [key, value + index] as Pair); + const { tree, root } = buildTreeFromPairs(maxNodeSize, pairs, loadFactor); + expectTreeMatches(tree, pairs); + + const leaves = collectLeaves(root); + const leafSizes = leaves.map((leaf) => leaf.keys.length); + if (pairs.length >= maxNodeSize) { + const expectedLeafCount = Math.ceil(pairs.length / targetNodeSize); + expect(leaves.length).toBe(expectedLeafCount); + } + const minLeaf = Math.min(...leafSizes); + const maxLeaf = Math.max(...leafSizes); + expect(maxLeaf - minLeaf).toBeLessThanOrEqual(1); + }); + } + } + } + }); + } +}); diff --git a/test/diffAgainst.test.ts b/test/diffAgainst.test.ts new file mode 100644 index 0000000..d89fbd8 --- /dev/null +++ b/test/diffAgainst.test.ts @@ -0,0 +1,218 @@ +import BTree from '../b+tree'; +import BTreeEx from '../extended'; +import diffAgainst from '../extended/diffAgainst'; + +var test: (name: string, f: () => void) => void = it; + +const FANOUTS = [32, 10, 4] as const; + +for (const fanout of FANOUTS) { + describe(`BTree diffAgainst tests with fanout ${fanout}`, () => { + runDiffAgainstSuite(fanout); + }); +} + +function runDiffAgainstSuite(maxNodeSize: number): void { + describe('Diff computation', () => { + let onlyThis: Map; + let onlyOther: Map; + let different: Map; + function reset(): void { + onlyOther = new Map(); + onlyThis = new Map(); + different = new Map(); + } + + beforeEach(() => reset()); + + const OnlyThis = (k: number, v: number) => { onlyThis.set(k, v); }; + const OnlyOther = (k: number, v: number) => { onlyOther.set(k, v); }; + const Different = (k: number, vThis: number, vOther: number) => { + different.set(k, `vThis: ${vThis}, vOther: ${vOther}`); + }; + const compare = (a: number, b: number) => a - b; + + function expectMapsEquals(mapA: Map, mapB: Map) { + const onlyA = []; + const onlyB = []; + const different = []; + mapA.forEach((valueA, keyA) => { + const valueB = mapB.get(keyA); + if (valueB === undefined) { + onlyA.push([keyA, valueA]); + } else if (!Object.is(valueB, valueB)) { + different.push([keyA, valueA, valueB]); + } + }); + mapB.forEach((valueB, keyB) => { + const valueA = mapA.get(keyB); + if (valueA === undefined) { + onlyA.push([keyB, valueB]); + } + }); + expect(onlyA.length).toEqual(0); + expect(onlyB.length).toEqual(0); + expect(different.length).toEqual(0); + } + + function expectDiffCorrect(treeThis: BTreeEx, treeOther: BTreeEx): void { + reset(); + treeThis.diffAgainst(treeOther, OnlyThis, OnlyOther, Different); + const onlyThisT: Map = new Map(); + const onlyOtherT: Map = new Map(); + const differentT: Map = new Map(); + treeThis.forEachPair((kThis, vThis) => { + if (!treeOther.has(kThis)) { + onlyThisT.set(kThis, vThis); + } else { + const vOther = treeOther.get(kThis); + if (!Object.is(vThis, vOther)) + differentT.set(kThis, `vThis: ${vThis}, vOther: ${vOther}`); + } + }); + treeOther.forEachPair((kOther, vOther) => { + if (!treeThis.has(kOther)) { + onlyOtherT.set(kOther, vOther); + } + }); + expectMapsEquals(onlyThis, onlyThisT); + expectMapsEquals(onlyOther, onlyOtherT); + expectMapsEquals(different, differentT); + } + + test('Diff of trees with different comparators is an error', () => { + const treeA = new BTreeEx([], compare); + const treeB = new BTreeEx([], (a, b) => b - a); + expect(() => treeA.diffAgainst(treeB, OnlyThis, OnlyOther, Different)).toThrow('comparators'); + }); + + test('Standalone diffAgainst works with core trees', () => { + const treeA = new BTree([[1, 1], [2, 2], [4, 4]], compare, maxNodeSize); + const treeB = new BTree([[1, 1], [2, 22], [3, 3]], compare, maxNodeSize); + const onlyThisKeys: number[] = []; + const onlyOtherKeys: number[] = []; + const differentKeys: number[] = []; + diffAgainst( + treeA, + treeB, + (k) => { onlyThisKeys.push(k); }, + (k) => { onlyOtherKeys.push(k); }, + (k) => { differentKeys.push(k); } + ); + expect(onlyThisKeys).toEqual([4]); + expect(onlyOtherKeys).toEqual([3]); + expect(differentKeys).toEqual([2]); + }); + + const entriesGroup: [number, number][][] = [[], [[1, 1], [2, 2], [3, 3], [4, 4], [5, 5]]]; + entriesGroup.forEach(entries => { + test(`Diff of the same tree ${entries.length > 0 ? '(non-empty)' : '(empty)'}`, () => { + const tree = new BTreeEx(entries, compare, maxNodeSize); + expectDiffCorrect(tree, tree); + expect(onlyOther.size).toEqual(0); + expect(onlyThis.size).toEqual(0); + expect(different.size).toEqual(0); + }); + }); + + test('Diff of identical trees', () => { + const treeA = new BTreeEx(entriesGroup[1], compare, maxNodeSize); + const treeB = new BTreeEx(entriesGroup[1], compare, maxNodeSize); + expectDiffCorrect(treeA, treeB); + }); + + [entriesGroup, [...entriesGroup].reverse()].forEach(doubleEntries => { + test(`Diff of an ${doubleEntries[0].length === 0 ? 'empty' : 'non-empty'} tree and a ${doubleEntries[1].length === 0 ? 'empty' : 'non-empty'} one`, () => { + const treeA = new BTreeEx(doubleEntries[0], compare, maxNodeSize); + const treeB = new BTreeEx(doubleEntries[1], compare, maxNodeSize); + expectDiffCorrect(treeA, treeB); + }); + }); + + test('Diff of different trees', () => { + const treeA = new BTreeEx(entriesGroup[1], compare, maxNodeSize); + const treeB = new BTreeEx(entriesGroup[1], compare, maxNodeSize); + treeB.set(-1, -1); + treeB.delete(2); + treeB.set(3, 4); + treeB.set(10, 10); + expectDiffCorrect(treeA, treeB); + }); + + test('Diff of odds and evens', () => { + const treeA = new BTreeEx([[1, 1], [3, 3], [5, 5], [7, 7]], compare, maxNodeSize); + const treeB = new BTreeEx([[2, 2], [4, 4], [6, 6], [8, 8]], compare, maxNodeSize); + expectDiffCorrect(treeA, treeB); + expectDiffCorrect(treeB, treeA); + }); + + function applyChanges(treeA: BTreeEx, duplicate: (tree: BTreeEx) => BTreeEx): void { + const treeB = duplicate(treeA); + const maxKey: number = treeA.maxKey()!; + const onlyInA = -10; + treeA.set(onlyInA, onlyInA); + const onlyInBSmall = -1; + treeB.set(onlyInBSmall, onlyInBSmall); + const onlyInBLarge = maxKey + 1; + treeB.set(onlyInBLarge, onlyInBLarge); + const onlyInAFromDelete = 10; + treeB.delete(onlyInAFromDelete); + const differingValue = -100; + const modifiedInB1 = 3; + const modifiedInB2 = maxKey - 2; + treeB.set(modifiedInB1, differingValue); + treeB.set(modifiedInB2, differingValue); + treeA.diffAgainst(treeB, OnlyThis, OnlyOther, Different); + expectDiffCorrect(treeA, treeB); + } + + function makeLargeTree(size?: number): BTreeEx { + size = size ?? Math.pow(maxNodeSize, 3); + const tree = new BTreeEx([], compare, maxNodeSize); + for (let i = 0; i < size; i++) { + tree.set(i, i); + } + return tree; + } + + test('Diff of large trees', () => { + const tree = makeLargeTree(); + applyChanges(tree, tree => tree.greedyClone()); + }); + + test('Diff of cloned trees', () => { + const tree = makeLargeTree(); + applyChanges(tree, tree => tree.clone()); + }); + + test('Diff can early exit', () => { + const tree = makeLargeTree(100); + const tree2 = tree.clone(); + tree2.set(-1, -1); + tree2.delete(10); + tree2.set(20, -1); + tree2.set(110, -1); + const ReturnKey = (key: number) => { return { break: key }; }; + + let val = tree.diffAgainst(tree2, OnlyThis, OnlyOther, ReturnKey); + expect(onlyOther.size).toEqual(1); + expect(onlyThis.size).toEqual(0); + expect(val).toEqual(20); + reset(); + + val = tree.diffAgainst(tree2, OnlyThis, ReturnKey, Different); + expect(different.size).toEqual(0); + expect(onlyThis.size).toEqual(0); + expect(val).toEqual(110); + reset(); + + val = tree.diffAgainst(tree2, ReturnKey, OnlyOther, Different); + expect(different.size).toEqual(1); + expect(onlyOther.size).toEqual(1); + expect(val).toEqual(10); + reset(); + + expectDiffCorrect(tree, tree2); + }); + }); +} diff --git a/test/intersect.test.ts b/test/intersect.test.ts new file mode 100644 index 0000000..8014a74 --- /dev/null +++ b/test/intersect.test.ts @@ -0,0 +1,266 @@ +import BTreeEx from '../extended'; +import intersect from '../extended/intersect'; +import { comparatorErrorMsg } from '../extended/shared'; +import MersenneTwister from 'mersenne-twister'; +import { + expectTreeMatchesEntries, + forEachFuzzCase, + populateFuzzTrees, + SetOperationFuzzSettings, + compareNumbers +} from './shared'; + +type SharedCall = { key: number, leftValue: number, rightValue: number }; + +const runForEachKeyInBothAndIntersect = ( + left: BTreeEx, + right: BTreeEx, + assertion: (calls: SharedCall[]) => void +) => { + const forEachCalls: SharedCall[] = []; + left.forEachKeyInBoth(right, (key, leftValue, rightValue) => { + forEachCalls.push({ key, leftValue, rightValue }); + }); + assertion(forEachCalls); + + const intersectionCalls: SharedCall[] = []; + const resultTree = intersect, number, number>(left, right, (key, leftValue, rightValue) => { + intersectionCalls.push({ key, leftValue, rightValue }); + return leftValue; + }); + const expectedEntries = intersectionCalls.map(({ key, leftValue }) => [key, leftValue] as [number, number]); + expect(resultTree.toArray()).toEqual(expectedEntries); + resultTree.checkValid(); + assertion(intersectionCalls); +}; + +const expectForEachKeyInBothAndIntersectCalls = ( + left: BTreeEx, + right: BTreeEx, + expected: Array<[number, number, number]> +) => { + const expectedRecords = tuplesToRecords(expected); + runForEachKeyInBothAndIntersect(left, right, (calls) => { + expect(calls).toEqual(expectedRecords); + }); +}; + +const tuplesToRecords = (entries: Array<[number, number, number]>): SharedCall[] => + entries.map(([key, leftValue, rightValue]) => ({ key, leftValue, rightValue })); + +const tuples = (...pairs: Array<[number, number]>) => pairs; +const triples = (...triplets: Array<[number, number, number]>) => triplets; +const buildTree = (entries: Array<[number, number]>, maxNodeSize: number) => + new BTreeEx(entries, compareNumbers, maxNodeSize); + +describe.each([32, 10, 4])('BTree forEachKeyInBoth/intersect tests with fanout %i', (maxNodeSize) => { + const buildTreeForFanout = (entries: Array<[number, number]>) => buildTree(entries, maxNodeSize); + + const BASIC_CASES: Array<{ + name: string; + left: Array<[number, number]>; + right: Array<[number, number]>; + expected: Array<[number, number, number]>; + alsoCheckSwap?: boolean; + }> = [ + { + name: 'forEachKeyInBoth/intersect two empty trees', + left: tuples(), + right: tuples(), + expected: triples(), + }, + { + name: 'forEachKeyInBoth/intersect empty tree with non-empty tree', + left: tuples(), + right: tuples([1, 10], [2, 20], [3, 30]), + expected: triples(), + alsoCheckSwap: true, + }, + { + name: 'forEachKeyInBoth/intersect with no overlapping keys', + left: tuples([1, 10], [3, 30], [5, 50]), + right: tuples([2, 20], [4, 40], [6, 60]), + expected: triples(), + }, + { + name: 'forEachKeyInBoth/intersect with single overlapping key', + left: tuples([1, 10], [2, 20], [3, 30]), + right: tuples([0, 100], [2, 200], [4, 400]), + expected: triples([2, 20, 200]), + }, + ]; + + BASIC_CASES.forEach(({ name, left, right, expected, alsoCheckSwap }) => { + it(name, () => { + const leftTree = buildTreeForFanout(left); + const rightTree = buildTreeForFanout(right); + expectForEachKeyInBothAndIntersectCalls(leftTree, rightTree, expected); + if (alsoCheckSwap) { + expectForEachKeyInBothAndIntersectCalls(rightTree, leftTree, expected); + } + }); + }); + + it('forEachKeyInBoth/intersect with multiple overlapping keys maintains tree contents', () => { + const leftEntries: Array<[number, number]> = [[1, 10], [2, 20], [3, 30], [4, 40], [5, 50]]; + const rightEntries: Array<[number, number]> = [[0, 100], [2, 200], [4, 400], [6, 600]]; + const tree1 = buildTreeForFanout(leftEntries); + const tree2 = buildTreeForFanout(rightEntries); + const leftBefore = tree1.toArray(); + const rightBefore = tree2.toArray(); + expectForEachKeyInBothAndIntersectCalls(tree1, tree2, triples([2, 20, 200], [4, 40, 400])); + expect(tree1.toArray()).toEqual(leftBefore); + expect(tree2.toArray()).toEqual(rightBefore); + tree1.checkValid(); + tree2.checkValid(); + }); + + it('forEachKeyInBoth/intersect with contiguous overlap yields sorted keys', () => { + const tree1 = buildTreeForFanout(tuples([1, 1], [2, 2], [3, 3], [4, 4], [5, 5], [6, 6])); + const tree2 = buildTreeForFanout(tuples([3, 30], [4, 40], [5, 50], [6, 60], [7, 70])); + runForEachKeyInBothAndIntersect(tree1, tree2, (calls) => { + expect(calls.map(c => c.key)).toEqual([3, 4, 5, 6]); + expect(calls.map(c => c.leftValue)).toEqual([3, 4, 5, 6]); + expect(calls.map(c => c.rightValue)).toEqual([30, 40, 50, 60]); + }); + }); + + it('forEachKeyInBoth/intersect large overlapping range counts each shared key once', () => { + const size = 1000; + const overlapStart = 500; + const leftEntries = Array.from({ length: size }, (_, i) => [i, i * 3] as [number, number]); + const rightEntries = Array.from({ length: size }, (_, i) => { + const key = i + overlapStart; + return [key, key * 7] as [number, number]; + }); + const tree1 = buildTreeForFanout(leftEntries); + const tree2 = buildTreeForFanout(rightEntries); + runForEachKeyInBothAndIntersect(tree1, tree2, (calls) => { + expect(calls.length).toBe(size - overlapStart); + expect(calls[0]).toEqual({ + key: overlapStart, + leftValue: overlapStart * 3, + rightValue: overlapStart * 7 + }); + const lastCall = calls[calls.length - 1]; + expect(lastCall.key).toBe(size - 1); + expect(lastCall.leftValue).toBe((size - 1) * 3); + expect(lastCall.rightValue).toBe((size - 1) * 7); + }); + }); + + it('forEachKeyInBoth/intersect tree with itself visits each key once', () => { + const entries = Array.from({ length: 20 }, (_, i) => [i, i * 2] as [number, number]); + const tree = buildTreeForFanout(entries); + runForEachKeyInBothAndIntersect(tree, tree, (calls) => { + expect(calls.length).toBe(entries.length); + for (let i = 0; i < entries.length; i++) { + const [key, value] = entries[i]; + expect(calls[i]).toEqual({ key, leftValue: value, rightValue: value }); + } + }); + }); + + it('forEachKeyInBoth/intersect arguments determine left/right values', () => { + const tree1 = buildTreeForFanout(tuples([1, 100], [2, 200], [4, 400])); + const tree2 = buildTreeForFanout(tuples([2, 20], [3, 30], [4, 40])); + expectForEachKeyInBothAndIntersectCalls(tree1, tree2, triples([2, 200, 20], [4, 400, 40])); + expectForEachKeyInBothAndIntersectCalls(tree2, tree1, triples([2, 20, 200], [4, 40, 400])); + }); +}); + +describe('BTree forEachKeyInBoth early exiting', () => { + const buildTreeForEarlyExit = (entries: Array<[number, number]>) => + buildTree(entries, 4); + + it('forEachKeyInBoth returns undefined when callback returns void', () => { + const tree1 = buildTreeForEarlyExit(tuples([1, 10], [2, 20], [3, 30])); + const tree2 = buildTreeForEarlyExit(tuples([0, 100], [2, 200], [3, 300], [4, 400])); + const visited: number[] = []; + const result = tree1.forEachKeyInBoth(tree2, key => { + visited.push(key); + }); + expect(result).toBeUndefined(); + expect(visited).toEqual([2, 3]); + }); + + it('forEachKeyInBoth ignores undefined break values and completes traversal', () => { + const tree1 = buildTreeForEarlyExit(tuples([1, 10], [2, 20], [3, 30])); + const tree2 = buildTreeForEarlyExit(tuples([2, 200], [3, 300], [5, 500])); + const visited: number[] = []; + const result = tree1.forEachKeyInBoth(tree2, key => { + visited.push(key); + return { break: undefined }; + }); + expect(result).toBeUndefined(); + expect(visited).toEqual([2, 3]); + }); + + it('forEachKeyInBoth breaks early when callback returns a value', () => { + const tree1 = buildTreeForEarlyExit(tuples([1, 10], [2, 20], [3, 30], [4, 40])); + const tree2 = buildTreeForEarlyExit(tuples([2, 200], [3, 300], [4, 400], [5, 500])); + const visited: number[] = []; + const breakResult = tree1.forEachKeyInBoth(tree2, (key, leftValue, rightValue) => { + visited.push(key); + if (key === 3) { + return { break: { key, sum: leftValue + rightValue } }; + } + }); + expect(breakResult).toEqual({ key: 3, sum: 330 }); + expect(visited).toEqual([2, 3]); + }); +}); + +describe('BTree forEachKeyInBoth and intersect input/output validation', () => { + it('forEachKeyInBoth throws error when comparators differ', () => { + const tree1 = new BTreeEx([[1, 10]], (a, b) => b + a); + const tree2 = new BTreeEx([[2, 20]], (a, b) => b - a); + expect(() => tree1.forEachKeyInBoth(tree2, () => { })).toThrow(comparatorErrorMsg); + expect(() => intersect, number, number>(tree1, tree2, () => 0)).toThrow(comparatorErrorMsg); + }); +}); + +describe('BTree forEachKeyInBoth/intersect fuzz tests', () => { + const FUZZ_SETTINGS: SetOperationFuzzSettings = { + branchingFactors: [4, 5, 32], + ooms: [2, 3], + fractionsPerOOM: [0.1, 0.25, 0.5], + removalChances: [0, 0.01, 0.1] + }; + + const FUZZ_TIMEOUT_MS = 30_000; + jest.setTimeout(FUZZ_TIMEOUT_MS); + + const rng = new MersenneTwister(0xC0FFEE); + + forEachFuzzCase(FUZZ_SETTINGS, ({ maxNodeSize, size, fractionA, fractionB, removalChance, removalLabel }) => { + it(`branch ${maxNodeSize}, size ${size}, fractionA ${fractionA.toFixed(2)}, fractionB ${fractionB.toFixed(2)}, removal ${removalLabel}`, () => { + const treeA = new BTreeEx([], compareNumbers, maxNodeSize); + const treeB = new BTreeEx([], compareNumbers, maxNodeSize); + const [treeAEntries, treeBEntries] = populateFuzzTrees( + [ + { tree: treeA, fraction: fractionA, removalChance }, + { tree: treeB, fraction: fractionB, removalChance } + ], + { rng, size, compare: compareNumbers, maxNodeSize, minAssignmentsPerKey: 1 } + ); + + const bMap = new Map(treeBEntries); + const expectedTuples: Array<[number, number, number]> = []; + for (const [key, leftValue] of treeAEntries) { + const rightValue = bMap.get(key); + if (rightValue !== undefined) + expectedTuples.push([key, leftValue, rightValue]); + } + + expectForEachKeyInBothAndIntersectCalls(treeA, treeB, expectedTuples); + const swappedExpected = expectedTuples.map(([key, leftValue, rightValue]) => [key, rightValue, leftValue] as [number, number, number]); + expectForEachKeyInBothAndIntersectCalls(treeB, treeA, swappedExpected); + + expectTreeMatchesEntries(treeA, treeAEntries); + expectTreeMatchesEntries(treeB, treeBEntries); + treeA.checkValid(); + treeB.checkValid(); + }); + }); +}); diff --git a/test/setOperationFuzz.test.ts b/test/setOperationFuzz.test.ts new file mode 100644 index 0000000..d11bad7 --- /dev/null +++ b/test/setOperationFuzz.test.ts @@ -0,0 +1,126 @@ +import BTreeEx from '../extended'; +import MersenneTwister from 'mersenne-twister'; +import { + expectTreeMatchesEntries, + forEachFuzzCase, + populateFuzzTrees, + SetOperationFuzzSettings +} from './shared'; + +const compare = (a: number, b: number) => a - b; + +describe('Set operation fuzz tests', () => { + const FUZZ_SETTINGS: SetOperationFuzzSettings = { + branchingFactors: [4, 5, 32], + ooms: [2, 3], + fractionsPerOOM: [0.1, 0.25, 0.5], + removalChances: [0.01, 0.1] + }; + + const FUZZ_TIMEOUT_MS = 30_000; + jest.setTimeout(FUZZ_TIMEOUT_MS); + + const rng = new MersenneTwister(0xC0FFEE); + + forEachFuzzCase(FUZZ_SETTINGS, ({ maxNodeSize, size, fractionA, fractionB, removalChance, removalLabel }) => { + it(`branch ${maxNodeSize}, size ${size}, fractionA ${fractionA.toFixed(2)}, fractionB ${fractionB.toFixed(2)}, removal ${removalLabel}`, () => { + const treeA = new BTreeEx([], compare, maxNodeSize); + const treeB = new BTreeEx([], compare, maxNodeSize); + const treeC = new BTreeEx([], compare, maxNodeSize); + const [treeAEntries, treeBEntries, treeCEntries] = populateFuzzTrees( + [ + { tree: treeA, fraction: fractionA, removalChance }, + { tree: treeB, fraction: fractionB, removalChance }, + { tree: treeC, fraction: 0.5 } + ], + { rng, size, compare, maxNodeSize } + ); + + const keepEither = (_k: number, left: number, _right: number) => left; + const dropValue = () => undefined; + const combineSum = (_k: number, left: number, right: number) => left + right; + + const unionDrop = treeA.union(treeB, dropValue); + const unionKeep = treeA.union(treeB, keepEither); + const intersection = treeA.intersect(treeB, keepEither); + const diffAB = treeA.subtract(treeB); + const diffBA = treeB.subtract(treeA); + + // 1. Partition of A: A = (A\B) ∪ (A∩B) and parts are disjoint. + const partition = diffAB.union(intersection, keepEither); + expect(partition.toArray()).toEqual(treeAEntries); + expect(diffAB.intersect(intersection, keepEither).size).toBe(0); + + // 2. Recover B from union and A\B: (A∪B)\(A\B) = B. + expect(unionKeep.subtract(diffAB).toArray()).toEqual(treeBEntries); + + // 3. Symmetric difference two ways. + const symFromDiffs = diffAB.union(diffBA, keepEither); + const symFromUnion = unionKeep.subtract(intersection); + expect(symFromDiffs.toArray()).toEqual(symFromUnion.toArray()); + + // 4. Intersection via difference: A∩B = A \ (A\B). + expect(intersection.toArray()).toEqual(treeA.subtract(diffAB).toArray()); + + // 5. Difference via intersection: A\B = A \ (A∩B). + expect(diffAB.toArray()).toEqual(treeA.subtract(intersection).toArray()); + + // 6. Idempotence. + expect(treeA.union(treeA, keepEither).toArray()).toEqual(treeAEntries); + expect(treeA.intersect(treeA, keepEither).toArray()).toEqual(treeAEntries); + expect(diffAB.subtract(treeB).toArray()).toEqual(diffAB.toArray()); + + // 7. Commutativity. + expect(intersection.toArray()).toEqual(treeB.intersect(treeA, keepEither).toArray()); + const commUT = treeA.union(treeB, combineSum); + const commTU = treeB.union(treeA, combineSum); + expect(commUT.toArray()).toEqual(commTU.toArray()); + + // 8. Associativity. + const assocLeft = treeA.intersect(treeB, keepEither).intersect(treeC, keepEither); + const assocRight = treeA.intersect(treeB.intersect(treeC, keepEither), keepEither); + expect(assocLeft.toArray()).toEqual(assocRight.toArray()); + const assocSumLeft = treeA.union(treeB, combineSum).union(treeC, combineSum); + const assocSumRight = treeA.union(treeB.union(treeC, combineSum), combineSum); + expect(assocSumLeft.toArray()).toEqual(assocSumRight.toArray()); + + // 9. Absorption. + expect(treeA.intersect(treeA.union(treeB, keepEither), keepEither).toArray()).toEqual(treeAEntries); + expect(treeA.union(treeA.intersect(treeB, keepEither), keepEither).toArray()).toEqual(treeAEntries); + + // 10. Distributivity. + const distIntersect = treeA.intersect(treeB.union(treeC, keepEither), keepEither); + const distRight = treeA.intersect(treeB, keepEither).union(treeA.intersect(treeC, keepEither), keepEither); + expect(distIntersect.toArray()).toEqual(distRight.toArray()); + const distSubtract = treeA.subtract(treeB.union(treeC, keepEither)); + const distSubtractRight = treeA.subtract(treeB).subtract(treeC); + expect(distSubtract.toArray()).toEqual(distSubtractRight.toArray()); + const distIntersectDiff = treeA.intersect(treeB, keepEither).subtract(treeC); + const distDiffIntersect = treeA.subtract(treeC).intersect(treeB, keepEither); + expect(distIntersectDiff.toArray()).toEqual(distDiffIntersect.toArray()); + + // 11. Superset sanity. + expect(treeA.subtract(treeA.union(treeB, keepEither)).size).toBe(0); + expect(diffAB.intersect(treeB, keepEither).size).toBe(0); + + // 12. Cardinality relations. + expect(unionKeep.size).toBe(treeA.size + treeB.size - intersection.size); + expect(diffAB.size).toBe(treeA.size - intersection.size); + expect(treeA.size).toBe(diffAB.size + intersection.size); + + partition.checkValid(); + unionDrop.checkValid(); + unionKeep.checkValid(); + intersection.checkValid(); + diffAB.checkValid(); + diffBA.checkValid(); + treeA.checkValid(); + treeB.checkValid(); + treeC.checkValid(); + + expectTreeMatchesEntries(treeA, treeAEntries); + expectTreeMatchesEntries(treeB, treeBEntries); + expectTreeMatchesEntries(treeC, treeCEntries); + }); + }); +}); diff --git a/test/shared.d.ts b/test/shared.d.ts new file mode 100644 index 0000000..a38bc07 --- /dev/null +++ b/test/shared.d.ts @@ -0,0 +1,50 @@ +import BTree, { IMap } from '../b+tree'; +import SortedArray from '../sorted-array'; +import MersenneTwister from 'mersenne-twister'; +export declare const compareNumbers: (a: number, b: number) => number; +export declare type TreeNodeStats = { + total: number; + shared: number; + newUnderfilled: number; + averageLoadFactor: number; +}; +export declare type TreeEntries = Array<[number, number]>; +export declare type SetOperationFuzzSettings = { + branchingFactors: number[]; + ooms: number[]; + fractionsPerOOM: number[]; + removalChances: number[]; +}; +export declare type FuzzCase = { + maxNodeSize: number; + oom: number; + size: number; + fractionA: number; + fractionB: number; + removalChance: number; + removalLabel: string; +}; +export declare function countTreeNodeStats(tree: BTree): TreeNodeStats; +export declare function logTreeNodeStats(label: string, stats: TreeNodeStats): void; +export declare function randInt(max: number): number; +export declare function expectTreeEqualTo(tree: BTree, list: SortedArray): void; +export declare function addToBoth(a: IMap, b: IMap, k: K, v: V): void; +export declare function makeArray(size: number, randomOrder: boolean, spacing?: number, rng?: MersenneTwister): number[]; +export declare const randomInt: (rng: MersenneTwister, maxExclusive: number) => number; +export declare function buildEntriesFromMap(entriesMap: Map, compareFn?: (a: number, b: number) => number): TreeEntries; +export declare type FuzzTreeSpec = { + tree: BTree; + fraction: number; + removalChance?: number; +}; +export declare type PopulateFuzzTreesOptions = { + size: number; + rng: MersenneTwister; + compare: (a: number, b: number) => number; + maxNodeSize: number; + minAssignmentsPerKey?: number; +}; +export declare function populateFuzzTrees(specs: FuzzTreeSpec[], { size, rng, compare, maxNodeSize, minAssignmentsPerKey }: PopulateFuzzTreesOptions): TreeEntries[]; +export declare function applyRemovalRunsToTree(tree: BTree, entries: TreeEntries, removalChance: number, branchingFactor: number, rng: MersenneTwister): TreeEntries; +export declare function expectTreeMatchesEntries(tree: BTree, entries: TreeEntries): void; +export declare function forEachFuzzCase(settings: SetOperationFuzzSettings, callback: (testCase: FuzzCase) => void): void; diff --git a/test/shared.js b/test/shared.js new file mode 100644 index 0000000..06de3f5 --- /dev/null +++ b/test/shared.js @@ -0,0 +1,220 @@ +"use strict"; +var __importDefault = (this && this.__importDefault) || function (mod) { + return (mod && mod.__esModule) ? mod : { "default": mod }; +}; +Object.defineProperty(exports, "__esModule", { value: true }); +exports.forEachFuzzCase = exports.expectTreeMatchesEntries = exports.applyRemovalRunsToTree = exports.populateFuzzTrees = exports.buildEntriesFromMap = exports.randomInt = exports.makeArray = exports.addToBoth = exports.expectTreeEqualTo = exports.randInt = exports.logTreeNodeStats = exports.countTreeNodeStats = exports.compareNumbers = void 0; +var mersenne_twister_1 = __importDefault(require("mersenne-twister")); +var rand = new mersenne_twister_1.default(1234); +var compareNumbers = function (a, b) { return a - b; }; +exports.compareNumbers = compareNumbers; +function countTreeNodeStats(tree) { + var root = tree._root; + if (tree.size === 0 || !root) + return { total: 0, shared: 0, newUnderfilled: 0, averageLoadFactor: 0 }; + var maxNodeSize = tree.maxNodeSize; + var minNodeSize = Math.floor(maxNodeSize / 2); + var visit = function (node, ancestorShared, isRoot) { + if (!node) + return { total: 0, shared: 0, newUnderfilled: 0, loadFactorSum: 0 }; + var selfShared = node.isShared === true || ancestorShared; + var children = node.children; + var occupancy = children ? children.length : node.keys.length; + var isUnderfilled = !isRoot && occupancy < minNodeSize; + var loadFactor = occupancy / maxNodeSize; + var shared = selfShared ? 1 : 0; + var total = 1; + var newUnderfilled = !selfShared && isUnderfilled ? 1 : 0; + var loadFactorSum = loadFactor; + if (children) { + for (var _i = 0, children_1 = children; _i < children_1.length; _i++) { + var child = children_1[_i]; + var stats = visit(child, selfShared, false); + total += stats.total; + shared += stats.shared; + newUnderfilled += stats.newUnderfilled; + loadFactorSum += stats.loadFactorSum; + } + } + return { total: total, shared: shared, newUnderfilled: newUnderfilled, loadFactorSum: loadFactorSum }; + }; + var result = visit(root, false, true); + var averageLoadFactor = result.total === 0 ? 0 : result.loadFactorSum / result.total; + return { + total: result.total, + shared: result.shared, + newUnderfilled: result.newUnderfilled, + averageLoadFactor: averageLoadFactor + }; +} +exports.countTreeNodeStats = countTreeNodeStats; +function logTreeNodeStats(label, stats) { + console.log("\tShared nodes (".concat(label, "): ").concat(stats.shared, "/").concat(stats.total)); + console.log("\tUnderfilled nodes (".concat(label, "): ").concat(stats.newUnderfilled, "/").concat(stats.total)); + var percent = (stats.averageLoadFactor * 100).toFixed(2); + console.log("\tAverage load factor (".concat(label, "): ").concat(percent, "%")); +} +exports.logTreeNodeStats = logTreeNodeStats; +function randInt(max) { + return rand.random_int() % max; +} +exports.randInt = randInt; +function expectTreeEqualTo(tree, list) { + tree.checkValid(); + expect(tree.toArray()).toEqual(list.getArray()); +} +exports.expectTreeEqualTo = expectTreeEqualTo; +function addToBoth(a, b, k, v) { + expect(a.set(k, v)).toEqual(b.set(k, v)); +} +exports.addToBoth = addToBoth; +function makeArray(size, randomOrder, spacing, rng) { + if (spacing === void 0) { spacing = 10; } + var randomizer = rng !== null && rng !== void 0 ? rng : rand; + var useGlobalRand = rng === undefined; + var randomFloat = function () { + if (typeof randomizer.random === 'function') + return randomizer.random(); + return Math.random(); + }; + var randomIntWithMax = function (max) { + if (max <= 0) + return 0; + if (useGlobalRand) + return randInt(max); + return Math.floor(randomFloat() * max); + }; + var keys = []; + var current = 0; + for (var i = 0; i < size; i++) { + current += 1 + randomIntWithMax(spacing); + keys[i] = current; + } + if (randomOrder) { + for (var i = 0; i < size; i++) + swap(keys, i, randomIntWithMax(size)); + } + return keys; +} +exports.makeArray = makeArray; +var randomInt = function (rng, maxExclusive) { + return Math.floor(rng.random() * maxExclusive); +}; +exports.randomInt = randomInt; +function swap(keys, i, j) { + var tmp = keys[i]; + keys[i] = keys[j]; + keys[j] = tmp; +} +function buildEntriesFromMap(entriesMap, compareFn) { + if (compareFn === void 0) { compareFn = function (a, b) { return a - b; }; } + var entries = Array.from(entriesMap.entries()); + entries.sort(function (a, b) { return compareFn(a[0], b[0]); }); + return entries; +} +exports.buildEntriesFromMap = buildEntriesFromMap; +function populateFuzzTrees(specs, _a) { + var size = _a.size, rng = _a.rng, compare = _a.compare, maxNodeSize = _a.maxNodeSize, _b = _a.minAssignmentsPerKey, minAssignmentsPerKey = _b === void 0 ? 0 : _b; + if (specs.length === 0) + return []; + var keys = makeArray(size, true, 1, rng); + var entriesMaps = specs.map(function () { return new Map(); }); + var assignments = new Array(specs.length); + var requiredAssignments = Math.min(minAssignmentsPerKey, specs.length); + for (var _i = 0, keys_1 = keys; _i < keys_1.length; _i++) { + var value = keys_1[_i]; + var assignedCount = 0; + for (var i = 0; i < specs.length; i++) { + assignments[i] = rng.random() < specs[i].fraction; + if (assignments[i]) + assignedCount++; + } + while (assignedCount < requiredAssignments && specs.length > 0) { + var index = (0, exports.randomInt)(rng, specs.length); + if (!assignments[index]) { + assignments[index] = true; + assignedCount++; + } + } + for (var i = 0; i < specs.length; i++) { + if (assignments[i]) { + specs[i].tree.set(value, value); + entriesMaps[i].set(value, value); + } + } + } + return specs.map(function (spec, index) { + var _a; + var entries = buildEntriesFromMap(entriesMaps[index], compare); + var removalChance = (_a = spec.removalChance) !== null && _a !== void 0 ? _a : 0; + if (removalChance > 0) + entries = applyRemovalRunsToTree(spec.tree, entries, removalChance, maxNodeSize, rng); + return entries; + }); +} +exports.populateFuzzTrees = populateFuzzTrees; +function applyRemovalRunsToTree(tree, entries, removalChance, branchingFactor, rng) { + if (removalChance <= 0 || entries.length === 0) + return entries; + var remaining = []; + var index = 0; + while (index < entries.length) { + var _a = entries[index], key = _a[0], value = _a[1]; + if (rng.random() < removalChance) { + tree.delete(key); + index++; + while (index < entries.length) { + var candidateKey = entries[index][0]; + if (rng.random() < (1 / branchingFactor)) + break; + tree.delete(candidateKey); + index++; + } + } + else { + remaining.push([key, value]); + index++; + } + } + return remaining; +} +exports.applyRemovalRunsToTree = applyRemovalRunsToTree; +function expectTreeMatchesEntries(tree, entries) { + var index = 0; + tree.forEachPair(function (key, value) { + var expected = entries[index++]; + expect([key, value]).toEqual(expected); + }); + expect(index).toBe(entries.length); +} +exports.expectTreeMatchesEntries = expectTreeMatchesEntries; +function validateFuzzSettings(settings) { + settings.fractionsPerOOM.forEach(function (fraction) { + if (fraction < 0 || fraction > 1) + throw new Error('fractionsPerOOM values must be between 0 and 1'); + }); + settings.removalChances.forEach(function (chance) { + if (chance < 0 || chance > 1) + throw new Error('removalChances values must be between 0 and 1'); + }); +} +function forEachFuzzCase(settings, callback) { + validateFuzzSettings(settings); + for (var _i = 0, _a = settings.branchingFactors; _i < _a.length; _i++) { + var maxNodeSize = _a[_i]; + for (var _b = 0, _c = settings.removalChances; _b < _c.length; _b++) { + var removalChance = _c[_b]; + var removalLabel = removalChance.toFixed(3); + for (var _d = 0, _e = settings.ooms; _d < _e.length; _d++) { + var oom = _e[_d]; + var size = 5 * Math.pow(10, oom); + for (var _f = 0, _g = settings.fractionsPerOOM; _f < _g.length; _f++) { + var fractionA = _g[_f]; + var fractionB = 1 - fractionA; + callback({ maxNodeSize: maxNodeSize, oom: oom, size: size, fractionA: fractionA, fractionB: fractionB, removalChance: removalChance, removalLabel: removalLabel }); + } + } + } + } +} +exports.forEachFuzzCase = forEachFuzzCase; diff --git a/test/shared.ts b/test/shared.ts new file mode 100644 index 0000000..c4770f6 --- /dev/null +++ b/test/shared.ts @@ -0,0 +1,285 @@ +import BTree, { BNode, BNodeInternal, IMap } from '../b+tree'; +import SortedArray from '../sorted-array'; +import MersenneTwister from 'mersenne-twister'; +import type { BTreeWithInternals } from '../extended/shared'; + +const rand = new MersenneTwister(1234); +export const compareNumbers = (a: number, b: number) => a - b; + +export type TreeNodeStats = { + total: number; + shared: number; + newUnderfilled: number; + averageLoadFactor: number; +}; + +export type TreeEntries = Array<[number, number]>; + +export type SetOperationFuzzSettings = { + branchingFactors: number[]; + ooms: number[]; + fractionsPerOOM: number[]; + removalChances: number[]; +}; + +export type FuzzCase = { + maxNodeSize: number; + oom: number; + size: number; + fractionA: number; + fractionB: number; + removalChance: number; + removalLabel: string; +}; + +export function countTreeNodeStats(tree: BTree): TreeNodeStats { + const root = (tree as unknown as BTreeWithInternals)._root; + if (tree.size === 0 || !root) + return { total: 0, shared: 0, newUnderfilled: 0, averageLoadFactor: 0 }; + + const maxNodeSize = tree.maxNodeSize; + const minNodeSize = Math.floor(maxNodeSize / 2); + + type StatsAccumulator = { + total: number; + shared: number; + newUnderfilled: number; + loadFactorSum: number; + }; + + const visit = (node: BNode, ancestorShared: boolean, isRoot: boolean): StatsAccumulator => { + if (!node) + return { total: 0, shared: 0, newUnderfilled: 0, loadFactorSum: 0 }; + const selfShared = node.isShared === true || ancestorShared; + const children: BNode[] | undefined = (node as BNodeInternal).children; + const occupancy = children ? children.length : node.keys.length; + const isUnderfilled = !isRoot && occupancy < minNodeSize; + const loadFactor = occupancy / maxNodeSize; + let shared = selfShared ? 1 : 0; + let total = 1; + let newUnderfilled = !selfShared && isUnderfilled ? 1 : 0; + let loadFactorSum = loadFactor; + if (children) { + for (const child of children) { + const stats = visit(child, selfShared, false); + total += stats.total; + shared += stats.shared; + newUnderfilled += stats.newUnderfilled; + loadFactorSum += stats.loadFactorSum; + } + } + return { total, shared, newUnderfilled, loadFactorSum }; + }; + + const result = visit(root, false, true); + const averageLoadFactor = result.total === 0 ? 0 : result.loadFactorSum / result.total; + return { + total: result.total, + shared: result.shared, + newUnderfilled: result.newUnderfilled, + averageLoadFactor + }; +} + +export function logTreeNodeStats(label: string, stats: TreeNodeStats): void { + console.log(`\tShared nodes (${label}): ${stats.shared}/${stats.total}`); + console.log(`\tUnderfilled nodes (${label}): ${stats.newUnderfilled}/${stats.total}`); + const percent = (stats.averageLoadFactor * 100).toFixed(2); + console.log(`\tAverage load factor (${label}): ${percent}%`); +} + +export function randInt(max: number): number { + return rand.random_int() % max; +} + +export function expectTreeEqualTo(tree: BTree, list: SortedArray): void { + tree.checkValid(); + expect(tree.toArray()).toEqual(list.getArray()); +} + +export function addToBoth(a: IMap, b: IMap, k: K, v: V): void { + expect(a.set(k, v)).toEqual(b.set(k, v)); +} + +export function makeArray( + size: number, + randomOrder: boolean, + spacing = 10, + rng?: MersenneTwister +): number[] { + const randomizer = rng ?? rand; + const useGlobalRand = rng === undefined; + + const randomFloat = () => { + if (typeof randomizer.random === 'function') + return randomizer.random(); + return Math.random(); + }; + + const randomIntWithMax = (max: number) => { + if (max <= 0) + return 0; + if (useGlobalRand) + return randInt(max); + return Math.floor(randomFloat() * max); + }; + + const keys: number[] = []; + let current = 0; + for (let i = 0; i < size; i++) { + current += 1 + randomIntWithMax(spacing); + keys[i] = current; + } + if (randomOrder) { + for (let i = 0; i < size; i++) + swap(keys, i, randomIntWithMax(size)); + } + return keys; +} + +export const randomInt = (rng: MersenneTwister, maxExclusive: number) => + Math.floor(rng.random() * maxExclusive); + +function swap(keys: any[], i: number, j: number) { + const tmp = keys[i]; + keys[i] = keys[j]; + keys[j] = tmp; +} + +export function buildEntriesFromMap( + entriesMap: Map, + compareFn: (a: number, b: number) => number = (a, b) => a - b +): TreeEntries { + const entries = Array.from(entriesMap.entries()) as TreeEntries; + entries.sort((a, b) => compareFn(a[0], b[0])); + return entries; +} + +export type FuzzTreeSpec = { + tree: BTree; + fraction: number; + removalChance?: number; +}; + +export type PopulateFuzzTreesOptions = { + size: number; + rng: MersenneTwister; + compare: (a: number, b: number) => number; + maxNodeSize: number; + minAssignmentsPerKey?: number; +}; + +export function populateFuzzTrees( + specs: FuzzTreeSpec[], + { size, rng, compare, maxNodeSize, minAssignmentsPerKey = 0 }: PopulateFuzzTreesOptions +): TreeEntries[] { + if (specs.length === 0) + return []; + + const keys = makeArray(size, true, 1, rng); + const entriesMaps = specs.map(() => new Map()); + const assignments = new Array(specs.length); + const requiredAssignments = Math.min(minAssignmentsPerKey, specs.length); + + for (const value of keys) { + let assignedCount = 0; + for (let i = 0; i < specs.length; i++) { + assignments[i] = rng.random() < specs[i].fraction; + if (assignments[i]) + assignedCount++; + } + + while (assignedCount < requiredAssignments && specs.length > 0) { + const index = randomInt(rng, specs.length); + if (!assignments[index]) { + assignments[index] = true; + assignedCount++; + } + } + + for (let i = 0; i < specs.length; i++) { + if (assignments[i]) { + specs[i].tree.set(value, value); + entriesMaps[i].set(value, value); + } + } + } + + return specs.map((spec, index) => { + let entries = buildEntriesFromMap(entriesMaps[index], compare); + const removalChance = spec.removalChance ?? 0; + if (removalChance > 0) + entries = applyRemovalRunsToTree(spec.tree, entries, removalChance, maxNodeSize, rng); + return entries; + }); +} + +export function applyRemovalRunsToTree( + tree: BTree, + entries: TreeEntries, + removalChance: number, + branchingFactor: number, + rng: MersenneTwister +): TreeEntries { + if (removalChance <= 0 || entries.length === 0) + return entries; + const remaining: TreeEntries = []; + let index = 0; + while (index < entries.length) { + const [key, value] = entries[index]; + if (rng.random() < removalChance) { + tree.delete(key); + index++; + while (index < entries.length) { + const [candidateKey] = entries[index]; + if (rng.random() < (1 / branchingFactor)) + break; + tree.delete(candidateKey); + index++; + } + } else { + remaining.push([key, value]); + index++; + } + } + return remaining; +} + +export function expectTreeMatchesEntries(tree: BTree, entries: TreeEntries): void { + let index = 0; + tree.forEachPair((key, value) => { + const expected = entries[index++]!; + expect([key, value]).toEqual(expected); + }); + expect(index).toBe(entries.length); +} + +function validateFuzzSettings(settings: SetOperationFuzzSettings): void { + settings.fractionsPerOOM.forEach(fraction => { + if (fraction < 0 || fraction > 1) + throw new Error('fractionsPerOOM values must be between 0 and 1'); + }); + settings.removalChances.forEach(chance => { + if (chance < 0 || chance > 1) + throw new Error('removalChances values must be between 0 and 1'); + }); +} + +export function forEachFuzzCase( + settings: SetOperationFuzzSettings, + callback: (testCase: FuzzCase) => void +): void { + validateFuzzSettings(settings); + for (const maxNodeSize of settings.branchingFactors) { + for (const removalChance of settings.removalChances) { + const removalLabel = removalChance.toFixed(3); + for (const oom of settings.ooms) { + const size = 5 * Math.pow(10, oom); + for (const fractionA of settings.fractionsPerOOM) { + const fractionB = 1 - fractionA; + callback({ maxNodeSize, oom, size, fractionA, fractionB, removalChance, removalLabel }); + } + } + } + } +} diff --git a/test/subtract.test.ts b/test/subtract.test.ts new file mode 100644 index 0000000..e985570 --- /dev/null +++ b/test/subtract.test.ts @@ -0,0 +1,285 @@ +import BTreeEx from '../extended'; +import forEachKeyNotIn from '../extended/forEachKeyNotIn'; +import subtract from '../extended/subtract'; +import { comparatorErrorMsg, branchingFactorErrorMsg } from '../extended/shared'; +import MersenneTwister from 'mersenne-twister'; +import { + expectTreeMatchesEntries, + forEachFuzzCase, + populateFuzzTrees, + SetOperationFuzzSettings, + compareNumbers +} from './shared'; + +type NotInCall = { key: number, value: number }; + +const runForEachKeyNotInAndSubtract = ( + include: BTreeEx, + exclude: BTreeEx, + assertion: (calls: NotInCall[]) => void +) => { + const forEachCalls: NotInCall[] = []; + forEachKeyNotIn(include, exclude, (key, value) => { + forEachCalls.push({ key, value }); + }); + assertion(forEachCalls); + + const resultTree = subtract, number, number>(include, exclude); + const subtractCalls = resultTree.toArray().map(([key, value]) => ({ key, value })); + expect(subtractCalls).toEqual(forEachCalls); + resultTree.checkValid(); + assertion(subtractCalls); +}; + +const expectForEachKeyNotInAndSubtractCalls = ( + include: BTreeEx, + exclude: BTreeEx, + expected: Array<[number, number]> +) => { + const expectedRecords = tuplesToRecords(expected); + runForEachKeyNotInAndSubtract(include, exclude, (calls) => { + expect(calls).toEqual(expectedRecords); + }); +}; + +const tuplesToRecords = (entries: Array<[number, number]>): NotInCall[] => + entries.map(([key, value]) => ({ key, value })); + +const tuples = (...pairs: Array<[number, number]>) => pairs; +const buildTree = (entries: Array<[number, number]>, maxNodeSize: number) => + new BTreeEx(entries, compareNumbers, maxNodeSize); + +describe.each([32, 10, 4])('BTree forEachKeyNotIn/subtract tests with fanout %i', (maxNodeSize) => { + const buildTreeForFanout = (entries: Array<[number, number]>) => buildTree(entries, maxNodeSize); + + const BASIC_CASES: Array<{ + name: string; + include: Array<[number, number]>; + exclude: Array<[number, number]>; + expected: Array<[number, number]>; + }> = [ + { + name: 'forEachKeyNotIn/subtract two empty trees', + include: tuples(), + exclude: tuples(), + expected: [], + }, + { + name: 'forEachKeyNotIn/subtract include empty tree with non-empty tree', + include: tuples(), + exclude: tuples([1, 10], [2, 20], [3, 30]), + expected: [], + }, + { + name: 'forEachKeyNotIn/subtract exclude tree empty yields all include keys', + include: tuples([1, 10], [3, 30], [5, 50]), + exclude: tuples(), + expected: tuples([1, 10], [3, 30], [5, 50]), + }, + { + name: 'forEachKeyNotIn/subtract with no overlapping keys returns include tree contents', + include: tuples([1, 10], [3, 30], [5, 50]), + exclude: tuples([0, 100], [2, 200], [4, 400]), + expected: tuples([1, 10], [3, 30], [5, 50]), + }, + { + name: 'forEachKeyNotIn/subtract with overlapping keys excludes matches', + include: tuples([1, 10], [2, 20], [3, 30], [4, 40], [5, 50]), + exclude: tuples([0, 100], [2, 200], [4, 400], [6, 600]), + expected: tuples([1, 10], [3, 30], [5, 50]), + }, + { + name: 'forEachKeyNotIn/subtract excludes leading overlap then emits remaining keys', + include: tuples([1, 10], [2, 20], [3, 30], [4, 40]), + exclude: tuples([1, 100], [2, 200]), + expected: tuples([3, 30], [4, 40]), + }, + { + name: 'forEachKeyNotIn/subtract exclude superset yields empty result', + include: tuples([2, 200], [3, 300]), + exclude: tuples([1, 100], [2, 200], [3, 300], [4, 400]), + expected: [], + }, + ]; + + BASIC_CASES.forEach(({ name, include, exclude, expected }) => { + it(name, () => { + const includeTree = buildTreeForFanout(include); + const excludeTree = buildTreeForFanout(exclude); + expectForEachKeyNotInAndSubtractCalls(includeTree, excludeTree, expected); + }); + }); + + it('forEachKeyNotIn/subtract maintains tree contents', () => { + const includeEntries: Array<[number, number]> = [[1, 10], [2, 20], [3, 30], [4, 40], [5, 50]]; + const excludeEntries: Array<[number, number]> = [[1, 100], [3, 300], [5, 500]]; + const includeTree = buildTreeForFanout(includeEntries); + const excludeTree = buildTreeForFanout(excludeEntries); + const includeBefore = includeTree.toArray(); + const excludeBefore = excludeTree.toArray(); + expectForEachKeyNotInAndSubtractCalls(includeTree, excludeTree, tuples([2, 20], [4, 40])); + expect(includeTree.toArray()).toEqual(includeBefore); + expect(excludeTree.toArray()).toEqual(excludeBefore); + includeTree.checkValid(); + excludeTree.checkValid(); + }); + + it('forEachKeyNotIn/subtract with contiguous overlap yields sorted survivors', () => { + const includeTree = buildTreeForFanout(tuples([1, 1], [2, 2], [3, 3], [4, 4], [5, 5], [6, 6])); + const excludeTree = buildTreeForFanout(tuples([3, 30], [4, 40], [5, 50])); + runForEachKeyNotInAndSubtract(includeTree, excludeTree, (calls) => { + expect(calls.map(c => c.key)).toEqual([1, 2, 6]); + expect(calls.map(c => c.value)).toEqual([1, 2, 6]); + }); + }); + + it('forEachKeyNotIn/subtract large subtraction leaves prefix and suffix ranges', () => { + const size = 1000; + const excludeStart = 200; + const excludeSpan = 500; + const includeEntries = Array.from({ length: size }, (_, i) => [i, i * 2] as [number, number]); + const excludeEntries = Array.from({ length: excludeSpan }, (_, i) => { + const key = i + excludeStart; + return [key, key * 3] as [number, number]; + }); + const includeTree = buildTreeForFanout(includeEntries); + const excludeTree = buildTreeForFanout(excludeEntries); + runForEachKeyNotInAndSubtract(includeTree, excludeTree, (calls) => { + expect(calls.length).toBe(size - excludeSpan); + expect(calls[0]).toEqual({ key: 0, value: 0 }); + const lastCall = calls[calls.length - 1]; + expect(lastCall.key).toBe(size - 1); + expect(lastCall.value).toBe((size - 1) * 2); + expect(calls.filter(c => c.key >= excludeStart && c.key < excludeStart + excludeSpan)).toEqual([]); + }); + }); + + it('forEachKeyNotIn/subtract tree with itself visits no keys', () => { + const entries = Array.from({ length: 20 }, (_, i) => [i, i * 2] as [number, number]); + const tree = buildTreeForFanout(entries); + expectForEachKeyNotInAndSubtractCalls(tree, tree, []); + }); + + it('subtract returns a cloned tree when nothing is removed', () => { + const includeTree = buildTreeForFanout(tuples([1, 10], [2, 20])); + const excludeTree = buildTreeForFanout(tuples([3, 30])); + const result = subtract, number, number>(includeTree, excludeTree); + expect(result).not.toBe(includeTree); + expect(result.toArray()).toEqual(includeTree.toArray()); + expect(excludeTree.toArray()).toEqual(tuples([3, 30])); + includeTree.checkValid(); + result.checkValid(); + excludeTree.checkValid(); + }); + + it('forEachKeyNotIn/subtract arguments determine surviving keys', () => { + const tree1 = buildTreeForFanout(tuples([1, 100], [2, 200], [4, 400])); + const tree2 = buildTreeForFanout(tuples([2, 20], [3, 30], [4, 40])); + expectForEachKeyNotInAndSubtractCalls(tree1, tree2, tuples([1, 100])); + expectForEachKeyNotInAndSubtractCalls(tree2, tree1, tuples([3, 30])); + }); +}); + +describe('BTree forEachKeyNotIn early exiting', () => { + const buildTreeForEarlyExit = (entries: Array<[number, number]>) => + buildTree(entries, 4); + + it('forEachKeyNotIn returns undefined when callback returns void', () => { + const includeTree = buildTreeForEarlyExit(tuples([1, 10], [2, 20], [3, 30])); + const excludeTree = buildTreeForEarlyExit(tuples([2, 200])); + const visited: number[] = []; + const result = forEachKeyNotIn(includeTree, excludeTree, key => { + visited.push(key); + }); + expect(result).toBeUndefined(); + expect(visited).toEqual([1, 3]); + }); + + it('forEachKeyNotIn ignores undefined break values and completes traversal', () => { + const includeTree = buildTreeForEarlyExit(tuples([1, 10], [2, 20], [3, 30], [4, 40])); + const excludeTree = buildTreeForEarlyExit(tuples([2, 200])); + const visited: number[] = []; + const result = forEachKeyNotIn(includeTree, excludeTree, key => { + visited.push(key); + return { break: undefined }; + }); + expect(result).toBeUndefined(); + expect(visited).toEqual([1, 3, 4]); + }); + + it('forEachKeyNotIn breaks early when callback returns a value', () => { + const includeTree = buildTreeForEarlyExit(tuples([1, 10], [2, 20], [3, 30], [4, 40])); + const excludeTree = buildTreeForEarlyExit(tuples([2, 200])); + const visited: number[] = []; + const breakResult = forEachKeyNotIn(includeTree, excludeTree, (key, value) => { + visited.push(key); + if (key === 3) { + return { break: { key, value } }; + } + }); + expect(breakResult).toEqual({ key: 3, value: 30 }); + expect(visited).toEqual([1, 3]); + }); +}); + +describe('BTree forEachKeyNotIn and subtract input/output validation', () => { + it('forEachKeyNotIn throws error when comparators differ', () => { + const includeTree = new BTreeEx([[1, 10]], (a, b) => b - a); + const excludeTree = new BTreeEx([[2, 20]], (a, b) => a + b); + expect(() => forEachKeyNotIn(includeTree, excludeTree, () => { })).toThrow(comparatorErrorMsg); + }); + + it('subtract throws error when comparators differ', () => { + const includeTree = new BTreeEx([[1, 10]], (a, b) => b - a); + const excludeTree = new BTreeEx([[2, 20]], (a, b) => a + b); + expect(() => subtract, number, number>(includeTree, excludeTree)).toThrow(comparatorErrorMsg); + }); + + it('subtract throws error when branching factors differ', () => { + const includeTree = new BTreeEx([[1, 10]], (a, b) => a - b, 4); + const excludeTree = new BTreeEx([[2, 20]], includeTree._compare, 8); + expect(() => subtract, number, number>(includeTree, excludeTree)).toThrow(branchingFactorErrorMsg); + }); +}); + +describe('BTree forEachKeyNotIn/subtract fuzz tests', () => { + const FUZZ_SETTINGS: SetOperationFuzzSettings = { + branchingFactors: [4, 5, 32], + ooms: [2, 3], + fractionsPerOOM: [0.1, 0.25, 0.5], + removalChances: [0, 0.01, 0.1] + }; + + const FUZZ_TIMEOUT_MS = 30_000; + jest.setTimeout(FUZZ_TIMEOUT_MS); + + const rng = new MersenneTwister(0xBAD_C0DE); + + forEachFuzzCase(FUZZ_SETTINGS, ({ maxNodeSize, size, fractionA, fractionB, removalChance, removalLabel }) => { + it(`branch ${maxNodeSize}, size ${size}, fractionA ${fractionA.toFixed(2)}, fractionB ${fractionB.toFixed(2)}, removal ${removalLabel}`, () => { + const treeA = new BTreeEx([], compareNumbers, maxNodeSize); + const treeB = new BTreeEx([], compareNumbers, maxNodeSize); + const [treeAEntries, treeBEntries] = populateFuzzTrees( + [ + { tree: treeA, fraction: fractionA, removalChance }, + { tree: treeB, fraction: fractionB, removalChance } + ], + { rng, size, compare: compareNumbers, maxNodeSize, minAssignmentsPerKey: 1 } + ); + + const bMap = new Map(treeBEntries); + const aMap = new Map(treeAEntries); + + const expectedA = treeAEntries.filter(([key]) => !bMap.has(key)); + const expectedB = treeBEntries.filter(([key]) => !aMap.has(key)); + + expectForEachKeyNotInAndSubtractCalls(treeA, treeB, expectedA); + expectForEachKeyNotInAndSubtractCalls(treeB, treeA, expectedB); + + expectTreeMatchesEntries(treeA, treeAEntries); + expectTreeMatchesEntries(treeB, treeBEntries); + treeA.checkValid(); + treeB.checkValid(); + }); + }); +}); diff --git a/test/union.test.ts b/test/union.test.ts new file mode 100644 index 0000000..ca1924e --- /dev/null +++ b/test/union.test.ts @@ -0,0 +1,755 @@ +import BTree from '../b+tree'; +import BTreeEx from '../extended'; +import union from '../extended/union'; +import { branchingFactorErrorMsg, comparatorErrorMsg } from '../extended/shared'; +import MersenneTwister from 'mersenne-twister'; +import { + expectTreeMatchesEntries, + forEachFuzzCase, + makeArray, + populateFuzzTrees, + randomInt, + SetOperationFuzzSettings, + compareNumbers +} from './shared'; + +type UnionFn = (key: number, leftValue: number, rightValue: number) => number | undefined; + +describe.each([32, 10, 4])('BTree union tests with fanout %i', (maxNodeSize) => { + const sharesNode = (root: any, targetNode: any): boolean => { + if (root === targetNode) + return true; + if (root.isLeaf) + return false; + const children = (root as any).children as any[]; + for (let i = 0; i < children.length; i++) { + if (sharesNode(children[i], targetNode)) + return true; + } + return false; + }; + + const buildTree = (keys: number[], valueScale = 1, valueOffset = 0) => { + const tree = new BTreeEx([], compareNumbers, maxNodeSize); + for (const key of keys) { + tree.set(key, key * valueScale + valueOffset); + } + return tree; + }; + + const expectRootLeafState = (tree: BTreeEx, expectedIsLeaf: boolean) => { + const root = tree['_root'] as any; + expect(root.isLeaf).toBe(expectedIsLeaf); + }; + + const range = (start: number, endExclusive: number, step = 1): number[] => { + const result: number[] = []; + for (let i = start; i < endExclusive; i += step) + result.push(i); + return result; + }; + + type UnionExpectationOptions = { + after?: (ctx: { result: BTreeEx, expected: BTreeEx }) => void; + expectedUnionFn?: UnionFn; + }; + + const sumUnion: UnionFn = (_key, leftValue, rightValue) => leftValue + rightValue; + const preferLeft: UnionFn = (_key, leftValue) => leftValue; + const preferRight: UnionFn = (_key, _leftValue, rightValue) => rightValue; + const failUnion = (message: string): UnionFn => () => { + throw new Error(message); + }; + + const naiveUnion = ( + left: BTreeEx, + right: BTreeEx, + unionFn: UnionFn + ) => { + const expected = left.clone(); + right.forEachPair((key, rightValue) => { + if (expected.has(key)) { + const leftValue = expected.get(key)!; + const unionedValue = unionFn(key, leftValue, rightValue); + if (unionedValue === undefined) { + expected.delete(key); + } else { + expected.set(key, unionedValue); + } + } else { + expected.set(key, rightValue); + } + }); + return expected; + }; + + const expectUnionMatchesBaseline = ( + left: BTreeEx, + right: BTreeEx, + unionFn: UnionFn, + options: UnionExpectationOptions = {} + ) => { + const { expectedUnionFn = unionFn, after } = options; + const expected = naiveUnion(left, right, expectedUnionFn); + const result = left.union(right, unionFn); + expect(result.toArray()).toEqual(expected.toArray()); + result.checkValid(); + expected.checkValid(); + after?.({ result, expected }); + return { result, expected }; + }; + + it('Union disjoint roots reuses roots', () => { + // ensure the roots are not underfilled, as union will try to merge underfilled roots + const size = maxNodeSize * maxNodeSize; + const tree1 = buildTree(range(0, size), 1, 0); + const offset = size * 5; + const tree2 = buildTree(range(offset, offset + size), 2, 0); + + expectRootLeafState(tree1, false); + expectRootLeafState(tree2, false); + + expectUnionMatchesBaseline(tree1, tree2, failUnion('Union callback should not run for disjoint roots'), { + after: ({ result }) => { + const resultRoot = result['_root'] as any; + expect(sharesNode(resultRoot, tree1['_root'] as any)).toBe(true); + expect(sharesNode(resultRoot, tree2['_root'] as any)).toBe(true); + } + }); + }); + + it('Union leaf roots with intersecting keys uses union callback', () => { + const tree1 = buildTree([1, 2, 4], 10, 0); + const tree2 = buildTree([2, 3, 5], 100, 0); + + expectRootLeafState(tree1, true); + expectRootLeafState(tree2, true); + + const calls: Array<{ key: number, leftValue: number, rightValue: number }> = []; + + expectUnionMatchesBaseline( + tree1, + tree2, + (key, leftValue, rightValue) => { + calls.push({ key, leftValue, rightValue }); + return leftValue + rightValue; + }, + { expectedUnionFn: sumUnion } + ); + expect(calls).toEqual([{ key: 2, leftValue: 20, rightValue: 200 }]); + }); + + it('Union leaf roots with disjoint keys', () => { + const tree1 = buildTree([1, 3, 5], 1, 0); + const tree2 = buildTree([2, 4, 6], 1, 1000); + + expectRootLeafState(tree1, true); + expectRootLeafState(tree2, true); + + const { result } = expectUnionMatchesBaseline( + tree1, + tree2, + failUnion('Union callback should not run for disjoint leaf roots') + ); + expect(result.toArray()).toEqual([ + [1, 1], + [2, 1002], + [3, 3], + [4, 1004], + [5, 5], + [6, 1006] + ]); + }); + + it('Union trees disjoint except for shared maximum key', () => { + const size = maxNodeSize * 2; + const tree1 = buildTree(range(0, size), 1, 0); + const tree2 = buildTree(range(size - 1, size - 1 + size), 3, 0); + + expectRootLeafState(tree1, false); + expectRootLeafState(tree2, false); + + let unionCalls = 0; + + const { result } = expectUnionMatchesBaseline( + tree1, + tree2, + (_key, leftValue, rightValue) => { + unionCalls++; + return sumUnion(_key, leftValue, rightValue); + }, + { expectedUnionFn: sumUnion } + ); + expect(unionCalls).toBe(1); + expect(result.get(size - 1)).toBe((size - 1) + (size - 1) * 3); + expect(result.size).toBe(tree1.size + tree2.size - 1); + }); + + it('Union trees where all leaves are disjoint and one tree straddles the other', () => { + const straddleLength = 3 * 2 * maxNodeSize; // creates multiple leaves on both trees + const tree1 = buildTree( + range(0, straddleLength / 3).concat(range((straddleLength / 3) * 2, straddleLength)), + 1 + ); + const tree2 = buildTree(range(straddleLength / 3, (straddleLength / 3) * 2), 3); + + expectRootLeafState(tree1, false); + expectRootLeafState(tree2, false); + + const { result } = expectUnionMatchesBaseline( + tree1, + tree2, + failUnion('Union callback should not run when all leaves are disjoint') + ); + expect(result.size).toBe(tree1.size + tree2.size); + }); + + it('Union where two-leaf tree intersects leaf-root tree across both leaves', () => { + const size = maxNodeSize + Math.max(3, Math.floor(maxNodeSize / 2)); + const tree1 = buildTree(range(0, size), 2, 0); + const tree2 = buildTree([1, Math.floor(size / 2), size - 1], 5, 0); + + expectRootLeafState(tree1, false); + expectRootLeafState(tree2, true); + + const seenKeys: number[] = []; + + expectUnionMatchesBaseline( + tree1, + tree2, + (key, _leftValue, rightValue) => { + seenKeys.push(key); + return rightValue; + }, + { expectedUnionFn: preferRight } + ); + expect(seenKeys.sort((a, b) => a - b)).toEqual([1, Math.floor(size / 2), size - 1]); + }); + + it('Union where max key equals min key of other tree', () => { + const size = maxNodeSize * 2; + const tree1 = buildTree(range(0, size), 1, 0); + const tree2 = buildTree(range(size - 1, size - 1 + size), 10, 0); + + expectRootLeafState(tree1, false); + expectRootLeafState(tree2, false); + + let unionCalls = 0; + + const { result } = expectUnionMatchesBaseline( + tree1, + tree2, + (_key, _leftValue, rightValue) => { + unionCalls++; + return rightValue; + }, + { expectedUnionFn: preferRight } + ); + expect(unionCalls).toBe(1); + expect(result.get(size - 1)).toBe((size - 1) * 10); + expect(result.size).toBe(tree1.size + tree2.size - 1); + }); + + it('Union odd and even keyed trees', () => { + const limit = maxNodeSize * 3; + const treeOdd = buildTree(range(1, limit * 2, 2), 1, 0); + const treeEven = buildTree(range(0, limit * 2, 2), 1, 100); + + expectRootLeafState(treeOdd, false); + expectRootLeafState(treeEven, false); + + const { result } = expectUnionMatchesBaseline( + treeOdd, + treeEven, + failUnion('Union callback should not be invoked for disjoint parity sets') + ); + expect(result.size).toBe(treeOdd.size + treeEven.size); + }); + + it('Union merges disjoint leaf roots into a single leaf', () => { + const perTree = Math.max(1, Math.floor(maxNodeSize / 2) - 1); + const keysA = range(1, perTree).map(i => i); + const keysB = keysA.map(k => k * 1000); + const tree1 = buildTree(keysA); + const tree2 = buildTree(keysB); + + expectRootLeafState(tree1, true); + expectRootLeafState(tree2, true); + + const unioned = tree1.union(tree2, failUnion('Should not be called for disjoint keys')); + const resultRoot = unioned['_root'] as any; + const expectedKeys = keysA.concat(keysB).sort(compareNumbers); + expect(resultRoot.isLeaf).toBe(true); + expect(resultRoot.keys).toEqual(expectedKeys); + }); + + it('Union combines underfilled non-leaf roots into a filled root', () => { + const minChildren = Math.floor(maxNodeSize / 2); + const targetLeavesPerTree = minChildren - 1; + if (targetLeavesPerTree === 1) { + return; // cannot test this case with only one leaf per tree + } + const entriesPerLeaf = maxNodeSize; + const buildUnderfilledTree = (startKey: number) => { + const keys: number[] = []; + for (let leaf = 0; leaf < targetLeavesPerTree; leaf++) { + for (let i = 0; i < entriesPerLeaf; i++) + keys.push(startKey + leaf * entriesPerLeaf + i); + } + const tree = buildTree(keys); + const root = tree['_root'] as any; + expect(root.isLeaf).toBe(false); + expect(root.children.length).toBeLessThan(minChildren); + return { tree, nextKey: startKey + keys.length, childCount: root.children.length }; + }; + + const first = buildUnderfilledTree(0); + const second = buildUnderfilledTree(first.nextKey + maxNodeSize * 10); + + const unioned = first.tree.union(second.tree, failUnion('Should not be called for disjoint keys')); + const resultRoot = unioned['_root'] as any; + expect(resultRoot.isLeaf).toBe(false); + expect(resultRoot.children.length).toBeGreaterThanOrEqual(minChildren); + expect(resultRoot.children.length).toBe(first.childCount + second.childCount); + }); + + it('Union overlapping prefix equal to branching factor', () => { + const shared = maxNodeSize; + const tree1Keys = [ + ...range(0, shared), + ...range(shared, shared + maxNodeSize) + ]; + const tree2Keys = [ + ...range(0, shared), + ...range(shared + maxNodeSize, shared + maxNodeSize * 2) + ]; + + const tree1 = buildTree(tree1Keys, 1, 0); + const tree2 = buildTree(tree2Keys, 2, 0); + + expectRootLeafState(tree1, false); + expectRootLeafState(tree2, false); + + const unionedKeys: number[] = []; + + expectUnionMatchesBaseline( + tree1, + tree2, + (key, leftValue, rightValue) => { + unionedKeys.push(key); + return leftValue + rightValue; + }, + { expectedUnionFn: sumUnion } + ); + expect(unionedKeys.sort((a, b) => a - b)).toEqual(range(0, shared)); + }); + + it('Union two empty trees', () => { + const tree1 = new BTreeEx([], compareNumbers, maxNodeSize); + const tree2 = new BTreeEx([], compareNumbers, maxNodeSize); + + const { result } = expectUnionMatchesBaseline(tree1, tree2, sumUnion); + expect(result.size).toBe(0); + }); + + it('Union empty tree with non-empty tree', () => { + const tree1 = new BTreeEx([], compareNumbers, maxNodeSize); + const tree2 = new BTreeEx([[1, 10], [2, 20], [3, 30]], compareNumbers, maxNodeSize); + + const { result: leftUnion } = expectUnionMatchesBaseline(tree1, tree2, sumUnion); + expect(leftUnion.toArray()).toEqual(tree2.toArray()); + + const { result: rightUnion } = expectUnionMatchesBaseline(tree2, tree1, sumUnion); + expect(rightUnion.toArray()).toEqual(tree2.toArray()); + expect(tree1.toArray()).toEqual([]); + expect(tree2.toArray()).toEqual([[1, 10], [2, 20], [3, 30]]); + tree1.checkValid(); + tree2.checkValid(); + }); + + it('Union with no overlapping keys', () => { + const tree1 = new BTreeEx([[1, 10], [3, 30], [5, 50]], compareNumbers, maxNodeSize); + const tree2 = new BTreeEx([[2, 20], [4, 40], [6, 60]], compareNumbers, maxNodeSize); + + const { result } = expectUnionMatchesBaseline( + tree1, + tree2, + failUnion('Should not be called for non-overlapping keys') + ); + + expect(result.size).toBe(6); + expect(result.toArray()).toEqual([[1, 10], [2, 20], [3, 30], [4, 40], [5, 50], [6, 60]]); + }); + + it('Union with completely overlapping keys - sum values', () => { + const tree1 = new BTreeEx([[1, 10], [2, 20], [3, 30]], compareNumbers, maxNodeSize); + const tree2 = new BTreeEx([[1, 5], [2, 15], [3, 25]], compareNumbers, maxNodeSize); + + const { result } = expectUnionMatchesBaseline(tree1, tree2, sumUnion); + expect(result.size).toBe(tree1.size); + }); + + it('Union with completely overlapping keys - prefer left', () => { + const tree1 = new BTreeEx([[1, 10], [2, 20], [3, 30]], compareNumbers, maxNodeSize); + const tree2 = new BTreeEx([[1, 100], [2, 200], [3, 300]], compareNumbers, maxNodeSize); + + const { result } = expectUnionMatchesBaseline(tree1, tree2, preferLeft); + expect(result.toArray()).toEqual(tree1.toArray()); + }); + + it('Union with completely overlapping keys - prefer right', () => { + const tree1 = new BTreeEx([[1, 10], [2, 20], [3, 30]], compareNumbers, maxNodeSize); + const tree2 = new BTreeEx([[1, 100], [2, 200], [3, 300]], compareNumbers, maxNodeSize); + + const { result } = expectUnionMatchesBaseline(tree1, tree2, (_k, _v1, v2) => v2); + expect(result.toArray()).toEqual(tree2.toArray()); + }); + + it('Union with partially overlapping keys', () => { + const tree1 = new BTreeEx([[1, 10], [2, 20], [3, 30], [4, 40]], compareNumbers, maxNodeSize); + const tree2 = new BTreeEx([[3, 300], [4, 400], [5, 500], [6, 600]], compareNumbers, maxNodeSize); + + const unionedKeys: number[] = []; + + expectUnionMatchesBaseline( + tree1, + tree2, + (key, v1, v2) => { + unionedKeys.push(key); + return v1 + v2; + }, + { expectedUnionFn: sumUnion } + ); + expect(unionedKeys.sort((a, b) => a - b)).toEqual([3, 4]); + }); + + it('Union with overlapping keys can delete entries', () => { + const tree1 = new BTreeEx([[1, 10], [2, 20], [3, 30], [4, 40]], compareNumbers, maxNodeSize); + const tree2 = new BTreeEx([[2, 200], [3, 300], [4, 400], [5, 500]], compareNumbers, maxNodeSize); + const { result } = expectUnionMatchesBaseline(tree1, tree2, (k, v1, v2) => { + if (k === 3) return undefined; + return v1 + v2; + }); + expect(result.has(3)).toBe(false); + }); + + it('Union is called even when values are equal', () => { + const tree1 = new BTreeEx([[1, 10], [2, 20]], compareNumbers, maxNodeSize); + const tree2 = new BTreeEx([[2, 20], [3, 30]], compareNumbers, maxNodeSize); + + const unionCallLog: Array<{ k: number, v1: number, v2: number }> = []; + + expectUnionMatchesBaseline( + tree1, + tree2, + (k, v1, v2) => { + unionCallLog.push({ k, v1, v2 }); + return v1; + }, + { expectedUnionFn: preferLeft } + ); + expect(unionCallLog).toEqual([{ k: 2, v1: 20, v2: 20 }]); + }); + + it('Union does not mutate input trees', () => { + const entries1: [number, number][] = [[1, 10], [2, 20], [3, 30]]; + const entries2: [number, number][] = [[2, 200], [3, 300], [4, 400]]; + const tree1 = new BTreeEx(entries1, compareNumbers, maxNodeSize); + const tree2 = new BTreeEx(entries2, compareNumbers, maxNodeSize); + + const snapshot1 = tree1.toArray(); + const snapshot2 = tree2.toArray(); + + expectUnionMatchesBaseline(tree1, tree2, sumUnion); + + expect(tree1.toArray()).toEqual(snapshot1); + expect(tree2.toArray()).toEqual(snapshot2); + tree1.checkValid(); + tree2.checkValid(); + }); + + it('Union large trees with some overlaps', () => { + const entries1: [number, number][] = range(0, 1000).map(i => [i, i]); + const entries2: [number, number][] = range(500, 1500).map(i => [i, i * 10]); + + const tree1 = new BTreeEx(entries1, compareNumbers, maxNodeSize); + const tree2 = new BTreeEx(entries2, compareNumbers, maxNodeSize); + + let unionCount = 0; + expectUnionMatchesBaseline( + tree1, + tree2, + (k, v1, v2) => { + unionCount++; + return v1 + v2; + }, + { expectedUnionFn: sumUnion } + ); + expect(unionCount).toBe(500); + }); + + it('Union with overlaps at boundaries', () => { + const tree1 = new BTreeEx([], compareNumbers, maxNodeSize); + const tree2 = new BTreeEx([], compareNumbers, maxNodeSize); + + for (let i = 0; i < 100; i++) { + tree1.set(i * 2, i * 2); + } + + for (let i = 50; i < 150; i++) { + tree2.set(i, i * 10); + } + + const unionedKeys: number[] = []; + + expectUnionMatchesBaseline( + tree1, + tree2, + (key, v1, v2) => { + unionedKeys.push(key); + return v1 + v2; + }, + { expectedUnionFn: sumUnion } + ); + + const expectedUnionedKeys = range(50, 150).filter(k => k % 2 === 0); + expect(unionedKeys.sort((a, b) => a - b)).toEqual(expectedUnionedKeys); + }); + + it('Union result can be modified without affecting inputs', () => { + const tree1 = new BTreeEx([[1, 10], [2, 20]], compareNumbers, maxNodeSize); + const tree2 = new BTreeEx([[3, 30], [4, 40]], compareNumbers, maxNodeSize); + + const { result } = expectUnionMatchesBaseline(tree1, tree2, sumUnion); + + result.set(1, 100); + result.set(5, 50); + result.delete(2); + + expect(tree1.get(1)).toBe(10); + expect(tree1.get(2)).toBe(20); + expect(tree1.has(5)).toBe(false); + expect(tree2.get(3)).toBe(30); + expect(tree2.get(4)).toBe(40); + tree1.checkValid(); + tree2.checkValid(); + result.checkValid(); + }); + + it('Union tree with itself returns a clone without invoking combineFn', () => { + const size = maxNodeSize * 2 + 5; + const tree = buildTree(range(0, size), 3, 1); + let unionCalls = 0; + + const original = tree.toArray(); + const result = tree.union(tree, (key, leftValue, rightValue) => { + unionCalls++; + return sumUnion(key, leftValue, rightValue); + }); + expect(unionCalls).toBe(0); + expect(result).not.toBe(tree); + expect(result.toArray()).toEqual(original); + expect(tree.toArray()).toEqual(original); + }); + + it('Standalone union short-circuits when given the same tree twice', () => { + const size = maxNodeSize * 2 + 1; + const tree = buildTree(range(0, size), 1, 0); + let unionCalls = 0; + const original = tree.toArray(); + const result = union(tree, tree, (_key: number, _leftValue: number, _rightValue: number) => { + unionCalls++; + return undefined; + }); + expect(unionCalls).toBe(0); + expect(result).not.toBe(tree); + expect(result.toArray()).toEqual(original); + expect(tree.toArray()).toEqual(original); + }); + + it('Union with disjoint ranges', () => { + const entries1: [number, number][] = []; + for (let i = 1; i <= 100; i++) entries1.push([i, i]); + for (let i = 201; i <= 300; i++) entries1.push([i, i]); + + const entries2: [number, number][] = []; + for (let i = 101; i <= 200; i++) entries2.push([i, i]); + + const tree1 = new BTreeEx(entries1, compareNumbers, maxNodeSize); + const tree2 = new BTreeEx(entries2, compareNumbers, maxNodeSize); + + const { result } = expectUnionMatchesBaseline( + tree1, + tree2, + failUnion('Should not be called - no overlaps') + ); + + expect(result.size).toBe(300); + expect(result.get(1)).toBe(1); + expect(result.get(100)).toBe(100); + expect(result.get(101)).toBe(101); + expect(result.get(200)).toBe(200); + expect(result.get(201)).toBe(201); + expect(result.get(300)).toBe(300); + }); + + it('Union with single element trees', () => { + const tree1 = new BTreeEx([[5, 50]], compareNumbers, maxNodeSize); + const tree2 = new BTreeEx([[5, 500]], compareNumbers, maxNodeSize); + + const { result } = expectUnionMatchesBaseline(tree1, tree2, (_k, v1, v2) => Math.max(v1, v2)); + expect(result.toArray()).toEqual([[5, 500]]); + }); + + it('Union excluding all overlapping keys', () => { + const tree1 = new BTreeEx([[1, 10], [2, 20], [3, 30]], compareNumbers, maxNodeSize); + const tree2 = new BTreeEx([[2, 200], [3, 300], [4, 400]], compareNumbers, maxNodeSize); + + const { result } = expectUnionMatchesBaseline(tree1, tree2, () => undefined); + expect(result.toArray()).toEqual([[1, 10], [4, 400]]); + }); + + it('Union with large disjoint ranges', () => { + const tree1 = new BTreeEx([], compareNumbers, maxNodeSize); + const tree2 = new BTreeEx([], compareNumbers, maxNodeSize); + + for (let i = 0; i <= 10000; i++) + tree1.set(i, i); + for (let i = 10001; i <= 20000; i++) + tree2.set(i, i); + + const { result } = expectUnionMatchesBaseline( + tree1, + tree2, + failUnion('Union callback should not run for disjoint ranges') + ); + expect(result.size).toBe(tree1.size + tree2.size); + expect(result.get(0)).toBe(0); + expect(result.get(20000)).toBe(20000); + }); + + it('Union trees with random overlap', () => { + const size = 10000; + const keys1 = makeArray(size, true); + const keys2 = makeArray(size, true); + + const tree1 = new BTreeEx(); + const tree2 = new BTreeEx(); + + for (let k of keys1) + tree1.set(k, k); + for (let k of keys2) + tree2.set(k, k * 10); + + expectUnionMatchesBaseline(tree1, tree2, preferLeft); + }); + + it('Union trees with ~10% overlap', () => { + const size = 200; + const offset = Math.floor(size * 0.9); + const overlap = size - offset; + + const tree1 = new BTreeEx([], compareNumbers, maxNodeSize); + const tree2 = new BTreeEx([], compareNumbers, maxNodeSize); + + for (let i = 0; i < size; i++) + tree1.set(i, i); + + for (let i = 0; i < size; i++) { + const key = offset + i; + tree2.set(key, key * 10); + } + + const { result } = expectUnionMatchesBaseline(tree1, tree2, preferLeft); + + expect(result.size).toBe(size + size - overlap); + for (let i = 0; i < offset; i++) + expect(result.get(i)).toBe(i); + for (let i = offset; i < size; i++) + expect(result.get(i)).toBe(i); + const upperBound = offset + size; + for (let i = size; i < upperBound; i++) + expect(result.get(i)).toBe(i * 10); + }); +}); + +describe('BTree union input/output validation', () => { + test('Union throws error when comparators differ', () => { + const tree1 = new BTreeEx([[1, 10]], (a, b) => b + a); + const tree2 = new BTreeEx([[2, 20]], (a, b) => b - a); + + expect(() => tree1.union(tree2, (_k, v1, v2) => v1 + v2)).toThrow(comparatorErrorMsg); + }); + + test('Union throws error when max node sizes differ', () => { + const tree1 = new BTreeEx([[1, 10]], compareNumbers, 32); + const tree2 = new BTreeEx([[2, 20]], compareNumbers, 33); + + expect(() => tree1.union(tree2, (_k, v1, v2) => v1 + v2)).toThrow(branchingFactorErrorMsg); + }); + + test('Union returns a tree of the same class', () => { + expect(union(new BTreeEx(), new BTreeEx(), (_k, v1, v2) => v1)).toBeInstanceOf(BTreeEx); + expect(union(new BTree(), new BTree(), (_k, v1, v2) => v1)).toBeInstanceOf(BTree); + expect(union(new BTree(), new BTree(), (_k, v1, v2) => v1) instanceof BTreeEx).toBeFalsy(); + }); +}); + +describe('BTree union fuzz tests', () => { + const unionFn = (_k: number, left: number, _right: number) => left; + const FUZZ_SETTINGS: SetOperationFuzzSettings = { + branchingFactors: [4, 5, 32], + ooms: [0, 1, 2], // [0, 1, 2, 3], + fractionsPerOOM: [0.1, 0.25, 0.5], // [0.0001, 0.01, 0.1, 0.25, 0.5], + removalChances: [0, 0.01, 0.1] + }; + const RANDOM_EDITS_PER_TEST = 20; + const TIMEOUT_MS = 30_000; + + jest.setTimeout(TIMEOUT_MS); + + const rng = new MersenneTwister(0xBEEFCAFE); + + forEachFuzzCase(FUZZ_SETTINGS, ({ maxNodeSize, size, fractionA, fractionB, removalChance, removalLabel }) => { + test(`branch ${maxNodeSize}, size ${size}, fractionA ${fractionA.toFixed(2)}, fractionB ${fractionB.toFixed(2)}, removal ${removalLabel}`, () => { + const treeA = new BTreeEx([], compareNumbers, maxNodeSize); + const treeB = new BTreeEx([], compareNumbers, maxNodeSize); + const [treeAEntries, treeBEntries] = populateFuzzTrees( + [ + { tree: treeA, fraction: fractionA, removalChance }, + { tree: treeB, fraction: fractionB, removalChance } + ], + { rng, size, compare: compareNumbers, maxNodeSize, minAssignmentsPerKey: 1 } + ); + + const unioned = treeA.union(treeB, unionFn); + unioned.checkValid(); + + const combinedKeys = new Set(); + treeAEntries.forEach(([key]) => combinedKeys.add(key)); + treeBEntries.forEach(([key]) => combinedKeys.add(key)); + const expected = Array.from(combinedKeys).sort(compareNumbers).map(key => [key, key]); + expect(unioned.toArray()).toEqual(expected); + + // Union should not have mutated inputs + expectTreeMatchesEntries(treeA, treeAEntries); + expectTreeMatchesEntries(treeB, treeBEntries); + + for (let edit = 0; edit < RANDOM_EDITS_PER_TEST; edit++) { + const key = 1 + randomInt(rng, size); + const action = rng.random(); + if (action < 0.33) { + unioned.set(key, key); + } else if (action < 0.66) { + unioned.set(key, -key); + } else { + unioned.delete(key); + } + } + + // Check for shared mutability issues + expectTreeMatchesEntries(treeA, treeAEntries); + expectTreeMatchesEntries(treeB, treeBEntries); + }); + }); +}); diff --git a/tsconfig.json b/tsconfig.json index 09f7275..e3758ba 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -17,5 +17,5 @@ "stripInternal": true }, "include": ["**/*.ts"], - "exclude": ["node_modules", "tests", "b+tree.test.ts"], + "exclude": ["node_modules", "tests", "test"], }