Skip to content

Commit bee92ee

Browse files
committed
fix: compare Uint8Arrays by content for proper binary ID equality
Fixes `eq` function and hash indexing to compare Uint8Arrays/Buffers by content instead of reference, enabling proper ULID comparisons in WHERE clauses. Changes: - Hash small Uint8Arrays (≤128 bytes) by content in db-ivm for better indexing - Compare Uint8Arrays by content in eq operator via areValuesEqual() function - Add comprehensive tests for Uint8Array equality comparison
1 parent 5078c4a commit bee92ee

File tree

6 files changed

+210
-16
lines changed

6 files changed

+210
-16
lines changed
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
---
2+
"@tanstack/db": patch
3+
"@tanstack/db-ivm": patch
4+
---
5+
6+
Fix Uint8Array/Buffer comparison to work by content instead of reference. This enables proper equality checks for binary IDs like ULIDs in WHERE clauses using the `eq` function.

packages/db-ivm/src/hashing/hash.ts

Lines changed: 38 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,12 @@ const OBJECT_MARKER = randomHash()
1717
const ARRAY_MARKER = randomHash()
1818
const MAP_MARKER = randomHash()
1919
const SET_MARKER = randomHash()
20+
const UINT8ARRAY_MARKER = randomHash()
21+
22+
// Maximum byte length for Uint8Arrays to hash by content instead of reference
23+
// Arrays smaller than this will be hashed by content, allowing proper equality comparisons
24+
// for small arrays like ULIDs (16 bytes) while still avoiding performance costs for large arrays
25+
const UINT8ARRAY_CONTENT_HASH_THRESHOLD = 128
2026

2127
const hashCache = new WeakMap<object, number>()
2228

@@ -35,6 +41,24 @@ function hashObject(input: object): number {
3541
let valueHash: number | undefined
3642
if (input instanceof Date) {
3743
valueHash = hashDate(input)
44+
} else if (
45+
// Check if input is a Uint8Array or Buffer
46+
(typeof Buffer !== `undefined` && input instanceof Buffer) ||
47+
input instanceof Uint8Array
48+
) {
49+
// For small Uint8Arrays/Buffers (e.g., ULIDs, UUIDs), hash by content
50+
// to enable proper equality comparisons. For large arrays, hash by reference
51+
// to avoid performance costs.
52+
if (input.byteLength <= UINT8ARRAY_CONTENT_HASH_THRESHOLD) {
53+
valueHash = hashUint8Array(input)
54+
} else {
55+
// Deeply hashing large arrays would be too costly
56+
// so we track them by reference and cache them in a weak map
57+
return cachedReferenceHash(input)
58+
}
59+
} else if (input instanceof File) {
60+
// Files are always hashed by reference due to their potentially large size
61+
return cachedReferenceHash(input)
3862
} else {
3963
let plainObjectInput = input
4064
let marker = OBJECT_MARKER
@@ -53,17 +77,6 @@ function hashObject(input: object): number {
5377
plainObjectInput = [...input.entries()]
5478
}
5579

56-
if (
57-
(typeof Buffer !== `undefined` && input instanceof Buffer) ||
58-
input instanceof Uint8Array ||
59-
input instanceof File
60-
) {
61-
// Deeply hashing these objects would be too costly
62-
// but we also don't want to ignore them
63-
// so we track them by reference and cache them in a weak map
64-
return cachedReferenceHash(input)
65-
}
66-
6780
valueHash = hashPlainObject(plainObjectInput, marker)
6881
}
6982

@@ -78,6 +91,20 @@ function hashDate(input: Date): number {
7891
return hasher.digest()
7992
}
8093

94+
function hashUint8Array(input: Uint8Array): number {
95+
const hasher = new MurmurHashStream()
96+
hasher.update(UINT8ARRAY_MARKER)
97+
// Hash the byte length first to differentiate arrays of different sizes
98+
hasher.update(input.byteLength)
99+
// Hash each byte in the array
100+
for (let i = 0; i < input.byteLength; i++) {
101+
// Use _writeByte to hash the actual byte values
102+
// @ts-expect-error - _writeByte is private but we need to use it here
103+
hasher._writeByte(input[i]!)
104+
}
105+
return hasher.digest()
106+
}
107+
81108
function hashPlainObject(input: object, marker: number): number {
82109
const hasher = new MurmurHashStream()
83110

packages/db-ivm/tests/utils.test.ts

Lines changed: 40 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -299,7 +299,8 @@ describe(`hash`, () => {
299299
expect(hash4).not.toBe(hash6) // Different Symbol content should have different hash
300300
})
301301

302-
it(`should hash Buffers, Uint8Arrays and File objects by reference`, () => {
302+
it(`should hash small Buffers and Uint8Arrays by content`, () => {
303+
// Small buffers (≤128 bytes) are hashed by content for proper equality comparisons
303304
const buffer1 = Buffer.from([1, 2, 3])
304305
const buffer2 = Buffer.from([1, 2, 3])
305306
const buffer3 = Buffer.from([1, 2, 3, 4])
@@ -309,7 +310,7 @@ describe(`hash`, () => {
309310
const hash3 = hash(buffer3)
310311

311312
expect(typeof hash1).toBe(hashType)
312-
expect(hash1).not.toBe(hash2) // Same content but different buffer instances have a different hash because it would be too costly to deeply hash buffers
313+
expect(hash1).toBe(hash2) // Same content = same hash for small buffers
313314
expect(hash1).not.toBe(hash3) // Different Buffer content should have different hash
314315
expect(hash1).toBe(hash(buffer1)) // Hashing same buffer should return same hash
315316

@@ -322,10 +323,46 @@ describe(`hash`, () => {
322323
const hash6 = hash(uint8Array3)
323324

324325
expect(typeof hash4).toBe(hashType)
325-
expect(hash4).not.toBe(hash5) // Same content but different uint8Array instances have a different hash because it would be too costly to deeply hash uint8Arrays
326+
expect(hash4).toBe(hash5) // Same content = same hash for small Uint8Arrays
326327
expect(hash4).not.toBe(hash6) // Different uint8Array content should have different hash
327328
expect(hash4).toBe(hash(uint8Array1)) // Hashing same uint8Array should return same hash
329+
})
330+
331+
it(`should hash large Buffers, Uint8Arrays and File objects by reference`, () => {
332+
// Large buffers (>128 bytes) are hashed by reference to avoid performance costs
333+
const largeBuffer1 = Buffer.alloc(300)
334+
const largeBuffer2 = Buffer.alloc(300)
335+
336+
// Fill with same content
337+
for (let i = 0; i < 300; i++) {
338+
largeBuffer1[i] = i % 256
339+
largeBuffer2[i] = i % 256
340+
}
341+
342+
const hash1 = hash(largeBuffer1)
343+
const hash2 = hash(largeBuffer2)
344+
345+
expect(typeof hash1).toBe(hashType)
346+
expect(hash1).not.toBe(hash2) // Same content but different instances = different hash for large buffers
347+
expect(hash1).toBe(hash(largeBuffer1)) // Hashing same buffer should return same hash
348+
349+
const largeUint8Array1 = new Uint8Array(300)
350+
const largeUint8Array2 = new Uint8Array(300)
351+
352+
// Fill with same content
353+
for (let i = 0; i < 300; i++) {
354+
largeUint8Array1[i] = i % 256
355+
largeUint8Array2[i] = i % 256
356+
}
357+
358+
const hash3 = hash(largeUint8Array1)
359+
const hash4 = hash(largeUint8Array2)
360+
361+
expect(typeof hash3).toBe(hashType)
362+
expect(hash3).not.toBe(hash4) // Same content but different instances = different hash for large Uint8Arrays
363+
expect(hash3).toBe(hash(largeUint8Array1)) // Hashing same uint8Array should return same hash
328364

365+
// Files are always hashed by reference regardless of size
329366
const file1 = new File([`Hello, world!`], `test.txt`)
330367
const file2 = new File([`Hello, world!`], `test.txt`)
331368
const file3 = new File([`Hello, world!`], `test.txt`)

packages/db/src/query/compiler/evaluators.ts

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ import {
33
UnknownExpressionTypeError,
44
UnknownFunctionError,
55
} from "../../errors.js"
6-
import { normalizeValue } from "../../utils/comparison.js"
6+
import { areValuesEqual, normalizeValue } from "../../utils/comparison.js"
77
import type { BasicExpression, Func, PropRef } from "../ir.js"
88
import type { NamespacedRow } from "../../types.js"
99

@@ -172,7 +172,8 @@ function compileFunction(func: Func, isSingleRow: boolean): (data: any) => any {
172172
if (isUnknown(a) || isUnknown(b)) {
173173
return null
174174
}
175-
return a === b
175+
// Use areValuesEqual for proper Uint8Array/Buffer comparison
176+
return areValuesEqual(a, b)
176177
}
177178
}
178179
case `gt`: {

packages/db/src/utils/comparison.ts

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,21 @@ export const defaultComparator = makeComparator({
111111
stringSort: `locale`,
112112
})
113113

114+
/**
115+
* Compare two Uint8Arrays for content equality
116+
*/
117+
function areUint8ArraysEqual(a: Uint8Array, b: Uint8Array): boolean {
118+
if (a.byteLength !== b.byteLength) {
119+
return false
120+
}
121+
for (let i = 0; i < a.byteLength; i++) {
122+
if (a[i] !== b[i]) {
123+
return false
124+
}
125+
}
126+
return true
127+
}
128+
114129
/**
115130
* Normalize a value for comparison
116131
*/
@@ -120,3 +135,29 @@ export function normalizeValue(value: any): any {
120135
}
121136
return value
122137
}
138+
139+
/**
140+
* Compare two values for equality, with special handling for Uint8Arrays and Buffers
141+
*/
142+
export function areValuesEqual(a: any, b: any): boolean {
143+
// Fast path for reference equality
144+
if (a === b) {
145+
return true
146+
}
147+
148+
// Check for Uint8Array/Buffer comparison
149+
const aIsUint8Array =
150+
(typeof Buffer !== `undefined` && a instanceof Buffer) ||
151+
a instanceof Uint8Array
152+
const bIsUint8Array =
153+
(typeof Buffer !== `undefined` && b instanceof Buffer) ||
154+
b instanceof Uint8Array
155+
156+
// If both are Uint8Arrays, compare by content
157+
if (aIsUint8Array && bIsUint8Array) {
158+
return areUint8ArraysEqual(a, b)
159+
}
160+
161+
// Different types or not Uint8Arrays
162+
return false
163+
}

packages/db/tests/query/compiler/evaluators.test.ts

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -423,6 +423,88 @@ describe(`evaluators`, () => {
423423

424424
expect(compiled({})).toBe(false)
425425
})
426+
427+
it(`handles eq with matching Uint8Arrays (content equality)`, () => {
428+
const array1 = new Uint8Array([1, 2, 3, 4, 5])
429+
const array2 = new Uint8Array([1, 2, 3, 4, 5])
430+
const func = new Func(`eq`, [new Value(array1), new Value(array2)])
431+
const compiled = compileExpression(func)
432+
433+
// Should return true because content is the same
434+
expect(compiled({})).toBe(true)
435+
})
436+
437+
it(`handles eq with non-matching Uint8Arrays (different content)`, () => {
438+
const array1 = new Uint8Array([1, 2, 3, 4, 5])
439+
const array2 = new Uint8Array([1, 2, 3, 4, 6])
440+
const func = new Func(`eq`, [new Value(array1), new Value(array2)])
441+
const compiled = compileExpression(func)
442+
443+
// Should return false because content is different
444+
expect(compiled({})).toBe(false)
445+
})
446+
447+
it(`handles eq with Uint8Arrays of different lengths`, () => {
448+
const array1 = new Uint8Array([1, 2, 3, 4])
449+
const array2 = new Uint8Array([1, 2, 3, 4, 5])
450+
const func = new Func(`eq`, [new Value(array1), new Value(array2)])
451+
const compiled = compileExpression(func)
452+
453+
// Should return false because lengths are different
454+
expect(compiled({})).toBe(false)
455+
})
456+
457+
it(`handles eq with same Uint8Array reference`, () => {
458+
const array = new Uint8Array([1, 2, 3, 4, 5])
459+
const func = new Func(`eq`, [new Value(array), new Value(array)])
460+
const compiled = compileExpression(func)
461+
462+
// Should return true (fast path for reference equality)
463+
expect(compiled({})).toBe(true)
464+
})
465+
466+
it(`handles eq with Uint8Array and non-Uint8Array`, () => {
467+
const array = new Uint8Array([1, 2, 3])
468+
const value = [1, 2, 3]
469+
const func = new Func(`eq`, [new Value(array), new Value(value)])
470+
const compiled = compileExpression(func)
471+
472+
// Should return false because types are different
473+
expect(compiled({})).toBe(false)
474+
})
475+
476+
it(`handles eq with ULIDs (16-byte Uint8Arrays)`, () => {
477+
// Simulate ULID comparison - 16 bytes
478+
const ulid1 = new Uint8Array(16)
479+
const ulid2 = new Uint8Array(16)
480+
481+
// Fill with same values
482+
for (let i = 0; i < 16; i++) {
483+
ulid1[i] = i
484+
ulid2[i] = i
485+
}
486+
487+
const func = new Func(`eq`, [new Value(ulid1), new Value(ulid2)])
488+
const compiled = compileExpression(func)
489+
490+
// Should return true because content is identical
491+
expect(compiled({})).toBe(true)
492+
})
493+
494+
it(`handles eq with Buffers (if available)`, () => {
495+
if (typeof Buffer !== `undefined`) {
496+
const buffer1 = Buffer.from([1, 2, 3, 4, 5])
497+
const buffer2 = Buffer.from([1, 2, 3, 4, 5])
498+
const func = new Func(`eq`, [
499+
new Value(buffer1),
500+
new Value(buffer2),
501+
])
502+
const compiled = compileExpression(func)
503+
504+
// Should return true because content is the same
505+
expect(compiled({})).toBe(true)
506+
}
507+
})
426508
})
427509

428510
describe(`gt (greater than)`, () => {

0 commit comments

Comments
 (0)