TanStack · KyleAMathews · Nov 10, 2025 · Nov 7, 2025 · Nov 7, 2025 · Nov 7, 2025
diff --git a/.changeset/fix-uint8array-comparison.md b/.changeset/fix-uint8array-comparison.md
@@ -0,0 +1,6 @@
+---
+"@tanstack/db": patch
+"@tanstack/db-ivm": patch
+---
+
+Fix Uint8Array/Buffer comparison to work by content instead of reference. This enables proper equality checks for binary IDs like ULIDs in WHERE clauses using the `eq` function.
diff --git a/packages/db-ivm/src/hashing/hash.ts b/packages/db-ivm/src/hashing/hash.ts
@@ -17,6 +17,12 @@ const OBJECT_MARKER = randomHash()
 const ARRAY_MARKER = randomHash()
 const MAP_MARKER = randomHash()
 const SET_MARKER = randomHash()
+const UINT8ARRAY_MARKER = randomHash()
+
+// Maximum byte length for Uint8Arrays to hash by content instead of reference
+// Arrays smaller than this will be hashed by content, allowing proper equality comparisons
+// for small arrays like ULIDs (16 bytes) while still avoiding performance costs for large arrays
+const UINT8ARRAY_CONTENT_HASH_THRESHOLD = 128
 
 const hashCache = new WeakMap<object, number>()
 
@@ -35,6 +41,24 @@ function hashObject(input: object): number {
   let valueHash: number | undefined
   if (input instanceof Date) {
     valueHash = hashDate(input)
+  } else if (
+    // Check if input is a Uint8Array or Buffer
+    (typeof Buffer !== `undefined` && input instanceof Buffer) ||
+    input instanceof Uint8Array
+  ) {
+    // For small Uint8Arrays/Buffers (e.g., ULIDs, UUIDs), hash by content
+    // to enable proper equality comparisons. For large arrays, hash by reference
+    // to avoid performance costs.
+    if (input.byteLength <= UINT8ARRAY_CONTENT_HASH_THRESHOLD) {
+      valueHash = hashUint8Array(input)
+    } else {
+      // Deeply hashing large arrays would be too costly
+      // so we track them by reference and cache them in a weak map
+      return cachedReferenceHash(input)
+    }
+  } else if (input instanceof File) {
+    // Files are always hashed by reference due to their potentially large size
+    return cachedReferenceHash(input)
   } else {
     let plainObjectInput = input
     let marker = OBJECT_MARKER
@@ -53,17 +77,6 @@ function hashObject(input: object): number {
       plainObjectInput = [...input.entries()]
     }
 
-    if (
-      (typeof Buffer !== `undefined` && input instanceof Buffer) ||
-      input instanceof Uint8Array ||
-      input instanceof File
-    ) {
-      // Deeply hashing these objects would be too costly
-      // but we also don't want to ignore them
-      // so we track them by reference and cache them in a weak map
-      return cachedReferenceHash(input)
-    }
-
     valueHash = hashPlainObject(plainObjectInput, marker)
   }
 
@@ -78,6 +91,18 @@ function hashDate(input: Date): number {
   return hasher.digest()
 }
 
+function hashUint8Array(input: Uint8Array): number {
+  const hasher = new MurmurHashStream()
+  hasher.update(UINT8ARRAY_MARKER)
+  // Hash the byte length first to differentiate arrays of different sizes
+  hasher.update(input.byteLength)
+  // Hash each byte in the array
+  for (let i = 0; i < input.byteLength; i++) {
+    hasher.writeByte(input[i]!)
+  }
+  return hasher.digest()
+}
+
 function hashPlainObject(input: object, marker: number): number {
   const hasher = new MurmurHashStream()
 

diff --git a/packages/db-ivm/src/hashing/murmur.ts b/packages/db-ivm/src/hashing/murmur.ts
@@ -51,7 +51,7 @@ export class MurmurHashStream implements Hasher {
     this.hash = Math.imul(this.hash, 5) + 0xe6546b64
   }
 
-  private _writeByte(byte: number): void {
+  writeByte(byte: number): void {
     this.carry |= (byte & 0xff) << (8 * this.carryBytes)
     this.carryBytes++
     this.length++
@@ -74,29 +74,29 @@ export class MurmurHashStream implements Hasher {
 
         for (let i = 0; i < description.length; i++) {
           const code = description.charCodeAt(i)
-          this._writeByte(code & 0xff)
-          this._writeByte((code >>> 8) & 0xff)
+          this.writeByte(code & 0xff)
+          this.writeByte((code >>> 8) & 0xff)
         }
         return
       }
       case `string`:
         this.update(STRING_MARKER)
         for (let i = 0; i < chunk.length; i++) {
           const code = chunk.charCodeAt(i)
-          this._writeByte(code & 0xff)
-          this._writeByte((code >>> 8) & 0xff)
+          this.writeByte(code & 0xff)
+          this.writeByte((code >>> 8) & 0xff)
         }
         return
       case `number`:
         dv.setFloat64(0, chunk, true) // fixed little-endian
-        this._writeByte(u8[0]!)
-        this._writeByte(u8[1]!)
-        this._writeByte(u8[2]!)
-        this._writeByte(u8[3]!)
-        this._writeByte(u8[4]!)
-        this._writeByte(u8[5]!)
-        this._writeByte(u8[6]!)
-        this._writeByte(u8[7]!)
+        this.writeByte(u8[0]!)
+        this.writeByte(u8[1]!)
+        this.writeByte(u8[2]!)
+        this.writeByte(u8[3]!)
+        this.writeByte(u8[4]!)
+        this.writeByte(u8[5]!)
+        this.writeByte(u8[6]!)
+        this.writeByte(u8[7]!)
         return
       case `bigint`: {
         let value = chunk
@@ -107,10 +107,10 @@ export class MurmurHashStream implements Hasher {
           this.update(BIG_INT_MARKER)
         }
         while (value > 0n) {
-          this._writeByte(Number(value & 0xffn))
+          this.writeByte(Number(value & 0xffn))
           value >>= 8n
         }
-        if (chunk === 0n) this._writeByte(0)
+        if (chunk === 0n) this.writeByte(0)
         return
       }
       default:

diff --git a/packages/db-ivm/tests/utils.test.ts b/packages/db-ivm/tests/utils.test.ts
@@ -299,7 +299,8 @@ describe(`hash`, () => {
       expect(hash4).not.toBe(hash6) // Different Symbol content should have different hash
     })
 
-    it(`should hash Buffers, Uint8Arrays and File objects by reference`, () => {
+    it(`should hash small Buffers and Uint8Arrays by content`, () => {
+      // Small buffers (≤128 bytes) are hashed by content for proper equality comparisons
       const buffer1 = Buffer.from([1, 2, 3])
       const buffer2 = Buffer.from([1, 2, 3])
       const buffer3 = Buffer.from([1, 2, 3, 4])
@@ -309,7 +310,7 @@ describe(`hash`, () => {
       const hash3 = hash(buffer3)
 
       expect(typeof hash1).toBe(hashType)
-      expect(hash1).not.toBe(hash2) // Same content but different buffer instances have a different hash because it would be too costly to deeply hash buffers
+      expect(hash1).toBe(hash2) // Same content = same hash for small buffers
       expect(hash1).not.toBe(hash3) // Different Buffer content should have different hash
       expect(hash1).toBe(hash(buffer1)) // Hashing same buffer should return same hash
 
@@ -322,10 +323,46 @@ describe(`hash`, () => {
       const hash6 = hash(uint8Array3)
 
       expect(typeof hash4).toBe(hashType)
-      expect(hash4).not.toBe(hash5) // Same content but different uint8Array instances have a different hash because it would be too costly to deeply hash uint8Arrays
+      expect(hash4).toBe(hash5) // Same content = same hash for small Uint8Arrays
       expect(hash4).not.toBe(hash6) // Different uint8Array content should have different hash
       expect(hash4).toBe(hash(uint8Array1)) // Hashing same uint8Array should return same hash
+    })
+
+    it(`should hash large Buffers, Uint8Arrays and File objects by reference`, () => {
+      // Large buffers (>128 bytes) are hashed by reference to avoid performance costs
+      const largeBuffer1 = Buffer.alloc(300)
+      const largeBuffer2 = Buffer.alloc(300)
+
+      // Fill with same content
+      for (let i = 0; i < 300; i++) {
+        largeBuffer1[i] = i % 256
+        largeBuffer2[i] = i % 256
+      }
+
+      const hash1 = hash(largeBuffer1)
+      const hash2 = hash(largeBuffer2)
+
+      expect(typeof hash1).toBe(hashType)
+      expect(hash1).not.toBe(hash2) // Same content but different instances = different hash for large buffers
+      expect(hash1).toBe(hash(largeBuffer1)) // Hashing same buffer should return same hash
+
+      const largeUint8Array1 = new Uint8Array(300)
+      const largeUint8Array2 = new Uint8Array(300)
+
+      // Fill with same content
+      for (let i = 0; i < 300; i++) {
+        largeUint8Array1[i] = i % 256
+        largeUint8Array2[i] = i % 256
+      }
+
+      const hash3 = hash(largeUint8Array1)
+      const hash4 = hash(largeUint8Array2)
+
+      expect(typeof hash3).toBe(hashType)
+      expect(hash3).not.toBe(hash4) // Same content but different instances = different hash for large Uint8Arrays
+      expect(hash3).toBe(hash(largeUint8Array1)) // Hashing same uint8Array should return same hash
 
+      // Files are always hashed by reference regardless of size
       const file1 = new File([`Hello, world!`], `test.txt`)
       const file2 = new File([`Hello, world!`], `test.txt`)
       const file3 = new File([`Hello, world!`], `test.txt`)

diff --git a/packages/db/src/query/compiler/evaluators.ts b/packages/db/src/query/compiler/evaluators.ts
@@ -3,7 +3,7 @@ import {
   UnknownExpressionTypeError,
   UnknownFunctionError,
 } from "../../errors.js"
-import { normalizeValue } from "../../utils/comparison.js"
+import { areValuesEqual, normalizeValue } from "../../utils/comparison.js"
 import type { BasicExpression, Func, PropRef } from "../ir.js"
 import type { NamespacedRow } from "../../types.js"
 
@@ -172,7 +172,8 @@ function compileFunction(func: Func, isSingleRow: boolean): (data: any) => any {
         if (isUnknown(a) || isUnknown(b)) {
           return null
         }
-        return a === b
+        // Use areValuesEqual for proper Uint8Array/Buffer comparison
+        return areValuesEqual(a, b)
       }
     }
     case `gt`: {

diff --git a/packages/db/src/utils/comparison.ts b/packages/db/src/utils/comparison.ts
@@ -112,11 +112,80 @@ export const defaultComparator = makeComparator({
 })
 
 /**
- * Normalize a value for comparison
+ * Compare two Uint8Arrays for content equality
+ */
+function areUint8ArraysEqual(a: Uint8Array, b: Uint8Array): boolean {
+  if (a.byteLength !== b.byteLength) {
+    return false
+  }
+  for (let i = 0; i < a.byteLength; i++) {
+    if (a[i] !== b[i]) {
+      return false
+    }
+  }
+  return true
+}
+
+/**
+ * Threshold for normalizing Uint8Arrays to string representations.
+ * Arrays larger than this will use reference equality to avoid memory overhead.
+ * 128 bytes is enough for common ID formats (ULIDs are 16 bytes, UUIDs are 16 bytes)
+ * while avoiding excessive string allocation for large binary data.
+ */
+const UINT8ARRAY_NORMALIZE_THRESHOLD = 128
+
+/**
+ * Normalize a value for comparison and Map key usage
+ * Converts values that can't be directly compared or used as Map keys
+ * into comparable primitive representations
  */
 export function normalizeValue(value: any): any {
   if (value instanceof Date) {
     return value.getTime()
   }
+
+  // Normalize Uint8Arrays/Buffers to a string representation for Map key usage
+  // This enables content-based equality for binary data like ULIDs
+  const isUint8Array =
+    (typeof Buffer !== `undefined` && value instanceof Buffer) ||
+    value instanceof Uint8Array
+
+  if (isUint8Array) {
+    // Only normalize small arrays to avoid memory overhead for large binary data
+    if (value.byteLength <= UINT8ARRAY_NORMALIZE_THRESHOLD) {
+      // Convert to a string representation that can be used as a Map key
+      // Use a special prefix to avoid collisions with user strings
+      return `__u8__${Array.from(value).join(`,`)}`
+    }
+    // For large arrays, fall back to reference equality
+    // Users working with large binary data should use a derived key if needed
+  }
+
   return value
 }
+
+/**
+ * Compare two values for equality, with special handling for Uint8Arrays and Buffers
+ */
+export function areValuesEqual(a: any, b: any): boolean {
+  // Fast path for reference equality
+  if (a === b) {
+    return true
+  }
+
+  // Check for Uint8Array/Buffer comparison
+  const aIsUint8Array =
+    (typeof Buffer !== `undefined` && a instanceof Buffer) ||
+    a instanceof Uint8Array
+  const bIsUint8Array =
+    (typeof Buffer !== `undefined` && b instanceof Buffer) ||
+    b instanceof Uint8Array
+
+  // If both are Uint8Arrays, compare by content
+  if (aIsUint8Array && bIsUint8Array) {
+    return areUint8ArraysEqual(a, b)
+  }
+
+  // Different types or not Uint8Arrays
+  return false
+}