diff --git a/common/changes/@visactor/vdataset/fix-bin-fill_2025-11-24-10-17.json b/common/changes/@visactor/vdataset/fix-bin-fill_2025-11-24-10-17.json new file mode 100644 index 0000000..740df2c --- /dev/null +++ b/common/changes/@visactor/vdataset/fix-bin-fill_2025-11-24-10-17.json @@ -0,0 +1,11 @@ +{ + "changes": [ + { + "comment": "fix: bin result should fill bins\n\n", + "type": "none", + "packageName": "@visactor/vdataset" + } + ], + "packageName": "@visactor/vdataset", + "email": "dingling112@gmail.com" +} \ No newline at end of file diff --git a/packages/vdataset/__tests__/bin.test.ts b/packages/vdataset/__tests__/bin.test.ts index b588f26..e0fcd64 100644 --- a/packages/vdataset/__tests__/bin.test.ts +++ b/packages/vdataset/__tests__/bin.test.ts @@ -17,6 +17,182 @@ describe('bin transform', () => { } }); + test('bins by count should fill bins', () => { + const data = [ + { + color: 'red', + shape: 'circle', + v: 1 + }, + { + color: 'red', + shape: 'circle', + v: 1 + }, + { + color: 'red', + shape: 'circle', + v: 1 + }, + { + color: 'red', + shape: 'circle', + v: 2 + }, + { + color: 'red', + shape: 'circle', + v: 5 + }, + { + color: 'red', + shape: 'circle', + v: 7 + }, + { + color: 'red', + shape: 'circle', + v: 8 + }, + { + color: 'red', + shape: 'circle', + v: 9 + }, + { + color: 'red', + shape: 'circle', + v: 10 + }, + { + color: 'blue', + shape: 'circle', + v: 1 + }, + { + color: 'blue', + shape: 'circle', + v: 1 + }, + { + color: 'blue', + shape: 'circle', + v: 1 + }, + { + color: 'blue', + shape: 'circle', + v: 2 + }, + { + color: 'blue', + shape: 'circle', + v: 5 + }, + { + color: 'blue', + shape: 'circle', + v: 7 + }, + { + color: 'blue', + shape: 'circle', + v: 8 + }, + { + color: 'blue', + shape: 'circle', + v: 9 + }, + { + color: 'red', + shape: 'triangle', + v: 1 + }, + { + color: 'red', + shape: 'triangle', + v: 1 + }, + { + color: 'red', + shape: 'triangle', + v: 1 + }, + { + color: 'red', + shape: 'triangle', + v: 2 + }, + { + color: 'red', + shape: 'triangle', + v: 5 + }, + { + color: 'red', + shape: 'triangle', + v: 7 + }, + { + color: 'red', + shape: 'triangle', + v: 8 + }, + { + color: 'red', + shape: 'triangle', + v: 9 + }, + { + color: 'blue', + shape: 'triangle', + v: 1 + }, + { + color: 'blue', + shape: 'triangle', + v: 1 + }, + { + color: 'blue', + shape: 'triangle', + v: 1 + }, + { + color: 'blue', + shape: 'triangle', + v: 2 + }, + { + color: 'blue', + shape: 'triangle', + v: 5 + }, + { + color: 'blue', + shape: 'triangle', + v: 7 + }, + { + color: 'blue', + shape: 'triangle', + v: 8 + }, + { + color: 'blue', + shape: 'triangle', + v: 9 + } + ]; + + const bins = bin(data, { + field: 'v', + bins: 5, + facetField: ['color', 'shape'] + }); + expect(bins.length).toBe(2 * 2 * 5); // color * shape * bins + }); test('bins in max value and threshold', () => { const data = [1, 1, 1, 2, 5, 7, 8, 9, 10].map(v => ({ v })); const bins = bin(data, { field: 'v', bins: 10 }); @@ -142,13 +318,17 @@ describe('bin transform', () => { ]; // thresholds split at 5 -> two bins const out: any = bin(data, { field: 'v', thresholds: [0, 5, 10], countField: 'w', groupField: 'g' }); - expect(out.length).toBe(3); - expect(out[0]).toMatchObject({ g: 'A', count: 5, x0: 0, x1: 5 }); - expect(out[1]).toMatchObject({ g: 'B', count: 1, x0: 0, x1: 5 }); - expect(out[2]).toMatchObject({ g: 'A', count: 4, x0: 5, x1: 10 }); - expect(out[0].percentage).toBeCloseTo(0.5, 12); - expect(out[1].percentage).toBeCloseTo(0.1, 12); - expect(out[2].percentage).toBeCloseTo(0.4, 12); + expect(out.length).toBe(4); + const byKey = (g: string, x0: number) => + out.find((item: any) => item.g === g && item.x0 === x0 && item.x1 === x0 + 5); + expect(byKey('A', 0)).toMatchObject({ count: 5 }); + expect(byKey('B', 0)).toMatchObject({ count: 1 }); + expect(byKey('A', 5)).toMatchObject({ count: 4 }); + expect(byKey('B', 5)).toMatchObject({ count: 0 }); + expect(byKey('A', 0)?.percentage).toBeCloseTo(0.5, 12); + expect(byKey('B', 0)?.percentage).toBeCloseTo(0.1, 12); + expect(byKey('A', 5)?.percentage).toBeCloseTo(0.4, 12); + expect(byKey('B', 5)?.percentage).toBeCloseTo(0, 12); }); test('groupField (multi) aggregates by composite key and preserves includeValues', () => { @@ -203,16 +383,38 @@ describe('bin transform', () => { { v: 6, type: 'B' } ]; const out: any = bin(data, { field: 'v', bins: 3, facetField: 'type' }); - expect(out.length).toBe(4); + expect(out.length).toBe(6); expect(out[0].x0).toBeCloseTo(1, 12); + expect(out[0].x1).toBeCloseTo(3, 12); expect(out[0].type).toBe('A'); - expect(out[3].x0).toBeCloseTo(5, 12); - expect(out[3].x1).toBeCloseTo(7, 12); - expect(out[3].type).toBe('B'); - expect(out[3].percentage).toBeCloseTo(2 / 3, 12); expect(out[0].percentage).toBeCloseTo(2 / 3, 12); + + expect(out[1].x0).toBeCloseTo(3, 12); + expect(out[1].x1).toBeCloseTo(5, 12); + expect(out[1].type).toBe('A'); + expect(out[1].percentage).toBeCloseTo(1 / 3, 12); + + expect(out[2].x0).toBeCloseTo(5, 12); + expect(out[2].x1).toBeCloseTo(7, 12); + expect(out[2].type).toBe('A'); + expect(out[2].percentage).toBeCloseTo(0, 12); + + expect(out[3].x0).toBeCloseTo(1, 12); + expect(out[3].x1).toBeCloseTo(3, 12); + expect(out[3].type).toBe('B'); + expect(out[3].percentage).toBeCloseTo(0, 12); + + expect(out[4].x0).toBeCloseTo(3, 12); + expect(out[4].x1).toBeCloseTo(5, 12); + expect(out[4].type).toBe('B'); + expect(out[4].percentage).toBeCloseTo(1 / 3, 12); + + expect(out[5].x0).toBeCloseTo(5, 12); + expect(out[5].x1).toBeCloseTo(7, 12); + expect(out[5].type).toBe('B'); + expect(out[5].percentage).toBeCloseTo(2 / 3, 12); }); - test('subView without groupField', () => { + test('subView with groupField keeps full bins per combination', () => { const data = [ { v: 1, type: 'A', group: 'china' }, { v: 2, type: 'A', group: 'china' }, @@ -228,20 +430,26 @@ describe('bin transform', () => { { v: 6, type: 'B', group: 'usa' } ]; const out: any = bin(data, { field: 'v', bins: 3, facetField: 'type', groupField: 'group' }); - expect(out.length).toBe(8); - expect(out[0].x0).toBeCloseTo(1, 12); - expect(out[0].x1).toBeCloseTo(3, 12); - expect(out[0].percentage).toBeCloseTo(1 / 3, 12); - expect(out[0].type).toBe('A'); - expect(out[0].group).toBe('china'); - expect(out[1].type).toBe('A'); - expect(out[1].group).toBe('usa'); - expect(out[0].percentage).toBeCloseTo(1 / 3, 12); - expect(out[7].x0).toBeCloseTo(5, 12); - expect(out[7].x1).toBeCloseTo(7, 12); - expect(out[7].type).toBe('B'); - expect(out[7].group).toBe('usa'); - expect(out[7].percentage).toBeCloseTo(1 / 3, 12); - expect(out[6].percentage).toBeCloseTo(1 / 3, 12); + // 2 types * 2 groups * 3 bins + expect(out.length).toBe(12); + const grouped: Record = {}; + for (const item of out) { + const key = `${item.type}-${item.group}`; + grouped[key] = grouped[key] || []; + grouped[key].push(item); + } + const expectedBins = [ + { x0: 1, x1: 3 }, + { x0: 3, x1: 5 }, + { x0: 5, x1: 7 } + ]; + for (const binsForCombo of Object.values(grouped)) { + expect(binsForCombo.length).toBe(3); + binsForCombo.sort((a, b) => a.x0 - b.x0); + binsForCombo.forEach((binItem, idx) => { + expect(binItem.x0).toBeCloseTo(expectedBins[idx].x0, 12); + expect(binItem.x1).toBeCloseTo(expectedBins[idx].x1, 12); + }); + } }); }); diff --git a/packages/vdataset/src/transform/bin.ts b/packages/vdataset/src/transform/bin.ts index e28a385..46c25f9 100644 --- a/packages/vdataset/src/transform/bin.ts +++ b/packages/vdataset/src/transform/bin.ts @@ -67,14 +67,15 @@ const subBin: Transform = (data: Array, options: ISubBinOptions) => { out.push(rec); } } - const groupField = options.groupField; - const usingGroup = !!groupField; + const usingGroup = Array.isArray(groupField) ? groupField.length > 0 : !!groupField; // when grouping, keep per-bin maps from groupKey -> aggregated weight, values and representative group object const binGroupCounts: Array> = usingGroup ? new Array(numBins).fill(0).map(() => new Map()) : []; const binGroupValues: Array> = usingGroup ? new Array(numBins).fill(0).map(() => new Map()) : []; - const binGroupRepr: Array> = usingGroup ? new Array(numBins).fill(0).map(() => new Map()) : []; + const groupKeyOrder: string[] = []; + const groupKeySet = new Set(); + const groupRepr = new Map(); for (let i = 0; i < n; i++) { const v: any = (data[i] as any)[field]; @@ -104,13 +105,13 @@ const subBin: Transform = (data: Array, options: ISubBinOptions) => { const m = binGroupCounts[j]; const prev = m.get(gk) ?? 0; m.set(gk, prev + datumCount); - // store representative group value/object - const repMap = binGroupRepr[j]; - if (!repMap.has(gk)) { + if (!groupKeySet.has(gk)) { + groupKeySet.add(gk); + groupKeyOrder.push(gk); if (isArray(groupField)) { - repMap.set(gk, Object.fromEntries((groupField as string[]).map(f => [f, (data[i] as any)[f]]))); + groupRepr.set(gk, Object.fromEntries((groupField as string[]).map(f => [f, (data[i] as any)[f]]))); } else { - repMap.set(gk, (data[i] as any)[groupField as string]); + groupRepr.set(gk, (data[i] as any)[groupField as string]); } } // collect values per group if needed @@ -140,28 +141,27 @@ const subBin: Transform = (data: Array, options: ISubBinOptions) => { const finalOut: any[] = []; if (usingGroup) { for (let j = 0; j < numBins; j++) { - const m = binGroupCounts[j]; - for (const [gk, sum] of m) { - totalCount += sum; + for (const gk of groupKeyOrder) { + const sum = binGroupCounts[j].get(gk) ?? 0; const rec: any = { [x0Name]: thresholds[j], [x1Name]: thresholds[j + 1], [countName]: sum }; - // attach group fields - const repr = binGroupRepr[j].get(gk); + const repr = groupRepr.get(gk) ?? {}; if (isArray(groupField)) { for (const f of groupField as string[]) { rec[f] = repr[f]; } - } else { + } else if (groupField) { rec[groupField as string] = repr; } if (options && options.includeValues) { rec[valuesName] = binGroupValues[j].get(gk) || []; } finalOut.push(rec); + totalCount += sum; } } - // compute percentages + const denominator = totalCount; for (const r of finalOut) { - r[percentageName] = totalCount > 0 ? r[countName] / totalCount : 0; + r[percentageName] = denominator > 0 ? r[countName] / denominator : 0; } } else { for (let i = 0, len = out.length; i < len; i++) {