Skip to content

Commit 432f482

Browse files
committed
fix: bin result should fill bins
1 parent ef2290e commit 432f482

File tree

2 files changed

+252
-44
lines changed

2 files changed

+252
-44
lines changed

packages/vdataset/__tests__/bin.test.ts

Lines changed: 236 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,182 @@ describe('bin transform', () => {
1717
}
1818
});
1919

20+
test('bins by count should fill bins', () => {
21+
const data = [
22+
{
23+
color: 'red',
24+
shape: 'circle',
25+
v: 1
26+
},
27+
{
28+
color: 'red',
29+
shape: 'circle',
30+
v: 1
31+
},
32+
{
33+
color: 'red',
34+
shape: 'circle',
35+
v: 1
36+
},
37+
{
38+
color: 'red',
39+
shape: 'circle',
40+
v: 2
41+
},
42+
{
43+
color: 'red',
44+
shape: 'circle',
45+
v: 5
46+
},
47+
{
48+
color: 'red',
49+
shape: 'circle',
50+
v: 7
51+
},
52+
{
53+
color: 'red',
54+
shape: 'circle',
55+
v: 8
56+
},
57+
{
58+
color: 'red',
59+
shape: 'circle',
60+
v: 9
61+
},
62+
{
63+
color: 'red',
64+
shape: 'circle',
65+
v: 10
66+
},
67+
{
68+
color: 'blue',
69+
shape: 'circle',
70+
v: 1
71+
},
72+
{
73+
color: 'blue',
74+
shape: 'circle',
75+
v: 1
76+
},
77+
{
78+
color: 'blue',
79+
shape: 'circle',
80+
v: 1
81+
},
82+
{
83+
color: 'blue',
84+
shape: 'circle',
85+
v: 2
86+
},
87+
{
88+
color: 'blue',
89+
shape: 'circle',
90+
v: 5
91+
},
92+
{
93+
color: 'blue',
94+
shape: 'circle',
95+
v: 7
96+
},
97+
{
98+
color: 'blue',
99+
shape: 'circle',
100+
v: 8
101+
},
102+
{
103+
color: 'blue',
104+
shape: 'circle',
105+
v: 9
106+
},
107+
{
108+
color: 'red',
109+
shape: 'triangle',
110+
v: 1
111+
},
112+
{
113+
color: 'red',
114+
shape: 'triangle',
115+
v: 1
116+
},
117+
{
118+
color: 'red',
119+
shape: 'triangle',
120+
v: 1
121+
},
122+
{
123+
color: 'red',
124+
shape: 'triangle',
125+
v: 2
126+
},
127+
{
128+
color: 'red',
129+
shape: 'triangle',
130+
v: 5
131+
},
132+
{
133+
color: 'red',
134+
shape: 'triangle',
135+
v: 7
136+
},
137+
{
138+
color: 'red',
139+
shape: 'triangle',
140+
v: 8
141+
},
142+
{
143+
color: 'red',
144+
shape: 'triangle',
145+
v: 9
146+
},
147+
{
148+
color: 'blue',
149+
shape: 'triangle',
150+
v: 1
151+
},
152+
{
153+
color: 'blue',
154+
shape: 'triangle',
155+
v: 1
156+
},
157+
{
158+
color: 'blue',
159+
shape: 'triangle',
160+
v: 1
161+
},
162+
{
163+
color: 'blue',
164+
shape: 'triangle',
165+
v: 2
166+
},
167+
{
168+
color: 'blue',
169+
shape: 'triangle',
170+
v: 5
171+
},
172+
{
173+
color: 'blue',
174+
shape: 'triangle',
175+
v: 7
176+
},
177+
{
178+
color: 'blue',
179+
shape: 'triangle',
180+
v: 8
181+
},
182+
{
183+
color: 'blue',
184+
shape: 'triangle',
185+
v: 9
186+
}
187+
];
188+
189+
const bins = bin(data, {
190+
field: 'v',
191+
bins: 5,
192+
facetField: ['color', 'shape']
193+
});
194+
expect(bins.length).toBe(2 * 2 * 5); // color * shape * bins
195+
});
20196
test('bins in max value and threshold', () => {
21197
const data = [1, 1, 1, 2, 5, 7, 8, 9, 10].map(v => ({ v }));
22198
const bins = bin(data, { field: 'v', bins: 10 });
@@ -142,13 +318,17 @@ describe('bin transform', () => {
142318
];
143319
// thresholds split at 5 -> two bins
144320
const out: any = bin(data, { field: 'v', thresholds: [0, 5, 10], countField: 'w', groupField: 'g' });
145-
expect(out.length).toBe(3);
146-
expect(out[0]).toMatchObject({ g: 'A', count: 5, x0: 0, x1: 5 });
147-
expect(out[1]).toMatchObject({ g: 'B', count: 1, x0: 0, x1: 5 });
148-
expect(out[2]).toMatchObject({ g: 'A', count: 4, x0: 5, x1: 10 });
149-
expect(out[0].percentage).toBeCloseTo(0.5, 12);
150-
expect(out[1].percentage).toBeCloseTo(0.1, 12);
151-
expect(out[2].percentage).toBeCloseTo(0.4, 12);
321+
expect(out.length).toBe(4);
322+
const byKey = (g: string, x0: number) =>
323+
out.find((item: any) => item.g === g && item.x0 === x0 && item.x1 === x0 + 5);
324+
expect(byKey('A', 0)).toMatchObject({ count: 5 });
325+
expect(byKey('B', 0)).toMatchObject({ count: 1 });
326+
expect(byKey('A', 5)).toMatchObject({ count: 4 });
327+
expect(byKey('B', 5)).toMatchObject({ count: 0 });
328+
expect(byKey('A', 0)?.percentage).toBeCloseTo(0.5, 12);
329+
expect(byKey('B', 0)?.percentage).toBeCloseTo(0.1, 12);
330+
expect(byKey('A', 5)?.percentage).toBeCloseTo(0.4, 12);
331+
expect(byKey('B', 5)?.percentage).toBeCloseTo(0, 12);
152332
});
153333

154334
test('groupField (multi) aggregates by composite key and preserves includeValues', () => {
@@ -203,16 +383,38 @@ describe('bin transform', () => {
203383
{ v: 6, type: 'B' }
204384
];
205385
const out: any = bin(data, { field: 'v', bins: 3, facetField: 'type' });
206-
expect(out.length).toBe(4);
386+
expect(out.length).toBe(6);
207387
expect(out[0].x0).toBeCloseTo(1, 12);
388+
expect(out[0].x1).toBeCloseTo(3, 12);
208389
expect(out[0].type).toBe('A');
209-
expect(out[3].x0).toBeCloseTo(5, 12);
210-
expect(out[3].x1).toBeCloseTo(7, 12);
211-
expect(out[3].type).toBe('B');
212-
expect(out[3].percentage).toBeCloseTo(2 / 3, 12);
213390
expect(out[0].percentage).toBeCloseTo(2 / 3, 12);
391+
392+
expect(out[1].x0).toBeCloseTo(3, 12);
393+
expect(out[1].x1).toBeCloseTo(5, 12);
394+
expect(out[1].type).toBe('A');
395+
expect(out[1].percentage).toBeCloseTo(1 / 3, 12);
396+
397+
expect(out[2].x0).toBeCloseTo(5, 12);
398+
expect(out[2].x1).toBeCloseTo(7, 12);
399+
expect(out[2].type).toBe('A');
400+
expect(out[2].percentage).toBeCloseTo(0, 12);
401+
402+
expect(out[3].x0).toBeCloseTo(1, 12);
403+
expect(out[3].x1).toBeCloseTo(3, 12);
404+
expect(out[3].type).toBe('B');
405+
expect(out[3].percentage).toBeCloseTo(0, 12);
406+
407+
expect(out[4].x0).toBeCloseTo(3, 12);
408+
expect(out[4].x1).toBeCloseTo(5, 12);
409+
expect(out[4].type).toBe('B');
410+
expect(out[4].percentage).toBeCloseTo(1 / 3, 12);
411+
412+
expect(out[5].x0).toBeCloseTo(5, 12);
413+
expect(out[5].x1).toBeCloseTo(7, 12);
414+
expect(out[5].type).toBe('B');
415+
expect(out[5].percentage).toBeCloseTo(2 / 3, 12);
214416
});
215-
test('subView without groupField', () => {
417+
test('subView with groupField keeps full bins per combination', () => {
216418
const data = [
217419
{ v: 1, type: 'A', group: 'china' },
218420
{ v: 2, type: 'A', group: 'china' },
@@ -228,20 +430,26 @@ describe('bin transform', () => {
228430
{ v: 6, type: 'B', group: 'usa' }
229431
];
230432
const out: any = bin(data, { field: 'v', bins: 3, facetField: 'type', groupField: 'group' });
231-
expect(out.length).toBe(8);
232-
expect(out[0].x0).toBeCloseTo(1, 12);
233-
expect(out[0].x1).toBeCloseTo(3, 12);
234-
expect(out[0].percentage).toBeCloseTo(1 / 3, 12);
235-
expect(out[0].type).toBe('A');
236-
expect(out[0].group).toBe('china');
237-
expect(out[1].type).toBe('A');
238-
expect(out[1].group).toBe('usa');
239-
expect(out[0].percentage).toBeCloseTo(1 / 3, 12);
240-
expect(out[7].x0).toBeCloseTo(5, 12);
241-
expect(out[7].x1).toBeCloseTo(7, 12);
242-
expect(out[7].type).toBe('B');
243-
expect(out[7].group).toBe('usa');
244-
expect(out[7].percentage).toBeCloseTo(1 / 3, 12);
245-
expect(out[6].percentage).toBeCloseTo(1 / 3, 12);
433+
// 2 types * 2 groups * 3 bins
434+
expect(out.length).toBe(12);
435+
const grouped: Record<string, any[]> = {};
436+
for (const item of out) {
437+
const key = `${item.type}-${item.group}`;
438+
grouped[key] = grouped[key] || [];
439+
grouped[key].push(item);
440+
}
441+
const expectedBins = [
442+
{ x0: 1, x1: 3 },
443+
{ x0: 3, x1: 5 },
444+
{ x0: 5, x1: 7 }
445+
];
446+
for (const binsForCombo of Object.values(grouped)) {
447+
expect(binsForCombo.length).toBe(3);
448+
binsForCombo.sort((a, b) => a.x0 - b.x0);
449+
binsForCombo.forEach((binItem, idx) => {
450+
expect(binItem.x0).toBeCloseTo(expectedBins[idx].x0, 12);
451+
expect(binItem.x1).toBeCloseTo(expectedBins[idx].x1, 12);
452+
});
453+
}
246454
});
247455
});

packages/vdataset/src/transform/bin.ts

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -67,14 +67,15 @@ const subBin: Transform = (data: Array<object>, options: ISubBinOptions) => {
6767
out.push(rec);
6868
}
6969
}
70-
7170
const groupField = options.groupField;
72-
const usingGroup = !!groupField;
71+
const usingGroup = Array.isArray(groupField) ? groupField.length > 0 : !!groupField;
7372

7473
// when grouping, keep per-bin maps from groupKey -> aggregated weight, values and representative group object
7574
const binGroupCounts: Array<Map<string, number>> = usingGroup ? new Array(numBins).fill(0).map(() => new Map()) : [];
7675
const binGroupValues: Array<Map<string, any[]>> = usingGroup ? new Array(numBins).fill(0).map(() => new Map()) : [];
77-
const binGroupRepr: Array<Map<string, any>> = usingGroup ? new Array(numBins).fill(0).map(() => new Map()) : [];
76+
const groupKeyOrder: string[] = [];
77+
const groupKeySet = new Set<string>();
78+
const groupRepr = new Map<string, any>();
7879

7980
for (let i = 0; i < n; i++) {
8081
const v: any = (data[i] as any)[field];
@@ -104,13 +105,13 @@ const subBin: Transform = (data: Array<object>, options: ISubBinOptions) => {
104105
const m = binGroupCounts[j];
105106
const prev = m.get(gk) ?? 0;
106107
m.set(gk, prev + datumCount);
107-
// store representative group value/object
108-
const repMap = binGroupRepr[j];
109-
if (!repMap.has(gk)) {
108+
if (!groupKeySet.has(gk)) {
109+
groupKeySet.add(gk);
110+
groupKeyOrder.push(gk);
110111
if (isArray(groupField)) {
111-
repMap.set(gk, Object.fromEntries((groupField as string[]).map(f => [f, (data[i] as any)[f]])));
112+
groupRepr.set(gk, Object.fromEntries((groupField as string[]).map(f => [f, (data[i] as any)[f]])));
112113
} else {
113-
repMap.set(gk, (data[i] as any)[groupField as string]);
114+
groupRepr.set(gk, (data[i] as any)[groupField as string]);
114115
}
115116
}
116117
// collect values per group if needed
@@ -140,28 +141,27 @@ const subBin: Transform = (data: Array<object>, options: ISubBinOptions) => {
140141
const finalOut: any[] = [];
141142
if (usingGroup) {
142143
for (let j = 0; j < numBins; j++) {
143-
const m = binGroupCounts[j];
144-
for (const [gk, sum] of m) {
145-
totalCount += sum;
144+
for (const gk of groupKeyOrder) {
145+
const sum = binGroupCounts[j].get(gk) ?? 0;
146146
const rec: any = { [x0Name]: thresholds[j], [x1Name]: thresholds[j + 1], [countName]: sum };
147-
// attach group fields
148-
const repr = binGroupRepr[j].get(gk);
147+
const repr = groupRepr.get(gk) ?? {};
149148
if (isArray(groupField)) {
150149
for (const f of groupField as string[]) {
151150
rec[f] = repr[f];
152151
}
153-
} else {
152+
} else if (groupField) {
154153
rec[groupField as string] = repr;
155154
}
156155
if (options && options.includeValues) {
157156
rec[valuesName] = binGroupValues[j].get(gk) || [];
158157
}
159158
finalOut.push(rec);
159+
totalCount += sum;
160160
}
161161
}
162-
// compute percentages
162+
const denominator = totalCount;
163163
for (const r of finalOut) {
164-
r[percentageName] = totalCount > 0 ? r[countName] / totalCount : 0;
164+
r[percentageName] = denominator > 0 ? r[countName] / denominator : 0;
165165
}
166166
} else {
167167
for (let i = 0, len = out.length; i < len; i++) {

0 commit comments

Comments
 (0)