Skip to content

Commit 65138b3

Browse files
authored
Merge pull request #260 from VisActor/feat/bin-count
feat: support `countField` option of `bin` and export `percentage`
2 parents ec1cd89 + ceee88f commit 65138b3

File tree

3 files changed

+67
-8
lines changed

3 files changed

+67
-8
lines changed
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
{
2+
"changes": [
3+
{
4+
"comment": "feat: support `countField` option of `bin` and export `percentage`\n\n",
5+
"type": "none",
6+
"packageName": "@visactor/vdataset"
7+
}
8+
],
9+
"packageName": "@visactor/vdataset",
10+
"email": "[email protected]"
11+
}

packages/vdataset/__tests__/bin.test.ts

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,4 +80,40 @@ describe('bin transform', () => {
8080
const total = out.reduce((s: number, b: any) => s + b.cnt, 0);
8181
expect(total).toBe(3);
8282
});
83+
84+
test('countField is used as weights and percentage calculated correctly', () => {
85+
const data = [
86+
{ v: 1, w: 2 }, // goes to first bin
87+
{ v: 2, w: 3 }, // goes to first bin
88+
{ v: 8, w: 5 } // goes to second bin
89+
];
90+
// thresholds split at 5 -> two bins [0,5) and [5,10]
91+
const out: any = bin(data, { field: 'v', thresholds: [0, 5, 10], countField: 'w' });
92+
expect(out.length).toBe(2);
93+
// first bin should have count 5 (2+3), second bin 5
94+
expect(out[0].count).toBe(5);
95+
expect(out[1].count).toBe(5);
96+
// percentage should be 0.5 for both
97+
expect(out[0].percentage).toBeCloseTo(0.5, 12);
98+
expect(out[1].percentage).toBeCloseTo(0.5, 12);
99+
});
100+
101+
test('renamed percentage field via outputNames is present and correct', () => {
102+
const data = [
103+
{ v: 1, w: 1 },
104+
{ v: 2, w: 1 },
105+
{ v: 9, w: 2 }
106+
];
107+
const out: any = bin(data, {
108+
field: 'v',
109+
thresholds: [0, 5, 10],
110+
countField: 'w',
111+
outputNames: { percentage: 'pct' }
112+
});
113+
expect(out.length).toBe(2);
114+
// counts: first bin 2, second bin 2 -> percentages 0.5 each
115+
expect(out[0].cnt === undefined).toBeTruthy(); // ensure default countName not renamed here
116+
expect(out[0].pct).toBeCloseTo(0.5, 12);
117+
expect(out[1].pct).toBeCloseTo(0.5, 12);
118+
});
83119
});

packages/vdataset/src/transform/bin.ts

Lines changed: 20 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,10 @@ export interface IBinOptions {
66
* numeric field to bin
77
*/
88
field: string;
9+
/**
10+
* count of numeric field
11+
*/
12+
countField?: string;
913
/**
1014
* number of bins (default 10)
1115
*/
@@ -29,7 +33,7 @@ export interface IBinOptions {
2933
/**
3034
* the field name of output data
3135
*/
32-
outputNames?: { x0?: string; x1?: string; count?: string; values?: string };
36+
outputNames?: { x0?: string; x1?: string; count?: string; values?: string; percentage?: string };
3337
}
3438

3539
/**
@@ -41,13 +45,13 @@ export const bin: Transform = (data: Array<object>, options?: IBinOptions) => {
4145
if (!field) {
4246
return [];
4347
}
44-
48+
const countField = options.countField;
4549
const n = data.length;
4650
// compute data-driven extent
4751
let min = Infinity;
4852
let max = -Infinity;
4953

50-
if (options?.extent) {
54+
if (options.extent) {
5155
min = options.extent[0];
5256
max = options.extent[1];
5357
} else {
@@ -74,11 +78,11 @@ export const bin: Transform = (data: Array<object>, options?: IBinOptions) => {
7478

7579
// build thresholds
7680
let thresholds: number[] | undefined;
77-
if (options && options.thresholds && options.thresholds.length) {
81+
if (options.thresholds && options.thresholds.length) {
7882
// explicit thresholds provided by user
7983
thresholds = options.thresholds.slice();
8084
thresholds.sort((a, b) => a - b);
81-
} else if (options && typeof options.step === 'number' && options.step > 0) {
85+
} else if (typeof options.step === 'number' && options.step > 0) {
8286
// fixed bin width (step) provided: compute number of bins to cover [min, max]
8387
const stepSize = options.step;
8488
let startMin = min;
@@ -94,7 +98,7 @@ export const bin: Transform = (data: Array<object>, options?: IBinOptions) => {
9498
}
9599
} else {
96100
// fallback to bins count (default 10)
97-
const bins = options?.bins && options.bins > 0 ? Math.floor(options.bins) : 10;
101+
const bins = options.bins && options.bins > 0 ? Math.floor(options.bins) : 10;
98102
const stepSize = (max - min) / bins;
99103
thresholds = new Array(bins + 1);
100104
for (let i = 0; i <= bins; i++) {
@@ -111,14 +115,16 @@ export const bin: Transform = (data: Array<object>, options?: IBinOptions) => {
111115
const x1Name = options.outputNames?.x1 ?? 'x1';
112116
const countName = options.outputNames?.count ?? 'count';
113117
const valuesName = options.outputNames?.values ?? 'values';
118+
const percentageName = options.outputNames?.percentage ?? 'percentage';
114119
const out: any[] = new Array(numBins);
115120
for (let i = 0; i < numBins; i++) {
116121
out[i] = { [x0Name]: thresholds[i], [x1Name]: thresholds[i + 1], [countName]: 0 };
117-
if (options?.includeValues) {
122+
if (options.includeValues) {
118123
out[i][valuesName] = [] as object[];
119124
}
120125
}
121126

127+
let totalCount = 0;
122128
// assign each datum to a bin (left-inclusive, right-exclusive except last bin includes max)
123129
for (let i = 0; i < n; i++) {
124130
const v: any = (data[i] as any)[field];
@@ -136,7 +142,9 @@ export const bin: Transform = (data: Array<object>, options?: IBinOptions) => {
136142
const right = out[j][x1Name];
137143
const isLast = j === numBins - 1;
138144
if ((num >= left && num < right) || (isLast && num <= right)) {
139-
out[j][countName]++;
145+
const count = (data[i] as any)[countField] ?? 1;
146+
out[j][countName] += count;
147+
totalCount += count;
140148
if (options && options.includeValues) {
141149
out[j][valuesName].push(data[i]);
142150
}
@@ -145,6 +153,10 @@ export const bin: Transform = (data: Array<object>, options?: IBinOptions) => {
145153
}
146154
}
147155

156+
for (let i = 0, len = out.length; i < len; i++) {
157+
out[i][percentageName] = totalCount > 0 ? out[i][countName] / totalCount : 0;
158+
}
159+
148160
return out;
149161
};
150162

0 commit comments

Comments
 (0)