Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 62 additions & 0 deletions packages/vdataset/__tests__/bin.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
import bin from '../src/transform/bin';

describe('bin transform', () => {
test('basic bins by count', () => {
const data = [] as any[];
for (let i = 0; i < 100; i++) {
data.push({ v: i });
}
const bins = bin(data, { field: 'v', bins: 10 });
expect(bins.length).toBe(10);
const total = bins.reduce((s: number, b: any) => s + b.count, 0);
expect(total).toBe(100);
// each bin should have roughly 10 counts
for (let i = 0; i < bins.length; i++) {
expect(bins[i].count).toBeGreaterThanOrEqual(8);
expect(bins[i].count).toBeLessThanOrEqual(12);
}
});

test('step produces correct bin widths', () => {
const data = [] as any[];
for (let i = 0; i <= 20; i++) {
data.push({ v: i });
}
const bins = bin(data, { field: 'v', step: 5 });
// step=5 -> bins covering [min,max] with width 5 => expect 5 bins (0-4,5-9,10-14,15-19,20)
expect(bins.length).toBeGreaterThanOrEqual(4);
// ensure bin edges spacing equals 5 or last smaller
for (let i = 0; i < bins.length - 1; i++) {
const w = bins[i].x1 - bins[i].x0;
expect(w).toBeCloseTo(5, 12);
}
});

test('explicit thresholds control bin edges', () => {
const data = [{ v: 1 }, { v: 3 }, { v: 7 }, { v: 12 }];
const thresholds = [0, 5, 10, 20];
const bins = bin(data, { field: 'v', thresholds });
expect(bins.length).toBe(3);
expect(bins[0].count).toBe(2); // 1 and 3
expect(bins[1].count).toBe(1); // 7
expect(bins[2].count).toBe(1); // 12
});

test('extent overrides and includeValues', () => {
const data = [
{ v: 2, id: 'a' },
{ v: 8, id: 'b' }
];
const bins = bin(data, { field: 'v', step: 5, extent: [0, 10], includeValues: true });
expect(bins.length).toBeGreaterThan(0);
const values = bins.flatMap((b: any) => (b.values ? b.values.map((it: any) => it.id) : []));
expect(values).toContain('a');
expect(values).toContain('b');
});

test('empty data returns empty array', () => {
const out = bin([], { field: 'x', bins: 5 });
expect(Array.isArray(out)).toBeTruthy();
expect(out.length).toBe(0);
});
});
2 changes: 2 additions & 0 deletions packages/vdataset/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ export type { IMapOptions } from './transform/map';
export { fold } from './transform/fold';
export type { IFoldOptions } from './transform/fold';
export { fields } from './transform/fields';
export type { IBinOptions } from './transform/bin';
export { bin } from './transform/bin';
export type { IFieldsOptions } from './transform/fields';
// transformType
export * from './transform/index';
Expand Down
125 changes: 125 additions & 0 deletions packages/vdataset/src/transform/bin.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
import { isNil } from '@visactor/vutils';
import type { Transform } from '.';

export interface IBinOptions {
field: string; // numeric field to bin
bins?: number; // number of bins (default 10)
thresholds?: number[]; // explicit bin edges
step?: number; // optional fixed bin width (interval step). If provided, overrides bins.
extent?: [number, number]; // optional [min, max] to use instead of data-driven
includeValues?: boolean; // whether to keep the original items in each bin
}

/**
* Bin transform: converts numeric field into histogram bins.
* Returns an array of bins: { x0, x1, count, values? }
*/
export const bin: Transform = (data: Array<object>, options?: IBinOptions) => {
const field = options?.field;
if (!field) {
return [];
}

const n = data.length;
// compute data-driven extent
let min = Infinity;
let max = -Infinity;

if (options?.extent) {
min = options.extent[0];
max = options.extent[1];
} else {
for (let i = 0; i < n; i++) {
const v: any = (data[i] as any)[field];
if (isNil(v)) {
continue;
}
const num = +v;
if (Number.isFinite(num)) {
if (num < min) {
min = num;
}
if (num > max) {
max = num;
}
}
}
}

if (!Number.isFinite(min) || !Number.isFinite(max) || n === 0) {
return [];
}

// build thresholds
let thresholds: number[] | undefined;
if (options && options.thresholds && options.thresholds.length) {
// explicit thresholds provided by user
thresholds = options.thresholds.slice();
thresholds.sort((a, b) => a - b);
} else if (options && typeof options.step === 'number' && options.step > 0) {
// fixed bin width (step) provided: compute number of bins to cover [min, max]
const stepSize = options.step;
let startMin = min;

if (!options.extent) {
startMin = Math.floor(min / stepSize) * stepSize;
}
thresholds = [startMin];

while (startMin < max) {
startMin += stepSize;
thresholds.push(startMin);
}
} else {
// fallback to bins count (default 10)
const bins = options?.bins && options.bins > 0 ? Math.floor(options.bins) : 10;
const stepSize = (max - min) / bins;
thresholds = new Array(bins + 1);
for (let i = 0; i <= bins; i++) {
thresholds[i] = i === bins ? max : min + stepSize * i;
}
}

const numBins = Math.max(0, thresholds.length - 1);
if (numBins === 0) {
return [];
}

const out: any[] = new Array(numBins);
for (let i = 0; i < numBins; i++) {
out[i] = { x0: thresholds[i], x1: thresholds[i + 1], count: 0 };
if (options?.includeValues) {
out[i].values = [] as object[];
}
}

// assign each datum to a bin (left-inclusive, right-exclusive except last bin includes max)
for (let i = 0; i < n; i++) {
const v: any = (data[i] as any)[field];
if (v == null) {
continue;
}
const num = +v;
if (!Number.isFinite(num)) {
continue;
}

// find bin index (linear scan is fine for moderate bin counts)
for (let j = 0; j < numBins; j++) {
const left = out[j].x0;
const right = out[j].x1;
const isLast = j === numBins - 1;
if ((num >= left && num < right) || (isLast && num <= right)) {
out[j].count++;
if (options && options.includeValues) {
out[j].values.push(data[i]);
}
break;
}
}
}

return out;
};

export default bin;
2 changes: 1 addition & 1 deletion packages/vutils/__tests__/common/regression-linear.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,6 @@ test('regressionLinear()', function () {
}
];
const res = regressionLinear(arr);
expect(res.coef).toEqual([0, 2]);
expect(res.coef).toEqual({ a: 0, b: 2 });
expect(res.predict(1)).toBeCloseTo(2);
});
36 changes: 36 additions & 0 deletions packages/vutils/__tests__/ecdf.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import ecdf from '../src/common/ecdf';

describe('ecdf', () => {
test('evaluate single and array and grid', () => {
const data = [1, 2, 2, 3];
const e = ecdf(data);
expect(e.n).toBe(4);
expect(e.evaluate(2)).toBeCloseTo(3 / 4);
expect(e.evaluate([0, 2, 4])).toEqual([0, 3 / 4, 1]);
const g = e.evaluateGrid(5);
expect(g.length).toBe(5);
// monotone non-decreasing
for (let i = 1; i < g.length; i++) {
expect(g[i].y).toBeGreaterThanOrEqual(g[i - 1].y);
}
});

test('empty and constant data', () => {
const e0 = ecdf([]);
expect(e0.n).toBe(0);
expect(e0.evaluate(1)).toBe(0);
const e1 = ecdf([5, 5, 5]);
const g = e1.evaluateGrid(3);
expect(g.every(p => p.x === 5)).toBeTruthy();
expect(g.every(c => c.y === 1)).toBeTruthy();
});

test('evaluate array and single consistent', () => {
const data = [3, 1, 4, 1, 5];
const model = ecdf(data);
const x = 2;
const a = model.evaluate(x) as number;
const b = (model.evaluate([x]) as number[])[0];
expect(a).toBeCloseTo(b, 12);
});
});
111 changes: 111 additions & 0 deletions packages/vutils/__tests__/kde.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
import { epanechnikov, gaussian, kde, scott, silverman } from '../src/common/kde';

describe('kde', () => {
test('evaluate and grid', () => {
const data = [1, 2, 3, 4, 5];
const model = kde(data, { bandwidthMethod: 'scott' });
expect(typeof model.bandwidth).toBe('number');
const v = model.evaluate(3);
expect(typeof v).toBe('number');
const arr = model.evaluate([1, 2, 3]);
expect(Array.isArray(arr)).toBeTruthy();
const g = model.evaluateGrid(10);
expect(Array.isArray(g)).toBeTruthy();
expect(g.length).toBe(10);
});

test('constant data returns zeros density when bandwidth is zero', () => {
const data = [5, 5, 5];
const model = kde(data, { bandwidth: 0 });
const g = model.evaluateGrid(3);
expect(g.length).toBe(3);
expect(g.every(d => d.y === 0)).toBeTruthy();
});
});

describe('kde', () => {
test('basic gaussian kde returns higher density near samples (evaluator API)', () => {
const data = [0, 0, 1, 2, 3];
const points = [-1, 0, 0.5, 1, 2, 4];
const model = kde(data, { kernel: gaussian, bandwidth: 0.5 });
const densities = model.evaluate(points) as number[];
// highest density should be at 0 or 1 (near data points)
const maxIdx = densities.indexOf(Math.max(...densities));
expect(points[maxIdx]).toBeGreaterThanOrEqual(0);
expect(densities.length).toBe(points.length);
});

test('epanechnikov kernel gives finite densities and respects bandwidth selectors (evaluator)', () => {
const data = [1, 2, 3, 4, 5];
const points = [0, 1, 2, 3, 4, 5, 6];
const model1 = kde(data, { kernel: epanechnikov, bandwidthMethod: 'scott' });
const model2 = kde(data, { kernel: epanechnikov, bandwidthMethod: 'silverman' });
const d1 = model1.evaluate(points) as number[];
const d2 = model2.evaluate(points) as number[];
expect(d1.length).toBe(points.length);
expect(d2.length).toBe(points.length);
for (let i = 0; i < d1.length; i++) {
expect(isFinite(d1[i])).toBe(true);
expect(isFinite(d2[i])).toBe(true);
}
});

test('bandwidth helpers roughly scale with n and std', () => {
const s = 2;
const h1 = scott(100, s);
const h2 = silverman(100, s);
expect(h1).toBeGreaterThan(0);
expect(h2).toBeGreaterThan(0);
});

test('evaluateGrid returns N points and densities', () => {
const data = [0, 1, 2];
const model = kde(data, { kernel: gaussian, bandwidth: 0.5 });
const res = model.evaluateGrid(5);
expect(res.length).toBe(5);
// points should be in increasing order
for (let i = 1; i < res.length; i++) {
expect(res[i].x).toBeGreaterThanOrEqual(res[i - 1].x);
}
});

test('evaluate(single) equals evaluate([single]) and is finite', () => {
const data = [0, 1, 2, 3];
const model = kde(data, { kernel: gaussian });
const x = 1.3;
const a = model.evaluate(x) as number;
const b = (model.evaluate([x]) as number[])[0];
expect(typeof a).toBe('number');
expect(Number.isFinite(a)).toBe(true);
expect(a).toBeCloseTo(b, 12);
});

test('constant data evaluateGrid returns repeated point and same densities', () => {
const data = [5, 5, 5];
const model = kde(data, { kernel: gaussian });
const res = model.evaluateGrid(4);
expect(res.length).toBe(4);
// all points should equal 5 and all densities should be equal
for (let i = 0; i < res.length; i++) {
expect(res[i].x).toBe(5);
expect(res[i].y).toBeCloseTo(res[0].y, 12);
}
});

test('kernels produce non-negative finite densities and bandwidth present', () => {
const data = [0, 2, 4, 6];
const modelG = kde(data, { kernel: gaussian });
const modelE = kde(data, { kernel: epanechnikov });
const testPoints = [0, 1, 2, 3, 4];
const dg = modelG.evaluate(testPoints) as number[];
const de = modelE.evaluate(testPoints) as number[];
for (let i = 0; i < testPoints.length; i++) {
expect(Number.isFinite(dg[i])).toBe(true);
expect(dg[i]).toBeGreaterThanOrEqual(0);
expect(Number.isFinite(de[i])).toBe(true);
expect(de[i]).toBeGreaterThanOrEqual(0);
}
expect(modelG.bandwidth).toBeGreaterThan(0);
expect(modelE.bandwidth).toBeGreaterThan(0);
});
});
Loading
Loading