Skip to content

Commit f59bd6f

Browse files
authored
feat(metrics): add a classification metrics module (#176)
1 parent 7d2d021 commit f59bd6f

File tree

3 files changed

+183
-1
lines changed

3 files changed

+183
-1
lines changed

ibis_ml/metrics.py

Lines changed: 147 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,147 @@
1+
import ibis.expr.datatypes as dt
2+
3+
4+
def accuracy_score(y_true: dt.Integer, y_pred: dt.Integer) -> float:
5+
"""Calculate the accuracy score of predicted values against true values.
6+
7+
Parameters
8+
----------
9+
y_true
10+
Table expression column containing the true labels.
11+
y_pred
12+
Table expression column containing the predicted labels.
13+
14+
Returns
15+
-------
16+
float
17+
The accuracy score, representing the fraction of correct predictions.
18+
19+
Examples
20+
--------
21+
>>> import ibis
22+
>>> from ibis_ml.metrics import accuracy_score
23+
>>> ibis.options.interactive = True
24+
>>> t = ibis.memtable(
25+
... {
26+
... "id": range(1, 13),
27+
... "actual": [1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1],
28+
... "prediction": [1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1],
29+
... }
30+
... )
31+
>>> accuracy_score(t.actual, t.prediction)
32+
┌──────────┐
33+
│ 0.583333 │
34+
└──────────┘
35+
"""
36+
return (y_true == y_pred).mean() # .to_pyarrow().as_py()
37+
38+
39+
def precision_score(y_true: dt.Integer, y_pred: dt.Integer) -> float:
40+
"""Calculate the precision score of predicted values against true values.
41+
42+
Parameters
43+
----------
44+
y_true
45+
Table expression column containing the true labels.
46+
y_pred
47+
Table expression column containing the predicted labels.
48+
49+
Returns
50+
-------
51+
float
52+
The precision score, representing the fraction of true positive predictions.
53+
54+
Examples
55+
--------
56+
>>> import ibis
57+
>>> from ibis_ml.metrics import precision_score
58+
>>> ibis.options.interactive = True
59+
>>> t = ibis.memtable(
60+
... {
61+
... "id": range(1, 13),
62+
... "actual": [1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1],
63+
... "prediction": [1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1],
64+
... }
65+
... )
66+
>>> precision_score(t.actual, t.prediction)
67+
┌──────────┐
68+
│ 0.666667 │
69+
└──────────┘
70+
"""
71+
true_positive = (y_true & y_pred).sum()
72+
predicted_positive = y_pred.sum()
73+
return true_positive / predicted_positive
74+
75+
76+
def recall_score(y_true: dt.Integer, y_pred: dt.Integer) -> float:
77+
"""Calculate the recall score of predicted values against true values.
78+
79+
Parameters
80+
----------
81+
y_true
82+
Table expression column containing the true labels.
83+
y_pred
84+
Table expression column containing the predicted labels.
85+
86+
Returns
87+
-------
88+
float
89+
The recall score, representing the fraction of true positive predictions.
90+
91+
Examples
92+
--------
93+
>>> import ibis
94+
>>> from ibis_ml.metrics import recall_score
95+
>>> ibis.options.interactive = True
96+
>>> t = ibis.memtable(
97+
... {
98+
... "id": range(1, 13),
99+
... "actual": [1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1],
100+
... "prediction": [1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1],
101+
... }
102+
... )
103+
>>> recall_score(t.actual, t.prediction)
104+
┌──────────┐
105+
│ 0.571429 │
106+
└──────────┘
107+
"""
108+
true_positive = (y_true & y_pred).sum()
109+
actual_positive = y_true.sum()
110+
return true_positive / actual_positive
111+
112+
113+
def f1_score(y_true: dt.Integer, y_pred: dt.Integer) -> float:
114+
"""Calculate the F1 score of predicted values against true values.
115+
116+
Parameters
117+
----------
118+
y_true
119+
Table expression column containing the true labels.
120+
y_pred
121+
Table expression column containing the predicted labels.
122+
123+
Returns
124+
-------
125+
float
126+
The F1 score, representing the harmonic mean of precision and recall.
127+
128+
Examples
129+
--------
130+
>>> import ibis
131+
>>> from ibis_ml.metrics import f1_score
132+
>>> ibis.options.interactive = True
133+
>>> t = ibis.memtable(
134+
... {
135+
... "id": range(1, 13),
136+
... "actual": [1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1],
137+
... "prediction": [1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1],
138+
... }
139+
... )
140+
>>> f1_score(t.actual, t.prediction)
141+
┌──────────┐
142+
│ 0.615385 │
143+
└──────────┘
144+
"""
145+
precision = precision_score(y_true, y_pred)
146+
recall = recall_score(y_true, y_pred)
147+
return (2 * precision * recall) / (precision + recall)

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ doc = [
2020
"itables",
2121
"jupyter",
2222
"quartodoc",
23-
"scikit-learn",
23+
"scikit-learn<1.6.0",
2424
"skorch",
2525
"torch",
2626
"xgboost",

tests/test_metrics.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
import ibis
2+
import pytest
3+
import sklearn.metrics
4+
5+
import ibis_ml.metrics
6+
7+
8+
@pytest.fixture
9+
def results_table():
10+
return ibis.memtable(
11+
{
12+
"id": range(1, 13),
13+
"actual": [1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1],
14+
"prediction": [1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1],
15+
}
16+
)
17+
18+
19+
@pytest.mark.parametrize(
20+
"metric_name",
21+
[
22+
pytest.param("accuracy_score", id="accuracy_score"),
23+
pytest.param("precision_score", id="precision_score"),
24+
pytest.param("recall_score", id="recall_score"),
25+
pytest.param("f1_score", id="f1_score"),
26+
],
27+
)
28+
def test_classification_metrics(results_table, metric_name):
29+
ibis_ml_func = getattr(ibis_ml.metrics, metric_name)
30+
sklearn_func = getattr(sklearn.metrics, metric_name)
31+
t = results_table
32+
df = t.to_pandas()
33+
result = ibis_ml_func(t.actual, t.prediction).to_pyarrow().as_py()
34+
expected = sklearn_func(df["actual"], df["prediction"])
35+
assert result == pytest.approx(expected, abs=1e-4)

0 commit comments

Comments
 (0)