feat(metrics): add a classification metrics module (#176)

IndexSeek · web-flow · commit f59bd6f564af · 2024-12-25T10:19:37.000-05:00
diff --git a/ibis_ml/metrics.py b/ibis_ml/metrics.py
@@ -0,0 +1,147 @@
+import ibis.expr.datatypes as dt
+
+
+def accuracy_score(y_true: dt.Integer, y_pred: dt.Integer) -> float:
+    """Calculate the accuracy score of predicted values against true values.
+
+    Parameters
+    ----------
+    y_true
+        Table expression column containing the true labels.
+    y_pred
+        Table expression column containing the predicted labels.
+
+    Returns
+    -------
+    float
+        The accuracy score, representing the fraction of correct predictions.
+
+    Examples
+    --------
+    >>> import ibis
+    >>> from ibis_ml.metrics import accuracy_score
+    >>> ibis.options.interactive = True
+    >>> t = ibis.memtable(
+    ...     {
+    ...         "id": range(1, 13),
+    ...         "actual": [1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1],
+    ...         "prediction": [1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1],
+    ...     }
+    ... )
+    >>> accuracy_score(t.actual, t.prediction)
+    ┌──────────┐
+    │ 0.583333 │
+    └──────────┘
+    """
+    return (y_true == y_pred).mean()  # .to_pyarrow().as_py()
+
+
+def precision_score(y_true: dt.Integer, y_pred: dt.Integer) -> float:
+    """Calculate the precision score of predicted values against true values.
+
+    Parameters
+    ----------
+    y_true
+        Table expression column containing the true labels.
+    y_pred
+        Table expression column containing the predicted labels.
+
+    Returns
+    -------
+    float
+        The precision score, representing the fraction of true positive predictions.
+
+    Examples
+    --------
+    >>> import ibis
+    >>> from ibis_ml.metrics import precision_score
+    >>> ibis.options.interactive = True
+    >>> t = ibis.memtable(
+    ...     {
+    ...         "id": range(1, 13),
+    ...         "actual": [1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1],
+    ...         "prediction": [1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1],
+    ...     }
+    ... )
+    >>> precision_score(t.actual, t.prediction)
+    ┌──────────┐
+    │ 0.666667 │
+    └──────────┘
+    """
+    true_positive = (y_true & y_pred).sum()
+    predicted_positive = y_pred.sum()
+    return true_positive / predicted_positive
+
+
+def recall_score(y_true: dt.Integer, y_pred: dt.Integer) -> float:
+    """Calculate the recall score of predicted values against true values.
+
+    Parameters
+    ----------
+    y_true
+        Table expression column containing the true labels.
+    y_pred
+        Table expression column containing the predicted labels.
+
+    Returns
+    -------
+    float
+        The recall score, representing the fraction of true positive predictions.
+
+    Examples
+    --------
+    >>> import ibis
+    >>> from ibis_ml.metrics import recall_score
+    >>> ibis.options.interactive = True
+    >>> t = ibis.memtable(
+    ...     {
+    ...         "id": range(1, 13),
+    ...         "actual": [1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1],
+    ...         "prediction": [1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1],
+    ...     }
+    ... )
+    >>> recall_score(t.actual, t.prediction)
+    ┌──────────┐
+    │ 0.571429 │
+    └──────────┘
+    """
+    true_positive = (y_true & y_pred).sum()
+    actual_positive = y_true.sum()
+    return true_positive / actual_positive
+
+
+def f1_score(y_true: dt.Integer, y_pred: dt.Integer) -> float:
+    """Calculate the F1 score of predicted values against true values.
+
+    Parameters
+    ----------
+    y_true
+        Table expression column containing the true labels.
+    y_pred
+        Table expression column containing the predicted labels.
+
+    Returns
+    -------
+    float
+        The F1 score, representing the harmonic mean of precision and recall.
+
+    Examples
+    --------
+    >>> import ibis
+    >>> from ibis_ml.metrics import f1_score
+    >>> ibis.options.interactive = True
+    >>> t = ibis.memtable(
+    ...     {
+    ...         "id": range(1, 13),
+    ...         "actual": [1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1],
+    ...         "prediction": [1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1],
+    ...     }
+    ... )
+    >>> f1_score(t.actual, t.prediction)
+    ┌──────────┐
+    │ 0.615385 │
+    └──────────┘
+    """
+    precision = precision_score(y_true, y_pred)
+    recall = recall_score(y_true, y_pred)
+    return (2 * precision * recall) / (precision + recall)
diff --git a/pyproject.toml b/pyproject.toml
@@ -20,7 +20,7 @@ doc = [
   "itables",
   "jupyter",
   "quartodoc", 
-  "scikit-learn",
+  "scikit-learn<1.6.0",
   "skorch", 
   "torch",
   "xgboost",
diff --git a/tests/test_metrics.py b/tests/test_metrics.py
@@ -0,0 +1,35 @@
+import ibis
+import pytest
+import sklearn.metrics
+
+import ibis_ml.metrics
+
+
+@pytest.fixture
+def results_table():
+    return ibis.memtable(
+        {
+            "id": range(1, 13),
+            "actual": [1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1],
+            "prediction": [1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1],
+        }
+    )
+
+
+@pytest.mark.parametrize(
+    "metric_name",
+    [
+        pytest.param("accuracy_score", id="accuracy_score"),
+        pytest.param("precision_score", id="precision_score"),
+        pytest.param("recall_score", id="recall_score"),
+        pytest.param("f1_score", id="f1_score"),
+    ],
+)
+def test_classification_metrics(results_table, metric_name):
+    ibis_ml_func = getattr(ibis_ml.metrics, metric_name)
+    sklearn_func = getattr(sklearn.metrics, metric_name)
+    t = results_table
+    df = t.to_pandas()
+    result = ibis_ml_func(t.actual, t.prediction).to_pyarrow().as_py()
+    expected = sklearn_func(df["actual"], df["prediction"])
+    assert result == pytest.approx(expected, abs=1e-4)