Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion aif360/algorithms/preprocessing/optim_preproc.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ def transform(self, dataset, sep='=', transform_Y=True):

if transform_Y:
# randomized mapping when Y is requested to be transformed
dfP_withY = self.OpT.dfP.applymap(lambda x: 0 if x < 1e-8 else x)
dfP_withY = self.OpT.dfP.map(lambda x: 0 if x < 1e-8 else x)
dfP_withY = dfP_withY.divide(dfP_withY.sum(axis=1), axis=0)

df_transformed = _apply_randomized_mapping(df, dfP_withY,
Expand Down
3 changes: 2 additions & 1 deletion aif360/datasets/regression_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,13 +81,14 @@ def __init__(self, df, dep_var_name, protected_attribute_names,
unprivileged_values = [0.]
if callable(vals):
df[attr] = df[attr].apply(vals)
elif np.issubdtype(df[attr].dtype, np.number):
elif pd.api.types.is_numeric_dtype(df[attr]):
# this attribute is numeric; no remapping needed
privileged_values = vals
unprivileged_values = list(set(df[attr]).difference(vals))
else:
# find all instances which match any of the attribute values
priv = np.logical_or.reduce(np.equal.outer(vals, df[attr].to_numpy()))
df[attr] = df[attr].astype(object)
df.loc[priv, attr] = privileged_values[0]
df.loc[~priv, attr] = unprivileged_values[0]

Expand Down
8 changes: 5 additions & 3 deletions aif360/datasets/standard_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,13 +112,14 @@ def __init__(self, df, label_name, favorable_classes,
unprivileged_values = [0.]
if callable(vals):
df[attr] = df[attr].apply(vals)
elif np.issubdtype(df[attr].dtype, np.number):
elif pd.api.types.is_numeric_dtype(df[attr]):
# this attribute is numeric; no remapping needed
privileged_values = vals
unprivileged_values = list(set(df[attr]).difference(vals))
else:
# find all instances which match any of the attribute values
priv = np.logical_or.reduce(np.equal.outer(vals, df[attr].to_numpy()))
df[attr] = df[attr].astype(object)
df.loc[priv, attr] = privileged_values[0]
df.loc[~priv, attr] = unprivileged_values[0]

Expand All @@ -132,14 +133,15 @@ def __init__(self, df, label_name, favorable_classes,
unfavorable_label = 0.
if callable(favorable_classes):
df[label_name] = df[label_name].apply(favorable_classes)
elif np.issubdtype(df[label_name], np.number) and len(set(df[label_name])) == 2:
elif pd.api.types.is_numeric_dtype(df[label_name]) and len(set(df[label_name])) == 2:
# labels are already binary; don't change them
favorable_label = favorable_classes[0]
unfavorable_label = set(df[label_name]).difference(favorable_classes).pop()
else:
# find all instances which match any of the favorable classes
pos = np.logical_or.reduce(np.equal.outer(favorable_classes,
pos = np.logical_or.reduce(np.equal.outer(favorable_classes,
df[label_name].to_numpy()))
df[label_name] = df[label_name].astype(object)
df.loc[pos, label_name] = favorable_label
df.loc[~pos, label_name] = unfavorable_label

Expand Down
2 changes: 1 addition & 1 deletion aif360/metrics/binary_label_dataset_metric.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ def consistency(self, n_neighbors=5):
consistency += np.abs(y[i] - np.mean(y[indices[i]]))
consistency = 1.0 - consistency/num_samples

return consistency
return np.asarray(consistency).item()

def _smoothed_base_rates(self, labels, concentration=1.0):
"""Dirichlet-smoothed base rates for each intersecting group in the
Expand Down
7 changes: 3 additions & 4 deletions aif360/sklearn/detectors/facts/clean.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def strip_str(x):
return x.strip()
else:
return x
X = X.applymap(strip_str)
X = X.map(strip_str)
X["relationship"] = X["relationship"].replace(["Husband", "Wife"], "Married")
X["hours-per-week"] = pd.cut(
x=X["hours-per-week"],
Expand Down Expand Up @@ -81,9 +81,8 @@ def clean_compas(X: DataFrame) -> DataFrame:
X = X.reset_index(drop=True)
X = X.drop(columns=["age", "c_charge_desc"])
X["priors_count"] = pd.cut(X["priors_count"], [-0.1, 1, 5, 10, 15, 38])
X.target.replace("Recidivated", 0, inplace=True)
X.target.replace("Survived", 1, inplace=True)
X["age_cat"].replace("Less than 25", "10-25", inplace=True)
X["target"] = X["target"].replace("Recidivated", 0).replace("Survived", 1)
X["age_cat"] = X["age_cat"].replace("Less than 25", "10-25")

return X

Expand Down
23 changes: 18 additions & 5 deletions aif360/sklearn/postprocessing/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,19 @@
from aif360.sklearn.postprocessing.reject_option_classification import RejectOptionClassifier, RejectOptionClassifierCV


def _get_requires_proba(postprocessor):
"""Get requires_proba tag compatible with sklearn < 1.6 and >= 1.6.

sklearn 1.6 removed _get_tags() from BaseEstimator; fall back to
_more_tags() which is still present.
"""
if hasattr(postprocessor, '_get_tags'):
return postprocessor._get_tags().get('requires_proba', False)
if hasattr(postprocessor, '_more_tags'):
return postprocessor._more_tags().get('requires_proba', False)
return False


class PostProcessingMeta(BaseEstimator, MetaEstimatorMixin):
"""A meta-estimator which wraps a given estimator with a post-processing
step.
Expand Down Expand Up @@ -85,7 +98,7 @@ def fit(self, X, y, sample_weight=None, **fit_params):
self.estimator_ = self.estimator if self.prefit else clone(self.estimator)

try:
use_proba = self.postprocessor._get_tags()['requires_proba']
use_proba = _get_requires_proba(self.postprocessor)
except KeyError:
raise TypeError("`postprocessor` (type: {}) does not have a "
"'requires_proba' tag.".format(type(self.estimator)))
Expand Down Expand Up @@ -145,7 +158,7 @@ def predict(self, X):
Returns:
numpy.ndarray: Predicted class label per sample.
"""
use_proba = self.postprocessor_._get_tags()['requires_proba']
use_proba = _get_requires_proba(self.postprocessor_)
y_score = (self.estimator_.predict_proba(X) if use_proba else
self.estimator_.predict(X))
y_score = pd.DataFrame(y_score, index=X.index).squeeze('columns')
Expand All @@ -169,7 +182,7 @@ def predict_proba(self, X):
in the model, where classes are ordered as they are in
``self.classes_``.
"""
use_proba = self.postprocessor_._get_tags()['requires_proba']
use_proba = _get_requires_proba(self.postprocessor_)
y_score = (self.estimator_.predict_proba(X) if use_proba else
self.estimator_.predict(X))
y_score = pd.DataFrame(y_score, index=X.index).squeeze('columns')
Expand All @@ -193,7 +206,7 @@ def predict_log_proba(self, X):
the model, where classes are ordered as they are in
``self.classes_``.
"""
use_proba = self.postprocessor_._get_tags()['requires_proba']
use_proba = _get_requires_proba(self.postprocessor_)
y_score = (self.estimator_.predict_proba(X) if use_proba else
self.estimator_.predict(X))
y_score = pd.DataFrame(y_score, index=X.index).squeeze('columns')
Expand All @@ -216,7 +229,7 @@ def score(self, X, y, sample_weight=None):
Returns:
float: Score value.
"""
use_proba = self.postprocessor_._get_tags()['requires_proba']
use_proba = _get_requires_proba(self.postprocessor_)
y_score = (self.estimator_.predict_proba(X) if use_proba else
self.estimator_.predict(X))
y_score = pd.DataFrame(y_score, index=X.index).squeeze('columns')
Expand Down