From 2d112cda72754b30848fbe3f700b1844bc88cb50 Mon Sep 17 00:00:00 2001 From: Jeong-Yoon Lee Date: Tue, 15 Jul 2025 15:26:03 -0700 Subject: [PATCH 1/5] Add BaseDRClassifier for binary classification with probabilities MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implements BaseDRClassifier class to address issue #819 by providing a DR-learner that outputs probabilities for binary classification problems, similar to other S/T/X learner classifiers. Key features: - Uses predict_proba() for outcome models to return probabilities - Maintains doubly robust estimation framework - Supports both single learner and separate outcome/effect learners - Includes comprehensive tests with classification data - Follows existing classifier implementation patterns Fixes #819 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude --- causalml/inference/meta/__init__.py | 2 +- causalml/inference/meta/drlearner.py | 88 +++++++++++++++++++++++++++- tests/test_meta_learners.py | 50 +++++++++++++++- 3 files changed, 137 insertions(+), 3 deletions(-) diff --git a/causalml/inference/meta/__init__.py b/causalml/inference/meta/__init__.py index 1ddf1ced..2127e5e6 100644 --- a/causalml/inference/meta/__init__.py +++ b/causalml/inference/meta/__init__.py @@ -9,4 +9,4 @@ from .xlearner import BaseXLearner, BaseXRegressor, BaseXClassifier from .rlearner import BaseRLearner, BaseRRegressor, BaseRClassifier, XGBRRegressor from .tmle import TMLELearner -from .drlearner import BaseDRLearner, BaseDRRegressor, XGBDRRegressor +from .drlearner import BaseDRLearner, BaseDRRegressor, BaseDRClassifier, XGBDRRegressor diff --git a/causalml/inference/meta/drlearner.py b/causalml/inference/meta/drlearner.py index ea8b7e0c..de89e9c9 100644 --- a/causalml/inference/meta/drlearner.py +++ b/causalml/inference/meta/drlearner.py @@ -13,7 +13,7 @@ check_p_conditions, convert_pd_to_np, ) -from causalml.metrics import regression_metrics +from causalml.metrics import regression_metrics, classification_metrics from causalml.propensity import compute_propensity_score @@ -487,6 +487,92 @@ def __init__( ) +class BaseDRClassifier(BaseDRLearner): + """ + A parent class for DR-learner classifier classes. + """ + + def __init__( + self, + learner=None, + control_outcome_learner=None, + treatment_outcome_learner=None, + treatment_effect_learner=None, + ate_alpha=0.05, + control_name=0, + ): + """Initialize a DR-learner classifier. + + Args: + learner (optional): a model to estimate outcomes and treatment effects in both the control and treatment + groups. Should have a predict_proba() method for outcome models. + control_outcome_learner (optional): a model to estimate outcomes in the control group. + Should have a predict_proba() method. + treatment_outcome_learner (optional): a model to estimate outcomes in the treatment group. + Should have a predict_proba() method. + treatment_effect_learner (optional): a model to estimate treatment effects in the treatment group. + Should be a regressor. + ate_alpha (float, optional): the confidence level alpha of the ATE estimate + control_name (str or int, optional): name of control group + """ + super().__init__( + learner=learner, + control_outcome_learner=control_outcome_learner, + treatment_outcome_learner=treatment_outcome_learner, + treatment_effect_learner=treatment_effect_learner, + ate_alpha=ate_alpha, + control_name=control_name, + ) + + def predict( + self, X, treatment=None, y=None, p=None, return_components=False, verbose=True + ): + """Predict treatment effects. + + Args: + X (np.matrix or np.array or pd.Dataframe): a feature matrix + treatment (np.array or pd.Series, optional): a treatment vector + y (np.array or pd.Series, optional): an outcome vector + verbose (bool, optional): whether to output progress logs + Returns: + (numpy.ndarray): Predictions of treatment effects. + """ + X, treatment, y = convert_pd_to_np(X, treatment, y) + + te = np.zeros((X.shape[0], self.t_groups.shape[0])) + yhat_cs = {} + yhat_ts = {} + + for i, group in enumerate(self.t_groups): + models_tau = self.models_tau[group] + _te = np.r_[[model.predict(X) for model in models_tau]].mean(axis=0) + te[:, i] = np.ravel(_te) + yhat_cs[group] = np.r_[ + [model.predict_proba(X)[:, 1] for model in self.models_mu_c] + ].mean(axis=0) + yhat_ts[group] = np.r_[ + [model.predict_proba(X)[:, 1] for model in self.models_mu_t[group]] + ].mean(axis=0) + + if (y is not None) and (treatment is not None) and verbose: + mask = (treatment == group) | (treatment == self.control_name) + treatment_filt = treatment[mask] + y_filt = y[mask] + w = (treatment_filt == group).astype(int) + + yhat = np.zeros_like(y_filt, dtype=float) + yhat[w == 0] = yhat_cs[group][mask][w == 0] + yhat[w == 1] = yhat_ts[group][mask][w == 1] + + logger.info("Error metrics for group {}".format(group)) + classification_metrics(y_filt, yhat, w) + + if not return_components: + return te + else: + return te, yhat_cs, yhat_ts + + class XGBDRRegressor(BaseDRRegressor): def __init__(self, ate_alpha=0.05, control_name=0, *args, **kwargs): """Initialize a DR-learner with two XGBoost models.""" diff --git a/tests/test_meta_learners.py b/tests/test_meta_learners.py index 47dd1009..c91275a8 100644 --- a/tests/test_meta_learners.py +++ b/tests/test_meta_learners.py @@ -30,7 +30,7 @@ XGBRRegressor, ) from causalml.inference.meta import TMLELearner -from causalml.inference.meta import BaseDRLearner +from causalml.inference.meta import BaseDRLearner, BaseDRRegressor, BaseDRClassifier from causalml.metrics import ape, auuc_score from .const import RANDOM_SEED, N_SAMPLE, ERROR_THRESHOLD, CONTROL_NAME, CONVERSION @@ -1039,3 +1039,51 @@ def test_BaseDRLearner(generate_regression_data): normalize=True, ) assert auuc["cate_p"] > 0.5 + + + +def test_BaseDRClassifier(generate_classification_data): + np.random.seed(RANDOM_SEED) + + df, X_names = generate_classification_data() + + df["treatment_group_key"] = np.where( + df["treatment_group_key"] == CONTROL_NAME, 0, 1 + ) + + # Extract features and outcome + y = df[CONVERSION].values + X = df[X_names].values + treatment = df["treatment_group_key"].values + + learner = BaseDRClassifier( + learner=LogisticRegression(), + treatment_effect_learner=LinearRegression() + ) + + # Test fit and predict + te = learner.fit_predict(X=X, treatment=treatment, y=y) + + # Check that treatment effects are returned + assert te.shape[0] == X.shape[0] + assert te.shape[1] == len(np.unique(treatment[treatment != 0])) + + # Test with return_components + te, yhat_cs, yhat_ts = learner.fit_predict( + X=X, treatment=treatment, y=y, return_components=True + ) + + # Check that components are returned as probabilities + for group in learner.t_groups: + assert np.all((yhat_cs[group] >= 0) & (yhat_cs[group] <= 1)) + assert np.all((yhat_ts[group] >= 0) & (yhat_ts[group] <= 1)) + + # Test separate outcome and effect learners + learner_separate = BaseDRClassifier( + control_outcome_learner=LogisticRegression(), + treatment_outcome_learner=LogisticRegression(), + treatment_effect_learner=LinearRegression() + ) + + te_separate = learner_separate.fit_predict(X=X, treatment=treatment, y=y) + assert te_separate.shape == te.shape From 426d8376e4e50a501ded59ddd20da7d98b5d997b Mon Sep 17 00:00:00 2001 From: Jeong-Yoon Lee Date: Tue, 15 Jul 2025 15:32:49 -0700 Subject: [PATCH 2/5] Apply black formatting to fix linting issues MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add black as a dependency in pyproject.toml - Format test_meta_learners.py with black - Fix code style issues for CI/CD pipeline 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude --- .claude/commands/fix-github-issue.md | 14 ++++ .claude/settings.local.json | 11 +++ CLAUDE.md | 110 +++++++++++++++++++++++++++ pyproject.toml | 1 + tests/test_meta_learners.py | 18 ++--- 5 files changed, 144 insertions(+), 10 deletions(-) create mode 100644 .claude/commands/fix-github-issue.md create mode 100644 .claude/settings.local.json create mode 100644 CLAUDE.md diff --git a/.claude/commands/fix-github-issue.md b/.claude/commands/fix-github-issue.md new file mode 100644 index 00000000..4334bf62 --- /dev/null +++ b/.claude/commands/fix-github-issue.md @@ -0,0 +1,14 @@ +Please analyze and fix the GitHub issue: $ARGUMENTS. + +Follow these steps: + +1. Use `gh issue view` to get the issue details +2. Understand the problem described in the issue +3. Search the codebase for relevant files +4. Implement the necessary changes to fix the issue +5. Write and run tests to verify the fix +6. Ensure code passes linting and type checking +7. Create a descriptive commit message +8. Push and create a PR + +Remember to use the GitHub CLI (`gh`) for all GitHub-related tasks. diff --git a/.claude/settings.local.json b/.claude/settings.local.json new file mode 100644 index 00000000..deb2007f --- /dev/null +++ b/.claude/settings.local.json @@ -0,0 +1,11 @@ +{ + "permissions": { + "allow": [ + "Bash(gh issue view:*)", + "Bash(uv run:*)", + "Bash(git checkout:*)", + "Bash(git add:*)" + ], + "deny": [] + } +} \ No newline at end of file diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 00000000..b69da3dd --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,110 @@ +# CLAUDE.md + +This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. + +## Project Overview + +CausalML is a Python package for uplift modeling and causal inference with machine learning algorithms. It provides methods to estimate Conditional Average Treatment Effect (CATE) or Individual Treatment Effect (ITE) from experimental or observational data. + +## Development Setup + +### Environment Setup +- Python 3.9+ required (supports 3.9-3.12) +- Uses `uv` as the package manager (preferred) or `pip` +- Install development dependencies with `make setup_local` (sets up pre-commit hooks) + +### Build Commands +- `make build_ext`: Build Cython extensions (required before running code/tests) +- `make build`: Build wheel distribution +- `make install`: Install package locally +- `make clean`: Clean build artifacts + +### Testing +- `make test`: Run full test suite with coverage +- `pytest -vs --cov causalml/`: Direct pytest command +- `pytest tests/test_specific.py`: Run specific test file +- Optional test flags: + - `pytest --runtf`: Include TensorFlow tests + - `pytest --runtorch`: Include PyTorch tests + +### Code Quality +- Uses `black` for code formatting +- Run `black .` before submitting PRs +- Pre-commit hooks available via `make setup_local` +- Flake8 configuration in tox.ini with max line length 120 + +## Architecture + +### Core Module Structure +``` +causalml/ +├── dataset/ # Synthetic data generation +├── feature_selection/ # Feature selection utilities +├── inference/ # Main inference algorithms +│ ├── meta/ # Meta-learners (S, T, X, R, DR learners) +│ ├── tree/ # Causal trees and uplift trees +│ ├── tf/ # TensorFlow implementations (DragonNet) +│ ├── torch/ # PyTorch implementations (CEVAE) +│ └── iv/ # Instrumental variable methods +├── metrics/ # Evaluation metrics +├── optimize/ # Policy learning and optimization +└── propensity.py # Propensity score modeling +``` + +### Key Components + +#### Meta-Learners (`causalml/inference/meta/`) +- **BaseLearner**: Abstract base class for all meta-learners +- **S-Learner**: Single model approach +- **T-Learner**: Two model approach +- **X-Learner**: Cross-learner with propensity scores +- **R-Learner**: Robinson's R-learner +- **DR-Learner**: Doubly robust learner + +#### Tree-Based Methods (`causalml/inference/tree/`) +- Causal trees and forests with Cython implementations +- Uplift trees for classification problems +- Custom splitting criteria for causal inference + +#### Propensity Score Models (`causalml/propensity.py`) +- **PropensityModel**: Abstract base for propensity estimation +- Built-in calibration support +- Clipping bounds to avoid numerical issues + +### Cython Extensions +The package includes Cython-compiled modules for performance: +- Tree algorithms (`_tree`, `_criterion`, `_splitter`, `_utils`) +- Causal tree components (`_builder`, causal trees) +- Always run `make build_ext` after changes to .pyx files + +## Common Workflows + +### Adding New Meta-Learners +1. Inherit from `BaseLearner` in `causalml/inference/meta/base.py` +2. Implement `fit()` and `predict()` methods +3. Add appropriate tests in `tests/test_meta_learners.py` + +### Working with Tree Methods +1. Cython files are in `causalml/inference/tree/` +2. Rebuild extensions with `make build_ext` after changes +3. Test with synthetic data from `causalml.dataset` + +### Testing Different Backends +- Core tests run without optional dependencies +- TensorFlow tests: `pytest --runtf` +- PyTorch tests: `pytest --runtorch` +- Tests use fixtures from `tests/conftest.py` for data generation + +### Git Operations +- **Pushing branches**: Use specific SSH key for authentication: + ```bash + GIT_SSH_COMMAND='ssh -i ~/.ssh/github_personal -o IdentitiesOnly=yes' git push -u origin branch_name + ``` + +## Important Notes + +- The package uses both pandas DataFrames and numpy arrays internally +- Propensity scores are clipped by default to avoid division by zero +- Meta-learners support both single and multiple treatment scenarios +- Tree methods include built-in visualization capabilities +- Optional dependencies (TensorFlow, PyTorch) are marked clearly in tests \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index acd437b3..47e5f853 100755 --- a/pyproject.toml +++ b/pyproject.toml @@ -40,6 +40,7 @@ dependencies = [ "lightgbm", "packaging", "graphviz", + "black>=25.1.0", ] [project.optional-dependencies] diff --git a/tests/test_meta_learners.py b/tests/test_meta_learners.py index c91275a8..98588bc0 100644 --- a/tests/test_meta_learners.py +++ b/tests/test_meta_learners.py @@ -1041,7 +1041,6 @@ def test_BaseDRLearner(generate_regression_data): assert auuc["cate_p"] > 0.5 - def test_BaseDRClassifier(generate_classification_data): np.random.seed(RANDOM_SEED) @@ -1050,40 +1049,39 @@ def test_BaseDRClassifier(generate_classification_data): df["treatment_group_key"] = np.where( df["treatment_group_key"] == CONTROL_NAME, 0, 1 ) - + # Extract features and outcome y = df[CONVERSION].values X = df[X_names].values treatment = df["treatment_group_key"].values learner = BaseDRClassifier( - learner=LogisticRegression(), - treatment_effect_learner=LinearRegression() + learner=LogisticRegression(), treatment_effect_learner=LinearRegression() ) # Test fit and predict te = learner.fit_predict(X=X, treatment=treatment, y=y) - + # Check that treatment effects are returned assert te.shape[0] == X.shape[0] assert te.shape[1] == len(np.unique(treatment[treatment != 0])) - + # Test with return_components te, yhat_cs, yhat_ts = learner.fit_predict( X=X, treatment=treatment, y=y, return_components=True ) - + # Check that components are returned as probabilities for group in learner.t_groups: assert np.all((yhat_cs[group] >= 0) & (yhat_cs[group] <= 1)) assert np.all((yhat_ts[group] >= 0) & (yhat_ts[group] <= 1)) - + # Test separate outcome and effect learners learner_separate = BaseDRClassifier( control_outcome_learner=LogisticRegression(), treatment_outcome_learner=LogisticRegression(), - treatment_effect_learner=LinearRegression() + treatment_effect_learner=LinearRegression(), ) - + te_separate = learner_separate.fit_predict(X=X, treatment=treatment, y=y) assert te_separate.shape == te.shape From 687a9bacc1463a37b0d2a1ec4110bddee85afb32 Mon Sep 17 00:00:00 2001 From: Jeong-Yoon Lee Date: Tue, 15 Jul 2025 15:37:30 -0700 Subject: [PATCH 3/5] Update CLAUDE.md with uv commands and black linting workflow MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Update all commands to use uv instead of pip/pytest - Add mandatory black formatting before push workflow - Include complete development workflow with linting steps - Emphasize code quality requirements for CI/CD 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude --- CLAUDE.md | 41 ++++++++++++++++++++++++++++++----------- 1 file changed, 30 insertions(+), 11 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index b69da3dd..b56c272b 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -10,28 +10,33 @@ CausalML is a Python package for uplift modeling and causal inference with machi ### Environment Setup - Python 3.9+ required (supports 3.9-3.12) -- Uses `uv` as the package manager (preferred) or `pip` +- Uses `uv` as the package manager (preferred) - Install development dependencies with `make setup_local` (sets up pre-commit hooks) ### Build Commands +- `uv pip install -e .`: Install package in editable mode (preferred) - `make build_ext`: Build Cython extensions (required before running code/tests) - `make build`: Build wheel distribution - `make install`: Install package locally - `make clean`: Clean build artifacts ### Testing -- `make test`: Run full test suite with coverage -- `pytest -vs --cov causalml/`: Direct pytest command -- `pytest tests/test_specific.py`: Run specific test file +- `uv run pytest -vs --cov causalml/`: Run full test suite with coverage +- `uv run pytest tests/test_specific.py`: Run specific test file +- `make test`: Alternative full test suite command - Optional test flags: - - `pytest --runtf`: Include TensorFlow tests - - `pytest --runtorch`: Include PyTorch tests + - `uv run pytest --runtf`: Include TensorFlow tests + - `uv run pytest --runtorch`: Include PyTorch tests ### Code Quality -- Uses `black` for code formatting -- Run `black .` before submitting PRs +- **ALWAYS run black before pushing**: Ensures code formatting compliance + ```bash + uv add black # Add black if not already installed + uv run black . # Format all Python files + ``` +- Uses `black` for code formatting (max line length 120) - Pre-commit hooks available via `make setup_local` -- Flake8 configuration in tox.ini with max line length 120 +- Flake8 configuration in tox.ini ## Architecture @@ -91,11 +96,25 @@ The package includes Cython-compiled modules for performance: ### Testing Different Backends - Core tests run without optional dependencies -- TensorFlow tests: `pytest --runtf` -- PyTorch tests: `pytest --runtorch` +- TensorFlow tests: `uv run pytest --runtf` +- PyTorch tests: `uv run pytest --runtorch` - Tests use fixtures from `tests/conftest.py` for data generation +### Complete Development Workflow +1. **Setup**: `uv pip install -e .` (install in editable mode) +2. **Development**: Make code changes +3. **Testing**: `uv run pytest tests/test_specific.py` (test your changes) +4. **Formatting**: `uv run black .` (REQUIRED before commit) +5. **Commit**: `git add -A && git commit -m "Your changes"` +6. **Push**: Use SSH key command below + ### Git Operations +- **Before pushing any branch**: Always run code formatting + ```bash + uv run black . # Format code + git add -A # Stage formatting changes (if any) + git commit -m "Apply black formatting" # Only if files were changed + ``` - **Pushing branches**: Use specific SSH key for authentication: ```bash GIT_SSH_COMMAND='ssh -i ~/.ssh/github_personal -o IdentitiesOnly=yes' git push -u origin branch_name From 210c2fe2e84b7639fb3c9282131b5d939d0b94fc Mon Sep 17 00:00:00 2001 From: Jeong-Yoon Lee Date: Tue, 15 Jul 2025 15:39:45 -0700 Subject: [PATCH 4/5] Remove Claude Code artifacts from repository MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Remove CLAUDE.md and .claude/ from git tracking - Add Claude Code artifacts to .gitignore - Keep repository clean of development tooling files 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude --- .claude/commands/fix-github-issue.md | 14 --- .claude/settings.local.json | 11 --- .gitignore | 6 +- CLAUDE.md | 129 --------------------------- 4 files changed, 5 insertions(+), 155 deletions(-) delete mode 100644 .claude/commands/fix-github-issue.md delete mode 100644 .claude/settings.local.json delete mode 100644 CLAUDE.md diff --git a/.claude/commands/fix-github-issue.md b/.claude/commands/fix-github-issue.md deleted file mode 100644 index 4334bf62..00000000 --- a/.claude/commands/fix-github-issue.md +++ /dev/null @@ -1,14 +0,0 @@ -Please analyze and fix the GitHub issue: $ARGUMENTS. - -Follow these steps: - -1. Use `gh issue view` to get the issue details -2. Understand the problem described in the issue -3. Search the codebase for relevant files -4. Implement the necessary changes to fix the issue -5. Write and run tests to verify the fix -6. Ensure code passes linting and type checking -7. Create a descriptive commit message -8. Push and create a PR - -Remember to use the GitHub CLI (`gh`) for all GitHub-related tasks. diff --git a/.claude/settings.local.json b/.claude/settings.local.json deleted file mode 100644 index deb2007f..00000000 --- a/.claude/settings.local.json +++ /dev/null @@ -1,11 +0,0 @@ -{ - "permissions": { - "allow": [ - "Bash(gh issue view:*)", - "Bash(uv run:*)", - "Bash(git checkout:*)", - "Bash(git add:*)" - ], - "deny": [] - } -} \ No newline at end of file diff --git a/.gitignore b/.gitignore index c95c8408..2d446e8a 100644 --- a/.gitignore +++ b/.gitignore @@ -20,4 +20,8 @@ _build/ *.prof .venv/ .python-version -uv.lock \ No newline at end of file +uv.lock + +# Claude Code artifacts +CLAUDE.md +.claude/ \ No newline at end of file diff --git a/CLAUDE.md b/CLAUDE.md deleted file mode 100644 index b56c272b..00000000 --- a/CLAUDE.md +++ /dev/null @@ -1,129 +0,0 @@ -# CLAUDE.md - -This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. - -## Project Overview - -CausalML is a Python package for uplift modeling and causal inference with machine learning algorithms. It provides methods to estimate Conditional Average Treatment Effect (CATE) or Individual Treatment Effect (ITE) from experimental or observational data. - -## Development Setup - -### Environment Setup -- Python 3.9+ required (supports 3.9-3.12) -- Uses `uv` as the package manager (preferred) -- Install development dependencies with `make setup_local` (sets up pre-commit hooks) - -### Build Commands -- `uv pip install -e .`: Install package in editable mode (preferred) -- `make build_ext`: Build Cython extensions (required before running code/tests) -- `make build`: Build wheel distribution -- `make install`: Install package locally -- `make clean`: Clean build artifacts - -### Testing -- `uv run pytest -vs --cov causalml/`: Run full test suite with coverage -- `uv run pytest tests/test_specific.py`: Run specific test file -- `make test`: Alternative full test suite command -- Optional test flags: - - `uv run pytest --runtf`: Include TensorFlow tests - - `uv run pytest --runtorch`: Include PyTorch tests - -### Code Quality -- **ALWAYS run black before pushing**: Ensures code formatting compliance - ```bash - uv add black # Add black if not already installed - uv run black . # Format all Python files - ``` -- Uses `black` for code formatting (max line length 120) -- Pre-commit hooks available via `make setup_local` -- Flake8 configuration in tox.ini - -## Architecture - -### Core Module Structure -``` -causalml/ -├── dataset/ # Synthetic data generation -├── feature_selection/ # Feature selection utilities -├── inference/ # Main inference algorithms -│ ├── meta/ # Meta-learners (S, T, X, R, DR learners) -│ ├── tree/ # Causal trees and uplift trees -│ ├── tf/ # TensorFlow implementations (DragonNet) -│ ├── torch/ # PyTorch implementations (CEVAE) -│ └── iv/ # Instrumental variable methods -├── metrics/ # Evaluation metrics -├── optimize/ # Policy learning and optimization -└── propensity.py # Propensity score modeling -``` - -### Key Components - -#### Meta-Learners (`causalml/inference/meta/`) -- **BaseLearner**: Abstract base class for all meta-learners -- **S-Learner**: Single model approach -- **T-Learner**: Two model approach -- **X-Learner**: Cross-learner with propensity scores -- **R-Learner**: Robinson's R-learner -- **DR-Learner**: Doubly robust learner - -#### Tree-Based Methods (`causalml/inference/tree/`) -- Causal trees and forests with Cython implementations -- Uplift trees for classification problems -- Custom splitting criteria for causal inference - -#### Propensity Score Models (`causalml/propensity.py`) -- **PropensityModel**: Abstract base for propensity estimation -- Built-in calibration support -- Clipping bounds to avoid numerical issues - -### Cython Extensions -The package includes Cython-compiled modules for performance: -- Tree algorithms (`_tree`, `_criterion`, `_splitter`, `_utils`) -- Causal tree components (`_builder`, causal trees) -- Always run `make build_ext` after changes to .pyx files - -## Common Workflows - -### Adding New Meta-Learners -1. Inherit from `BaseLearner` in `causalml/inference/meta/base.py` -2. Implement `fit()` and `predict()` methods -3. Add appropriate tests in `tests/test_meta_learners.py` - -### Working with Tree Methods -1. Cython files are in `causalml/inference/tree/` -2. Rebuild extensions with `make build_ext` after changes -3. Test with synthetic data from `causalml.dataset` - -### Testing Different Backends -- Core tests run without optional dependencies -- TensorFlow tests: `uv run pytest --runtf` -- PyTorch tests: `uv run pytest --runtorch` -- Tests use fixtures from `tests/conftest.py` for data generation - -### Complete Development Workflow -1. **Setup**: `uv pip install -e .` (install in editable mode) -2. **Development**: Make code changes -3. **Testing**: `uv run pytest tests/test_specific.py` (test your changes) -4. **Formatting**: `uv run black .` (REQUIRED before commit) -5. **Commit**: `git add -A && git commit -m "Your changes"` -6. **Push**: Use SSH key command below - -### Git Operations -- **Before pushing any branch**: Always run code formatting - ```bash - uv run black . # Format code - git add -A # Stage formatting changes (if any) - git commit -m "Apply black formatting" # Only if files were changed - ``` -- **Pushing branches**: Use specific SSH key for authentication: - ```bash - GIT_SSH_COMMAND='ssh -i ~/.ssh/github_personal -o IdentitiesOnly=yes' git push -u origin branch_name - ``` - -## Important Notes - -- The package uses both pandas DataFrames and numpy arrays internally -- Propensity scores are clipped by default to avoid division by zero -- Meta-learners support both single and multiple treatment scenarios -- Tree methods include built-in visualization capabilities -- Optional dependencies (TensorFlow, PyTorch) are marked clearly in tests \ No newline at end of file From ac8a3af4900979c5dc5dd98d13fa71f35b79e224 Mon Sep 17 00:00:00 2001 From: Jeong-Yoon Lee Date: Tue, 15 Jul 2025 15:46:26 -0700 Subject: [PATCH 5/5] Address Copilot PR review comments MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Improve docstring completeness with detailed parameter descriptions - Clarify usage of treatment/y parameters for classification metrics - Explain return_components output format with probability details - Separate DR learner imports for better readability - Maintain API consistency with other meta-learners Addresses comments in #844 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude --- causalml/inference/meta/drlearner.py | 16 +++++++++++++--- tests/test_meta_learners.py | 4 +++- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/causalml/inference/meta/drlearner.py b/causalml/inference/meta/drlearner.py index de89e9c9..ab2ab29d 100644 --- a/causalml/inference/meta/drlearner.py +++ b/causalml/inference/meta/drlearner.py @@ -531,11 +531,21 @@ def predict( Args: X (np.matrix or np.array or pd.Dataframe): a feature matrix - treatment (np.array or pd.Series, optional): a treatment vector - y (np.array or pd.Series, optional): an outcome vector - verbose (bool, optional): whether to output progress logs + treatment (np.array or pd.Series, optional): a treatment vector. Used for computing + classification metrics when y is also provided. + y (np.array or pd.Series, optional): an outcome vector. Used for computing + classification metrics when treatment is also provided. + p (np.ndarray or pd.Series or dict, optional): an array of propensity scores of float (0,1) in the + single-treatment case; or, a dictionary of treatment groups that map to propensity vectors of + float (0,1). Currently not used in prediction but kept for API consistency. + return_components (bool, optional): whether to return outcome probabilities for treatment and control + groups separately. Defaults to False. + verbose (bool, optional): whether to output progress logs. Defaults to True. Returns: (numpy.ndarray): Predictions of treatment effects. + If return_components is True, also returns: + - dict: Predicted probabilities for the control group (yhat_cs). + - dict: Predicted probabilities for the treatment group (yhat_ts). """ X, treatment, y = convert_pd_to_np(X, treatment, y) diff --git a/tests/test_meta_learners.py b/tests/test_meta_learners.py index 98588bc0..f1902fc0 100644 --- a/tests/test_meta_learners.py +++ b/tests/test_meta_learners.py @@ -30,7 +30,9 @@ XGBRRegressor, ) from causalml.inference.meta import TMLELearner -from causalml.inference.meta import BaseDRLearner, BaseDRRegressor, BaseDRClassifier +from causalml.inference.meta import BaseDRLearner +from causalml.inference.meta import BaseDRRegressor +from causalml.inference.meta import BaseDRClassifier from causalml.metrics import ape, auuc_score from .const import RANDOM_SEED, N_SAMPLE, ERROR_THRESHOLD, CONTROL_NAME, CONVERSION