Skip to content

Commit 03737e3

Browse files
committed
Initial commit
0 parents  commit 03737e3

52 files changed

Lines changed: 26876 additions & 0 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.github/workflows/main.yml

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
name: test
2+
3+
on:
4+
push:
5+
branches:
6+
- main
7+
pull_request:
8+
workflow_call:
9+
10+
permissions:
11+
contents: read
12+
13+
jobs:
14+
build:
15+
strategy:
16+
matrix:
17+
python-version: ["3.10", "3.11", "3.12"]
18+
runs-on: ubuntu-latest
19+
steps:
20+
- uses: actions/checkout@v4
21+
- name: Set up Python ${{ matrix.python-version }}
22+
uses: actions/setup-python@v5
23+
with:
24+
python-version: ${{ matrix.python-version }}
25+
- name: Install dependencies
26+
run: |
27+
python -m pip install --upgrade pip
28+
pip install tox
29+
- name: Run tests with tox
30+
run: tox

.github/workflows/publish.yml

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
name: Release and Publish to PyPI
2+
3+
on:
4+
push:
5+
tags:
6+
- "v*"
7+
8+
permissions:
9+
contents: write
10+
11+
jobs:
12+
tests:
13+
uses: ./.github/workflows/main.yml
14+
release:
15+
name: Release new version
16+
needs: tests
17+
runs-on: ubuntu-latest
18+
steps:
19+
- name: Checkout source
20+
env:
21+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
22+
tag: ${{ github.ref_name }}
23+
run: |
24+
gh release create "$tag" \
25+
--repo="$GITHUB_REPOSITORY" \
26+
--title="${GITHUB_REPOSITORY#*/} ${tag#v}" \
27+
--generate-notes
28+
publish:
29+
name: publish
30+
needs: release
31+
runs-on: ubuntu-latest
32+
steps:
33+
- name: Checkout source
34+
uses: actions/checkout@v4
35+
- name: Set up Python 3.10
36+
uses: actions/setup-python@v5
37+
with:
38+
python-version: "3.10"
39+
- name: Build package
40+
run: |
41+
python -m pip install -U pip build
42+
python -m build
43+
- name: Publish
44+
uses: pypa/gh-action-pypi-publish@v1.5.0
45+
with:
46+
user: __token__
47+
password: ${{ secrets.PYPI_TOKEN }}

.gitignore

Lines changed: 171 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,171 @@
1+
# Byte-compiled / optimized / DLL files
2+
__pycache__/
3+
*.py[cod]
4+
*$py.class
5+
6+
# C extensions
7+
*.so
8+
9+
# Distribution / packaging
10+
.Python
11+
build/
12+
develop-eggs/
13+
dist/
14+
downloads/
15+
eggs/
16+
.eggs/
17+
lib/
18+
lib64/
19+
parts/
20+
sdist/
21+
var/
22+
wheels/
23+
share/python-wheels/
24+
*.egg-info/
25+
.installed.cfg
26+
*.egg
27+
MANIFEST
28+
29+
# PyInstaller
30+
# Usually these files are written by a python script from a template
31+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
32+
*.manifest
33+
*.spec
34+
35+
# Installer logs
36+
pip-log.txt
37+
pip-delete-this-directory.txt
38+
39+
# Unit test / coverage reports
40+
htmlcov/
41+
.tox/
42+
.nox/
43+
.coverage
44+
.coverage.*
45+
.cache
46+
nosetests.xml
47+
coverage.xml
48+
*.cover
49+
*.py,cover
50+
.hypothesis/
51+
.pytest_cache/
52+
cover/
53+
54+
# Translations
55+
*.mo
56+
*.pot
57+
58+
# Django stuff:
59+
*.log
60+
local_settings.py
61+
db.sqlite3
62+
db.sqlite3-journal
63+
64+
# Flask stuff:
65+
instance/
66+
.webassets-cache
67+
68+
# Scrapy stuff:
69+
.scrapy
70+
71+
# Sphinx documentation
72+
docs/_build/
73+
74+
# PyBuilder
75+
.pybuilder/
76+
target/
77+
78+
# Jupyter Notebook
79+
.ipynb_checkpoints
80+
81+
# IPython
82+
profile_default/
83+
ipython_config.py
84+
85+
# pyenv
86+
# For a library or package, you might want to ignore these files since the code is
87+
# intended to run in multiple environments; otherwise, check them in:
88+
# .python-version
89+
90+
# pipenv
91+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
93+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
94+
# install all needed dependencies.
95+
#Pipfile.lock
96+
97+
# poetry
98+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
99+
# This is especially recommended for binary packages to ensure reproducibility, and is more
100+
# commonly ignored for libraries.
101+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102+
#poetry.lock
103+
104+
# pdm
105+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106+
#pdm.lock
107+
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108+
# in version control.
109+
# https://pdm.fming.dev/latest/usage/project/#working-with-version-control
110+
.pdm.toml
111+
.pdm-python
112+
.pdm-build/
113+
114+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
115+
__pypackages__/
116+
117+
# Celery stuff
118+
celerybeat-schedule
119+
celerybeat.pid
120+
121+
# SageMath parsed files
122+
*.sage.py
123+
124+
# Environments
125+
.env
126+
.venv
127+
env/
128+
venv/
129+
ENV/
130+
env.bak/
131+
venv.bak/
132+
133+
# Spyder project settings
134+
.spyderproject
135+
.spyproject
136+
137+
# Rope project settings
138+
.ropeproject
139+
140+
# mkdocs documentation
141+
/site
142+
143+
# mypy
144+
.mypy_cache/
145+
.dmypy.json
146+
dmypy.json
147+
148+
# Pyre type checker
149+
.pyre/
150+
151+
# pytype static type analyzer
152+
.pytype/
153+
154+
# Cython debug symbols
155+
cython_debug/
156+
157+
# PyCharm
158+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
159+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
160+
# and can be added to the global gitignore or merged into this file. For a more nuclear
161+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
162+
#.idea/
163+
164+
# Conda
165+
.conda/
166+
167+
# Pylint
168+
.pylint.d/
169+
170+
# VS Code
171+
.vscode/

.pylintrc

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
[MAIN]
2+
3+
extension-pkg-whitelist=gurobipy
4+
max-args=10
5+
max-line-length=80
6+
max-locals=20
7+
max-returns=10
8+
max-attributes=20
9+
10+
[MESSAGES CONTROL]
11+
12+
disable=
13+
missing-docstring,
14+
invalid-name,

LICENSE

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
MIT License
2+
3+
Copyright (c) 2024 FIPE-Org
4+
5+
Permission is hereby granted, free of charge, to any person obtaining a copy
6+
of this software and associated documentation files (the "Software"), to deal
7+
in the Software without restriction, including without limitation the rights
8+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9+
copies of the Software, and to permit persons to whom the Software is
10+
furnished to do so, subject to the following conditions:
11+
12+
The above copyright notice and this permission notice shall be included in all
13+
copies or substantial portions of the Software.
14+
15+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21+
SOFTWARE.

README.md

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
# FIPE: Functionally Identical Pruning of Ensembles
2+
3+
![Tests badge](https://github.com/eminyous/fipe/actions/workflows/main.yml/badge.svg?branch=main)
4+
5+
This repository provides methods for Functionally-Identical Pruning of Tree Ensembles (FIPE). Given a trained scikit-learn model, FIPE provides a pruned model that is certified to be equivalent to the original model on the entire feature space.
6+
7+
## Installation
8+
9+
This project requires the gurobi solver. Free academic licenses are available. Please consult:
10+
11+
- [Gurobi academic program and licenses](https://www.gurobi.com/academia/academic-program-and-licenses/)
12+
- [Gurobi academic license agreement](https://www.gurobi.com/downloads/end-user-license-agreement-academic/)
13+
14+
Run the following commands from the project root to install the requirements. You may have to install python and venv before.
15+
16+
```shell
17+
virtualenv -p python3.10 env
18+
pip install -e .
19+
```
20+
21+
The installation can be checked by running the test suite:
22+
23+
```shell
24+
pip install pytest
25+
pytest
26+
```
27+
28+
The integration tests require a working Gurobi license. If a license is not available, the tests will pass and print a warning.
29+
30+
### Getting started
31+
32+
A minimal working example to prune an AdaBoost ensemble is presented below.
33+
34+
```python
35+
from fipe import FIPE, FeatureEncoder
36+
import pandas as pd
37+
import numpy as np
38+
from sklearn.datasets import load_iris
39+
from sklearn.model_selection import train_test_split
40+
from sklearn.ensemble import AdaBoostClassifier
41+
42+
43+
# Load data encode features
44+
data = load_iris()
45+
X = pd.DataFrame(data.data)
46+
y = data.target
47+
48+
encoder = FeatureEncoder(X)
49+
X = encoder.X.values
50+
51+
# Train tree ensemble
52+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
53+
base = AdaBoostClassifier(algorithm="SAMME", n_estimators=100)
54+
base.fit(X, y)
55+
56+
# Read and normalize weights
57+
w = base.estimator_weights_
58+
w = (w / w.max()) * 1e5
59+
60+
# Prune using FIPE
61+
norm = 1
62+
print(f'Pruning model by minimizing l_{norm} norm.')
63+
pruner = FIPE(base=base, weights=w, encoder=encoder, eps=1e-6)
64+
pruner.build()
65+
pruner.set_norm(norm)
66+
pruner.add_samples(X_train)
67+
pruner.oracle.setParam('LogToConsole', 0)
68+
pruner.prune()
69+
print('\n Finished pruning.')
70+
71+
# Read pruned model
72+
n_activated = pruner.n_activated
73+
print('The pruned ensemble has ', n_activated, ' estimators.')
74+
75+
# Verify functionally-identical on test data
76+
y_pred = base.predict(X_test)
77+
y_pruned = pruner.predict(X_test)
78+
fidelity = np.mean(y_pred == y_pruned)
79+
print('Fidelity to initial ensemble is ', fidelity, '%.')
80+
```

fipe/__init__.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
from .ensemble import Ensemble
2+
from .feature import FeatureEncoder
3+
from .fipe import FIPE
4+
from .ocean import OCEAN, BaseOCEAN, VoteOCEAN
5+
from .oracle import Oracle
6+
from .prune import BasePruner, Pruner
7+
from .tree.tree import Node, Tree
8+
from .typing import FeatureType
9+
10+
__all__ = [
11+
"FeatureEncoder",
12+
"Ensemble",
13+
"FIPE",
14+
"Oracle",
15+
"BasePruner",
16+
"Pruner",
17+
"BaseOCEAN",
18+
"VoteOCEAN",
19+
"OCEAN",
20+
"Node",
21+
"Tree",
22+
"FeatureType",
23+
]

0 commit comments

Comments
 (0)