eminyous
diff --git a/‎.github/workflows/main.yml‎
Lines changed: 30 additions & 0 deletions b/‎.github/workflows/main.yml‎
Lines changed: 30 additions & 0 deletions
diff --git a/‎.github/workflows/publish.yml‎
Lines changed: 47 additions & 0 deletions b/‎.github/workflows/publish.yml‎
Lines changed: 47 additions & 0 deletions
diff --git a/‎.gitignore‎
Lines changed: 171 additions & 0 deletions b/‎.gitignore‎
Lines changed: 171 additions & 0 deletions
diff --git a/‎.pylintrc‎
Lines changed: 14 additions & 0 deletions b/‎.pylintrc‎
Lines changed: 14 additions & 0 deletions
diff --git a/‎LICENSE‎
Lines changed: 21 additions & 0 deletions b/‎LICENSE‎
Lines changed: 21 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 80 additions & 0 deletions b/‎README.md‎
Lines changed: 80 additions & 0 deletions
diff --git a/‎fipe/__init__.py‎
Lines changed: 23 additions & 0 deletions b/‎fipe/__init__.py‎
Lines changed: 23 additions & 0 deletions
@@ -0,0 +1,30 @@
+name: test
+
+on:
+  push:
+    branches:
+      - main
+  pull_request:
+  workflow_call:
+
+permissions:
+  contents: read
+
+jobs:
+  build:
+    strategy:
+      matrix:
+        python-version: ["3.10", "3.11", "3.12"]
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install tox
+      - name: Run tests with tox
+        run: tox
@@ -0,0 +1,47 @@
+name: Release and Publish to PyPI
+
+on:
+    push:
+        tags:
+          - "v*"
+
+permissions:
+    contents: write
+
+jobs:
+    tests:
+        uses: ./.github/workflows/main.yml
+    release:
+        name: Release new version
+        needs: tests
+        runs-on: ubuntu-latest
+        steps:
+            - name: Checkout source
+              env:
+                  GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+                  tag: ${{ github.ref_name }}
+              run: |
+                gh release create "$tag" \
+                    --repo="$GITHUB_REPOSITORY" \
+                    --title="${GITHUB_REPOSITORY#*/} ${tag#v}" \
+                    --generate-notes
+    publish:
+        name: publish
+        needs: release
+        runs-on: ubuntu-latest
+        steps:
+            - name: Checkout source
+              uses: actions/checkout@v4
+            - name: Set up Python 3.10
+              uses: actions/setup-python@v5
+              with:
+                  python-version: "3.10"
+            - name: Build package
+              run: |
+                    python -m pip install -U pip build
+                    python -m build
+            - name: Publish
+              uses: pypa/gh-action-pypi-publish@v1.5.0
+              with:
+                  user: __token__
+                  password: ${{ secrets.PYPI_TOKEN }}
@@ -0,0 +1,171 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
+.pdm.toml
+.pdm-python
+.pdm-build/
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+
+# Conda
+.conda/
+
+# Pylint
+.pylint.d/
+
+# VS Code
+.vscode/
@@ -0,0 +1,14 @@
+[MAIN]
+
+extension-pkg-whitelist=gurobipy
+max-args=10
+max-line-length=80
+max-locals=20
+max-returns=10
+max-attributes=20
+
+[MESSAGES CONTROL]
+
+disable=
+    missing-docstring,
+    invalid-name,
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2024 FIPE-Org
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
@@ -0,0 +1,80 @@
+# FIPE: Functionally Identical Pruning of Ensembles
+
+![Tests badge](https://github.com/eminyous/fipe/actions/workflows/main.yml/badge.svg?branch=main)
+
+This repository provides methods for Functionally-Identical Pruning of Tree Ensembles (FIPE). Given a trained scikit-learn model, FIPE provides a pruned model that is certified to be equivalent to the original model on the entire feature space.
+
+## Installation
+
+This project requires the gurobi solver. Free academic licenses are available. Please consult:
+
+- [Gurobi academic program and licenses](https://www.gurobi.com/academia/academic-program-and-licenses/)
+- [Gurobi academic license agreement](https://www.gurobi.com/downloads/end-user-license-agreement-academic/)
+
+Run the following commands from the project root to install the requirements. You may have to install python and venv before.
+
+```shell
+    virtualenv -p python3.10 env
+    pip install -e .
+```
+
+The installation can be checked by running the test suite:
+
+```shell
+    pip install pytest
+    pytest
+```
+
+The integration tests require a working Gurobi license. If a license is not available, the tests will pass and print a warning.
+
+### Getting started
+
+A minimal working example to prune an AdaBoost ensemble is presented below.
+
+```python
+    from fipe import FIPE, FeatureEncoder
+    import pandas as pd
+    import numpy as np
+    from sklearn.datasets import load_iris
+    from sklearn.model_selection import train_test_split
+    from sklearn.ensemble import AdaBoostClassifier
+
+
+    # Load data encode features
+    data = load_iris()
+    X = pd.DataFrame(data.data)
+    y = data.target
+
+    encoder = FeatureEncoder(X)
+    X = encoder.X.values
+
+    # Train tree ensemble
+    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
+    base = AdaBoostClassifier(algorithm="SAMME", n_estimators=100)
+    base.fit(X, y)
+
+    # Read and normalize weights
+    w = base.estimator_weights_
+    w = (w / w.max()) * 1e5
+
+    # Prune using FIPE
+    norm = 1
+    print(f'Pruning model by minimizing l_{norm} norm.')
+    pruner = FIPE(base=base, weights=w, encoder=encoder, eps=1e-6)
+    pruner.build()
+    pruner.set_norm(norm)
+    pruner.add_samples(X_train)
+    pruner.oracle.setParam('LogToConsole', 0)
+    pruner.prune()
+    print('\n Finished pruning.')
+
+    # Read pruned model
+    n_activated = pruner.n_activated
+    print('The pruned ensemble has ', n_activated, ' estimators.')
+
+    # Verify functionally-identical on test data
+    y_pred = base.predict(X_test)
+    y_pruned = pruner.predict(X_test)
+    fidelity = np.mean(y_pred == y_pruned)
+    print('Fidelity to initial ensemble is ', fidelity, '%.')
+```
@@ -0,0 +1,23 @@
+from .ensemble import Ensemble
+from .feature import FeatureEncoder
+from .fipe import FIPE
+from .ocean import OCEAN, BaseOCEAN, VoteOCEAN
+from .oracle import Oracle
+from .prune import BasePruner, Pruner
+from .tree.tree import Node, Tree
+from .typing import FeatureType
+
+__all__ = [
+    "FeatureEncoder",
+    "Ensemble",
+    "FIPE",
+    "Oracle",
+    "BasePruner",
+    "Pruner",
+    "BaseOCEAN",
+    "VoteOCEAN",
+    "OCEAN",
+    "Node",
+    "Tree",
+    "FeatureType",
+]