diff --git a/PM_pipeline.ipynb b/PM_pipeline.ipynb new file mode 100644 index 0000000..458ce19 --- /dev/null +++ b/PM_pipeline.ipynb @@ -0,0 +1,810 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [], + "authorship_tag": "ABX9TyPzZxEbLOYq1zjEDFBK8FRH", + "include_colab_link": true + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "view-in-github", + "colab_type": "text" + }, + "source": [ + "\"Open" + ] + }, + { + "cell_type": "markdown", + "source": [ + "#Libraries" + ], + "metadata": { + "id": "ylUFE2gAWyTB" + } + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": { + "id": "Bcap68Gv94i9" + }, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.preprocessing import OneHotEncoder\n", + "from sklearn.base import BaseEstimator, TransformerMixin\n", + "from sklearn.pipeline import Pipeline, make_pipeline\n", + "from sklearn.compose import ColumnTransformer\n", + "import xgboost as xgb\n", + "from xgboost import XGBRegressor\n", + "from scipy import stats\n", + "import joblib" + ] + }, + { + "cell_type": "markdown", + "source": [ + "#Pipeline" + ], + "metadata": { + "id": "UpWtCyNJW2eu" + } + }, + { + "cell_type": "code", + "source": [ + "categorical_features = ['flight_phase']\n", + "numerical_features = ['flight_cycle',\t'egt_probe_average',\t'fuel_flw',\t'core_spd',\t'zpn12p',\t'vib_n1_#1_bearing',\t'vib_n2_#1_bearing',\t'vib_n2_turbine_frame']\n", + "numerical_features_for_outlier = ['egt_probe_average', 'fuel_flw', 'core_spd',\n", + " 'zpn12p', 'vib_n1_#1_bearing', 'vib_n2_#1_bearing',\n", + " 'vib_n2_turbine_frame'] #'corrected_fan_spd'" + ], + "metadata": { + "id": "MBRHt8W0Jvrf" + }, + "execution_count": 23, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# Custom Transformer for outliers removal: OutlierRemoverGroup\n", + "class OutlierRemoverGroup(BaseEstimator, TransformerMixin):\n", + " def __init__(self, groupby_col, columns, threshold=2.5):\n", + " self.groupby_col = groupby_col\n", + " self.columns = columns\n", + " self.threshold = threshold\n", + "\n", + " def fit(self, X, y=None):\n", + " return self\n", + "\n", + " def transform(self, X):\n", + " X_clean = X.copy()\n", + " z_scores = X_clean.groupby(self.groupby_col)[self.columns].transform(\n", + " lambda x: np.abs(stats.zscore(x, nan_policy='omit'))\n", + " )\n", + " mask = (z_scores <= self.threshold).all(axis=1)\n", + " return X_clean[mask].reset_index(drop=True)" + ], + "metadata": { + "id": "R5PX9gsLVQCg" + }, + "execution_count": 24, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "#encoding categorical column\n", + "categorical_pipeline = Pipeline(steps=[\n", + " ('onehot', OneHotEncoder(handle_unknown='ignore', drop='first'))\n", + "])" + ], + "metadata": { + "id": "toyCPtvUI9UV" + }, + "execution_count": 25, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "#merging the transformers together\n", + "col_transformer = ColumnTransformer(\n", + " transformers=[\n", + " ('num', 'passthrough', numerical_features),\n", + " ('cat', categorical_pipeline, categorical_features)\n", + " ]\n", + ")" + ], + "metadata": { + "id": "Vjzd3EDFRWW2" + }, + "execution_count": 26, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "xgb_model = XGBRegressor(random_state=42)\n", + "pipefinal = make_pipeline(col_transformer, xgb_model)" + ], + "metadata": { + "id": "dStezUWMWGIf" + }, + "execution_count": 27, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "#Data Importing" + ], + "metadata": { + "id": "fWjuGoyrXZhH" + } + }, + { + "cell_type": "code", + "source": [ + "df = pd.read_csv('engines2_data_cleaned_no_outliers.csv')#Data/" + ], + "metadata": { + "id": "Gzo0jNotXXFu" + }, + "execution_count": 28, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "#applying the above outlierRemover\n", + "X = df[['flight_cycle', 'flight_phase', 'egt_probe_average', 'fuel_flw', 'core_spd', 'zpn12p', 'vib_n1_#1_bearing', 'vib_n2_#1_bearing', 'vib_n2_turbine_frame']]\n", + "Y= df['RUL']\n", + "df_all = X.copy()\n", + "df_all['RUL'] = Y\n", + "remover = OutlierRemoverGroup(groupby_col='flight_phase', columns=numerical_features_for_outlier)\n", + "df_filtered = remover.fit_transform(df_all)" + ], + "metadata": { + "id": "XeL_fjgRbL5o" + }, + "execution_count": 29, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "#Data Splitting" + ], + "metadata": { + "id": "22b_NXdU98iX" + } + }, + { + "cell_type": "code", + "source": [ + "X = df_filtered.drop(columns='RUL')\n", + "Y = df_filtered['RUL']\n", + "# Initial 80/20 split\n", + "X_train, X_temp, y_train, y_temp = train_test_split(X, Y, test_size=0.2, random_state=42)\n", + "# Further split temp into validation and test sets\n", + "X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)" + ], + "metadata": { + "id": "BVEzw6JUW7cI" + }, + "execution_count": 30, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "pipefinal.fit(X_train, y_train)" + ], + "metadata": { + "id": "ICjo6swRWkRq", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 245 + }, + "outputId": "fcaecb51-cb33-4e16-902a-a4e3b27192f7" + }, + "execution_count": 31, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "Pipeline(steps=[('columntransformer',\n", + " ColumnTransformer(transformers=[('num', 'passthrough',\n", + " ['flight_cycle',\n", + " 'egt_probe_average',\n", + " 'fuel_flw', 'core_spd',\n", + " 'zpn12p',\n", + " 'vib_n1_#1_bearing',\n", + " 'vib_n2_#1_bearing',\n", + " 'vib_n2_turbine_frame']),\n", + " ('cat',\n", + " Pipeline(steps=[('onehot',\n", + " OneHotEncoder(drop='first',\n", + " handle_unknown='ignore'))]),\n", + " ['flight_phase'])])),\n", + " ('xgbregressor',\n", + " XGB...\n", + " feature_types=None, gamma=None, grow_policy=None,\n", + " importance_type=None,\n", + " interaction_constraints=None, learning_rate=None,\n", + " max_bin=None, max_cat_threshold=None,\n", + " max_cat_to_onehot=None, max_delta_step=None,\n", + " max_depth=None, max_leaves=None,\n", + " min_child_weight=None, missing=nan,\n", + " monotone_constraints=None, multi_strategy=None,\n", + " n_estimators=None, n_jobs=None,\n", + " num_parallel_tree=None, random_state=42, ...))])" + ], + "text/html": [ + "
Pipeline(steps=[('columntransformer',\n",
+              "                 ColumnTransformer(transformers=[('num', 'passthrough',\n",
+              "                                                  ['flight_cycle',\n",
+              "                                                   'egt_probe_average',\n",
+              "                                                   'fuel_flw', 'core_spd',\n",
+              "                                                   'zpn12p',\n",
+              "                                                   'vib_n1_#1_bearing',\n",
+              "                                                   'vib_n2_#1_bearing',\n",
+              "                                                   'vib_n2_turbine_frame']),\n",
+              "                                                 ('cat',\n",
+              "                                                  Pipeline(steps=[('onehot',\n",
+              "                                                                   OneHotEncoder(drop='first',\n",
+              "                                                                                 handle_unknown='ignore'))]),\n",
+              "                                                  ['flight_phase'])])),\n",
+              "                ('xgbregressor',\n",
+              "                 XGB...\n",
+              "                              feature_types=None, gamma=None, grow_policy=None,\n",
+              "                              importance_type=None,\n",
+              "                              interaction_constraints=None, learning_rate=None,\n",
+              "                              max_bin=None, max_cat_threshold=None,\n",
+              "                              max_cat_to_onehot=None, max_delta_step=None,\n",
+              "                              max_depth=None, max_leaves=None,\n",
+              "                              min_child_weight=None, missing=nan,\n",
+              "                              monotone_constraints=None, multi_strategy=None,\n",
+              "                              n_estimators=None, n_jobs=None,\n",
+              "                              num_parallel_tree=None, random_state=42, ...))])
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ] + }, + "metadata": {}, + "execution_count": 31 + } + ] + }, + { + "cell_type": "code", + "source": [ + "# Predict and evaluate on the test set\n", + "y_pred = pipefinal.predict(X_test)\n", + "from sklearn.metrics import mean_absolute_error, mean_squared_error\n", + "mae = mean_absolute_error(y_test, y_pred)\n", + "rmse = mean_squared_error(y_test, y_pred)\n", + "print(f\"MAE: {mae}, RMSE: {rmse}\")" + ], + "metadata": { + "id": "nWFKHcUOXwFh", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "2e31953a-2a65-46ea-97ee-2a96bb2298c7" + }, + "execution_count": 33, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "MAE: 125.16436767578125, RMSE: 27350.759765625\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "# Save the Pipeline\n", + "joblib.dump(pipefinal, 'predictive_maintenance_pipeline.pkl')\n" + ], + "metadata": { + "id": "I0xFogOlLct5", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "0ceb0312-972c-4a49-d1e4-145859f5f544" + }, + "execution_count": 34, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "['predictive_maintenance_pipeline.pkl']" + ] + }, + "metadata": {}, + "execution_count": 34 + } + ] + } + ] +} \ No newline at end of file