MaruthiKo · MaruthiKo · Nov 17, 2023
diff --git a/Dockerfile b/Dockerfile
@@ -1,9 +1,15 @@
-FROM tiangolo/uvicorn-gunicorn:python3.10
+FROM python:3.10
 
-COPY ./requirements.txt /requirements.txt
+COPY requirements.txt app/requirements.txt
 
-RUN pip install -r /requirements.txt
+WORKDIR /app
 
-COPY ./models /models
+RUN pip install -r requirements.txt
 
-COPY ./app /app
+COPY . .
+
+EXPOSE 8501
+
+ENTRYPOINT ["streamlit","run"]
+
+CMD ["app.py"]
diff --git a/app.py b/app.py
@@ -31,7 +31,7 @@ def run():
     }
 
     if st.button("Predict"):
-        response = requests.post("http://127.0.0.1:8000/predict", json=data)
+        response = requests.post("http://backend.docker:8000/predict", json=data)
         prediction = response.json()
         if prediction == "1":
             st.success("The project is going to be successful")

diff --git a/app/Dockerfile b/app/Dockerfile
@@ -0,0 +1,14 @@
+# FROM tiangolo/uvicorn-gunicorn:python3.10
+FROM python:3.10.1-slim
+
+COPY requirements.txt app/requirements.txt
+
+WORKDIR /app
+
+RUN pip install -r requirements.txt
+
+COPY . /app
+
+EXPOSE 8000
+
+CMD ["uvicorn", "main:app", "--host", "127.0.0.1", "--port", "8000" , "--reload"]
diff --git a/app/main.py b/app/main.py
@@ -7,7 +7,7 @@
 
 app = FastAPI(title='Project Success Prediction', version='1.0', description='KNN Model is used for prediciton')
 
-model = joblib.load('../models/success_pred_model.pkl')
+model = joblib.load('./success_pred_model.pkl')
 
 class Data(BaseModel):
     parent_category: str

diff --git a/app/requirements.txt b/app/requirements.txt
@@ -0,0 +1,5 @@
+scikit-learn
+numpy
+pandas
+fastapi
+uvicorn
diff --git a/app/success_pred_model.pkl b/app/success_pred_model.pkl
diff --git a/docker-compose.yml b/docker-compose.yml
@@ -0,0 +1,16 @@
+version: "3.10"
+
+services:
+  frontend:
+    build: 
+      dockerfile: frontend\Dockerfile
+    ports:
+      - 8501:8501
+    depends_on:
+      - app
+
+  app:
+    build: 
+      dockerfile: app\Dockerfile
+    ports:
+      - 8000:8000
diff --git a/frontend/Dockerfile b/frontend/Dockerfile
@@ -0,0 +1,15 @@
+FROM python:3.10.1-slim
+
+COPY requirements.txt app/requirements.txt
+
+WORKDIR /app
+
+RUN pip install -r requirements.txt
+
+COPY . /app
+
+EXPOSE 8501
+
+ENTRYPOINT ["streamlit","run"]
+
+CMD ["main.py"]
diff --git a/frontend/data.csv b/frontend/data.csv
diff --git a/frontend/main.py b/frontend/main.py
@@ -0,0 +1,45 @@
+import pandas as pd
+import requests
+import json
+import streamlit as st
+from training_pipeline import DataPreprocessor
+
+
+preprocessor = DataPreprocessor("./data.csv")
+df = preprocessor.preprocess()
+
+def run():
+    st.title("Kickstarter Project Success Prediction")
+    parent_category = st.selectbox("Parent Category", df["parent_category"].unique())
+    sub_category = st.selectbox("Sub Category", df["sub_category"].unique())
+    days = st.number_input("Number of Days", min_value=1, max_value=120)
+    backers_count = st.number_input("Number of Backers")
+    pledged_amt = st.number_input("Pledged Amount")
+    converted_pledged_amt = st.number_input("Converted Pledged Amount")
+    goal = st.number_input("Goal")
+    country = st.selectbox("Country", df["country"].unique())
+
+    data = {
+        'parent_category': parent_category,
+        'sub_category': sub_category,
+        'days': days,
+        'backers_count': backers_count,
+        'pledged_amt': pledged_amt,
+        'converted_pledged_amt': converted_pledged_amt,
+        'goal': goal,
+        'country': country
+    }
+
+    if st.button("Predict"):
+        response = requests.post("http://app:8000/predict", json=data)
+        prediction = response.json()
+        if prediction == "1":
+            st.success("The project is going to be successful")
+            # print("The project is going to be successful")
+        elif prediction == "0":
+            st.error("The project is likely to fail. Don't lose hope! Keep trying")
+            # print("The project is likely to fail. Don't lose hope!, Keep trying")
+
+if __name__ == "__main__":
+    run()
+    # print(df.shape)
diff --git a/frontend/requirements.txt b/frontend/requirements.txt
@@ -0,0 +1,2 @@
+pandas
+streamlit
diff --git a/frontend/training_pipeline.py b/frontend/training_pipeline.py
@@ -0,0 +1,65 @@
+import pandas as pd
+from sklearn.model_selection import train_test_split
+from sklearn.neighbors import KNeighborsClassifier
+from sklearn.preprocessing import OneHotEncoder
+from sklearn.compose import make_column_transformer
+from sklearn.pipeline import make_pipeline
+from sklearn.metrics import accuracy_score,f1_score
+import joblib
+class DataPreprocessor():
+
+    def __init__(self, path):
+        self.df = pd.read_csv(path)
+
+    def preprocess(self):
+        # Convert label(state) into 0,1
+        labels = {"successful":1, "failed":0}
+        self.df["state"] = self.df["state"].map(labels)
+
+        # Drop unnecesary columns    
+        df = self.df.drop('Unnamed: 0',axis=1)
+        df["sub_category"] = df["sub_category"].fillna("Not Applicable")
+        return df
+class Model:
+    def __init__(self,df: pd.DataFrame, model):
+        self.model = model
+        self.df = df
+        # Split the dataframe into Features and Labels
+
+        self.X = self.df.drop('state',axis=1) # Features
+        self.y = self.df['state'] # Labels
+
+        # Split the data into training and testing sets
+        self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(self.X, self.y, test_size=0.2, random_state=15)
+
+    def pipeline(self):
+        # Create a pipeline
+        cols = ["country","parent_category","sub_category"]
+        ohe = OneHotEncoder(handle_unknown = "ignore")
+        ohe.fit(self.X[cols])
+
+        column_trans = make_column_transformer((OneHotEncoder(categories=ohe.categories_),
+                                                cols),
+                                                remainder='passthrough')
+
+        pipe = make_pipeline(column_trans, self.model)
+        pipe.fit(self.X_train, self.y_train)
+        return pipe
+
+    def dump(self):
+        joblib.dump(self.pipeline(), open("models/success_pred_model.pkl", "wb"))
+
+    def evaulate(self):
+        y_pred = self.pipeline().predict(self.X_test)
+        accuracy = accuracy_score(self.y_test,y_pred) * 100
+        f1 = f1_score(self.y_test,y_pred) * 100
+        print("Accuracy:", round(accuracy,2))
+        print("F1_score:", round(f1, 2))
+
+if __name__ == "__main__":
+    preprocessor = DataPreprocessor("data/data.csv")
+    df = preprocessor.preprocess()
+    knn = KNeighborsClassifier()
+    model = Model(df, knn)
+    model.dump()
+    # model.evaulate() # Uncomment to evaluate the model