Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 11 additions & 5 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,9 +1,15 @@
FROM tiangolo/uvicorn-gunicorn:python3.10
FROM python:3.10

COPY ./requirements.txt /requirements.txt
COPY requirements.txt app/requirements.txt

RUN pip install -r /requirements.txt
WORKDIR /app

COPY ./models /models
RUN pip install -r requirements.txt

COPY ./app /app
COPY . .

EXPOSE 8501

ENTRYPOINT ["streamlit","run"]

CMD ["app.py"]
2 changes: 1 addition & 1 deletion app.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def run():
}

if st.button("Predict"):
response = requests.post("http://127.0.0.1:8000/predict", json=data)
response = requests.post("http://backend.docker:8000/predict", json=data)
prediction = response.json()
if prediction == "1":
st.success("The project is going to be successful")
Expand Down
14 changes: 14 additions & 0 deletions app/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# FROM tiangolo/uvicorn-gunicorn:python3.10
FROM python:3.10.1-slim

COPY requirements.txt app/requirements.txt

WORKDIR /app

RUN pip install -r requirements.txt

COPY . /app

EXPOSE 8000

CMD ["uvicorn", "main:app", "--host", "127.0.0.1", "--port", "8000" , "--reload"]
2 changes: 1 addition & 1 deletion app/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

app = FastAPI(title='Project Success Prediction', version='1.0', description='KNN Model is used for prediciton')

model = joblib.load('../models/success_pred_model.pkl')
model = joblib.load('./success_pred_model.pkl')

class Data(BaseModel):
parent_category: str
Expand Down
5 changes: 5 additions & 0 deletions app/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
scikit-learn
numpy
pandas
fastapi
uvicorn
Binary file added app/success_pred_model.pkl
Binary file not shown.
16 changes: 16 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
version: "3.10"

services:
frontend:
build:
dockerfile: frontend\Dockerfile
ports:
- 8501:8501
depends_on:
- app

app:
build:
dockerfile: app\Dockerfile
ports:
- 8000:8000
15 changes: 15 additions & 0 deletions frontend/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
FROM python:3.10.1-slim

COPY requirements.txt app/requirements.txt

WORKDIR /app

RUN pip install -r requirements.txt

COPY . /app

EXPOSE 8501

ENTRYPOINT ["streamlit","run"]

CMD ["main.py"]
181,375 changes: 181,375 additions & 0 deletions frontend/data.csv

Large diffs are not rendered by default.

45 changes: 45 additions & 0 deletions frontend/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
import pandas as pd
import requests
import json
import streamlit as st
from training_pipeline import DataPreprocessor


preprocessor = DataPreprocessor("./data.csv")
df = preprocessor.preprocess()

def run():
st.title("Kickstarter Project Success Prediction")
parent_category = st.selectbox("Parent Category", df["parent_category"].unique())
sub_category = st.selectbox("Sub Category", df["sub_category"].unique())
days = st.number_input("Number of Days", min_value=1, max_value=120)
backers_count = st.number_input("Number of Backers")
pledged_amt = st.number_input("Pledged Amount")
converted_pledged_amt = st.number_input("Converted Pledged Amount")
goal = st.number_input("Goal")
country = st.selectbox("Country", df["country"].unique())

data = {
'parent_category': parent_category,
'sub_category': sub_category,
'days': days,
'backers_count': backers_count,
'pledged_amt': pledged_amt,
'converted_pledged_amt': converted_pledged_amt,
'goal': goal,
'country': country
}

if st.button("Predict"):
response = requests.post("http://app:8000/predict", json=data)
prediction = response.json()
if prediction == "1":
st.success("The project is going to be successful")
# print("The project is going to be successful")
elif prediction == "0":
st.error("The project is likely to fail. Don't lose hope! Keep trying")
# print("The project is likely to fail. Don't lose hope!, Keep trying")

if __name__ == "__main__":
run()
# print(df.shape)
2 changes: 2 additions & 0 deletions frontend/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
pandas
streamlit
65 changes: 65 additions & 0 deletions frontend/training_pipeline.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import make_column_transformer
from sklearn.pipeline import make_pipeline
from sklearn.metrics import accuracy_score,f1_score
import joblib
class DataPreprocessor():

def __init__(self, path):
self.df = pd.read_csv(path)

def preprocess(self):
# Convert label(state) into 0,1
labels = {"successful":1, "failed":0}
self.df["state"] = self.df["state"].map(labels)

# Drop unnecesary columns
df = self.df.drop('Unnamed: 0',axis=1)
df["sub_category"] = df["sub_category"].fillna("Not Applicable")
return df
class Model:
def __init__(self,df: pd.DataFrame, model):
self.model = model
self.df = df
# Split the dataframe into Features and Labels

self.X = self.df.drop('state',axis=1) # Features
self.y = self.df['state'] # Labels

# Split the data into training and testing sets
self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(self.X, self.y, test_size=0.2, random_state=15)

def pipeline(self):
# Create a pipeline
cols = ["country","parent_category","sub_category"]
ohe = OneHotEncoder(handle_unknown = "ignore")
ohe.fit(self.X[cols])

column_trans = make_column_transformer((OneHotEncoder(categories=ohe.categories_),
cols),
remainder='passthrough')

pipe = make_pipeline(column_trans, self.model)
pipe.fit(self.X_train, self.y_train)
return pipe

def dump(self):
joblib.dump(self.pipeline(), open("models/success_pred_model.pkl", "wb"))

def evaulate(self):
y_pred = self.pipeline().predict(self.X_test)
accuracy = accuracy_score(self.y_test,y_pred) * 100
f1 = f1_score(self.y_test,y_pred) * 100
print("Accuracy:", round(accuracy,2))
print("F1_score:", round(f1, 2))

if __name__ == "__main__":
preprocessor = DataPreprocessor("data/data.csv")
df = preprocessor.preprocess()
knn = KNeighborsClassifier()
model = Model(df, knn)
model.dump()
# model.evaulate() # Uncomment to evaluate the model