-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathrecommendations.py
More file actions
94 lines (74 loc) · 3.55 KB
/
recommendations.py
File metadata and controls
94 lines (74 loc) · 3.55 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
#!/usr/bin/env python
# coding: utf-8
# ## Import dependencies
# In[1]:
get_ipython().run_line_magic('matplotlib', 'inline')
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
import ast
from scipy import stats
from ast import literal_eval
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn.metrics.pairwise import linear_kernel, cosine_similarity
# from nltk.stem.snowball import SnowballStemmer
# from nltk.stem.wordnet import WordNetLemmatizer
# from nltk.corpus import wordnet
# from surprise import Reader, Dataset, SVD, evaluate
import warnings; warnings.simplefilter('ignore')
jobs_US_base_line = jobs_US_base_line.reset_index()
titles = jobs_US_base_line['Title']
indices = pd.Series(jobs_US_base_line.index, index=jobs_US_base_line['Title'])
def get_recommendations(title):
idx = indices[title]
#print (idx)
sim_scores = list(enumerate(cosine_sim[idx]))
#print (sim_scores)
sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
job_indices = [i[0] for i in sim_scores]
return titles.iloc[job_indices]
get_recommendations('SAP Business Analyst / WM').head(10)
get_recommendations('Security Engineer/Technical Lead').head(10)
get_recommendations('Immediate Opening').head(10)
get_recommendations('EXPERIENCED ROOFERS').head(10)
users_training.head()
user_based_approach_US = users_training.loc[users_training['Country']=='US']
user_based_approach = user_based_approach_US.iloc[0:10000,:]
user_based_approach.head()
user_based_approach['DegreeType'] = user_based_approach['DegreeType'].fillna('')
user_based_approach['Major'] = user_based_approach['Major'].fillna('')
user_based_approach['TotalYearsExperience'] = str(user_based_approach['TotalYearsExperience'].fillna(''))
user_based_approach['DegreeType'] = user_based_approach['DegreeType'] + user_based_approach['Major'] +
user_based_approach['TotalYearsExperience']
tf = TfidfVectorizer(analyzer='word',ngram_range=(1, 2),min_df=0, stop_words='english')
tfidf_matrix = tf.fit_transform(user_based_approach['DegreeType'])
tfidf_matrix.shape
# http://scikit-learn.org/stable/modules/metrics.html#linear-kernel
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)
cosine_sim[0]
user_based_approach = user_based_approach.reset_index()
userid = user_based_approach['UserID']
indices = pd.Series(user_based_approach.index, index=user_based_approach['UserID'])
def get_recommendations_userwise(userid):
idx = indices[userid]
#print (idx)
sim_scores = list(enumerate(cosine_sim[idx]))
#print (sim_scores)
sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
user_indices = [i[0] for i in sim_scores]
#print (user_indices)
return user_indices[0:11]
print ("-----Top 10 Similar users with userId: 123------")
get_recommendations_userwise(123)
def get_job_id(usrid_list):
jobs_userwise = apps_training['UserID'].isin(usrid_list) #
df1 = pd.DataFrame(data = apps_training[jobs_userwise], columns=['JobID'])
joblist = df1['JobID'].tolist()
Job_list = jobs['JobID'].isin(joblist) #[1083186, 516837, 507614, 754917, 686406, 1058896, 335132])
df_temp = pd.DataFrame(data = jobs[Job_list], columns=['JobID','Title','Description','City','State'])
return df_temp
get_job_id(get_recommendations_userwise(123))
print ("-----Top 10 Similar users with userId: 47------")
get_recommendations_userwise(555)
get_job_id(get_recommendations_userwise(47))