Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
149 changes: 109 additions & 40 deletions Chapter_08_Naive_Bayes/Coding_naive_Bayes.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -146,9 +146,78 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 3,
"metadata": {},
"outputs": [],
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>text</th>\n",
" <th>spam</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Subject: naturally irresistible your corporate...</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Subject: the stock trading gunslinger fanny i...</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Subject: unbelievable new homes made easy im ...</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Subject: 4 color printing special request add...</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Subject: do not have money , get software cds ...</td>\n",
" <td>1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" text spam\n",
"0 Subject: naturally irresistible your corporate... 1\n",
"1 Subject: the stock trading gunslinger fanny i... 1\n",
"2 Subject: unbelievable new homes made easy im ... 1\n",
"3 Subject: 4 color printing special request add... 1\n",
"4 Subject: do not have money , get software cds ... 1"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# IMPORTANT: ONLY RUN THIS CELL IF YOU ARE WORKING ON A COLAB \n",
"\n",
Expand All @@ -159,7 +228,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -172,7 +241,7 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 5,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -206,61 +275,61 @@
" <th>0</th>\n",
" <td>Subject: naturally irresistible your corporate...</td>\n",
" <td>1</td>\n",
" <td>[., clear, original, our, :, distinctive, busi...</td>\n",
" <td>[more, information, clear, -, shouldn, _, iead...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Subject: the stock trading gunslinger fanny i...</td>\n",
" <td>1</td>\n",
" <td>[perspicuous, huzzah, inflexible, continuant, ...</td>\n",
" <td>[edt, optima, continuant, kansas, tight, hawth...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Subject: unbelievable new homes made easy im ...</td>\n",
" <td>1</td>\n",
" <td>[454, ., to, 3, wanting, we, you, loan, our, p...</td>\n",
" <td>[loan, new, to, 454, your, you, extended, unco...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Subject: 4 color printing special request add...</td>\n",
" <td>1</td>\n",
" <td>[., an, printing, goldengraphix, our, e, irwin...</td>\n",
" <td>[special, canyon, goldengraphix, ca, e, inform...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Subject: do not have money , get software cds ...</td>\n",
" <td>1</td>\n",
" <td>[., it, cds, ?, to, compatibility, along, ', d...</td>\n",
" <td>[to, ?, are, death, marriage, here, old, !, .,...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>Subject: great nnews hello , welcome to medzo...</td>\n",
" <td>1</td>\n",
" <td>[introduce, ., 75, worldwide, total, op, v, to...</td>\n",
" <td>[total, to, andmanyother, nice, miilion, are, ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>Subject: here ' s a hot play in motion homela...</td>\n",
" <td>1</td>\n",
" <td>[transportation, innovative, *, adverse, excha...</td>\n",
" <td>[terror, more, involve, press, constant, proje...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>Subject: save your money buy getting this thin...</td>\n",
" <td>1</td>\n",
" <td>[imagine, it, ., has, viagra, iasts, ?, buy, t...</td>\n",
" <td>[errrectlon, save, to, ?, start, what, man, ca...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>Subject: undeliverable : home based business f...</td>\n",
" <td>1</td>\n",
" <td>[75, ., original, :, business, undeliverable, ...</td>\n",
" <td>[2001, undeliverable, 2002, -, 8, recognized, ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>Subject: save your money buy getting this thin...</td>\n",
" <td>1</td>\n",
" <td>[imagine, it, ., has, viagra, ?, buy, to, 10, ...</td>\n",
" <td>[errrectlon, save, to, ?, start, what, man, ca...</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
Expand All @@ -280,19 +349,19 @@
"9 Subject: save your money buy getting this thin... 1 \n",
"\n",
" words \n",
"0 [., clear, original, our, :, distinctive, busi... \n",
"1 [perspicuous, huzzah, inflexible, continuant, ... \n",
"2 [454, ., to, 3, wanting, we, you, loan, our, p... \n",
"3 [., an, printing, goldengraphix, our, e, irwin... \n",
"4 [., it, cds, ?, to, compatibility, along, ', d... \n",
"5 [introduce, ., 75, worldwide, total, op, v, to... \n",
"6 [transportation, innovative, *, adverse, excha... \n",
"7 [imagine, it, ., has, viagra, iasts, ?, buy, t... \n",
"8 [75, ., original, :, business, undeliverable, ... \n",
"9 [imagine, it, ., has, viagra, ?, buy, to, 10, ... "
"0 [more, information, clear, -, shouldn, _, iead... \n",
"1 [edt, optima, continuant, kansas, tight, hawth... \n",
"2 [loan, new, to, 454, your, you, extended, unco... \n",
"3 [special, canyon, goldengraphix, ca, e, inform... \n",
"4 [to, ?, are, death, marriage, here, old, !, .,... \n",
"5 [total, to, andmanyother, nice, miilion, are, ... \n",
"6 [terror, more, involve, press, constant, proje... \n",
"7 [errrectlon, save, to, ?, start, what, man, ca... \n",
"8 [2001, undeliverable, 2002, -, 8, recognized, ... \n",
"9 [errrectlon, save, to, ?, start, what, man, ca... "
]
},
"execution_count": 4,
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -303,7 +372,7 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 6,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -340,7 +409,7 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -360,7 +429,7 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 8,
"metadata": {},
"outputs": [
{
Expand All @@ -369,7 +438,7 @@
"{'spam': 9, 'ham': 1}"
]
},
"execution_count": 7,
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -380,7 +449,7 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 9,
"metadata": {},
"outputs": [
{
Expand All @@ -389,7 +458,7 @@
"{'spam': 39, 'ham': 42}"
]
},
"execution_count": 8,
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -407,7 +476,7 @@
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -420,7 +489,7 @@
},
{
"cell_type": "code",
"execution_count": 10,
"execution_count": 11,
"metadata": {},
"outputs": [
{
Expand All @@ -429,7 +498,7 @@
"0.9"
]
},
"execution_count": 10,
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -440,7 +509,7 @@
},
{
"cell_type": "code",
"execution_count": 11,
"execution_count": 12,
"metadata": {},
"outputs": [
{
Expand All @@ -449,7 +518,7 @@
"0.48148148148148145"
]
},
"execution_count": 11,
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -460,7 +529,7 @@
},
{
"cell_type": "code",
"execution_count": 16,
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -476,8 +545,8 @@
" if word in model:\n",
" spams.append(model[word]['spam']/num_spam*total)\n",
" hams.append(model[word]['ham']/num_ham*total)\n",
" prod_spams = np.compat.long(np.prod(spams)*num_spam)\n",
" prod_hams = np.compat.long(np.prod(hams)*num_ham)\n",
" prod_spams = np.prod(spams, dtype=np.float64) * num_spam\n",
" prod_hams = np.prod(hams, dtype=np.float64) * num_ham\n",
" return prod_spams/(prod_spams + prod_hams)"
]
},
Expand Down Expand Up @@ -665,7 +734,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.6"
"version": "3.12.5"
}
},
"nbformat": 4,
Expand Down