From 79fb3919505ef9f3f82abc8bc9631ed063176642 Mon Sep 17 00:00:00 2001 From: FabianaMFZ Date: Wed, 31 Jul 2024 00:09:41 +0100 Subject: [PATCH] lab --- your-code/challenge-1.ipynb | 147 +- your-code/challenge-2.ipynb | 4945 ++++++++++++++++++++++++++++++++++- 2 files changed, 5053 insertions(+), 39 deletions(-) diff --git a/your-code/challenge-1.ipynb b/your-code/challenge-1.ipynb index 0808166..cdc145b 100644 --- a/your-code/challenge-1.ipynb +++ b/your-code/challenge-1.ipynb @@ -14,6 +14,28 @@ "* Write the functions you will use in Challenge 3 for cleaning, tokenizing, stemming, and lemmatizing data." ] }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import re\n", + "\n", + "import nltk \n", + "from nltk.stem import WordNetLemmatizer\n", + "from nltk.corpus import stopwords\n", + "from nltk.tokenize import word_tokenize\n", + "from nltk.tokenize import sent_tokenize\n", + "from nltk.stem import PorterStemmer, LancasterStemmer, SnowballStemmer\n", + "from nltk.stem import WordNetLemmatizer\n", + "from nltk.corpus import wordnet\n", + "\n", + "from sklearn.feature_extraction.text import TfidfVectorizer\n", + "from sklearn.feature_extraction.text import CountVectorizer" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -66,11 +88,22 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "'ironhack s q website is'" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "def clean_up(s):\n", + "def clean_up(string):\n", " \"\"\"\n", " Cleans up numbers, URLs, and special characters from a string.\n", "\n", @@ -78,8 +111,22 @@ " s: The string to be cleaned up.\n", "\n", " Returns:\n", - " A string that has been cleaned up.\n", - " \"\"\"" + " A cleaned-up string.\n", + " \"\"\"\n", + " text = s.lower() # Convert to lowercase\n", + " text = re.sub(r'\\d+', ' ', text) # Remove numbers\n", + " text = re.sub(r'http\\S+', ' ', text) # Remove URLs\n", + " text = re.sub(r'[^\\w\\s]', ' ', text) # Remove special characters (except spaces)\n", + " \n", + " # Remove extra spaces\n", + " text = re.sub(r'\\s+', ' ', text).strip()\n", + "\n", + " return text\n", + "\n", + "string = r\"\"\"@Ironhack's-#Q website 776-is http://ironhack.com [(2018)]\")\"\"\"\n", + "\n", + "cleaned_string = clean_up(string)\n", + "cleaned_string" ] }, { @@ -101,11 +148,22 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 14, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "['ironhack', 's', 'q', 'website', 'is']" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "def tokenize(s):\n", + "def tokenize(cleaned_string):\n", " \"\"\"\n", " Tokenize a string.\n", "\n", @@ -114,7 +172,14 @@ "\n", " Returns:\n", " A list of words as the result of tokenization.\n", - " \"\"\"" + " \"\"\"\n", + " tokens = word_tokenize(cleaned_string)\n", + " tokens = [word for word in tokens if word.isalnum()]\n", + " \n", + " return tokens\n", + "\n", + "string_tokens = tokenize(cleaned_string)\n", + "string_tokens" ] }, { @@ -145,11 +210,26 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 16, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "['ironhack---->ironhack',\n", + " 's---->s',\n", + " 'q---->q',\n", + " 'website---->website',\n", + " 'is---->is']" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "def stem_and_lemmatize(l):\n", + "def stem_and_lemmatize(string_tokens):\n", " \"\"\"\n", " Perform stemming and lemmatization on a list of words.\n", "\n", @@ -158,7 +238,14 @@ "\n", " Returns:\n", " A list of strings after being stemmed and lemmatized.\n", - " \"\"\"" + " \"\"\"\n", + " lemmatizer = WordNetLemmatizer()\n", + " lemmatized = [word +\"---->\" + lemmatizer.lemmatize(word) for word in string_tokens]\n", + " \n", + " return lemmatized\n", + "\n", + "lemmatized = stem_and_lemmatize(string_tokens)\n", + "lemmatized" ] }, { @@ -176,11 +263,26 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 18, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "['ironhack---->ironhack',\n", + " 's---->s',\n", + " 'q---->q',\n", + " 'website---->website',\n", + " 'is---->is']" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "def remove_stopwords(l):\n", + "def remove_stopwords(lemmatized):\n", " \"\"\"\n", " Remove English stopwords from a list of strings.\n", "\n", @@ -189,7 +291,14 @@ "\n", " Returns:\n", " A list of strings after stop words are removed.\n", - " \"\"\"" + " \"\"\"\n", + " stop_words = set(stopwords.words('english'))\n", + " filtered_tokens = [word for word in lemmatized if word not in stop_words]\n", + " \n", + " return filtered_tokens\n", + "\n", + "filtered_tokens = remove_stopwords(lemmatized)\n", + "filtered_tokens" ] }, { @@ -204,7 +313,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -218,7 +327,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.3" + "version": "3.11.5" } }, "nbformat": 4, diff --git a/your-code/challenge-2.ipynb b/your-code/challenge-2.ipynb index 6b0e116..be339d7 100644 --- a/your-code/challenge-2.ipynb +++ b/your-code/challenge-2.ipynb @@ -46,11 +46,193 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 26, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
targetiddateflagusertext
54120002200003313Tue Jun 16 18:18:13 PDT 2009NO_QUERYDEWGetMeTho77@Nkluvr4eva My poor little dumpling In Holmde...
75001467998601Mon Apr 06 23:11:18 PDT 2009NO_QUERYYoung_JI'm off too bed. I gotta wake up hella early t...
76671102300049112Tue Jun 23 13:40:12 PDT 2009NO_QUERYdougnawoschikI havent been able to listen to it yet My spe...
28505501993474319Mon Jun 01 10:26:09 PDT 2009NO_QUERYthirevennow remembers why solving a relatively big equ...
70599502256551006Sat Jun 20 12:56:51 PDT 2009NO_QUERYtaracollins086Ate too much, feel sick
.....................
137448242051447103Fri Jun 05 22:02:36 PDT 2009NO_QUERY_Jaska@girlwonder24 Thanks.
66701402245469948Fri Jun 19 16:10:39 PDT 2009NO_QUERYjulianicolaotrying to study for the biggest test, next wee...
145123442063022808Sun Jun 07 01:05:46 PDT 2009NO_QUERYElaineToniJust finished watching Your Song Presents: Boy...
118141241982082859Sun May 31 10:29:36 PDT 2009NO_QUERYlindseyrd20@janfran813 awww i can't wait to get one
51791002191411932Tue Jun 16 05:13:13 PDT 2009NO_QUERYserraannisadoing nothing
\n", + "

20000 rows × 6 columns

\n", + "
" + ], + "text/plain": [ + " target id date flag \\\n", + "541200 0 2200003313 Tue Jun 16 18:18:13 PDT 2009 NO_QUERY \n", + "750 0 1467998601 Mon Apr 06 23:11:18 PDT 2009 NO_QUERY \n", + "766711 0 2300049112 Tue Jun 23 13:40:12 PDT 2009 NO_QUERY \n", + "285055 0 1993474319 Mon Jun 01 10:26:09 PDT 2009 NO_QUERY \n", + "705995 0 2256551006 Sat Jun 20 12:56:51 PDT 2009 NO_QUERY \n", + "... ... ... ... ... \n", + "1374482 4 2051447103 Fri Jun 05 22:02:36 PDT 2009 NO_QUERY \n", + "667014 0 2245469948 Fri Jun 19 16:10:39 PDT 2009 NO_QUERY \n", + "1451234 4 2063022808 Sun Jun 07 01:05:46 PDT 2009 NO_QUERY \n", + "1181412 4 1982082859 Sun May 31 10:29:36 PDT 2009 NO_QUERY \n", + "517910 0 2191411932 Tue Jun 16 05:13:13 PDT 2009 NO_QUERY \n", + "\n", + " user text \n", + "541200 DEWGetMeTho77 @Nkluvr4eva My poor little dumpling In Holmde... \n", + "750 Young_J I'm off too bed. I gotta wake up hella early t... \n", + "766711 dougnawoschik I havent been able to listen to it yet My spe... \n", + "285055 thireven now remembers why solving a relatively big equ... \n", + "705995 taracollins086 Ate too much, feel sick \n", + "... ... ... \n", + "1374482 _Jaska @girlwonder24 Thanks. \n", + "667014 julianicolao trying to study for the biggest test, next wee... \n", + "1451234 ElaineToni Just finished watching Your Song Presents: Boy... \n", + "1181412 lindseyrd20 @janfran813 awww i can't wait to get one \n", + "517910 serraannisa doing nothing \n", + "\n", + "[20000 rows x 6 columns]" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# your code here" + "import pandas as pd\n", + "\n", + "df = pd.read_csv(\"sentiment140.csv\", encoding='ISO-8859-1') \n", + "\n", + "df.columns = ['target', 'id', 'date', 'flag', 'user', 'text']\n", + "\n", + "# Create a smaller sample for development\n", + "sample_size = 20000 \n", + "df_sample = df.sample(n=sample_size, random_state=42)\n", + "df_sample\n", + "\n", + "# target: the polarity of the tweet (0 = negative, 2 = neutral, 4 = positive)" ] }, { @@ -76,11 +258,404 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 27, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
indextargetiddateflagusertexttext_cleanedtext_tokentext_lemmatext_processed
054120002200003313Tue Jun 16 18:18:13 PDT 2009NO_QUERYDEWGetMeTho77@Nkluvr4eva My poor little dumpling In Holmde...nkluvr eva my poor little dumpling in holmdel ...[nkluvr, eva, my, poor, little, dumpling, in, ...[nkluvr, eva, my, poor, little, dumpling, in, ...[nkluvr, eva, poor, little, dumpling, holmdel,...
175001467998601Mon Apr 06 23:11:18 PDT 2009NO_QUERYYoung_JI'm off too bed. I gotta wake up hella early t...i m off too bed i gotta wake up hella early to...[i, m, off, too, bed, i, got, ta, wake, up, he...[i, m, off, too, bed, i, got, ta, wake, up, he...[bed, got, ta, wake, hella, early, tomorrow, m...
276671102300049112Tue Jun 23 13:40:12 PDT 2009NO_QUERYdougnawoschikI havent been able to listen to it yet My spe...i havent been able to listen to it yet my spea...[i, havent, been, able, to, listen, to, it, ye...[i, havent, been, able, to, listen, to, it, ye...[havent, able, listen, yet, speaker, busted]
328505501993474319Mon Jun 01 10:26:09 PDT 2009NO_QUERYthirevennow remembers why solving a relatively big equ...now remembers why solving a relatively big equ...[now, remembers, why, solving, a, relatively, ...[now, remembers, why, solving, a, relatively, ...[remembers, solving, relatively, big, equation...
470599502256551006Sat Jun 20 12:56:51 PDT 2009NO_QUERYtaracollins086Ate too much, feel sickate too much feel sick[ate, too, much, feel, sick][ate, too, much, feel, sick][ate, much, feel, sick]
....................................
19995137448242051447103Fri Jun 05 22:02:36 PDT 2009NO_QUERY_Jaska@girlwonder24 Thanks.girlwonder thanks[girlwonder, thanks][girlwonder, thanks][girlwonder, thanks]
1999666701402245469948Fri Jun 19 16:10:39 PDT 2009NO_QUERYjulianicolaotrying to study for the biggest test, next wee...trying to study for the biggest test next week...[trying, to, study, for, the, biggest, test, n...[trying, to, study, for, the, biggest, test, n...[trying, study, biggest, test, next, week, wor...
19997145123442063022808Sun Jun 07 01:05:46 PDT 2009NO_QUERYElaineToniJust finished watching Your Song Presents: Boy...just finished watching your song presents boys...[just, finished, watching, your, song, present...[just, finished, watching, your, song, present...[finished, watching, song, present, boystown]
19998118141241982082859Sun May 31 10:29:36 PDT 2009NO_QUERYlindseyrd20@janfran813 awww i can't wait to get onejanfran awww i can t wait to get one[janfran, awww, i, can, t, wait, to, get, one][janfran, awww, i, can, t, wait, to, get, one][janfran, awww, wait, get, one]
1999951791002191411932Tue Jun 16 05:13:13 PDT 2009NO_QUERYserraannisadoing nothingdoing nothing[doing, nothing][doing, nothing][nothing]
\n", + "

20000 rows × 11 columns

\n", + "
" + ], + "text/plain": [ + " index target id date flag \\\n", + "0 541200 0 2200003313 Tue Jun 16 18:18:13 PDT 2009 NO_QUERY \n", + "1 750 0 1467998601 Mon Apr 06 23:11:18 PDT 2009 NO_QUERY \n", + "2 766711 0 2300049112 Tue Jun 23 13:40:12 PDT 2009 NO_QUERY \n", + "3 285055 0 1993474319 Mon Jun 01 10:26:09 PDT 2009 NO_QUERY \n", + "4 705995 0 2256551006 Sat Jun 20 12:56:51 PDT 2009 NO_QUERY \n", + "... ... ... ... ... ... \n", + "19995 1374482 4 2051447103 Fri Jun 05 22:02:36 PDT 2009 NO_QUERY \n", + "19996 667014 0 2245469948 Fri Jun 19 16:10:39 PDT 2009 NO_QUERY \n", + "19997 1451234 4 2063022808 Sun Jun 07 01:05:46 PDT 2009 NO_QUERY \n", + "19998 1181412 4 1982082859 Sun May 31 10:29:36 PDT 2009 NO_QUERY \n", + "19999 517910 0 2191411932 Tue Jun 16 05:13:13 PDT 2009 NO_QUERY \n", + "\n", + " user text \\\n", + "0 DEWGetMeTho77 @Nkluvr4eva My poor little dumpling In Holmde... \n", + "1 Young_J I'm off too bed. I gotta wake up hella early t... \n", + "2 dougnawoschik I havent been able to listen to it yet My spe... \n", + "3 thireven now remembers why solving a relatively big equ... \n", + "4 taracollins086 Ate too much, feel sick \n", + "... ... ... \n", + "19995 _Jaska @girlwonder24 Thanks. \n", + "19996 julianicolao trying to study for the biggest test, next wee... \n", + "19997 ElaineToni Just finished watching Your Song Presents: Boy... \n", + "19998 lindseyrd20 @janfran813 awww i can't wait to get one \n", + "19999 serraannisa doing nothing \n", + "\n", + " text_cleaned \\\n", + "0 nkluvr eva my poor little dumpling in holmdel ... \n", + "1 i m off too bed i gotta wake up hella early to... \n", + "2 i havent been able to listen to it yet my spea... \n", + "3 now remembers why solving a relatively big equ... \n", + "4 ate too much feel sick \n", + "... ... \n", + "19995 girlwonder thanks \n", + "19996 trying to study for the biggest test next week... \n", + "19997 just finished watching your song presents boys... \n", + "19998 janfran awww i can t wait to get one \n", + "19999 doing nothing \n", + "\n", + " text_token \\\n", + "0 [nkluvr, eva, my, poor, little, dumpling, in, ... \n", + "1 [i, m, off, too, bed, i, got, ta, wake, up, he... \n", + "2 [i, havent, been, able, to, listen, to, it, ye... \n", + "3 [now, remembers, why, solving, a, relatively, ... \n", + "4 [ate, too, much, feel, sick] \n", + "... ... \n", + "19995 [girlwonder, thanks] \n", + "19996 [trying, to, study, for, the, biggest, test, n... \n", + "19997 [just, finished, watching, your, song, present... \n", + "19998 [janfran, awww, i, can, t, wait, to, get, one] \n", + "19999 [doing, nothing] \n", + "\n", + " text_lemma \\\n", + "0 [nkluvr, eva, my, poor, little, dumpling, in, ... \n", + "1 [i, m, off, too, bed, i, got, ta, wake, up, he... \n", + "2 [i, havent, been, able, to, listen, to, it, ye... \n", + "3 [now, remembers, why, solving, a, relatively, ... \n", + "4 [ate, too, much, feel, sick] \n", + "... ... \n", + "19995 [girlwonder, thanks] \n", + "19996 [trying, to, study, for, the, biggest, test, n... \n", + "19997 [just, finished, watching, your, song, present... \n", + "19998 [janfran, awww, i, can, t, wait, to, get, one] \n", + "19999 [doing, nothing] \n", + "\n", + " text_processed \n", + "0 [nkluvr, eva, poor, little, dumpling, holmdel,... \n", + "1 [bed, got, ta, wake, hella, early, tomorrow, m... \n", + "2 [havent, able, listen, yet, speaker, busted] \n", + "3 [remembers, solving, relatively, big, equation... \n", + "4 [ate, much, feel, sick] \n", + "... ... \n", + "19995 [girlwonder, thanks] \n", + "19996 [trying, study, biggest, test, next, week, wor... \n", + "19997 [finished, watching, song, present, boystown] \n", + "19998 [janfran, awww, wait, get, one] \n", + "19999 [nothing] \n", + "\n", + "[20000 rows x 11 columns]" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import re\n", + "\n", + "import nltk \n", + "from nltk.stem import WordNetLemmatizer\n", + "from nltk.corpus import stopwords\n", + "from nltk.tokenize import word_tokenize\n", + "from nltk.tokenize import sent_tokenize\n", + "from nltk.stem import PorterStemmer, LancasterStemmer, SnowballStemmer\n", + "from nltk.stem import WordNetLemmatizer\n", + "from nltk.corpus import wordnet\n", + "\n", + "def clean_up(column):\n", + " text = column.lower() # Convert to lowercase\n", + " text = re.sub(r'\\d+', ' ', text) # Remove numbers\n", + " text = re.sub(r'http\\S+', ' ', text) # Remove URLs\n", + " text = re.sub(r'[^\\w\\s]', ' ', text) # Remove special characters (except spaces)\n", + " text = re.sub(r'\\s+', ' ', text).strip() # Remove extra spaces\n", + "\n", + " return text\n", + "\n", + "def tokenize(column):\n", + " tokens = word_tokenize(column)\n", + " tokens = [word for word in tokens if word.isalnum()]\n", + " \n", + " return tokens\n", + "\n", + "def stem_and_lemmatize(column):\n", + " lemmatizer = WordNetLemmatizer()\n", + " lemmatized = [lemmatizer.lemmatize(word) for word in column]\n", + " \n", + " return lemmatized\n", + "\n", + "def remove_stopwords(column):\n", + " stop_words = set(stopwords.words('english'))\n", + " filtered_tokens = [word for word in column if word not in stop_words]\n", + " \n", + " return filtered_tokens\n", + "\n", + "# Apply functions\n", + "df_sample['text_cleaned'] = df_sample['text'].apply(clean_up)\n", + "df_sample['text_token'] = df_sample['text_cleaned'].apply(tokenize)\n", + "df_sample['text_lemma'] = df_sample['text_token'].apply(stem_and_lemmatize)\n", + "df_sample['text_processed'] = df_sample['text_lemma'].apply(remove_stopwords)\n", + "\n", + "df_sample = df_sample.reset_index()\n", + "df_sample" + ] + }, + { + "cell_type": "code", + "execution_count": 28, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "(9942, 11)" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# your code here" + "neg_tweets = df_sample[df_sample['target']==0]\n", + "neg_tweets.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(0, 11)" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "neutral_tweets = df_sample[df_sample['target']==2]\n", + "neutral_tweets.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(10058, 11)" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pos_tweets = df_sample[df_sample['target']==4]\n", + "pos_tweets.shape" ] }, { @@ -98,11 +673,126 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 33, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
01
0wa1400
1day1323
2good1175
3get1102
4like977
.........
4995regularly3
4996petewentz3
4997inspired3
4998nun3
4999influence3
\n", + "

5000 rows × 2 columns

\n", + "
" + ], + "text/plain": [ + " 0 1\n", + "0 wa 1400\n", + "1 day 1323\n", + "2 good 1175\n", + "3 get 1102\n", + "4 like 977\n", + "... ... ...\n", + "4995 regularly 3\n", + "4996 petewentz 3\n", + "4997 inspired 3\n", + "4998 nun 3\n", + "4999 influence 3\n", + "\n", + "[5000 rows x 2 columns]" + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# your code here" + "from nltk.probability import FreqDist\n", + "\n", + "all_words = [word for words in df_sample['text_processed'] for word in words]\n", + "freq_dist = FreqDist(all_words)\n", + "top_5000_words = freq_dist.most_common(5000)\n", + "\n", + "top_5000_df = pd.DataFrame(top_5000_words)\n", + "top_5000_df" ] }, { @@ -167,11 +857,4158 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 35, "metadata": {}, "outputs": [], "source": [ - "# your code here" + "import numpy as np\n", + "from sklearn.feature_extraction.text import CountVectorizer\n", + "\n", + "# Extract only the words from top_5000_words\n", + "top_words = [word for word, _ in top_5000_words]\n", + "\n", + "# Create CountVectorizer with vocabulary restriction\n", + "vectorizer = CountVectorizer(vocabulary=top_words)\n", + "\n", + "# Fit and transform to get the document-term matrix\n", + "dtm = vectorizer.fit_transform([' '.join(words) for words in df_sample['text_processed']])\n", + "\n", + "# Convert sparse matrix to dense numpy array for easier manipulation\n", + "dtm_dense = dtm.toarray()\n", + "\n", + "# Assuming your sentiment labels (0 for negative, 4 for positive) are in 'target' column\n", + "sentiment_labels = df_sample['target'].replace(4,1).to_numpy()" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[1, 0, 0, ..., 0, 0, 0],\n", + " [0, 0, 0, ..., 0, 0, 0],\n", + " [0, 0, 0, ..., 0, 0, 0],\n", + " ...,\n", + " [0, 0, 0, ..., 0, 0, 0],\n", + " [0, 0, 0, ..., 0, 0, 0],\n", + " [0, 0, 0, ..., 0, 0, 0]], dtype=int64)" + ] + }, + "execution_count": 39, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dtm_dense" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(20000, 5000)" + ] + }, + "execution_count": 41, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dtm_dense.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([0, 0, 0, ..., 1, 1, 0], dtype=int64)" + ] + }, + "execution_count": 40, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sentiment_labels" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[({'wa': True,\n", + " 'day': False,\n", + " 'good': False,\n", + " 'get': False,\n", + " 'like': False,\n", + " 'go': False,\n", + " 'quot': False,\n", + " 'love': False,\n", + " 'work': False,\n", + " 'got': False,\n", + " 'today': False,\n", + " 'going': False,\n", + " 'u': False,\n", + " 'time': False,\n", + " 'one': False,\n", + " 'lol': False,\n", + " 'know': False,\n", + " 'im': False,\n", + " 'back': False,\n", + " 'really': True,\n", + " 'want': False,\n", + " 'night': False,\n", + " 'amp': False,\n", + " 'well': False,\n", + " 'new': False,\n", + " 'see': False,\n", + " 'think': False,\n", + " 'still': False,\n", + " 'oh': False,\n", + " 'thanks': False,\n", + " 'na': False,\n", + " 'ha': False,\n", + " 'need': False,\n", + " 'home': False,\n", + " 'much': False,\n", + " 'miss': False,\n", + " 'feel': False,\n", + " 'last': False,\n", + " 'morning': False,\n", + " 'great': False,\n", + " 'make': False,\n", + " 'tomorrow': False,\n", + " 'twitter': False,\n", + " 'haha': False,\n", + " 'wish': False,\n", + " 'hope': True,\n", + " 'bad': False,\n", + " 'sad': False,\n", + " 'fun': False,\n", + " 'come': False,\n", + " 'sleep': False,\n", + " 'would': False,\n", + " 'nice': False,\n", + " 'sorry': False,\n", + " 'right': False,\n", + " 'week': False,\n", + " 'tonight': True,\n", + " 'happy': False,\n", + " 'say': False,\n", + " 'thing': False,\n", + " 'getting': False,\n", + " 'look': False,\n", + " 'friend': False,\n", + " 'gon': False,\n", + " 'though': False,\n", + " 'hate': False,\n", + " 'wait': False,\n", + " 'better': False,\n", + " 'bed': False,\n", + " 'way': False,\n", + " 'watching': False,\n", + " 'lt': False,\n", + " 'people': False,\n", + " 'yeah': False,\n", + " 'hour': False,\n", + " 'show': False,\n", + " 'could': False,\n", + " 'thank': False,\n", + " 'take': False,\n", + " 'weekend': False,\n", + " 'next': False,\n", + " 'yes': False,\n", + " 'school': False,\n", + " 'even': False,\n", + " 'little': True,\n", + " 'life': False,\n", + " 'working': False,\n", + " 'everyone': False,\n", + " 'guy': False,\n", + " 'cant': False,\n", + " 'sick': False,\n", + " 'dont': True,\n", + " 'hey': False,\n", + " 'let': False,\n", + " 'awesome': False,\n", + " 'movie': False,\n", + " 'girl': False,\n", + " 'tweet': False,\n", + " 'always': False,\n", + " 'x': False,\n", + " 'never': False,\n", + " 'watch': False,\n", + " 'please': False,\n", + " 'soon': False,\n", + " 'year': False,\n", + " 'first': False,\n", + " 'long': False,\n", + " 'ok': False,\n", + " 'tired': False,\n", + " 'already': False,\n", + " 'feeling': False,\n", + " 'suck': False,\n", + " 'wan': False,\n", + " 'sure': False,\n", + " 'best': False,\n", + " 'looking': False,\n", + " 'n': False,\n", + " 'man': False,\n", + " 'another': False,\n", + " 'something': False,\n", + " 'find': False,\n", + " 'start': False,\n", + " 'cool': False,\n", + " 'done': False,\n", + " 'pretty': False,\n", + " 'omg': False,\n", + " 'yay': False,\n", + " 'yet': False,\n", + " 'phone': False,\n", + " 'damn': False,\n", + " 'lot': False,\n", + " 'away': False,\n", + " 'went': False,\n", + " 'old': False,\n", + " 'follow': False,\n", + " 'help': False,\n", + " 'guess': False,\n", + " 'keep': False,\n", + " 'house': False,\n", + " 'thought': False,\n", + " 'song': False,\n", + " 'made': False,\n", + " 'ever': False,\n", + " 'trying': True,\n", + " 'ya': False,\n", + " 'sun': False,\n", + " 'p': False,\n", + " 'bit': False,\n", + " 'hurt': False,\n", + " 'game': False,\n", + " 'finally': False,\n", + " 'maybe': False,\n", + " 'ready': False,\n", + " 'sound': False,\n", + " 'w': False,\n", + " 'big': False,\n", + " 'lost': False,\n", + " 'b': False,\n", + " 'nothing': False,\n", + " 'someone': False,\n", + " 'early': False,\n", + " 'summer': False,\n", + " 'tell': False,\n", + " 'hard': True,\n", + " 'also': False,\n", + " 'birthday': False,\n", + " 'r': False,\n", + " 'left': False,\n", + " 'mean': False,\n", + " 'missed': False,\n", + " 'ur': False,\n", + " 'th': False,\n", + " 'rain': False,\n", + " 'pic': False,\n", + " 'mom': False,\n", + " 'com': False,\n", + " 'party': False,\n", + " 'two': False,\n", + " 'glad': False,\n", + " 'world': False,\n", + " 'baby': False,\n", + " 'wow': False,\n", + " 'might': False,\n", + " 'bored': False,\n", + " 'call': False,\n", + " 'ta': False,\n", + " 'check': False,\n", + " 'late': False,\n", + " 'waiting': False,\n", + " 'car': False,\n", + " 'video': False,\n", + " 'stuff': False,\n", + " 'found': False,\n", + " 'yesterday': False,\n", + " 'said': False,\n", + " 'sunday': False,\n", + " 'luck': False,\n", + " 'saw': False,\n", + " 'amazing': False,\n", + " 'hot': False,\n", + " 'monday': False,\n", + " 'weather': False,\n", + " 'live': False,\n", + " 'many': False,\n", + " 'iphone': False,\n", + " 'doe': False,\n", + " 'exam': False,\n", + " 'thats': False,\n", + " 'play': False,\n", + " 'making': False,\n", + " 'may': False,\n", + " 'excited': False,\n", + " 'god': False,\n", + " 'dad': False,\n", + " 'family': False,\n", + " 'gone': False,\n", + " 'friday': False,\n", + " 'boy': False,\n", + " 'follower': False,\n", + " 'read': False,\n", + " 'funny': False,\n", + " 'hi': False,\n", + " 'since': False,\n", + " 'give': False,\n", + " 'poor': True,\n", + " 'enjoy': False,\n", + " 'job': False,\n", + " 'okay': False,\n", + " 'talk': False,\n", + " 'gt': False,\n", + " 'later': False,\n", + " 'head': False,\n", + " 'almost': False,\n", + " 'beautiful': False,\n", + " 'woke': False,\n", + " 'cold': False,\n", + " 'anything': False,\n", + " 'hear': False,\n", + " 'lunch': False,\n", + " 'missing': False,\n", + " 'put': False,\n", + " 'free': False,\n", + " 'try': True,\n", + " 'coming': False,\n", + " 'must': False,\n", + " 'end': False,\n", + " 'tho': False,\n", + " 'leave': False,\n", + " 'ugh': False,\n", + " 'till': False,\n", + " 'busy': False,\n", + " 'around': False,\n", + " 'food': False,\n", + " 'far': False,\n", + " 'book': False,\n", + " 'music': False,\n", + " 'cry': False,\n", + " 'headache': False,\n", + " 'cause': False,\n", + " 'use': False,\n", + " 'listening': False,\n", + " 'fan': False,\n", + " 'stop': False,\n", + " 'stay': False,\n", + " 'totally': False,\n", + " 'wanted': False,\n", + " 'place': False,\n", + " 'shit': False,\n", + " 'xx': True,\n", + " 'tv': False,\n", + " 'e': False,\n", + " 'picture': False,\n", + " 'update': False,\n", + " 'least': False,\n", + " 'sweet': False,\n", + " 'anyone': False,\n", + " 'lovely': False,\n", + " 'thinking': False,\n", + " 'forward': False,\n", + " 'aww': False,\n", + " 'g': False,\n", + " 'dog': False,\n", + " 'class': False,\n", + " 'actually': False,\n", + " 'everything': False,\n", + " 'eat': False,\n", + " 'mine': False,\n", + " 'playing': False,\n", + " 'cute': False,\n", + " 'hahaha': False,\n", + " 'kid': False,\n", + " 'dinner': False,\n", + " 'stupid': False,\n", + " 'sooo': False,\n", + " 'came': False,\n", + " 'word': False,\n", + " 'ill': False,\n", + " 'eating': False,\n", + " 'win': False,\n", + " 'hopefully': False,\n", + " 'finished': False,\n", + " 'welcome': False,\n", + " 'anymore': False,\n", + " 'minute': False,\n", + " 'every': False,\n", + " 'face': False,\n", + " 'idea': False,\n", + " 'hair': False,\n", + " 'without': False,\n", + " 'kinda': False,\n", + " 'month': False,\n", + " 'saturday': False,\n", + " 'c': False,\n", + " 'wrong': False,\n", + " 'super': False,\n", + " 'www': False,\n", + " 'final': False,\n", + " 'true': False,\n", + " 'reading': False,\n", + " 'hug': False,\n", + " 'probably': False,\n", + " 'photo': False,\n", + " 'buy': False,\n", + " 'hehe': False,\n", + " 'taking': False,\n", + " 'believe': False,\n", + " 'eye': False,\n", + " 'didnt': False,\n", + " 'name': False,\n", + " 'alone': False,\n", + " 'mind': False,\n", + " 'room': False,\n", + " 'either': False,\n", + " 'dream': False,\n", + " 'following': False,\n", + " 'able': False,\n", + " 'goodnight': False,\n", + " 'else': False,\n", + " 'lmao': False,\n", + " 'boo': False,\n", + " 'heard': False,\n", + " 'coffee': False,\n", + " 'outside': False,\n", + " 'real': False,\n", + " 'mileycyrus': False,\n", + " 'break': False,\n", + " 'part': False,\n", + " 'june': False,\n", + " 'computer': False,\n", + " 'forgot': False,\n", + " 'ticket': False,\n", + " 'blog': False,\n", + " 'post': False,\n", + " 'awww': False,\n", + " 'rest': False,\n", + " 'dude': False,\n", + " 'pm': False,\n", + " 'enough': False,\n", + " 'brother': False,\n", + " 'plan': False,\n", + " 'ð': False,\n", + " 'half': False,\n", + " 'using': False,\n", + " 'add': False,\n", + " 'stuck': False,\n", + " 'person': False,\n", + " 'text': False,\n", + " 'meet': False,\n", + " 'mother': False,\n", + " 'album': False,\n", + " 'study': False,\n", + " 'crazy': False,\n", + " 'hand': False,\n", + " 'send': False,\n", + " 'fine': False,\n", + " 'talking': False,\n", + " 'whole': False,\n", + " 'reply': False,\n", + " 'nite': False,\n", + " 'k': False,\n", + " 'seems': False,\n", + " 'run': False,\n", + " 'red': False,\n", + " 'took': False,\n", + " 'v': False,\n", + " 'seen': False,\n", + " 'trip': False,\n", + " 'full': False,\n", + " 'beach': False,\n", + " 'hello': False,\n", + " 'side': False,\n", + " 'loved': False,\n", + " 'news': False,\n", + " 'hit': False,\n", + " 'tried': False,\n", + " 'kind': False,\n", + " 'rock': False,\n", + " 'heart': False,\n", + " 'yea': False,\n", + " 'la': False,\n", + " 'shopping': False,\n", + " 'problem': False,\n", + " 'afternoon': False,\n", + " 'â': False,\n", + " 'pain': False,\n", + " 'change': False,\n", + " 'nap': False,\n", + " 'started': False,\n", + " 'used': False,\n", + " 'remember': False,\n", + " 'star': False,\n", + " 'boring': False,\n", + " 'course': False,\n", + " 'heading': False,\n", + " 'quite': False,\n", + " 'seeing': False,\n", + " 'hell': False,\n", + " 'train': False,\n", + " 'breakfast': False,\n", + " 'crap': False,\n", + " 'told': False,\n", + " 'english': False,\n", + " 'died': False,\n", + " 'sister': False,\n", + " 'fuck': False,\n", + " 'site': False,\n", + " 'instead': False,\n", + " 'leaving': False,\n", + " 'ipod': False,\n", + " 'money': False,\n", + " 'raining': False,\n", + " 'finish': False,\n", + " 'til': False,\n", + " 'anyway': False,\n", + " 'ah': False,\n", + " 'running': False,\n", + " 'concert': False,\n", + " 'soo': False,\n", + " 'facebook': False,\n", + " 'link': False,\n", + " 'sitting': False,\n", + " 'point': False,\n", + " 'jealous': False,\n", + " 'season': False,\n", + " 'bring': False,\n", + " 'ñ': False,\n", + " 'cuz': False,\n", + " 'awake': False,\n", + " 'mum': False,\n", + " 'reason': False,\n", + " 'studying': False,\n", + " 'pay': False,\n", + " 'cat': False,\n", + " 'wonder': False,\n", + " 'congrats': False,\n", + " 'ago': False,\n", + " 'xd': False,\n", + " 'page': False,\n", + " 'f': False,\n", + " 'bought': False,\n", + " 'store': False,\n", + " 'drink': False,\n", + " 'definitely': False,\n", + " 'couple': False,\n", + " 'evening': False,\n", + " 'chocolate': False,\n", + " 'btw': False,\n", + " 'soooo': False,\n", + " 'sunny': False,\n", + " 'sore': False,\n", + " 'drive': False,\n", + " 'shower': False,\n", + " 'st': False,\n", + " 'lucky': False,\n", + " 'aw': False,\n", + " 'test': False,\n", + " 'walk': False,\n", + " 'internet': False,\n", + " 'open': False,\n", + " 'water': False,\n", + " 'wake': False,\n", + " 'l': False,\n", + " 'office': False,\n", + " 'list': False,\n", + " 'watched': False,\n", + " 'team': False,\n", + " 'tommcfly': False,\n", + " 'clean': False,\n", + " 'enjoying': False,\n", + " 'hungry': False,\n", + " 'smile': False,\n", + " 'seriously': False,\n", + " 'move': False,\n", + " 'wont': False,\n", + " 'high': False,\n", + " 'asleep': False,\n", + " 'award': False,\n", + " 'fucking': False,\n", + " 'bout': False,\n", + " 'starting': False,\n", + " 'top': False,\n", + " 'moment': False,\n", + " 'dance': False,\n", + " 'project': False,\n", + " 'second': False,\n", + " 'park': False,\n", + " 'ask': False,\n", + " 'hr': False,\n", + " 'email': False,\n", + " 'church': False,\n", + " 'driving': False,\n", + " 'tea': False,\n", + " 'broke': False,\n", + " 'gym': False,\n", + " 'ride': False,\n", + " 'le': False,\n", + " 'loving': False,\n", + " 'meeting': False,\n", + " 'worth': False,\n", + " 'fail': False,\n", + " 'black': False,\n", + " 'close': False,\n", + " 'visit': False,\n", + " 'sigh': False,\n", + " 'ate': False,\n", + " 'online': False,\n", + " 'number': False,\n", + " 'vote': False,\n", + " 'bye': False,\n", + " 'hang': False,\n", + " 'wonderful': False,\n", + " 'youtube': False,\n", + " 'care': False,\n", + " 'co': False,\n", + " 'cut': False,\n", + " 'drinking': False,\n", + " 'horrible': False,\n", + " 'ddlovato': False,\n", + " 'shirt': False,\n", + " 'ice': False,\n", + " 'saying': False,\n", + " 'answer': False,\n", + " 'date': False,\n", + " 'dear': False,\n", + " 'agree': False,\n", + " 'j': False,\n", + " 'set': False,\n", + " 'town': False,\n", + " 'da': False,\n", + " 'wear': False,\n", + " 'happened': False,\n", + " 'line': False,\n", + " 'parent': False,\n", + " 'worse': False,\n", + " 'min': False,\n", + " 'longer': False,\n", + " 'lady': False,\n", + " 'together': False,\n", + " 'cream': False,\n", + " 'worry': False,\n", + " 'goin': False,\n", + " 'followfriday': False,\n", + " 'fast': False,\n", + " 'forget': False,\n", + " 'fb': False,\n", + " 'doesnt': False,\n", + " 'broken': False,\n", + " 'wtf': False,\n", + " 'turn': False,\n", + " 'unfortunately': False,\n", + " 'chance': False,\n", + " 'favorite': False,\n", + " 'spent': False,\n", + " 'fall': False,\n", + " 'air': False,\n", + " 'idk': False,\n", + " 'slept': False,\n", + " 'sometimes': False,\n", + " 'rainy': False,\n", + " 'question': False,\n", + " 'laptop': False,\n", + " 'tweeting': False,\n", + " 'tuesday': False,\n", + " 'earlier': False,\n", + " 'mood': False,\n", + " 'slow': False,\n", + " 'hoping': False,\n", + " 'thx': False,\n", + " 'h': False,\n", + " 'absolutely': False,\n", + " 'mr': False,\n", + " 'ahh': False,\n", + " 'cleaning': False,\n", + " 'via': False,\n", + " 'holiday': False,\n", + " 'happen': False,\n", + " 'wishing': False,\n", + " 'taken': False,\n", + " 'pool': False,\n", + " 'episode': False,\n", + " 'garden': False,\n", + " 'homework': False,\n", + " 'website': False,\n", + " 'sleeping': False,\n", + " 'airport': False,\n", + " 'especially': False,\n", + " 'business': False,\n", + " 'perfect': False,\n", + " 'em': False,\n", + " 'fell': False,\n", + " 'nd': False,\n", + " 'upset': False,\n", + " 'small': False,\n", + " 'city': False,\n", + " 'chat': False,\n", + " 'knew': False,\n", + " 'foot': False,\n", + " 'chicken': False,\n", + " '½': False,\n", + " 'father': False,\n", + " 'throat': False,\n", + " 'mac': False,\n", + " 'weird': False,\n", + " 'window': False,\n", + " 'passed': False,\n", + " 'son': False,\n", + " 'story': False,\n", + " 'tour': False,\n", + " 'message': False,\n", + " 'shop': False,\n", + " 'wednesday': False,\n", + " 'due': False,\n", + " 'bbq': False,\n", + " 'listen': False,\n", + " 'sleepy': False,\n", + " 'woman': False,\n", + " 'company': False,\n", + " 'sunshine': False,\n", + " 'met': False,\n", + " 'short': False,\n", + " 'comment': False,\n", + " 'mad': False,\n", + " 'understand': False,\n", + " 'gave': False,\n", + " 'pc': False,\n", + " 'hubby': False,\n", + " 'different': False,\n", + " 'havent': False,\n", + " 'account': False,\n", + " 'note': False,\n", + " 'seem': False,\n", + " 'scared': False,\n", + " 'bag': False,\n", + " 'alright': False,\n", + " 'cup': False,\n", + " 'ive': False,\n", + " 'leg': False,\n", + " 'green': False,\n", + " 'interesting': False,\n", + " 'bus': False,\n", + " 'past': False,\n", + " 'glass': False,\n", + " 'worst': False,\n", + " 'power': False,\n", + " 'plz': False,\n", + " 'math': False,\n", + " 'white': False,\n", + " 'support': False,\n", + " 'nope': False,\n", + " 'sat': False,\n", + " 'moon': False,\n", + " 'hangover': False,\n", + " 'moving': False,\n", + " 'taylor': False,\n", + " 'touch': False,\n", + " 'order': False,\n", + " 'jonas': False,\n", + " 'pick': False,\n", + " 'forever': False,\n", + " 'tom': False,\n", + " 'dead': False,\n", + " 'shoot': False,\n", + " 'called': False,\n", + " 'bitch': False,\n", + " 'rather': False,\n", + " 'catch': False,\n", + " 'bet': False,\n", + " 'officially': False,\n", + " 'vip': False,\n", + " 'writing': False,\n", + " 'si': False,\n", + " 'lil': False,\n", + " 'worried': False,\n", + " 'xoxo': False,\n", + " 'write': False,\n", + " 'graduation': False,\n", + " 'liked': False,\n", + " 'except': False,\n", + " 'bday': False,\n", + " 'gay': False,\n", + " 'dang': False,\n", + " 'cousin': False,\n", + " 'load': False,\n", + " 'myspace': False,\n", + " 'sent': False,\n", + " 'fix': False,\n", + " 'ahhh': False,\n", + " 'fly': False,\n", + " 'blue': False,\n", + " 'college': False,\n", + " 'gorgeous': False,\n", + " 'special': False,\n", + " 'kill': False,\n", + " 'apple': False,\n", + " 'interview': False,\n", + " 'fight': False,\n", + " 'everybody': False,\n", + " 'dress': False,\n", + " 'box': False,\n", + " 'bro': False,\n", + " 'case': False,\n", + " 'july': False,\n", + " 'cake': False,\n", + " 'three': False,\n", + " 'hmm': False,\n", + " 'deal': False,\n", + " 'ouch': False,\n", + " 'inside': False,\n", + " 'yep': False,\n", + " 'random': False,\n", + " 'meant': False,\n", + " 'profile': False,\n", + " 'mtv': False,\n", + " 'wedding': False,\n", + " 'clothes': False,\n", + " 'band': False,\n", + " 'issue': False,\n", + " 'apparently': False,\n", + " 'lonely': False,\n", + " 'age': False,\n", + " 'shall': False,\n", + " 'flight': False,\n", + " 'yr': False,\n", + " 'supposed': False,\n", + " 'david': False,\n", + " 'living': False,\n", + " 'needed': False,\n", + " 'london': False,\n", + " 'laugh': False,\n", + " 'sign': False,\n", + " 'finger': False,\n", + " 'beer': False,\n", + " 'itunes': False,\n", + " 'played': False,\n", + " 'tear': False,\n", + " 'ppl': False,\n", + " 'body': False,\n", + " 'paper': False,\n", + " 'group': False,\n", + " 'looked': False,\n", + " 'sadly': False,\n", + " 'bloody': False,\n", + " 'club': False,\n", + " 'web': False,\n", + " 'vacation': False,\n", + " 'jonasbrothers': False,\n", + " 'uk': False,\n", + " 'version': False,\n", + " 'huge': False,\n", + " 'sooooo': False,\n", + " 'lakers': False,\n", + " 'germany': False,\n", + " 'save': False,\n", + " 'hanging': False,\n", + " 'shoe': False,\n", + " 'fantastic': False,\n", + " 'wine': False,\n", + " 'sort': False,\n", + " 'none': False,\n", + " 'twilight': False,\n", + " 'shot': False,\n", + " 'thursday': False,\n", + " 'confused': False,\n", + " 'sale': False,\n", + " 'goodbye': False,\n", + " 'round': False,\n", + " 'lame': False,\n", + " 'cheer': False,\n", + " 'singing': False,\n", + " 'promise': False,\n", + " 'drop': False,\n", + " 'nose': False,\n", + " 'whats': False,\n", + " 'babe': False,\n", + " 'xxx': False,\n", + " 'lesson': False,\n", + " 'camera': False,\n", + " 'download': False,\n", + " 'info': False,\n", + " 'fair': False,\n", + " 'yummy': False,\n", + " 'french': False,\n", + " 'miley': False,\n", + " 'light': False,\n", + " 'asked': False,\n", + " 'lazy': False,\n", + " 'indeed': False,\n", + " 'along': False,\n", + " 'quick': False,\n", + " 'ã': False,\n", + " 'door': False,\n", + " 'sit': False,\n", + " 'yup': False,\n", + " 'street': False,\n", + " 'mile': False,\n", + " 'dm': False,\n", + " 'jus': False,\n", + " 'giving': False,\n", + " 'ear': False,\n", + " 'service': False,\n", + " 'low': False,\n", + " 'relaxing': False,\n", + " 'radio': False,\n", + " 'arm': False,\n", + " 'future': False,\n", + " 'proud': False,\n", + " 'fact': False,\n", + " 'learn': False,\n", + " 'easy': False,\n", + " 'child': False,\n", + " 'kitty': False,\n", + " 'hold': False,\n", + " 'mark': False,\n", + " 'peep': False,\n", + " 'wearing': False,\n", + " 'luv': False,\n", + " 'south': False,\n", + " 'road': False,\n", + " 'smell': False,\n", + " 'exciting': False,\n", + " 'currently': False,\n", + " 'warm': False,\n", + " 'puppy': False,\n", + " 'packing': False,\n", + " 'google': False,\n", + " 'exactly': False,\n", + " 'share': False,\n", + " 'decided': False,\n", + " 'voice': False,\n", + " 'fat': False,\n", + " 'mommy': False,\n", + " 'realize': False,\n", + " 'join': False,\n", + " 'nearly': False,\n", + " 'bb': False,\n", + " 'whatever': False,\n", + " 'woo': False,\n", + " 'spend': False,\n", + " 'stand': False,\n", + " 'enjoyed': False,\n", + " 'tweeps': False,\n", + " 'sold': False,\n", + " 'gettin': False,\n", + " 'fever': False,\n", + " 'paid': False,\n", + " 'beat': False,\n", + " 'pink': False,\n", + " 'mall': False,\n", + " 'gunna': False,\n", + " 'ï': False,\n", + " 'freaking': False,\n", + " 'snow': False,\n", + " 'hospital': False,\n", + " 'cheese': False,\n", + " 'nobody': False,\n", + " 'wondering': False,\n", + " 'plane': False,\n", + " 'shame': False,\n", + " 'search': False,\n", + " 'ended': False,\n", + " 'helping': False,\n", + " 'safe': False,\n", + " 'staying': False,\n", + " 'storm': False,\n", + " 'fam': False,\n", + " 'z': False,\n", + " 'sing': False,\n", + " 'view': False,\n", + " 'yo': False,\n", + " 'card': False,\n", + " 'stick': False,\n", + " 'joe': False,\n", + " 'matter': False,\n", + " 'delicious': False,\n", + " 'cook': False,\n", + " 'country': False,\n", + " 'hill': False,\n", + " 'peace': False,\n", + " 'worked': False,\n", + " 'race': False,\n", + " 'boot': False,\n", + " 'figure': False,\n", + " 'vega': False,\n", + " 'france': False,\n", + " 'wit': False,\n", + " 'topic': False,\n", + " 'spot': False,\n", + " 'cover': False,\n", + " 'afford': False,\n", + " 'floor': False,\n", + " 'bike': False,\n", + " 'starbucks': False,\n", + " 'hahah': False,\n", + " 'tummy': False,\n", + " 'trouble': False,\n", + " 'mouth': False,\n", + " 'ran': False,\n", + " 'drunk': False,\n", + " 'lately': False,\n", + " 'joke': False,\n", + " 'bird': False,\n", + " 'flu': False,\n", + " 'showing': False,\n", + " 'mail': False,\n", + " 'turned': False,\n", + " 'men': False,\n", + " 'taste': False,\n", + " 'gosh': False,\n", + " 'bar': False,\n", + " 'changed': False,\n", + " 'fish': False,\n", + " 'stopped': False,\n", + " 'wife': False,\n", + " 'alot': False,\n", + " 'tweetdeck': False,\n", + " 'magic': False,\n", + " 'brilliant': False,\n", + " 'cooking': False,\n", + " 'state': False,\n", + " 'design': False,\n", + " 'film': False,\n", + " 'tan': False,\n", + " 'isnt': False,\n", + " 'nail': False,\n", + " 'bummed': False,\n", + " 'prob': False,\n", + " 'happens': False,\n", + " 'eh': False,\n", + " 'type': False,\n", + " 'young': False,\n", + " 'honey': False,\n", + " 'price': False,\n", + " 'disappointed': False,\n", + " 'sky': False,\n", + " 'ring': False,\n", + " 'surprise': False,\n", + " 'self': False,\n", + " 'depressing': False,\n", + " 'sexy': False,\n", + " 'cd': False,\n", + " 'woot': False,\n", + " 'pop': False,\n", + " 'piece': False,\n", + " 'degree': False,\n", + " 'chillin': False,\n", + " 'hilarious': False,\n", + " 'rip': False,\n", + " 'updated': False,\n", + " 'ohh': False,\n", + " 'boyfriend': False,\n", + " 'awwww': False,\n", + " 'hun': False,\n", + " 'crappy': False,\n", + " 'er': False,\n", + " 'although': False,\n", + " 'death': False,\n", + " 'lake': False,\n", + " 'ahead': False,\n", + " 'art': False,\n", + " 'watchin': False,\n", + " 'scary': False,\n", + " 'annoying': False,\n", + " 'event': False,\n", + " 'plus': False,\n", + " 'ff': False,\n", + " 'major': False,\n", + " 'afraid': False,\n", + " 'pissed': False,\n", + " 'wat': False,\n", + " 'huh': False,\n", + " 'secret': False,\n", + " 'others': False,\n", + " 'land': False,\n", + " 'caught': False,\n", + " 'closed': False,\n", + " 'kiss': False,\n", + " 'trek': False,\n", + " 'code': False,\n", + " 'mobile': False,\n", + " 'knee': False,\n", + " ...},\n", + " 0),\n", + " ({'wa': False,\n", + " 'day': False,\n", + " 'good': False,\n", + " 'get': False,\n", + " 'like': False,\n", + " 'go': False,\n", + " 'quot': False,\n", + " 'love': False,\n", + " 'work': False,\n", + " 'got': True,\n", + " 'today': False,\n", + " 'going': False,\n", + " 'u': False,\n", + " 'time': False,\n", + " 'one': False,\n", + " 'lol': False,\n", + " 'know': False,\n", + " 'im': False,\n", + " 'back': False,\n", + " 'really': False,\n", + " 'want': False,\n", + " 'night': False,\n", + " 'amp': False,\n", + " 'well': False,\n", + " 'new': False,\n", + " 'see': False,\n", + " 'think': False,\n", + " 'still': False,\n", + " 'oh': False,\n", + " 'thanks': False,\n", + " 'na': False,\n", + " 'ha': False,\n", + " 'need': False,\n", + " 'home': False,\n", + " 'much': False,\n", + " 'miss': False,\n", + " 'feel': False,\n", + " 'last': False,\n", + " 'morning': True,\n", + " 'great': False,\n", + " 'make': False,\n", + " 'tomorrow': True,\n", + " 'twitter': False,\n", + " 'haha': False,\n", + " 'wish': False,\n", + " 'hope': False,\n", + " 'bad': False,\n", + " 'sad': False,\n", + " 'fun': False,\n", + " 'come': False,\n", + " 'sleep': False,\n", + " 'would': False,\n", + " 'nice': False,\n", + " 'sorry': False,\n", + " 'right': False,\n", + " 'week': False,\n", + " 'tonight': False,\n", + " 'happy': False,\n", + " 'say': False,\n", + " 'thing': False,\n", + " 'getting': False,\n", + " 'look': False,\n", + " 'friend': False,\n", + " 'gon': False,\n", + " 'though': False,\n", + " 'hate': False,\n", + " 'wait': False,\n", + " 'better': False,\n", + " 'bed': True,\n", + " 'way': False,\n", + " 'watching': False,\n", + " 'lt': False,\n", + " 'people': False,\n", + " 'yeah': False,\n", + " 'hour': False,\n", + " 'show': False,\n", + " 'could': False,\n", + " 'thank': False,\n", + " 'take': False,\n", + " 'weekend': False,\n", + " 'next': False,\n", + " 'yes': False,\n", + " 'school': False,\n", + " 'even': False,\n", + " 'little': False,\n", + " 'life': False,\n", + " 'working': False,\n", + " 'everyone': False,\n", + " 'guy': False,\n", + " 'cant': False,\n", + " 'sick': False,\n", + " 'dont': False,\n", + " 'hey': False,\n", + " 'let': False,\n", + " 'awesome': False,\n", + " 'movie': False,\n", + " 'girl': False,\n", + " 'tweet': False,\n", + " 'always': False,\n", + " 'x': False,\n", + " 'never': False,\n", + " 'watch': False,\n", + " 'please': False,\n", + " 'soon': False,\n", + " 'year': False,\n", + " 'first': False,\n", + " 'long': False,\n", + " 'ok': False,\n", + " 'tired': False,\n", + " 'already': False,\n", + " 'feeling': False,\n", + " 'suck': False,\n", + " 'wan': False,\n", + " 'sure': False,\n", + " 'best': False,\n", + " 'looking': False,\n", + " 'n': False,\n", + " 'man': False,\n", + " 'another': False,\n", + " 'something': False,\n", + " 'find': False,\n", + " 'start': False,\n", + " 'cool': False,\n", + " 'done': False,\n", + " 'pretty': False,\n", + " 'omg': False,\n", + " 'yay': False,\n", + " 'yet': False,\n", + " 'phone': False,\n", + " 'damn': False,\n", + " 'lot': False,\n", + " 'away': False,\n", + " 'went': False,\n", + " 'old': False,\n", + " 'follow': False,\n", + " 'help': False,\n", + " 'guess': False,\n", + " 'keep': False,\n", + " 'house': False,\n", + " 'thought': False,\n", + " 'song': False,\n", + " 'made': False,\n", + " 'ever': False,\n", + " 'trying': False,\n", + " 'ya': False,\n", + " 'sun': False,\n", + " 'p': False,\n", + " 'bit': False,\n", + " 'hurt': False,\n", + " 'game': False,\n", + " 'finally': False,\n", + " 'maybe': False,\n", + " 'ready': False,\n", + " 'sound': False,\n", + " 'w': False,\n", + " 'big': False,\n", + " 'lost': False,\n", + " 'b': False,\n", + " 'nothing': False,\n", + " 'someone': False,\n", + " 'early': True,\n", + " 'summer': False,\n", + " 'tell': False,\n", + " 'hard': False,\n", + " 'also': False,\n", + " 'birthday': False,\n", + " 'r': False,\n", + " 'left': False,\n", + " 'mean': False,\n", + " 'missed': False,\n", + " 'ur': False,\n", + " 'th': False,\n", + " 'rain': False,\n", + " 'pic': False,\n", + " 'mom': False,\n", + " 'com': False,\n", + " 'party': False,\n", + " 'two': False,\n", + " 'glad': False,\n", + " 'world': False,\n", + " 'baby': False,\n", + " 'wow': False,\n", + " 'might': False,\n", + " 'bored': False,\n", + " 'call': False,\n", + " 'ta': True,\n", + " 'check': False,\n", + " 'late': False,\n", + " 'waiting': False,\n", + " 'car': False,\n", + " 'video': False,\n", + " 'stuff': False,\n", + " 'found': False,\n", + " 'yesterday': False,\n", + " 'said': False,\n", + " 'sunday': False,\n", + " 'luck': False,\n", + " 'saw': False,\n", + " 'amazing': False,\n", + " 'hot': False,\n", + " 'monday': False,\n", + " 'weather': False,\n", + " 'live': False,\n", + " 'many': False,\n", + " 'iphone': False,\n", + " 'doe': False,\n", + " 'exam': False,\n", + " 'thats': False,\n", + " 'play': False,\n", + " 'making': False,\n", + " 'may': False,\n", + " 'excited': False,\n", + " 'god': False,\n", + " 'dad': False,\n", + " 'family': False,\n", + " 'gone': False,\n", + " 'friday': False,\n", + " 'boy': False,\n", + " 'follower': False,\n", + " 'read': False,\n", + " 'funny': False,\n", + " 'hi': False,\n", + " 'since': False,\n", + " 'give': False,\n", + " 'poor': False,\n", + " 'enjoy': False,\n", + " 'job': False,\n", + " 'okay': False,\n", + " 'talk': False,\n", + " 'gt': False,\n", + " 'later': False,\n", + " 'head': False,\n", + " 'almost': False,\n", + " 'beautiful': False,\n", + " 'woke': False,\n", + " 'cold': False,\n", + " 'anything': False,\n", + " 'hear': False,\n", + " 'lunch': False,\n", + " 'missing': False,\n", + " 'put': False,\n", + " 'free': False,\n", + " 'try': False,\n", + " 'coming': False,\n", + " 'must': False,\n", + " 'end': False,\n", + " 'tho': False,\n", + " 'leave': False,\n", + " 'ugh': False,\n", + " 'till': False,\n", + " 'busy': False,\n", + " 'around': False,\n", + " 'food': False,\n", + " 'far': False,\n", + " 'book': False,\n", + " 'music': False,\n", + " 'cry': False,\n", + " 'headache': False,\n", + " 'cause': False,\n", + " 'use': False,\n", + " 'listening': False,\n", + " 'fan': False,\n", + " 'stop': False,\n", + " 'stay': False,\n", + " 'totally': False,\n", + " 'wanted': False,\n", + " 'place': False,\n", + " 'shit': False,\n", + " 'xx': False,\n", + " 'tv': False,\n", + " 'e': False,\n", + " 'picture': False,\n", + " 'update': False,\n", + " 'least': False,\n", + " 'sweet': False,\n", + " 'anyone': False,\n", + " 'lovely': False,\n", + " 'thinking': False,\n", + " 'forward': False,\n", + " 'aww': False,\n", + " 'g': False,\n", + " 'dog': False,\n", + " 'class': False,\n", + " 'actually': False,\n", + " 'everything': False,\n", + " 'eat': False,\n", + " 'mine': False,\n", + " 'playing': False,\n", + " 'cute': False,\n", + " 'hahaha': False,\n", + " 'kid': False,\n", + " 'dinner': False,\n", + " 'stupid': False,\n", + " 'sooo': False,\n", + " 'came': False,\n", + " 'word': False,\n", + " 'ill': False,\n", + " 'eating': False,\n", + " 'win': False,\n", + " 'hopefully': False,\n", + " 'finished': False,\n", + " 'welcome': False,\n", + " 'anymore': False,\n", + " 'minute': False,\n", + " 'every': False,\n", + " 'face': False,\n", + " 'idea': False,\n", + " 'hair': False,\n", + " 'without': False,\n", + " 'kinda': False,\n", + " 'month': False,\n", + " 'saturday': False,\n", + " 'c': False,\n", + " 'wrong': False,\n", + " 'super': False,\n", + " 'www': False,\n", + " 'final': False,\n", + " 'true': False,\n", + " 'reading': False,\n", + " 'hug': False,\n", + " 'probably': False,\n", + " 'photo': False,\n", + " 'buy': False,\n", + " 'hehe': False,\n", + " 'taking': False,\n", + " 'believe': False,\n", + " 'eye': False,\n", + " 'didnt': False,\n", + " 'name': False,\n", + " 'alone': False,\n", + " 'mind': False,\n", + " 'room': False,\n", + " 'either': False,\n", + " 'dream': False,\n", + " 'following': False,\n", + " 'able': False,\n", + " 'goodnight': False,\n", + " 'else': False,\n", + " 'lmao': False,\n", + " 'boo': False,\n", + " 'heard': False,\n", + " 'coffee': False,\n", + " 'outside': False,\n", + " 'real': False,\n", + " 'mileycyrus': False,\n", + " 'break': False,\n", + " 'part': False,\n", + " 'june': False,\n", + " 'computer': False,\n", + " 'forgot': False,\n", + " 'ticket': False,\n", + " 'blog': False,\n", + " 'post': False,\n", + " 'awww': False,\n", + " 'rest': False,\n", + " 'dude': False,\n", + " 'pm': False,\n", + " 'enough': False,\n", + " 'brother': False,\n", + " 'plan': False,\n", + " 'ð': False,\n", + " 'half': False,\n", + " 'using': False,\n", + " 'add': False,\n", + " 'stuck': False,\n", + " 'person': False,\n", + " 'text': False,\n", + " 'meet': False,\n", + " 'mother': False,\n", + " 'album': False,\n", + " 'study': False,\n", + " 'crazy': False,\n", + " 'hand': False,\n", + " 'send': False,\n", + " 'fine': False,\n", + " 'talking': False,\n", + " 'whole': False,\n", + " 'reply': False,\n", + " 'nite': False,\n", + " 'k': False,\n", + " 'seems': False,\n", + " 'run': False,\n", + " 'red': False,\n", + " 'took': False,\n", + " 'v': False,\n", + " 'seen': False,\n", + " 'trip': False,\n", + " 'full': False,\n", + " 'beach': False,\n", + " 'hello': False,\n", + " 'side': False,\n", + " 'loved': False,\n", + " 'news': False,\n", + " 'hit': False,\n", + " 'tried': False,\n", + " 'kind': False,\n", + " 'rock': False,\n", + " 'heart': False,\n", + " 'yea': False,\n", + " 'la': False,\n", + " 'shopping': False,\n", + " 'problem': False,\n", + " 'afternoon': False,\n", + " 'â': False,\n", + " 'pain': False,\n", + " 'change': False,\n", + " 'nap': False,\n", + " 'started': False,\n", + " 'used': False,\n", + " 'remember': False,\n", + " 'star': False,\n", + " 'boring': False,\n", + " 'course': False,\n", + " 'heading': False,\n", + " 'quite': False,\n", + " 'seeing': False,\n", + " 'hell': False,\n", + " 'train': False,\n", + " 'breakfast': False,\n", + " 'crap': False,\n", + " 'told': False,\n", + " 'english': False,\n", + " 'died': False,\n", + " 'sister': False,\n", + " 'fuck': False,\n", + " 'site': False,\n", + " 'instead': False,\n", + " 'leaving': False,\n", + " 'ipod': False,\n", + " 'money': False,\n", + " 'raining': False,\n", + " 'finish': False,\n", + " 'til': False,\n", + " 'anyway': False,\n", + " 'ah': False,\n", + " 'running': False,\n", + " 'concert': False,\n", + " 'soo': False,\n", + " 'facebook': False,\n", + " 'link': False,\n", + " 'sitting': False,\n", + " 'point': False,\n", + " 'jealous': False,\n", + " 'season': False,\n", + " 'bring': False,\n", + " 'ñ': False,\n", + " 'cuz': False,\n", + " 'awake': False,\n", + " 'mum': False,\n", + " 'reason': False,\n", + " 'studying': False,\n", + " 'pay': False,\n", + " 'cat': False,\n", + " 'wonder': False,\n", + " 'congrats': False,\n", + " 'ago': False,\n", + " 'xd': False,\n", + " 'page': False,\n", + " 'f': False,\n", + " 'bought': False,\n", + " 'store': False,\n", + " 'drink': False,\n", + " 'definitely': False,\n", + " 'couple': False,\n", + " 'evening': False,\n", + " 'chocolate': False,\n", + " 'btw': False,\n", + " 'soooo': False,\n", + " 'sunny': False,\n", + " 'sore': False,\n", + " 'drive': False,\n", + " 'shower': False,\n", + " 'st': False,\n", + " 'lucky': False,\n", + " 'aw': False,\n", + " 'test': False,\n", + " 'walk': False,\n", + " 'internet': False,\n", + " 'open': False,\n", + " 'water': False,\n", + " 'wake': True,\n", + " 'l': False,\n", + " 'office': False,\n", + " 'list': False,\n", + " 'watched': False,\n", + " 'team': False,\n", + " 'tommcfly': False,\n", + " 'clean': False,\n", + " 'enjoying': False,\n", + " 'hungry': False,\n", + " 'smile': False,\n", + " 'seriously': False,\n", + " 'move': False,\n", + " 'wont': False,\n", + " 'high': False,\n", + " 'asleep': False,\n", + " 'award': False,\n", + " 'fucking': False,\n", + " 'bout': False,\n", + " 'starting': False,\n", + " 'top': False,\n", + " 'moment': False,\n", + " 'dance': False,\n", + " 'project': False,\n", + " 'second': False,\n", + " 'park': False,\n", + " 'ask': False,\n", + " 'hr': False,\n", + " 'email': False,\n", + " 'church': False,\n", + " 'driving': False,\n", + " 'tea': False,\n", + " 'broke': False,\n", + " 'gym': False,\n", + " 'ride': False,\n", + " 'le': False,\n", + " 'loving': False,\n", + " 'meeting': False,\n", + " 'worth': False,\n", + " 'fail': False,\n", + " 'black': False,\n", + " 'close': False,\n", + " 'visit': False,\n", + " 'sigh': False,\n", + " 'ate': False,\n", + " 'online': False,\n", + " 'number': False,\n", + " 'vote': False,\n", + " 'bye': False,\n", + " 'hang': False,\n", + " 'wonderful': False,\n", + " 'youtube': False,\n", + " 'care': False,\n", + " 'co': False,\n", + " 'cut': False,\n", + " 'drinking': False,\n", + " 'horrible': False,\n", + " 'ddlovato': False,\n", + " 'shirt': False,\n", + " 'ice': False,\n", + " 'saying': False,\n", + " 'answer': False,\n", + " 'date': False,\n", + " 'dear': False,\n", + " 'agree': False,\n", + " 'j': False,\n", + " 'set': False,\n", + " 'town': False,\n", + " 'da': False,\n", + " 'wear': False,\n", + " 'happened': False,\n", + " 'line': False,\n", + " 'parent': False,\n", + " 'worse': False,\n", + " 'min': False,\n", + " 'longer': False,\n", + " 'lady': False,\n", + " 'together': False,\n", + " 'cream': False,\n", + " 'worry': False,\n", + " 'goin': False,\n", + " 'followfriday': False,\n", + " 'fast': False,\n", + " 'forget': False,\n", + " 'fb': False,\n", + " 'doesnt': False,\n", + " 'broken': False,\n", + " 'wtf': False,\n", + " 'turn': False,\n", + " 'unfortunately': False,\n", + " 'chance': False,\n", + " 'favorite': False,\n", + " 'spent': False,\n", + " 'fall': False,\n", + " 'air': False,\n", + " 'idk': False,\n", + " 'slept': False,\n", + " 'sometimes': False,\n", + " 'rainy': False,\n", + " 'question': False,\n", + " 'laptop': False,\n", + " 'tweeting': False,\n", + " 'tuesday': False,\n", + " 'earlier': False,\n", + " 'mood': False,\n", + " 'slow': False,\n", + " 'hoping': False,\n", + " 'thx': False,\n", + " 'h': False,\n", + " 'absolutely': False,\n", + " 'mr': False,\n", + " 'ahh': False,\n", + " 'cleaning': False,\n", + " 'via': False,\n", + " 'holiday': False,\n", + " 'happen': False,\n", + " 'wishing': False,\n", + " 'taken': False,\n", + " 'pool': False,\n", + " 'episode': False,\n", + " 'garden': False,\n", + " 'homework': False,\n", + " 'website': False,\n", + " 'sleeping': False,\n", + " 'airport': False,\n", + " 'especially': False,\n", + " 'business': False,\n", + " 'perfect': False,\n", + " 'em': False,\n", + " 'fell': False,\n", + " 'nd': False,\n", + " 'upset': False,\n", + " 'small': False,\n", + " 'city': False,\n", + " 'chat': False,\n", + " 'knew': False,\n", + " 'foot': False,\n", + " 'chicken': False,\n", + " '½': False,\n", + " 'father': False,\n", + " 'throat': False,\n", + " 'mac': False,\n", + " 'weird': False,\n", + " 'window': False,\n", + " 'passed': False,\n", + " 'son': False,\n", + " 'story': False,\n", + " 'tour': False,\n", + " 'message': False,\n", + " 'shop': False,\n", + " 'wednesday': False,\n", + " 'due': False,\n", + " 'bbq': False,\n", + " 'listen': False,\n", + " 'sleepy': False,\n", + " 'woman': False,\n", + " 'company': False,\n", + " 'sunshine': False,\n", + " 'met': False,\n", + " 'short': False,\n", + " 'comment': False,\n", + " 'mad': False,\n", + " 'understand': False,\n", + " 'gave': False,\n", + " 'pc': False,\n", + " 'hubby': False,\n", + " 'different': False,\n", + " 'havent': False,\n", + " 'account': False,\n", + " 'note': False,\n", + " 'seem': False,\n", + " 'scared': False,\n", + " 'bag': False,\n", + " 'alright': False,\n", + " 'cup': False,\n", + " 'ive': False,\n", + " 'leg': False,\n", + " 'green': False,\n", + " 'interesting': False,\n", + " 'bus': False,\n", + " 'past': False,\n", + " 'glass': False,\n", + " 'worst': False,\n", + " 'power': False,\n", + " 'plz': False,\n", + " 'math': False,\n", + " 'white': False,\n", + " 'support': False,\n", + " 'nope': False,\n", + " 'sat': False,\n", + " 'moon': False,\n", + " 'hangover': False,\n", + " 'moving': False,\n", + " 'taylor': False,\n", + " 'touch': False,\n", + " 'order': False,\n", + " 'jonas': False,\n", + " 'pick': False,\n", + " 'forever': False,\n", + " 'tom': False,\n", + " 'dead': False,\n", + " 'shoot': False,\n", + " 'called': False,\n", + " 'bitch': False,\n", + " 'rather': False,\n", + " 'catch': False,\n", + " 'bet': False,\n", + " 'officially': False,\n", + " 'vip': False,\n", + " 'writing': False,\n", + " 'si': False,\n", + " 'lil': False,\n", + " 'worried': False,\n", + " 'xoxo': False,\n", + " 'write': False,\n", + " 'graduation': False,\n", + " 'liked': False,\n", + " 'except': False,\n", + " 'bday': False,\n", + " 'gay': False,\n", + " 'dang': False,\n", + " 'cousin': False,\n", + " 'load': False,\n", + " 'myspace': False,\n", + " 'sent': False,\n", + " 'fix': False,\n", + " 'ahhh': False,\n", + " 'fly': False,\n", + " 'blue': False,\n", + " 'college': False,\n", + " 'gorgeous': False,\n", + " 'special': False,\n", + " 'kill': False,\n", + " 'apple': False,\n", + " 'interview': False,\n", + " 'fight': False,\n", + " 'everybody': False,\n", + " 'dress': False,\n", + " 'box': False,\n", + " 'bro': False,\n", + " 'case': False,\n", + " 'july': False,\n", + " 'cake': False,\n", + " 'three': False,\n", + " 'hmm': False,\n", + " 'deal': False,\n", + " 'ouch': False,\n", + " 'inside': False,\n", + " 'yep': False,\n", + " 'random': False,\n", + " 'meant': False,\n", + " 'profile': False,\n", + " 'mtv': False,\n", + " 'wedding': False,\n", + " 'clothes': False,\n", + " 'band': False,\n", + " 'issue': False,\n", + " 'apparently': False,\n", + " 'lonely': False,\n", + " 'age': False,\n", + " 'shall': False,\n", + " 'flight': False,\n", + " 'yr': False,\n", + " 'supposed': False,\n", + " 'david': False,\n", + " 'living': False,\n", + " 'needed': False,\n", + " 'london': False,\n", + " 'laugh': False,\n", + " 'sign': False,\n", + " 'finger': False,\n", + " 'beer': False,\n", + " 'itunes': False,\n", + " 'played': False,\n", + " 'tear': False,\n", + " 'ppl': False,\n", + " 'body': False,\n", + " 'paper': False,\n", + " 'group': False,\n", + " 'looked': False,\n", + " 'sadly': False,\n", + " 'bloody': False,\n", + " 'club': False,\n", + " 'web': False,\n", + " 'vacation': False,\n", + " 'jonasbrothers': False,\n", + " 'uk': False,\n", + " 'version': False,\n", + " 'huge': False,\n", + " 'sooooo': False,\n", + " 'lakers': False,\n", + " 'germany': False,\n", + " 'save': False,\n", + " 'hanging': False,\n", + " 'shoe': False,\n", + " 'fantastic': False,\n", + " 'wine': False,\n", + " 'sort': False,\n", + " 'none': False,\n", + " 'twilight': False,\n", + " 'shot': False,\n", + " 'thursday': False,\n", + " 'confused': False,\n", + " 'sale': False,\n", + " 'goodbye': False,\n", + " 'round': False,\n", + " 'lame': False,\n", + " 'cheer': False,\n", + " 'singing': False,\n", + " 'promise': False,\n", + " 'drop': False,\n", + " 'nose': False,\n", + " 'whats': False,\n", + " 'babe': False,\n", + " 'xxx': False,\n", + " 'lesson': False,\n", + " 'camera': False,\n", + " 'download': False,\n", + " 'info': False,\n", + " 'fair': False,\n", + " 'yummy': False,\n", + " 'french': False,\n", + " 'miley': False,\n", + " 'light': False,\n", + " 'asked': False,\n", + " 'lazy': False,\n", + " 'indeed': False,\n", + " 'along': False,\n", + " 'quick': False,\n", + " 'ã': False,\n", + " 'door': False,\n", + " 'sit': False,\n", + " 'yup': False,\n", + " 'street': False,\n", + " 'mile': False,\n", + " 'dm': False,\n", + " 'jus': False,\n", + " 'giving': False,\n", + " 'ear': False,\n", + " 'service': False,\n", + " 'low': False,\n", + " 'relaxing': False,\n", + " 'radio': False,\n", + " 'arm': False,\n", + " 'future': False,\n", + " 'proud': False,\n", + " 'fact': False,\n", + " 'learn': False,\n", + " 'easy': False,\n", + " 'child': False,\n", + " 'kitty': False,\n", + " 'hold': False,\n", + " 'mark': False,\n", + " 'peep': False,\n", + " 'wearing': False,\n", + " 'luv': False,\n", + " 'south': False,\n", + " 'road': False,\n", + " 'smell': False,\n", + " 'exciting': False,\n", + " 'currently': False,\n", + " 'warm': False,\n", + " 'puppy': False,\n", + " 'packing': False,\n", + " 'google': False,\n", + " 'exactly': False,\n", + " 'share': False,\n", + " 'decided': False,\n", + " 'voice': False,\n", + " 'fat': False,\n", + " 'mommy': False,\n", + " 'realize': False,\n", + " 'join': False,\n", + " 'nearly': False,\n", + " 'bb': False,\n", + " 'whatever': False,\n", + " 'woo': False,\n", + " 'spend': False,\n", + " 'stand': False,\n", + " 'enjoyed': False,\n", + " 'tweeps': False,\n", + " 'sold': False,\n", + " 'gettin': False,\n", + " 'fever': False,\n", + " 'paid': False,\n", + " 'beat': False,\n", + " 'pink': False,\n", + " 'mall': False,\n", + " 'gunna': False,\n", + " 'ï': False,\n", + " 'freaking': False,\n", + " 'snow': False,\n", + " 'hospital': False,\n", + " 'cheese': False,\n", + " 'nobody': False,\n", + " 'wondering': False,\n", + " 'plane': False,\n", + " 'shame': False,\n", + " 'search': False,\n", + " 'ended': False,\n", + " 'helping': False,\n", + " 'safe': False,\n", + " 'staying': False,\n", + " 'storm': False,\n", + " 'fam': False,\n", + " 'z': False,\n", + " 'sing': False,\n", + " 'view': False,\n", + " 'yo': False,\n", + " 'card': False,\n", + " 'stick': False,\n", + " 'joe': False,\n", + " 'matter': False,\n", + " 'delicious': False,\n", + " 'cook': False,\n", + " 'country': False,\n", + " 'hill': False,\n", + " 'peace': False,\n", + " 'worked': False,\n", + " 'race': False,\n", + " 'boot': False,\n", + " 'figure': False,\n", + " 'vega': False,\n", + " 'france': False,\n", + " 'wit': False,\n", + " 'topic': False,\n", + " 'spot': False,\n", + " 'cover': False,\n", + " 'afford': False,\n", + " 'floor': False,\n", + " 'bike': False,\n", + " 'starbucks': False,\n", + " 'hahah': False,\n", + " 'tummy': False,\n", + " 'trouble': False,\n", + " 'mouth': False,\n", + " 'ran': False,\n", + " 'drunk': False,\n", + " 'lately': False,\n", + " 'joke': False,\n", + " 'bird': False,\n", + " 'flu': False,\n", + " 'showing': False,\n", + " 'mail': False,\n", + " 'turned': False,\n", + " 'men': False,\n", + " 'taste': False,\n", + " 'gosh': False,\n", + " 'bar': False,\n", + " 'changed': False,\n", + " 'fish': False,\n", + " 'stopped': False,\n", + " 'wife': False,\n", + " 'alot': False,\n", + " 'tweetdeck': False,\n", + " 'magic': False,\n", + " 'brilliant': False,\n", + " 'cooking': False,\n", + " 'state': False,\n", + " 'design': False,\n", + " 'film': False,\n", + " 'tan': False,\n", + " 'isnt': False,\n", + " 'nail': False,\n", + " 'bummed': False,\n", + " 'prob': False,\n", + " 'happens': False,\n", + " 'eh': False,\n", + " 'type': False,\n", + " 'young': False,\n", + " 'honey': False,\n", + " 'price': False,\n", + " 'disappointed': False,\n", + " 'sky': False,\n", + " 'ring': False,\n", + " 'surprise': False,\n", + " 'self': False,\n", + " 'depressing': False,\n", + " 'sexy': False,\n", + " 'cd': False,\n", + " 'woot': False,\n", + " 'pop': False,\n", + " 'piece': False,\n", + " 'degree': False,\n", + " 'chillin': False,\n", + " 'hilarious': False,\n", + " 'rip': False,\n", + " 'updated': False,\n", + " 'ohh': False,\n", + " 'boyfriend': False,\n", + " 'awwww': False,\n", + " 'hun': False,\n", + " 'crappy': False,\n", + " 'er': False,\n", + " 'although': False,\n", + " 'death': False,\n", + " 'lake': False,\n", + " 'ahead': False,\n", + " 'art': False,\n", + " 'watchin': False,\n", + " 'scary': False,\n", + " 'annoying': False,\n", + " 'event': False,\n", + " 'plus': False,\n", + " 'ff': False,\n", + " 'major': False,\n", + " 'afraid': False,\n", + " 'pissed': False,\n", + " 'wat': False,\n", + " 'huh': False,\n", + " 'secret': False,\n", + " 'others': False,\n", + " 'land': False,\n", + " 'caught': False,\n", + " 'closed': False,\n", + " 'kiss': False,\n", + " 'trek': False,\n", + " 'code': False,\n", + " 'mobile': False,\n", + " 'knee': False,\n", + " ...},\n", + " 0),\n", + " ({'wa': False,\n", + " 'day': False,\n", + " 'good': False,\n", + " 'get': False,\n", + " 'like': False,\n", + " 'go': False,\n", + " 'quot': False,\n", + " 'love': False,\n", + " 'work': False,\n", + " 'got': False,\n", + " 'today': False,\n", + " 'going': False,\n", + " 'u': False,\n", + " 'time': False,\n", + " 'one': False,\n", + " 'lol': False,\n", + " 'know': False,\n", + " 'im': False,\n", + " 'back': False,\n", + " 'really': False,\n", + " 'want': False,\n", + " 'night': False,\n", + " 'amp': False,\n", + " 'well': False,\n", + " 'new': False,\n", + " 'see': False,\n", + " 'think': False,\n", + " 'still': False,\n", + " 'oh': False,\n", + " 'thanks': False,\n", + " 'na': False,\n", + " 'ha': False,\n", + " 'need': False,\n", + " 'home': False,\n", + " 'much': False,\n", + " 'miss': False,\n", + " 'feel': False,\n", + " 'last': False,\n", + " 'morning': False,\n", + " 'great': False,\n", + " 'make': False,\n", + " 'tomorrow': False,\n", + " 'twitter': False,\n", + " 'haha': False,\n", + " 'wish': False,\n", + " 'hope': False,\n", + " 'bad': False,\n", + " 'sad': False,\n", + " 'fun': False,\n", + " 'come': False,\n", + " 'sleep': False,\n", + " 'would': False,\n", + " 'nice': False,\n", + " 'sorry': False,\n", + " 'right': False,\n", + " 'week': False,\n", + " 'tonight': False,\n", + " 'happy': False,\n", + " 'say': False,\n", + " 'thing': False,\n", + " 'getting': False,\n", + " 'look': False,\n", + " 'friend': False,\n", + " 'gon': False,\n", + " 'though': False,\n", + " 'hate': False,\n", + " 'wait': False,\n", + " 'better': False,\n", + " 'bed': False,\n", + " 'way': False,\n", + " 'watching': False,\n", + " 'lt': False,\n", + " 'people': False,\n", + " 'yeah': False,\n", + " 'hour': False,\n", + " 'show': False,\n", + " 'could': False,\n", + " 'thank': False,\n", + " 'take': False,\n", + " 'weekend': False,\n", + " 'next': False,\n", + " 'yes': False,\n", + " 'school': False,\n", + " 'even': False,\n", + " 'little': False,\n", + " 'life': False,\n", + " 'working': False,\n", + " 'everyone': False,\n", + " 'guy': False,\n", + " 'cant': False,\n", + " 'sick': False,\n", + " 'dont': False,\n", + " 'hey': False,\n", + " 'let': False,\n", + " 'awesome': False,\n", + " 'movie': False,\n", + " 'girl': False,\n", + " 'tweet': False,\n", + " 'always': False,\n", + " 'x': False,\n", + " 'never': False,\n", + " 'watch': False,\n", + " 'please': False,\n", + " 'soon': False,\n", + " 'year': False,\n", + " 'first': False,\n", + " 'long': False,\n", + " 'ok': False,\n", + " 'tired': False,\n", + " 'already': False,\n", + " 'feeling': False,\n", + " 'suck': False,\n", + " 'wan': False,\n", + " 'sure': False,\n", + " 'best': False,\n", + " 'looking': False,\n", + " 'n': False,\n", + " 'man': False,\n", + " 'another': False,\n", + " 'something': False,\n", + " 'find': False,\n", + " 'start': False,\n", + " 'cool': False,\n", + " 'done': False,\n", + " 'pretty': False,\n", + " 'omg': False,\n", + " 'yay': False,\n", + " 'yet': True,\n", + " 'phone': False,\n", + " 'damn': False,\n", + " 'lot': False,\n", + " 'away': False,\n", + " 'went': False,\n", + " 'old': False,\n", + " 'follow': False,\n", + " 'help': False,\n", + " 'guess': False,\n", + " 'keep': False,\n", + " 'house': False,\n", + " 'thought': False,\n", + " 'song': False,\n", + " 'made': False,\n", + " 'ever': False,\n", + " 'trying': False,\n", + " 'ya': False,\n", + " 'sun': False,\n", + " 'p': False,\n", + " 'bit': False,\n", + " 'hurt': False,\n", + " 'game': False,\n", + " 'finally': False,\n", + " 'maybe': False,\n", + " 'ready': False,\n", + " 'sound': False,\n", + " 'w': False,\n", + " 'big': False,\n", + " 'lost': False,\n", + " 'b': False,\n", + " 'nothing': False,\n", + " 'someone': False,\n", + " 'early': False,\n", + " 'summer': False,\n", + " 'tell': False,\n", + " 'hard': False,\n", + " 'also': False,\n", + " 'birthday': False,\n", + " 'r': False,\n", + " 'left': False,\n", + " 'mean': False,\n", + " 'missed': False,\n", + " 'ur': False,\n", + " 'th': False,\n", + " 'rain': False,\n", + " 'pic': False,\n", + " 'mom': False,\n", + " 'com': False,\n", + " 'party': False,\n", + " 'two': False,\n", + " 'glad': False,\n", + " 'world': False,\n", + " 'baby': False,\n", + " 'wow': False,\n", + " 'might': False,\n", + " 'bored': False,\n", + " 'call': False,\n", + " 'ta': False,\n", + " 'check': False,\n", + " 'late': False,\n", + " 'waiting': False,\n", + " 'car': False,\n", + " 'video': False,\n", + " 'stuff': False,\n", + " 'found': False,\n", + " 'yesterday': False,\n", + " 'said': False,\n", + " 'sunday': False,\n", + " 'luck': False,\n", + " 'saw': False,\n", + " 'amazing': False,\n", + " 'hot': False,\n", + " 'monday': False,\n", + " 'weather': False,\n", + " 'live': False,\n", + " 'many': False,\n", + " 'iphone': False,\n", + " 'doe': False,\n", + " 'exam': False,\n", + " 'thats': False,\n", + " 'play': False,\n", + " 'making': False,\n", + " 'may': False,\n", + " 'excited': False,\n", + " 'god': False,\n", + " 'dad': False,\n", + " 'family': False,\n", + " 'gone': False,\n", + " 'friday': False,\n", + " 'boy': False,\n", + " 'follower': False,\n", + " 'read': False,\n", + " 'funny': False,\n", + " 'hi': False,\n", + " 'since': False,\n", + " 'give': False,\n", + " 'poor': False,\n", + " 'enjoy': False,\n", + " 'job': False,\n", + " 'okay': False,\n", + " 'talk': False,\n", + " 'gt': False,\n", + " 'later': False,\n", + " 'head': False,\n", + " 'almost': False,\n", + " 'beautiful': False,\n", + " 'woke': False,\n", + " 'cold': False,\n", + " 'anything': False,\n", + " 'hear': False,\n", + " 'lunch': False,\n", + " 'missing': False,\n", + " 'put': False,\n", + " 'free': False,\n", + " 'try': False,\n", + " 'coming': False,\n", + " 'must': False,\n", + " 'end': False,\n", + " 'tho': False,\n", + " 'leave': False,\n", + " 'ugh': False,\n", + " 'till': False,\n", + " 'busy': False,\n", + " 'around': False,\n", + " 'food': False,\n", + " 'far': False,\n", + " 'book': False,\n", + " 'music': False,\n", + " 'cry': False,\n", + " 'headache': False,\n", + " 'cause': False,\n", + " 'use': False,\n", + " 'listening': False,\n", + " 'fan': False,\n", + " 'stop': False,\n", + " 'stay': False,\n", + " 'totally': False,\n", + " 'wanted': False,\n", + " 'place': False,\n", + " 'shit': False,\n", + " 'xx': False,\n", + " 'tv': False,\n", + " 'e': False,\n", + " 'picture': False,\n", + " 'update': False,\n", + " 'least': False,\n", + " 'sweet': False,\n", + " 'anyone': False,\n", + " 'lovely': False,\n", + " 'thinking': False,\n", + " 'forward': False,\n", + " 'aww': False,\n", + " 'g': False,\n", + " 'dog': False,\n", + " 'class': False,\n", + " 'actually': False,\n", + " 'everything': False,\n", + " 'eat': False,\n", + " 'mine': False,\n", + " 'playing': False,\n", + " 'cute': False,\n", + " 'hahaha': False,\n", + " 'kid': False,\n", + " 'dinner': False,\n", + " 'stupid': False,\n", + " 'sooo': False,\n", + " 'came': False,\n", + " 'word': False,\n", + " 'ill': False,\n", + " 'eating': False,\n", + " 'win': False,\n", + " 'hopefully': False,\n", + " 'finished': False,\n", + " 'welcome': False,\n", + " 'anymore': False,\n", + " 'minute': False,\n", + " 'every': False,\n", + " 'face': False,\n", + " 'idea': False,\n", + " 'hair': False,\n", + " 'without': False,\n", + " 'kinda': False,\n", + " 'month': False,\n", + " 'saturday': False,\n", + " 'c': False,\n", + " 'wrong': False,\n", + " 'super': False,\n", + " 'www': False,\n", + " 'final': False,\n", + " 'true': False,\n", + " 'reading': False,\n", + " 'hug': False,\n", + " 'probably': False,\n", + " 'photo': False,\n", + " 'buy': False,\n", + " 'hehe': False,\n", + " 'taking': False,\n", + " 'believe': False,\n", + " 'eye': False,\n", + " 'didnt': False,\n", + " 'name': False,\n", + " 'alone': False,\n", + " 'mind': False,\n", + " 'room': False,\n", + " 'either': False,\n", + " 'dream': False,\n", + " 'following': False,\n", + " 'able': True,\n", + " 'goodnight': False,\n", + " 'else': False,\n", + " 'lmao': False,\n", + " 'boo': False,\n", + " 'heard': False,\n", + " 'coffee': False,\n", + " 'outside': False,\n", + " 'real': False,\n", + " 'mileycyrus': False,\n", + " 'break': False,\n", + " 'part': False,\n", + " 'june': False,\n", + " 'computer': False,\n", + " 'forgot': False,\n", + " 'ticket': False,\n", + " 'blog': False,\n", + " 'post': False,\n", + " 'awww': False,\n", + " 'rest': False,\n", + " 'dude': False,\n", + " 'pm': False,\n", + " 'enough': False,\n", + " 'brother': False,\n", + " 'plan': False,\n", + " 'ð': False,\n", + " 'half': False,\n", + " 'using': False,\n", + " 'add': False,\n", + " 'stuck': False,\n", + " 'person': False,\n", + " 'text': False,\n", + " 'meet': False,\n", + " 'mother': False,\n", + " 'album': False,\n", + " 'study': False,\n", + " 'crazy': False,\n", + " 'hand': False,\n", + " 'send': False,\n", + " 'fine': False,\n", + " 'talking': False,\n", + " 'whole': False,\n", + " 'reply': False,\n", + " 'nite': False,\n", + " 'k': False,\n", + " 'seems': False,\n", + " 'run': False,\n", + " 'red': False,\n", + " 'took': False,\n", + " 'v': False,\n", + " 'seen': False,\n", + " 'trip': False,\n", + " 'full': False,\n", + " 'beach': False,\n", + " 'hello': False,\n", + " 'side': False,\n", + " 'loved': False,\n", + " 'news': False,\n", + " 'hit': False,\n", + " 'tried': False,\n", + " 'kind': False,\n", + " 'rock': False,\n", + " 'heart': False,\n", + " 'yea': False,\n", + " 'la': False,\n", + " 'shopping': False,\n", + " 'problem': False,\n", + " 'afternoon': False,\n", + " 'â': False,\n", + " 'pain': False,\n", + " 'change': False,\n", + " 'nap': False,\n", + " 'started': False,\n", + " 'used': False,\n", + " 'remember': False,\n", + " 'star': False,\n", + " 'boring': False,\n", + " 'course': False,\n", + " 'heading': False,\n", + " 'quite': False,\n", + " 'seeing': False,\n", + " 'hell': False,\n", + " 'train': False,\n", + " 'breakfast': False,\n", + " 'crap': False,\n", + " 'told': False,\n", + " 'english': False,\n", + " 'died': False,\n", + " 'sister': False,\n", + " 'fuck': False,\n", + " 'site': False,\n", + " 'instead': False,\n", + " 'leaving': False,\n", + " 'ipod': False,\n", + " 'money': False,\n", + " 'raining': False,\n", + " 'finish': False,\n", + " 'til': False,\n", + " 'anyway': False,\n", + " 'ah': False,\n", + " 'running': False,\n", + " 'concert': False,\n", + " 'soo': False,\n", + " 'facebook': False,\n", + " 'link': False,\n", + " 'sitting': False,\n", + " 'point': False,\n", + " 'jealous': False,\n", + " 'season': False,\n", + " 'bring': False,\n", + " 'ñ': False,\n", + " 'cuz': False,\n", + " 'awake': False,\n", + " 'mum': False,\n", + " 'reason': False,\n", + " 'studying': False,\n", + " 'pay': False,\n", + " 'cat': False,\n", + " 'wonder': False,\n", + " 'congrats': False,\n", + " 'ago': False,\n", + " 'xd': False,\n", + " 'page': False,\n", + " 'f': False,\n", + " 'bought': False,\n", + " 'store': False,\n", + " 'drink': False,\n", + " 'definitely': False,\n", + " 'couple': False,\n", + " 'evening': False,\n", + " 'chocolate': False,\n", + " 'btw': False,\n", + " 'soooo': False,\n", + " 'sunny': False,\n", + " 'sore': False,\n", + " 'drive': False,\n", + " 'shower': False,\n", + " 'st': False,\n", + " 'lucky': False,\n", + " 'aw': False,\n", + " 'test': False,\n", + " 'walk': False,\n", + " 'internet': False,\n", + " 'open': False,\n", + " 'water': False,\n", + " 'wake': False,\n", + " 'l': False,\n", + " 'office': False,\n", + " 'list': False,\n", + " 'watched': False,\n", + " 'team': False,\n", + " 'tommcfly': False,\n", + " 'clean': False,\n", + " 'enjoying': False,\n", + " 'hungry': False,\n", + " 'smile': False,\n", + " 'seriously': False,\n", + " 'move': False,\n", + " 'wont': False,\n", + " 'high': False,\n", + " 'asleep': False,\n", + " 'award': False,\n", + " 'fucking': False,\n", + " 'bout': False,\n", + " 'starting': False,\n", + " 'top': False,\n", + " 'moment': False,\n", + " 'dance': False,\n", + " 'project': False,\n", + " 'second': False,\n", + " 'park': False,\n", + " 'ask': False,\n", + " 'hr': False,\n", + " 'email': False,\n", + " 'church': False,\n", + " 'driving': False,\n", + " 'tea': False,\n", + " 'broke': False,\n", + " 'gym': False,\n", + " 'ride': False,\n", + " 'le': False,\n", + " 'loving': False,\n", + " 'meeting': False,\n", + " 'worth': False,\n", + " 'fail': False,\n", + " 'black': False,\n", + " 'close': False,\n", + " 'visit': False,\n", + " 'sigh': False,\n", + " 'ate': False,\n", + " 'online': False,\n", + " 'number': False,\n", + " 'vote': False,\n", + " 'bye': False,\n", + " 'hang': False,\n", + " 'wonderful': False,\n", + " 'youtube': False,\n", + " 'care': False,\n", + " 'co': False,\n", + " 'cut': False,\n", + " 'drinking': False,\n", + " 'horrible': False,\n", + " 'ddlovato': False,\n", + " 'shirt': False,\n", + " 'ice': False,\n", + " 'saying': False,\n", + " 'answer': False,\n", + " 'date': False,\n", + " 'dear': False,\n", + " 'agree': False,\n", + " 'j': False,\n", + " 'set': False,\n", + " 'town': False,\n", + " 'da': False,\n", + " 'wear': False,\n", + " 'happened': False,\n", + " 'line': False,\n", + " 'parent': False,\n", + " 'worse': False,\n", + " 'min': False,\n", + " 'longer': False,\n", + " 'lady': False,\n", + " 'together': False,\n", + " 'cream': False,\n", + " 'worry': False,\n", + " 'goin': False,\n", + " 'followfriday': False,\n", + " 'fast': False,\n", + " 'forget': False,\n", + " 'fb': False,\n", + " 'doesnt': False,\n", + " 'broken': False,\n", + " 'wtf': False,\n", + " 'turn': False,\n", + " 'unfortunately': False,\n", + " 'chance': False,\n", + " 'favorite': False,\n", + " 'spent': False,\n", + " 'fall': False,\n", + " 'air': False,\n", + " 'idk': False,\n", + " 'slept': False,\n", + " 'sometimes': False,\n", + " 'rainy': False,\n", + " 'question': False,\n", + " 'laptop': False,\n", + " 'tweeting': False,\n", + " 'tuesday': False,\n", + " 'earlier': False,\n", + " 'mood': False,\n", + " 'slow': False,\n", + " 'hoping': False,\n", + " 'thx': False,\n", + " 'h': False,\n", + " 'absolutely': False,\n", + " 'mr': False,\n", + " 'ahh': False,\n", + " 'cleaning': False,\n", + " 'via': False,\n", + " 'holiday': False,\n", + " 'happen': False,\n", + " 'wishing': False,\n", + " 'taken': False,\n", + " 'pool': False,\n", + " 'episode': False,\n", + " 'garden': False,\n", + " 'homework': False,\n", + " 'website': False,\n", + " 'sleeping': False,\n", + " 'airport': False,\n", + " 'especially': False,\n", + " 'business': False,\n", + " 'perfect': False,\n", + " 'em': False,\n", + " 'fell': False,\n", + " 'nd': False,\n", + " 'upset': False,\n", + " 'small': False,\n", + " 'city': False,\n", + " 'chat': False,\n", + " 'knew': False,\n", + " 'foot': False,\n", + " 'chicken': False,\n", + " '½': False,\n", + " 'father': False,\n", + " 'throat': False,\n", + " 'mac': False,\n", + " 'weird': False,\n", + " 'window': False,\n", + " 'passed': False,\n", + " 'son': False,\n", + " 'story': False,\n", + " 'tour': False,\n", + " 'message': False,\n", + " 'shop': False,\n", + " 'wednesday': False,\n", + " 'due': False,\n", + " 'bbq': False,\n", + " 'listen': True,\n", + " 'sleepy': False,\n", + " 'woman': False,\n", + " 'company': False,\n", + " 'sunshine': False,\n", + " 'met': False,\n", + " 'short': False,\n", + " 'comment': False,\n", + " 'mad': False,\n", + " 'understand': False,\n", + " 'gave': False,\n", + " 'pc': False,\n", + " 'hubby': False,\n", + " 'different': False,\n", + " 'havent': True,\n", + " 'account': False,\n", + " 'note': False,\n", + " 'seem': False,\n", + " 'scared': False,\n", + " 'bag': False,\n", + " 'alright': False,\n", + " 'cup': False,\n", + " 'ive': False,\n", + " 'leg': False,\n", + " 'green': False,\n", + " 'interesting': False,\n", + " 'bus': False,\n", + " 'past': False,\n", + " 'glass': False,\n", + " 'worst': False,\n", + " 'power': False,\n", + " 'plz': False,\n", + " 'math': False,\n", + " 'white': False,\n", + " 'support': False,\n", + " 'nope': False,\n", + " 'sat': False,\n", + " 'moon': False,\n", + " 'hangover': False,\n", + " 'moving': False,\n", + " 'taylor': False,\n", + " 'touch': False,\n", + " 'order': False,\n", + " 'jonas': False,\n", + " 'pick': False,\n", + " 'forever': False,\n", + " 'tom': False,\n", + " 'dead': False,\n", + " 'shoot': False,\n", + " 'called': False,\n", + " 'bitch': False,\n", + " 'rather': False,\n", + " 'catch': False,\n", + " 'bet': False,\n", + " 'officially': False,\n", + " 'vip': False,\n", + " 'writing': False,\n", + " 'si': False,\n", + " 'lil': False,\n", + " 'worried': False,\n", + " 'xoxo': False,\n", + " 'write': False,\n", + " 'graduation': False,\n", + " 'liked': False,\n", + " 'except': False,\n", + " 'bday': False,\n", + " 'gay': False,\n", + " 'dang': False,\n", + " 'cousin': False,\n", + " 'load': False,\n", + " 'myspace': False,\n", + " 'sent': False,\n", + " 'fix': False,\n", + " 'ahhh': False,\n", + " 'fly': False,\n", + " 'blue': False,\n", + " 'college': False,\n", + " 'gorgeous': False,\n", + " 'special': False,\n", + " 'kill': False,\n", + " 'apple': False,\n", + " 'interview': False,\n", + " 'fight': False,\n", + " 'everybody': False,\n", + " 'dress': False,\n", + " 'box': False,\n", + " 'bro': False,\n", + " 'case': False,\n", + " 'july': False,\n", + " 'cake': False,\n", + " 'three': False,\n", + " 'hmm': False,\n", + " 'deal': False,\n", + " 'ouch': False,\n", + " 'inside': False,\n", + " 'yep': False,\n", + " 'random': False,\n", + " 'meant': False,\n", + " 'profile': False,\n", + " 'mtv': False,\n", + " 'wedding': False,\n", + " 'clothes': False,\n", + " 'band': False,\n", + " 'issue': False,\n", + " 'apparently': False,\n", + " 'lonely': False,\n", + " 'age': False,\n", + " 'shall': False,\n", + " 'flight': False,\n", + " 'yr': False,\n", + " 'supposed': False,\n", + " 'david': False,\n", + " 'living': False,\n", + " 'needed': False,\n", + " 'london': False,\n", + " 'laugh': False,\n", + " 'sign': False,\n", + " 'finger': False,\n", + " 'beer': False,\n", + " 'itunes': False,\n", + " 'played': False,\n", + " 'tear': False,\n", + " 'ppl': False,\n", + " 'body': False,\n", + " 'paper': False,\n", + " 'group': False,\n", + " 'looked': False,\n", + " 'sadly': False,\n", + " 'bloody': False,\n", + " 'club': False,\n", + " 'web': False,\n", + " 'vacation': False,\n", + " 'jonasbrothers': False,\n", + " 'uk': False,\n", + " 'version': False,\n", + " 'huge': False,\n", + " 'sooooo': False,\n", + " 'lakers': False,\n", + " 'germany': False,\n", + " 'save': False,\n", + " 'hanging': False,\n", + " 'shoe': False,\n", + " 'fantastic': False,\n", + " 'wine': False,\n", + " 'sort': False,\n", + " 'none': False,\n", + " 'twilight': False,\n", + " 'shot': False,\n", + " 'thursday': False,\n", + " 'confused': False,\n", + " 'sale': False,\n", + " 'goodbye': False,\n", + " 'round': False,\n", + " 'lame': False,\n", + " 'cheer': False,\n", + " 'singing': False,\n", + " 'promise': False,\n", + " 'drop': False,\n", + " 'nose': False,\n", + " 'whats': False,\n", + " 'babe': False,\n", + " 'xxx': False,\n", + " 'lesson': False,\n", + " 'camera': False,\n", + " 'download': False,\n", + " 'info': False,\n", + " 'fair': False,\n", + " 'yummy': False,\n", + " 'french': False,\n", + " 'miley': False,\n", + " 'light': False,\n", + " 'asked': False,\n", + " 'lazy': False,\n", + " 'indeed': False,\n", + " 'along': False,\n", + " 'quick': False,\n", + " 'ã': False,\n", + " 'door': False,\n", + " 'sit': False,\n", + " 'yup': False,\n", + " 'street': False,\n", + " 'mile': False,\n", + " 'dm': False,\n", + " 'jus': False,\n", + " 'giving': False,\n", + " 'ear': False,\n", + " 'service': False,\n", + " 'low': False,\n", + " 'relaxing': False,\n", + " 'radio': False,\n", + " 'arm': False,\n", + " 'future': False,\n", + " 'proud': False,\n", + " 'fact': False,\n", + " 'learn': False,\n", + " 'easy': False,\n", + " 'child': False,\n", + " 'kitty': False,\n", + " 'hold': False,\n", + " 'mark': False,\n", + " 'peep': False,\n", + " 'wearing': False,\n", + " 'luv': False,\n", + " 'south': False,\n", + " 'road': False,\n", + " 'smell': False,\n", + " 'exciting': False,\n", + " 'currently': False,\n", + " 'warm': False,\n", + " 'puppy': False,\n", + " 'packing': False,\n", + " 'google': False,\n", + " 'exactly': False,\n", + " 'share': False,\n", + " 'decided': False,\n", + " 'voice': False,\n", + " 'fat': False,\n", + " 'mommy': False,\n", + " 'realize': False,\n", + " 'join': False,\n", + " 'nearly': False,\n", + " 'bb': False,\n", + " 'whatever': False,\n", + " 'woo': False,\n", + " 'spend': False,\n", + " 'stand': False,\n", + " 'enjoyed': False,\n", + " 'tweeps': False,\n", + " 'sold': False,\n", + " 'gettin': False,\n", + " 'fever': False,\n", + " 'paid': False,\n", + " 'beat': False,\n", + " 'pink': False,\n", + " 'mall': False,\n", + " 'gunna': False,\n", + " 'ï': False,\n", + " 'freaking': False,\n", + " 'snow': False,\n", + " 'hospital': False,\n", + " 'cheese': False,\n", + " 'nobody': False,\n", + " 'wondering': False,\n", + " 'plane': False,\n", + " 'shame': False,\n", + " 'search': False,\n", + " 'ended': False,\n", + " 'helping': False,\n", + " 'safe': False,\n", + " 'staying': False,\n", + " 'storm': False,\n", + " 'fam': False,\n", + " 'z': False,\n", + " 'sing': False,\n", + " 'view': False,\n", + " 'yo': False,\n", + " 'card': False,\n", + " 'stick': False,\n", + " 'joe': False,\n", + " 'matter': False,\n", + " 'delicious': False,\n", + " 'cook': False,\n", + " 'country': False,\n", + " 'hill': False,\n", + " 'peace': False,\n", + " 'worked': False,\n", + " 'race': False,\n", + " 'boot': False,\n", + " 'figure': False,\n", + " 'vega': False,\n", + " 'france': False,\n", + " 'wit': False,\n", + " 'topic': False,\n", + " 'spot': False,\n", + " 'cover': False,\n", + " 'afford': False,\n", + " 'floor': False,\n", + " 'bike': False,\n", + " 'starbucks': False,\n", + " 'hahah': False,\n", + " 'tummy': False,\n", + " 'trouble': False,\n", + " 'mouth': False,\n", + " 'ran': False,\n", + " 'drunk': False,\n", + " 'lately': False,\n", + " 'joke': False,\n", + " 'bird': False,\n", + " 'flu': False,\n", + " 'showing': False,\n", + " 'mail': False,\n", + " 'turned': False,\n", + " 'men': False,\n", + " 'taste': False,\n", + " 'gosh': False,\n", + " 'bar': False,\n", + " 'changed': False,\n", + " 'fish': False,\n", + " 'stopped': False,\n", + " 'wife': False,\n", + " 'alot': False,\n", + " 'tweetdeck': False,\n", + " 'magic': False,\n", + " 'brilliant': False,\n", + " 'cooking': False,\n", + " 'state': False,\n", + " 'design': False,\n", + " 'film': False,\n", + " 'tan': False,\n", + " 'isnt': False,\n", + " 'nail': False,\n", + " 'bummed': False,\n", + " 'prob': False,\n", + " 'happens': False,\n", + " 'eh': False,\n", + " 'type': False,\n", + " 'young': False,\n", + " 'honey': False,\n", + " 'price': False,\n", + " 'disappointed': False,\n", + " 'sky': False,\n", + " 'ring': False,\n", + " 'surprise': False,\n", + " 'self': False,\n", + " 'depressing': False,\n", + " 'sexy': False,\n", + " 'cd': False,\n", + " 'woot': False,\n", + " 'pop': False,\n", + " 'piece': False,\n", + " 'degree': False,\n", + " 'chillin': False,\n", + " 'hilarious': False,\n", + " 'rip': False,\n", + " 'updated': False,\n", + " 'ohh': False,\n", + " 'boyfriend': False,\n", + " 'awwww': False,\n", + " 'hun': False,\n", + " 'crappy': False,\n", + " 'er': False,\n", + " 'although': False,\n", + " 'death': False,\n", + " 'lake': False,\n", + " 'ahead': False,\n", + " 'art': False,\n", + " 'watchin': False,\n", + " 'scary': False,\n", + " 'annoying': False,\n", + " 'event': False,\n", + " 'plus': False,\n", + " 'ff': False,\n", + " 'major': False,\n", + " 'afraid': False,\n", + " 'pissed': False,\n", + " 'wat': False,\n", + " 'huh': False,\n", + " 'secret': False,\n", + " 'others': False,\n", + " 'land': False,\n", + " 'caught': False,\n", + " 'closed': False,\n", + " 'kiss': False,\n", + " 'trek': False,\n", + " 'code': False,\n", + " 'mobile': False,\n", + " 'knee': False,\n", + " ...},\n", + " 0),\n", + " ({'wa': False,\n", + " 'day': False,\n", + " 'good': False,\n", + " 'get': False,\n", + " 'like': False,\n", + " 'go': False,\n", + " 'quot': False,\n", + " 'love': False,\n", + " 'work': False,\n", + " 'got': False,\n", + " 'today': False,\n", + " 'going': False,\n", + " 'u': False,\n", + " 'time': False,\n", + " 'one': False,\n", + " 'lol': False,\n", + " 'know': False,\n", + " 'im': False,\n", + " 'back': False,\n", + " 'really': False,\n", + " 'want': False,\n", + " 'night': False,\n", + " 'amp': False,\n", + " 'well': False,\n", + " 'new': False,\n", + " 'see': False,\n", + " 'think': False,\n", + " 'still': False,\n", + " 'oh': False,\n", + " 'thanks': False,\n", + " 'na': False,\n", + " 'ha': False,\n", + " 'need': False,\n", + " 'home': False,\n", + " 'much': False,\n", + " 'miss': False,\n", + " 'feel': False,\n", + " 'last': False,\n", + " 'morning': False,\n", + " 'great': False,\n", + " 'make': False,\n", + " 'tomorrow': False,\n", + " 'twitter': False,\n", + " 'haha': False,\n", + " 'wish': False,\n", + " 'hope': False,\n", + " 'bad': False,\n", + " 'sad': False,\n", + " 'fun': False,\n", + " 'come': False,\n", + " 'sleep': False,\n", + " 'would': False,\n", + " 'nice': False,\n", + " 'sorry': False,\n", + " 'right': False,\n", + " 'week': False,\n", + " 'tonight': False,\n", + " 'happy': False,\n", + " 'say': False,\n", + " 'thing': False,\n", + " 'getting': False,\n", + " 'look': False,\n", + " 'friend': False,\n", + " 'gon': False,\n", + " 'though': False,\n", + " 'hate': False,\n", + " 'wait': False,\n", + " 'better': False,\n", + " 'bed': False,\n", + " 'way': False,\n", + " 'watching': False,\n", + " 'lt': False,\n", + " 'people': False,\n", + " 'yeah': False,\n", + " 'hour': False,\n", + " 'show': False,\n", + " 'could': False,\n", + " 'thank': False,\n", + " 'take': False,\n", + " 'weekend': False,\n", + " 'next': False,\n", + " 'yes': False,\n", + " 'school': False,\n", + " 'even': False,\n", + " 'little': False,\n", + " 'life': False,\n", + " 'working': False,\n", + " 'everyone': False,\n", + " 'guy': False,\n", + " 'cant': False,\n", + " 'sick': False,\n", + " 'dont': False,\n", + " 'hey': False,\n", + " 'let': False,\n", + " 'awesome': False,\n", + " 'movie': False,\n", + " 'girl': False,\n", + " 'tweet': False,\n", + " 'always': False,\n", + " 'x': False,\n", + " 'never': False,\n", + " 'watch': False,\n", + " 'please': False,\n", + " 'soon': False,\n", + " 'year': False,\n", + " 'first': False,\n", + " 'long': False,\n", + " 'ok': False,\n", + " 'tired': False,\n", + " 'already': False,\n", + " 'feeling': False,\n", + " 'suck': False,\n", + " 'wan': False,\n", + " 'sure': False,\n", + " 'best': False,\n", + " 'looking': False,\n", + " 'n': False,\n", + " 'man': False,\n", + " 'another': False,\n", + " 'something': False,\n", + " 'find': False,\n", + " 'start': False,\n", + " 'cool': False,\n", + " 'done': False,\n", + " 'pretty': False,\n", + " 'omg': False,\n", + " 'yay': False,\n", + " 'yet': False,\n", + " 'phone': False,\n", + " 'damn': False,\n", + " 'lot': False,\n", + " 'away': False,\n", + " 'went': False,\n", + " 'old': False,\n", + " 'follow': False,\n", + " 'help': False,\n", + " 'guess': False,\n", + " 'keep': False,\n", + " 'house': False,\n", + " 'thought': False,\n", + " 'song': False,\n", + " 'made': False,\n", + " 'ever': False,\n", + " 'trying': False,\n", + " 'ya': False,\n", + " 'sun': False,\n", + " 'p': False,\n", + " 'bit': False,\n", + " 'hurt': False,\n", + " 'game': False,\n", + " 'finally': False,\n", + " 'maybe': False,\n", + " 'ready': False,\n", + " 'sound': False,\n", + " 'w': False,\n", + " 'big': True,\n", + " 'lost': False,\n", + " 'b': False,\n", + " 'nothing': False,\n", + " 'someone': False,\n", + " 'early': False,\n", + " 'summer': False,\n", + " 'tell': False,\n", + " 'hard': False,\n", + " 'also': False,\n", + " 'birthday': False,\n", + " 'r': False,\n", + " 'left': False,\n", + " 'mean': False,\n", + " 'missed': False,\n", + " 'ur': False,\n", + " 'th': False,\n", + " 'rain': False,\n", + " 'pic': False,\n", + " 'mom': False,\n", + " 'com': False,\n", + " 'party': False,\n", + " 'two': True,\n", + " 'glad': False,\n", + " 'world': False,\n", + " 'baby': False,\n", + " 'wow': False,\n", + " 'might': False,\n", + " 'bored': False,\n", + " 'call': False,\n", + " 'ta': False,\n", + " 'check': False,\n", + " 'late': False,\n", + " 'waiting': False,\n", + " 'car': False,\n", + " 'video': False,\n", + " 'stuff': False,\n", + " 'found': False,\n", + " 'yesterday': False,\n", + " 'said': False,\n", + " 'sunday': False,\n", + " 'luck': False,\n", + " 'saw': False,\n", + " 'amazing': False,\n", + " 'hot': False,\n", + " 'monday': False,\n", + " 'weather': False,\n", + " 'live': False,\n", + " 'many': False,\n", + " 'iphone': False,\n", + " 'doe': False,\n", + " 'exam': False,\n", + " 'thats': False,\n", + " 'play': False,\n", + " 'making': False,\n", + " 'may': False,\n", + " 'excited': False,\n", + " 'god': False,\n", + " 'dad': False,\n", + " 'family': False,\n", + " 'gone': False,\n", + " 'friday': False,\n", + " 'boy': False,\n", + " 'follower': False,\n", + " 'read': False,\n", + " 'funny': False,\n", + " 'hi': False,\n", + " 'since': False,\n", + " 'give': False,\n", + " 'poor': False,\n", + " 'enjoy': False,\n", + " 'job': False,\n", + " 'okay': False,\n", + " 'talk': False,\n", + " 'gt': False,\n", + " 'later': False,\n", + " 'head': False,\n", + " 'almost': False,\n", + " 'beautiful': False,\n", + " 'woke': False,\n", + " 'cold': False,\n", + " 'anything': False,\n", + " 'hear': False,\n", + " 'lunch': False,\n", + " 'missing': False,\n", + " 'put': False,\n", + " 'free': False,\n", + " 'try': False,\n", + " 'coming': False,\n", + " 'must': False,\n", + " 'end': False,\n", + " 'tho': False,\n", + " 'leave': False,\n", + " 'ugh': False,\n", + " 'till': False,\n", + " 'busy': False,\n", + " 'around': False,\n", + " 'food': False,\n", + " 'far': False,\n", + " 'book': False,\n", + " 'music': False,\n", + " 'cry': False,\n", + " 'headache': False,\n", + " 'cause': False,\n", + " 'use': False,\n", + " 'listening': False,\n", + " 'fan': False,\n", + " 'stop': False,\n", + " 'stay': False,\n", + " 'totally': False,\n", + " 'wanted': False,\n", + " 'place': False,\n", + " 'shit': False,\n", + " 'xx': False,\n", + " 'tv': False,\n", + " 'e': False,\n", + " 'picture': False,\n", + " 'update': False,\n", + " 'least': False,\n", + " 'sweet': False,\n", + " 'anyone': False,\n", + " 'lovely': False,\n", + " 'thinking': False,\n", + " 'forward': False,\n", + " 'aww': False,\n", + " 'g': False,\n", + " 'dog': False,\n", + " 'class': False,\n", + " 'actually': False,\n", + " 'everything': False,\n", + " 'eat': False,\n", + " 'mine': False,\n", + " 'playing': False,\n", + " 'cute': False,\n", + " 'hahaha': False,\n", + " 'kid': False,\n", + " 'dinner': False,\n", + " 'stupid': False,\n", + " 'sooo': False,\n", + " 'came': False,\n", + " 'word': False,\n", + " 'ill': False,\n", + " 'eating': False,\n", + " 'win': False,\n", + " 'hopefully': False,\n", + " 'finished': False,\n", + " 'welcome': False,\n", + " 'anymore': False,\n", + " 'minute': False,\n", + " 'every': False,\n", + " 'face': False,\n", + " 'idea': False,\n", + " 'hair': False,\n", + " 'without': False,\n", + " 'kinda': False,\n", + " 'month': False,\n", + " 'saturday': False,\n", + " 'c': False,\n", + " 'wrong': False,\n", + " 'super': False,\n", + " 'www': False,\n", + " 'final': False,\n", + " 'true': False,\n", + " 'reading': False,\n", + " 'hug': False,\n", + " 'probably': False,\n", + " 'photo': False,\n", + " 'buy': False,\n", + " 'hehe': False,\n", + " 'taking': False,\n", + " 'believe': False,\n", + " 'eye': False,\n", + " 'didnt': False,\n", + " 'name': False,\n", + " 'alone': False,\n", + " 'mind': False,\n", + " 'room': False,\n", + " 'either': False,\n", + " 'dream': False,\n", + " 'following': False,\n", + " 'able': False,\n", + " 'goodnight': False,\n", + " 'else': False,\n", + " 'lmao': False,\n", + " 'boo': False,\n", + " 'heard': False,\n", + " 'coffee': False,\n", + " 'outside': False,\n", + " 'real': False,\n", + " 'mileycyrus': False,\n", + " 'break': False,\n", + " 'part': False,\n", + " 'june': False,\n", + " 'computer': False,\n", + " 'forgot': False,\n", + " 'ticket': False,\n", + " 'blog': False,\n", + " 'post': False,\n", + " 'awww': False,\n", + " 'rest': False,\n", + " 'dude': False,\n", + " 'pm': False,\n", + " 'enough': False,\n", + " 'brother': False,\n", + " 'plan': False,\n", + " 'ð': False,\n", + " 'half': False,\n", + " 'using': False,\n", + " 'add': False,\n", + " 'stuck': False,\n", + " 'person': False,\n", + " 'text': False,\n", + " 'meet': False,\n", + " 'mother': False,\n", + " 'album': False,\n", + " 'study': False,\n", + " 'crazy': False,\n", + " 'hand': False,\n", + " 'send': False,\n", + " 'fine': False,\n", + " 'talking': False,\n", + " 'whole': False,\n", + " 'reply': False,\n", + " 'nite': False,\n", + " 'k': False,\n", + " 'seems': False,\n", + " 'run': False,\n", + " 'red': False,\n", + " 'took': False,\n", + " 'v': False,\n", + " 'seen': False,\n", + " 'trip': False,\n", + " 'full': False,\n", + " 'beach': False,\n", + " 'hello': False,\n", + " 'side': False,\n", + " 'loved': False,\n", + " 'news': False,\n", + " 'hit': False,\n", + " 'tried': False,\n", + " 'kind': False,\n", + " 'rock': False,\n", + " 'heart': False,\n", + " 'yea': False,\n", + " 'la': False,\n", + " 'shopping': False,\n", + " 'problem': False,\n", + " 'afternoon': False,\n", + " 'â': False,\n", + " 'pain': True,\n", + " 'change': False,\n", + " 'nap': False,\n", + " 'started': False,\n", + " 'used': False,\n", + " 'remember': False,\n", + " 'star': False,\n", + " 'boring': False,\n", + " 'course': False,\n", + " 'heading': False,\n", + " 'quite': False,\n", + " 'seeing': False,\n", + " 'hell': False,\n", + " 'train': False,\n", + " 'breakfast': False,\n", + " 'crap': False,\n", + " 'told': False,\n", + " 'english': False,\n", + " 'died': False,\n", + " 'sister': False,\n", + " 'fuck': False,\n", + " 'site': False,\n", + " 'instead': False,\n", + " 'leaving': False,\n", + " 'ipod': False,\n", + " 'money': False,\n", + " 'raining': False,\n", + " 'finish': False,\n", + " 'til': False,\n", + " 'anyway': False,\n", + " 'ah': False,\n", + " 'running': False,\n", + " 'concert': False,\n", + " 'soo': False,\n", + " 'facebook': False,\n", + " 'link': False,\n", + " 'sitting': False,\n", + " 'point': False,\n", + " 'jealous': False,\n", + " 'season': False,\n", + " 'bring': False,\n", + " 'ñ': False,\n", + " 'cuz': False,\n", + " 'awake': False,\n", + " 'mum': False,\n", + " 'reason': False,\n", + " 'studying': False,\n", + " 'pay': False,\n", + " 'cat': False,\n", + " 'wonder': False,\n", + " 'congrats': False,\n", + " 'ago': False,\n", + " 'xd': False,\n", + " 'page': False,\n", + " 'f': False,\n", + " 'bought': False,\n", + " 'store': False,\n", + " 'drink': False,\n", + " 'definitely': False,\n", + " 'couple': False,\n", + " 'evening': False,\n", + " 'chocolate': False,\n", + " 'btw': False,\n", + " 'soooo': False,\n", + " 'sunny': False,\n", + " 'sore': False,\n", + " 'drive': False,\n", + " 'shower': False,\n", + " 'st': False,\n", + " 'lucky': False,\n", + " 'aw': False,\n", + " 'test': False,\n", + " 'walk': False,\n", + " 'internet': False,\n", + " 'open': False,\n", + " 'water': False,\n", + " 'wake': False,\n", + " 'l': False,\n", + " 'office': False,\n", + " 'list': False,\n", + " 'watched': False,\n", + " 'team': False,\n", + " 'tommcfly': False,\n", + " 'clean': False,\n", + " 'enjoying': False,\n", + " 'hungry': False,\n", + " 'smile': False,\n", + " 'seriously': False,\n", + " 'move': False,\n", + " 'wont': False,\n", + " 'high': False,\n", + " 'asleep': False,\n", + " 'award': False,\n", + " 'fucking': False,\n", + " 'bout': False,\n", + " 'starting': False,\n", + " 'top': False,\n", + " 'moment': False,\n", + " 'dance': False,\n", + " 'project': False,\n", + " 'second': False,\n", + " 'park': False,\n", + " 'ask': False,\n", + " 'hr': False,\n", + " 'email': False,\n", + " 'church': False,\n", + " 'driving': False,\n", + " 'tea': False,\n", + " 'broke': False,\n", + " 'gym': False,\n", + " 'ride': False,\n", + " 'le': False,\n", + " 'loving': False,\n", + " 'meeting': False,\n", + " 'worth': False,\n", + " 'fail': False,\n", + " 'black': False,\n", + " 'close': False,\n", + " 'visit': False,\n", + " 'sigh': False,\n", + " 'ate': False,\n", + " 'online': False,\n", + " 'number': False,\n", + " 'vote': False,\n", + " 'bye': False,\n", + " 'hang': False,\n", + " 'wonderful': False,\n", + " 'youtube': False,\n", + " 'care': False,\n", + " 'co': False,\n", + " 'cut': False,\n", + " 'drinking': False,\n", + " 'horrible': False,\n", + " 'ddlovato': False,\n", + " 'shirt': False,\n", + " 'ice': False,\n", + " 'saying': False,\n", + " 'answer': False,\n", + " 'date': False,\n", + " 'dear': False,\n", + " 'agree': False,\n", + " 'j': False,\n", + " 'set': False,\n", + " 'town': False,\n", + " 'da': False,\n", + " 'wear': False,\n", + " 'happened': False,\n", + " 'line': False,\n", + " 'parent': False,\n", + " 'worse': False,\n", + " 'min': False,\n", + " 'longer': False,\n", + " 'lady': False,\n", + " 'together': False,\n", + " 'cream': False,\n", + " 'worry': False,\n", + " 'goin': False,\n", + " 'followfriday': False,\n", + " 'fast': False,\n", + " 'forget': False,\n", + " 'fb': False,\n", + " 'doesnt': False,\n", + " 'broken': False,\n", + " 'wtf': False,\n", + " 'turn': False,\n", + " 'unfortunately': False,\n", + " 'chance': False,\n", + " 'favorite': False,\n", + " 'spent': False,\n", + " 'fall': False,\n", + " 'air': False,\n", + " 'idk': False,\n", + " 'slept': False,\n", + " 'sometimes': False,\n", + " 'rainy': False,\n", + " 'question': False,\n", + " 'laptop': False,\n", + " 'tweeting': False,\n", + " 'tuesday': False,\n", + " 'earlier': False,\n", + " 'mood': False,\n", + " 'slow': False,\n", + " 'hoping': False,\n", + " 'thx': False,\n", + " 'h': False,\n", + " 'absolutely': False,\n", + " 'mr': False,\n", + " 'ahh': False,\n", + " 'cleaning': False,\n", + " 'via': False,\n", + " 'holiday': False,\n", + " 'happen': False,\n", + " 'wishing': False,\n", + " 'taken': False,\n", + " 'pool': False,\n", + " 'episode': False,\n", + " 'garden': False,\n", + " 'homework': False,\n", + " 'website': False,\n", + " 'sleeping': False,\n", + " 'airport': False,\n", + " 'especially': False,\n", + " 'business': False,\n", + " 'perfect': False,\n", + " 'em': False,\n", + " 'fell': False,\n", + " 'nd': False,\n", + " 'upset': False,\n", + " 'small': False,\n", + " 'city': False,\n", + " 'chat': False,\n", + " 'knew': False,\n", + " 'foot': False,\n", + " 'chicken': False,\n", + " '½': False,\n", + " 'father': False,\n", + " 'throat': False,\n", + " 'mac': False,\n", + " 'weird': False,\n", + " 'window': False,\n", + " 'passed': False,\n", + " 'son': False,\n", + " 'story': False,\n", + " 'tour': False,\n", + " 'message': False,\n", + " 'shop': False,\n", + " 'wednesday': False,\n", + " 'due': False,\n", + " 'bbq': False,\n", + " 'listen': False,\n", + " 'sleepy': False,\n", + " 'woman': False,\n", + " 'company': False,\n", + " 'sunshine': False,\n", + " 'met': False,\n", + " 'short': False,\n", + " 'comment': False,\n", + " 'mad': False,\n", + " 'understand': False,\n", + " 'gave': False,\n", + " 'pc': False,\n", + " 'hubby': False,\n", + " 'different': False,\n", + " 'havent': False,\n", + " 'account': False,\n", + " 'note': False,\n", + " 'seem': False,\n", + " 'scared': False,\n", + " 'bag': False,\n", + " 'alright': False,\n", + " 'cup': False,\n", + " 'ive': False,\n", + " 'leg': False,\n", + " 'green': False,\n", + " 'interesting': False,\n", + " 'bus': False,\n", + " 'past': False,\n", + " 'glass': False,\n", + " 'worst': False,\n", + " 'power': False,\n", + " 'plz': False,\n", + " 'math': False,\n", + " 'white': False,\n", + " 'support': False,\n", + " 'nope': False,\n", + " 'sat': False,\n", + " 'moon': False,\n", + " 'hangover': False,\n", + " 'moving': False,\n", + " 'taylor': False,\n", + " 'touch': False,\n", + " 'order': False,\n", + " 'jonas': False,\n", + " 'pick': False,\n", + " 'forever': False,\n", + " 'tom': False,\n", + " 'dead': False,\n", + " 'shoot': False,\n", + " 'called': False,\n", + " 'bitch': False,\n", + " 'rather': False,\n", + " 'catch': False,\n", + " 'bet': False,\n", + " 'officially': False,\n", + " 'vip': False,\n", + " 'writing': False,\n", + " 'si': False,\n", + " 'lil': False,\n", + " 'worried': False,\n", + " 'xoxo': False,\n", + " 'write': False,\n", + " 'graduation': False,\n", + " 'liked': False,\n", + " 'except': False,\n", + " 'bday': False,\n", + " 'gay': False,\n", + " 'dang': False,\n", + " 'cousin': False,\n", + " 'load': False,\n", + " 'myspace': False,\n", + " 'sent': False,\n", + " 'fix': False,\n", + " 'ahhh': False,\n", + " 'fly': False,\n", + " 'blue': False,\n", + " 'college': False,\n", + " 'gorgeous': False,\n", + " 'special': False,\n", + " 'kill': False,\n", + " 'apple': False,\n", + " 'interview': False,\n", + " 'fight': False,\n", + " 'everybody': False,\n", + " 'dress': False,\n", + " 'box': False,\n", + " 'bro': False,\n", + " 'case': False,\n", + " 'july': False,\n", + " 'cake': False,\n", + " 'three': False,\n", + " 'hmm': False,\n", + " 'deal': False,\n", + " 'ouch': False,\n", + " 'inside': False,\n", + " 'yep': False,\n", + " 'random': False,\n", + " 'meant': False,\n", + " 'profile': False,\n", + " 'mtv': False,\n", + " 'wedding': False,\n", + " 'clothes': False,\n", + " 'band': False,\n", + " 'issue': False,\n", + " 'apparently': False,\n", + " 'lonely': False,\n", + " 'age': False,\n", + " 'shall': False,\n", + " 'flight': False,\n", + " 'yr': False,\n", + " 'supposed': False,\n", + " 'david': False,\n", + " 'living': False,\n", + " 'needed': False,\n", + " 'london': False,\n", + " 'laugh': False,\n", + " 'sign': False,\n", + " 'finger': False,\n", + " 'beer': False,\n", + " 'itunes': False,\n", + " 'played': False,\n", + " 'tear': False,\n", + " 'ppl': False,\n", + " 'body': False,\n", + " 'paper': False,\n", + " 'group': False,\n", + " 'looked': False,\n", + " 'sadly': False,\n", + " 'bloody': False,\n", + " 'club': False,\n", + " 'web': False,\n", + " 'vacation': False,\n", + " 'jonasbrothers': False,\n", + " 'uk': False,\n", + " 'version': False,\n", + " 'huge': False,\n", + " 'sooooo': False,\n", + " 'lakers': False,\n", + " 'germany': False,\n", + " 'save': False,\n", + " 'hanging': False,\n", + " 'shoe': False,\n", + " 'fantastic': False,\n", + " 'wine': False,\n", + " 'sort': False,\n", + " 'none': False,\n", + " 'twilight': False,\n", + " 'shot': False,\n", + " 'thursday': False,\n", + " 'confused': False,\n", + " 'sale': False,\n", + " 'goodbye': False,\n", + " 'round': False,\n", + " 'lame': False,\n", + " 'cheer': False,\n", + " 'singing': False,\n", + " 'promise': False,\n", + " 'drop': False,\n", + " 'nose': False,\n", + " 'whats': False,\n", + " 'babe': False,\n", + " 'xxx': False,\n", + " 'lesson': False,\n", + " 'camera': False,\n", + " 'download': False,\n", + " 'info': False,\n", + " 'fair': False,\n", + " 'yummy': False,\n", + " 'french': False,\n", + " 'miley': False,\n", + " 'light': False,\n", + " 'asked': False,\n", + " 'lazy': False,\n", + " 'indeed': False,\n", + " 'along': False,\n", + " 'quick': False,\n", + " 'ã': False,\n", + " 'door': False,\n", + " 'sit': False,\n", + " 'yup': False,\n", + " 'street': False,\n", + " 'mile': False,\n", + " 'dm': False,\n", + " 'jus': False,\n", + " 'giving': False,\n", + " 'ear': False,\n", + " 'service': False,\n", + " 'low': False,\n", + " 'relaxing': False,\n", + " 'radio': False,\n", + " 'arm': False,\n", + " 'future': False,\n", + " 'proud': False,\n", + " 'fact': False,\n", + " 'learn': False,\n", + " 'easy': False,\n", + " 'child': False,\n", + " 'kitty': False,\n", + " 'hold': False,\n", + " 'mark': False,\n", + " 'peep': False,\n", + " 'wearing': False,\n", + " 'luv': False,\n", + " 'south': False,\n", + " 'road': False,\n", + " 'smell': False,\n", + " 'exciting': False,\n", + " 'currently': False,\n", + " 'warm': False,\n", + " 'puppy': False,\n", + " 'packing': False,\n", + " 'google': False,\n", + " 'exactly': False,\n", + " 'share': False,\n", + " 'decided': False,\n", + " 'voice': False,\n", + " 'fat': False,\n", + " 'mommy': False,\n", + " 'realize': False,\n", + " 'join': False,\n", + " 'nearly': False,\n", + " 'bb': False,\n", + " 'whatever': False,\n", + " 'woo': False,\n", + " 'spend': False,\n", + " 'stand': False,\n", + " 'enjoyed': False,\n", + " 'tweeps': False,\n", + " 'sold': False,\n", + " 'gettin': False,\n", + " 'fever': False,\n", + " 'paid': False,\n", + " 'beat': False,\n", + " 'pink': False,\n", + " 'mall': False,\n", + " 'gunna': False,\n", + " 'ï': False,\n", + " 'freaking': False,\n", + " 'snow': False,\n", + " 'hospital': False,\n", + " 'cheese': False,\n", + " 'nobody': False,\n", + " 'wondering': False,\n", + " 'plane': False,\n", + " 'shame': False,\n", + " 'search': False,\n", + " 'ended': False,\n", + " 'helping': False,\n", + " 'safe': False,\n", + " 'staying': False,\n", + " 'storm': False,\n", + " 'fam': False,\n", + " 'z': False,\n", + " 'sing': False,\n", + " 'view': False,\n", + " 'yo': False,\n", + " 'card': False,\n", + " 'stick': False,\n", + " 'joe': False,\n", + " 'matter': False,\n", + " 'delicious': False,\n", + " 'cook': False,\n", + " 'country': False,\n", + " 'hill': False,\n", + " 'peace': False,\n", + " 'worked': False,\n", + " 'race': False,\n", + " 'boot': False,\n", + " 'figure': False,\n", + " 'vega': False,\n", + " 'france': False,\n", + " 'wit': False,\n", + " 'topic': False,\n", + " 'spot': False,\n", + " 'cover': False,\n", + " 'afford': False,\n", + " 'floor': False,\n", + " 'bike': False,\n", + " 'starbucks': False,\n", + " 'hahah': False,\n", + " 'tummy': False,\n", + " 'trouble': False,\n", + " 'mouth': False,\n", + " 'ran': False,\n", + " 'drunk': False,\n", + " 'lately': False,\n", + " 'joke': False,\n", + " 'bird': False,\n", + " 'flu': False,\n", + " 'showing': False,\n", + " 'mail': False,\n", + " 'turned': False,\n", + " 'men': False,\n", + " 'taste': False,\n", + " 'gosh': False,\n", + " 'bar': False,\n", + " 'changed': False,\n", + " 'fish': False,\n", + " 'stopped': False,\n", + " 'wife': False,\n", + " 'alot': False,\n", + " 'tweetdeck': False,\n", + " 'magic': False,\n", + " 'brilliant': False,\n", + " 'cooking': False,\n", + " 'state': False,\n", + " 'design': False,\n", + " 'film': False,\n", + " 'tan': False,\n", + " 'isnt': False,\n", + " 'nail': False,\n", + " 'bummed': False,\n", + " 'prob': False,\n", + " 'happens': False,\n", + " 'eh': False,\n", + " 'type': False,\n", + " 'young': False,\n", + " 'honey': False,\n", + " 'price': False,\n", + " 'disappointed': False,\n", + " 'sky': False,\n", + " 'ring': False,\n", + " 'surprise': False,\n", + " 'self': False,\n", + " 'depressing': False,\n", + " 'sexy': False,\n", + " 'cd': False,\n", + " 'woot': False,\n", + " 'pop': False,\n", + " 'piece': False,\n", + " 'degree': False,\n", + " 'chillin': False,\n", + " 'hilarious': False,\n", + " 'rip': False,\n", + " 'updated': False,\n", + " 'ohh': False,\n", + " 'boyfriend': False,\n", + " 'awwww': False,\n", + " 'hun': False,\n", + " 'crappy': False,\n", + " 'er': False,\n", + " 'although': False,\n", + " 'death': False,\n", + " 'lake': False,\n", + " 'ahead': False,\n", + " 'art': False,\n", + " 'watchin': False,\n", + " 'scary': False,\n", + " 'annoying': False,\n", + " 'event': False,\n", + " 'plus': False,\n", + " 'ff': False,\n", + " 'major': False,\n", + " 'afraid': False,\n", + " 'pissed': False,\n", + " 'wat': False,\n", + " 'huh': False,\n", + " 'secret': False,\n", + " 'others': False,\n", + " 'land': False,\n", + " 'caught': False,\n", + " 'closed': False,\n", + " 'kiss': False,\n", + " 'trek': False,\n", + " 'code': False,\n", + " 'mobile': False,\n", + " 'knee': False,\n", + " ...},\n", + " 0)]" + ] + }, + "execution_count": 48, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Create a list to hold the features in the required format\n", + "feature_list = []\n", + "\n", + "# Iterate through each document (row) in the matrix\n", + "for i in range(dtm_dense.shape[0]):\n", + "\n", + " # Create a dictionary for the document's word features\n", + " document_dict = {word: (count > 0) for word, count in zip(top_words, dtm_dense[i, :])}\n", + " \n", + " # Add the document's feature dictionary and sentiment label as a tuple to the feature_list\n", + " feature_list.append((document_dict, sentiment_labels[i]))\n", + "\n", + "feature_list" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Explanation\n", + "\n", + "DTM: Document-Term Matrix\n", + "\n", + "DTM stands for Document-Term Matrix. It's a fundamental data structure used in text analysis and natural language processing (NLP). Here's how it works:\n", + "\n", + "Documents: Each row in the matrix represents a single document (in your case, a tweet).\n", + "Terms: Each column represents a unique word (term) from your vocabulary (the top 5000 words in your case).\n", + "Values: The values in the matrix are the frequencies of each word in each document.\n", + "In simpler terms, a DTM shows you how many times each word appears in each document.\n", + "\n", + "True/False in the feature_list:\n", + "\n", + "The True/False values you see in the feature_list are a slightly different representation of the DTM. Instead of raw word counts, they indicate the presence or absence of each word in a document:\n", + "\n", + "True: The word is present in the document.\n", + "False: The word is absent from the document.\n", + "This binary representation is often used for text classification tasks because it focuses on whether a word appears at all, rather than how many times it appears. It makes the features more suitable for algorithms like Naive Bayes that often work well with categorical data." ] }, { @@ -210,11 +5047,62 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 49, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Most Informative Features:\n", + "Most Informative Features\n", + " ugh = True 0 : 1 = 23.3 : 1.0\n", + " upset = True 0 : 1 = 22.6 : 1.0\n", + " sadly = True 0 : 1 = 17.2 : 1.0\n", + " broke = True 0 : 1 = 15.6 : 1.0\n", + " poor = True 0 : 1 = 15.5 : 1.0\n", + " congrats = True 1 : 0 = 14.8 : 1.0\n", + " died = True 0 : 1 = 14.0 : 1.0\n", + " sad = True 0 : 1 = 12.9 : 1.0\n", + " burnt = True 0 : 1 = 11.8 : 1.0\n", + " depressing = True 0 : 1 = 11.1 : 1.0\n" + ] + } + ], + "source": [ + "from nltk.classify import NaiveBayesClassifier\n", + "from sklearn.model_selection import train_test_split\n", + "\n", + "# Splitting the data into training and test sets\n", + "train_set, test_set = train_test_split(feature_list, test_size=0.2, random_state=42)\n", + "\n", + "# Training the Naive Bayes classifier\n", + "classifier = NaiveBayesClassifier.train(train_set)\n", + "\n", + "# Displaying the most informative features\n", + "classifier.show_most_informative_features(10)" + ] + }, + { + "cell_type": "markdown", "metadata": {}, - "outputs": [], "source": [ - "# your code here" + "#### Explanation\n", + "The output above provides insights into which words are most helpful in classifying your tweets as positive (1) or negative (0).\n", + "\n", + "Here's a breakdown of what each part means:\n", + "\n", + "E.g: ugh = True 0 : 1 = 23.3 : 1.0\n", + "ugh = True: This means the presence of the word \"ugh\" in a tweet.\n", + "0 : 1: This indicates the sentiment categories being compared:\n", + "0: Negative sentiment\n", + "1: Positive sentiment\n", + "23.3 : 1.0: This is the likelihood ratio. It tells you how much more likely the word \"ugh\" is to appear in a negative tweet compared to a positive tweet.\n", + "Interpreting the Likelihood Ratio:\n", + "\n", + "A likelihood ratio of 23.3 : 1.0 means that the word \"ugh\" is 23.3 times more likely to appear in a negative tweet than in a positive tweet. This suggests that \"ugh\" is a strong indicator of negative sentiment." ] }, { @@ -230,11 +5118,28 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 50, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "0.726" + ] + }, + "execution_count": 50, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# your code here" + "accuracy = nltk.classify.accuracy(classifier, test_set)\n", + "\n", + "accuracy\n", + "\n", + "#Yey!\n", + "# An accuracy of 0.726 means that the classifier correctly predicted the sentiment of 72.6% of the tweets in the test set. \n", + "# In other words, for every 100 tweets the model was given, it got about ~73 of them right." ] }, { @@ -298,7 +5203,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -312,7 +5217,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.3" + "version": "3.11.5" } }, "nbformat": 4,