diff --git a/your-code/challenge-1.ipynb b/your-code/challenge-1.ipynb
index 0808166..cdc145b 100644
--- a/your-code/challenge-1.ipynb
+++ b/your-code/challenge-1.ipynb
@@ -14,6 +14,28 @@
"* Write the functions you will use in Challenge 3 for cleaning, tokenizing, stemming, and lemmatizing data."
]
},
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import pandas as pd\n",
+ "import re\n",
+ "\n",
+ "import nltk \n",
+ "from nltk.stem import WordNetLemmatizer\n",
+ "from nltk.corpus import stopwords\n",
+ "from nltk.tokenize import word_tokenize\n",
+ "from nltk.tokenize import sent_tokenize\n",
+ "from nltk.stem import PorterStemmer, LancasterStemmer, SnowballStemmer\n",
+ "from nltk.stem import WordNetLemmatizer\n",
+ "from nltk.corpus import wordnet\n",
+ "\n",
+ "from sklearn.feature_extraction.text import TfidfVectorizer\n",
+ "from sklearn.feature_extraction.text import CountVectorizer"
+ ]
+ },
{
"cell_type": "markdown",
"metadata": {},
@@ -66,11 +88,22 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 12,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'ironhack s q website is'"
+ ]
+ },
+ "execution_count": 12,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "def clean_up(s):\n",
+ "def clean_up(string):\n",
" \"\"\"\n",
" Cleans up numbers, URLs, and special characters from a string.\n",
"\n",
@@ -78,8 +111,22 @@
" s: The string to be cleaned up.\n",
"\n",
" Returns:\n",
- " A string that has been cleaned up.\n",
- " \"\"\""
+ " A cleaned-up string.\n",
+ " \"\"\"\n",
+ " text = s.lower() # Convert to lowercase\n",
+ " text = re.sub(r'\\d+', ' ', text) # Remove numbers\n",
+ " text = re.sub(r'http\\S+', ' ', text) # Remove URLs\n",
+ " text = re.sub(r'[^\\w\\s]', ' ', text) # Remove special characters (except spaces)\n",
+ " \n",
+ " # Remove extra spaces\n",
+ " text = re.sub(r'\\s+', ' ', text).strip()\n",
+ "\n",
+ " return text\n",
+ "\n",
+ "string = r\"\"\"@Ironhack's-#Q website 776-is http://ironhack.com [(2018)]\")\"\"\"\n",
+ "\n",
+ "cleaned_string = clean_up(string)\n",
+ "cleaned_string"
]
},
{
@@ -101,11 +148,22 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 14,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "['ironhack', 's', 'q', 'website', 'is']"
+ ]
+ },
+ "execution_count": 14,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "def tokenize(s):\n",
+ "def tokenize(cleaned_string):\n",
" \"\"\"\n",
" Tokenize a string.\n",
"\n",
@@ -114,7 +172,14 @@
"\n",
" Returns:\n",
" A list of words as the result of tokenization.\n",
- " \"\"\""
+ " \"\"\"\n",
+ " tokens = word_tokenize(cleaned_string)\n",
+ " tokens = [word for word in tokens if word.isalnum()]\n",
+ " \n",
+ " return tokens\n",
+ "\n",
+ "string_tokens = tokenize(cleaned_string)\n",
+ "string_tokens"
]
},
{
@@ -145,11 +210,26 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 16,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "['ironhack---->ironhack',\n",
+ " 's---->s',\n",
+ " 'q---->q',\n",
+ " 'website---->website',\n",
+ " 'is---->is']"
+ ]
+ },
+ "execution_count": 16,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "def stem_and_lemmatize(l):\n",
+ "def stem_and_lemmatize(string_tokens):\n",
" \"\"\"\n",
" Perform stemming and lemmatization on a list of words.\n",
"\n",
@@ -158,7 +238,14 @@
"\n",
" Returns:\n",
" A list of strings after being stemmed and lemmatized.\n",
- " \"\"\""
+ " \"\"\"\n",
+ " lemmatizer = WordNetLemmatizer()\n",
+ " lemmatized = [word +\"---->\" + lemmatizer.lemmatize(word) for word in string_tokens]\n",
+ " \n",
+ " return lemmatized\n",
+ "\n",
+ "lemmatized = stem_and_lemmatize(string_tokens)\n",
+ "lemmatized"
]
},
{
@@ -176,11 +263,26 @@
},
{
"cell_type": "code",
- "execution_count": 1,
+ "execution_count": 18,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "['ironhack---->ironhack',\n",
+ " 's---->s',\n",
+ " 'q---->q',\n",
+ " 'website---->website',\n",
+ " 'is---->is']"
+ ]
+ },
+ "execution_count": 18,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "def remove_stopwords(l):\n",
+ "def remove_stopwords(lemmatized):\n",
" \"\"\"\n",
" Remove English stopwords from a list of strings.\n",
"\n",
@@ -189,7 +291,14 @@
"\n",
" Returns:\n",
" A list of strings after stop words are removed.\n",
- " \"\"\""
+ " \"\"\"\n",
+ " stop_words = set(stopwords.words('english'))\n",
+ " filtered_tokens = [word for word in lemmatized if word not in stop_words]\n",
+ " \n",
+ " return filtered_tokens\n",
+ "\n",
+ "filtered_tokens = remove_stopwords(lemmatized)\n",
+ "filtered_tokens"
]
},
{
@@ -204,7 +313,7 @@
],
"metadata": {
"kernelspec": {
- "display_name": "Python 3",
+ "display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
@@ -218,7 +327,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.7.3"
+ "version": "3.11.5"
}
},
"nbformat": 4,
diff --git a/your-code/challenge-2.ipynb b/your-code/challenge-2.ipynb
index 6b0e116..be339d7 100644
--- a/your-code/challenge-2.ipynb
+++ b/your-code/challenge-2.ipynb
@@ -46,11 +46,193 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 26,
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " target | \n",
+ " id | \n",
+ " date | \n",
+ " flag | \n",
+ " user | \n",
+ " text | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 541200 | \n",
+ " 0 | \n",
+ " 2200003313 | \n",
+ " Tue Jun 16 18:18:13 PDT 2009 | \n",
+ " NO_QUERY | \n",
+ " DEWGetMeTho77 | \n",
+ " @Nkluvr4eva My poor little dumpling In Holmde... | \n",
+ "
\n",
+ " \n",
+ " | 750 | \n",
+ " 0 | \n",
+ " 1467998601 | \n",
+ " Mon Apr 06 23:11:18 PDT 2009 | \n",
+ " NO_QUERY | \n",
+ " Young_J | \n",
+ " I'm off too bed. I gotta wake up hella early t... | \n",
+ "
\n",
+ " \n",
+ " | 766711 | \n",
+ " 0 | \n",
+ " 2300049112 | \n",
+ " Tue Jun 23 13:40:12 PDT 2009 | \n",
+ " NO_QUERY | \n",
+ " dougnawoschik | \n",
+ " I havent been able to listen to it yet My spe... | \n",
+ "
\n",
+ " \n",
+ " | 285055 | \n",
+ " 0 | \n",
+ " 1993474319 | \n",
+ " Mon Jun 01 10:26:09 PDT 2009 | \n",
+ " NO_QUERY | \n",
+ " thireven | \n",
+ " now remembers why solving a relatively big equ... | \n",
+ "
\n",
+ " \n",
+ " | 705995 | \n",
+ " 0 | \n",
+ " 2256551006 | \n",
+ " Sat Jun 20 12:56:51 PDT 2009 | \n",
+ " NO_QUERY | \n",
+ " taracollins086 | \n",
+ " Ate too much, feel sick | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 1374482 | \n",
+ " 4 | \n",
+ " 2051447103 | \n",
+ " Fri Jun 05 22:02:36 PDT 2009 | \n",
+ " NO_QUERY | \n",
+ " _Jaska | \n",
+ " @girlwonder24 Thanks. | \n",
+ "
\n",
+ " \n",
+ " | 667014 | \n",
+ " 0 | \n",
+ " 2245469948 | \n",
+ " Fri Jun 19 16:10:39 PDT 2009 | \n",
+ " NO_QUERY | \n",
+ " julianicolao | \n",
+ " trying to study for the biggest test, next wee... | \n",
+ "
\n",
+ " \n",
+ " | 1451234 | \n",
+ " 4 | \n",
+ " 2063022808 | \n",
+ " Sun Jun 07 01:05:46 PDT 2009 | \n",
+ " NO_QUERY | \n",
+ " ElaineToni | \n",
+ " Just finished watching Your Song Presents: Boy... | \n",
+ "
\n",
+ " \n",
+ " | 1181412 | \n",
+ " 4 | \n",
+ " 1982082859 | \n",
+ " Sun May 31 10:29:36 PDT 2009 | \n",
+ " NO_QUERY | \n",
+ " lindseyrd20 | \n",
+ " @janfran813 awww i can't wait to get one | \n",
+ "
\n",
+ " \n",
+ " | 517910 | \n",
+ " 0 | \n",
+ " 2191411932 | \n",
+ " Tue Jun 16 05:13:13 PDT 2009 | \n",
+ " NO_QUERY | \n",
+ " serraannisa | \n",
+ " doing nothing | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
20000 rows × 6 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " target id date flag \\\n",
+ "541200 0 2200003313 Tue Jun 16 18:18:13 PDT 2009 NO_QUERY \n",
+ "750 0 1467998601 Mon Apr 06 23:11:18 PDT 2009 NO_QUERY \n",
+ "766711 0 2300049112 Tue Jun 23 13:40:12 PDT 2009 NO_QUERY \n",
+ "285055 0 1993474319 Mon Jun 01 10:26:09 PDT 2009 NO_QUERY \n",
+ "705995 0 2256551006 Sat Jun 20 12:56:51 PDT 2009 NO_QUERY \n",
+ "... ... ... ... ... \n",
+ "1374482 4 2051447103 Fri Jun 05 22:02:36 PDT 2009 NO_QUERY \n",
+ "667014 0 2245469948 Fri Jun 19 16:10:39 PDT 2009 NO_QUERY \n",
+ "1451234 4 2063022808 Sun Jun 07 01:05:46 PDT 2009 NO_QUERY \n",
+ "1181412 4 1982082859 Sun May 31 10:29:36 PDT 2009 NO_QUERY \n",
+ "517910 0 2191411932 Tue Jun 16 05:13:13 PDT 2009 NO_QUERY \n",
+ "\n",
+ " user text \n",
+ "541200 DEWGetMeTho77 @Nkluvr4eva My poor little dumpling In Holmde... \n",
+ "750 Young_J I'm off too bed. I gotta wake up hella early t... \n",
+ "766711 dougnawoschik I havent been able to listen to it yet My spe... \n",
+ "285055 thireven now remembers why solving a relatively big equ... \n",
+ "705995 taracollins086 Ate too much, feel sick \n",
+ "... ... ... \n",
+ "1374482 _Jaska @girlwonder24 Thanks. \n",
+ "667014 julianicolao trying to study for the biggest test, next wee... \n",
+ "1451234 ElaineToni Just finished watching Your Song Presents: Boy... \n",
+ "1181412 lindseyrd20 @janfran813 awww i can't wait to get one \n",
+ "517910 serraannisa doing nothing \n",
+ "\n",
+ "[20000 rows x 6 columns]"
+ ]
+ },
+ "execution_count": 26,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "# your code here"
+ "import pandas as pd\n",
+ "\n",
+ "df = pd.read_csv(\"sentiment140.csv\", encoding='ISO-8859-1') \n",
+ "\n",
+ "df.columns = ['target', 'id', 'date', 'flag', 'user', 'text']\n",
+ "\n",
+ "# Create a smaller sample for development\n",
+ "sample_size = 20000 \n",
+ "df_sample = df.sample(n=sample_size, random_state=42)\n",
+ "df_sample\n",
+ "\n",
+ "# target: the polarity of the tweet (0 = negative, 2 = neutral, 4 = positive)"
]
},
{
@@ -76,11 +258,404 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 27,
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " index | \n",
+ " target | \n",
+ " id | \n",
+ " date | \n",
+ " flag | \n",
+ " user | \n",
+ " text | \n",
+ " text_cleaned | \n",
+ " text_token | \n",
+ " text_lemma | \n",
+ " text_processed | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 541200 | \n",
+ " 0 | \n",
+ " 2200003313 | \n",
+ " Tue Jun 16 18:18:13 PDT 2009 | \n",
+ " NO_QUERY | \n",
+ " DEWGetMeTho77 | \n",
+ " @Nkluvr4eva My poor little dumpling In Holmde... | \n",
+ " nkluvr eva my poor little dumpling in holmdel ... | \n",
+ " [nkluvr, eva, my, poor, little, dumpling, in, ... | \n",
+ " [nkluvr, eva, my, poor, little, dumpling, in, ... | \n",
+ " [nkluvr, eva, poor, little, dumpling, holmdel,... | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 750 | \n",
+ " 0 | \n",
+ " 1467998601 | \n",
+ " Mon Apr 06 23:11:18 PDT 2009 | \n",
+ " NO_QUERY | \n",
+ " Young_J | \n",
+ " I'm off too bed. I gotta wake up hella early t... | \n",
+ " i m off too bed i gotta wake up hella early to... | \n",
+ " [i, m, off, too, bed, i, got, ta, wake, up, he... | \n",
+ " [i, m, off, too, bed, i, got, ta, wake, up, he... | \n",
+ " [bed, got, ta, wake, hella, early, tomorrow, m... | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 766711 | \n",
+ " 0 | \n",
+ " 2300049112 | \n",
+ " Tue Jun 23 13:40:12 PDT 2009 | \n",
+ " NO_QUERY | \n",
+ " dougnawoschik | \n",
+ " I havent been able to listen to it yet My spe... | \n",
+ " i havent been able to listen to it yet my spea... | \n",
+ " [i, havent, been, able, to, listen, to, it, ye... | \n",
+ " [i, havent, been, able, to, listen, to, it, ye... | \n",
+ " [havent, able, listen, yet, speaker, busted] | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 285055 | \n",
+ " 0 | \n",
+ " 1993474319 | \n",
+ " Mon Jun 01 10:26:09 PDT 2009 | \n",
+ " NO_QUERY | \n",
+ " thireven | \n",
+ " now remembers why solving a relatively big equ... | \n",
+ " now remembers why solving a relatively big equ... | \n",
+ " [now, remembers, why, solving, a, relatively, ... | \n",
+ " [now, remembers, why, solving, a, relatively, ... | \n",
+ " [remembers, solving, relatively, big, equation... | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 705995 | \n",
+ " 0 | \n",
+ " 2256551006 | \n",
+ " Sat Jun 20 12:56:51 PDT 2009 | \n",
+ " NO_QUERY | \n",
+ " taracollins086 | \n",
+ " Ate too much, feel sick | \n",
+ " ate too much feel sick | \n",
+ " [ate, too, much, feel, sick] | \n",
+ " [ate, too, much, feel, sick] | \n",
+ " [ate, much, feel, sick] | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 19995 | \n",
+ " 1374482 | \n",
+ " 4 | \n",
+ " 2051447103 | \n",
+ " Fri Jun 05 22:02:36 PDT 2009 | \n",
+ " NO_QUERY | \n",
+ " _Jaska | \n",
+ " @girlwonder24 Thanks. | \n",
+ " girlwonder thanks | \n",
+ " [girlwonder, thanks] | \n",
+ " [girlwonder, thanks] | \n",
+ " [girlwonder, thanks] | \n",
+ "
\n",
+ " \n",
+ " | 19996 | \n",
+ " 667014 | \n",
+ " 0 | \n",
+ " 2245469948 | \n",
+ " Fri Jun 19 16:10:39 PDT 2009 | \n",
+ " NO_QUERY | \n",
+ " julianicolao | \n",
+ " trying to study for the biggest test, next wee... | \n",
+ " trying to study for the biggest test next week... | \n",
+ " [trying, to, study, for, the, biggest, test, n... | \n",
+ " [trying, to, study, for, the, biggest, test, n... | \n",
+ " [trying, study, biggest, test, next, week, wor... | \n",
+ "
\n",
+ " \n",
+ " | 19997 | \n",
+ " 1451234 | \n",
+ " 4 | \n",
+ " 2063022808 | \n",
+ " Sun Jun 07 01:05:46 PDT 2009 | \n",
+ " NO_QUERY | \n",
+ " ElaineToni | \n",
+ " Just finished watching Your Song Presents: Boy... | \n",
+ " just finished watching your song presents boys... | \n",
+ " [just, finished, watching, your, song, present... | \n",
+ " [just, finished, watching, your, song, present... | \n",
+ " [finished, watching, song, present, boystown] | \n",
+ "
\n",
+ " \n",
+ " | 19998 | \n",
+ " 1181412 | \n",
+ " 4 | \n",
+ " 1982082859 | \n",
+ " Sun May 31 10:29:36 PDT 2009 | \n",
+ " NO_QUERY | \n",
+ " lindseyrd20 | \n",
+ " @janfran813 awww i can't wait to get one | \n",
+ " janfran awww i can t wait to get one | \n",
+ " [janfran, awww, i, can, t, wait, to, get, one] | \n",
+ " [janfran, awww, i, can, t, wait, to, get, one] | \n",
+ " [janfran, awww, wait, get, one] | \n",
+ "
\n",
+ " \n",
+ " | 19999 | \n",
+ " 517910 | \n",
+ " 0 | \n",
+ " 2191411932 | \n",
+ " Tue Jun 16 05:13:13 PDT 2009 | \n",
+ " NO_QUERY | \n",
+ " serraannisa | \n",
+ " doing nothing | \n",
+ " doing nothing | \n",
+ " [doing, nothing] | \n",
+ " [doing, nothing] | \n",
+ " [nothing] | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
20000 rows × 11 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " index target id date flag \\\n",
+ "0 541200 0 2200003313 Tue Jun 16 18:18:13 PDT 2009 NO_QUERY \n",
+ "1 750 0 1467998601 Mon Apr 06 23:11:18 PDT 2009 NO_QUERY \n",
+ "2 766711 0 2300049112 Tue Jun 23 13:40:12 PDT 2009 NO_QUERY \n",
+ "3 285055 0 1993474319 Mon Jun 01 10:26:09 PDT 2009 NO_QUERY \n",
+ "4 705995 0 2256551006 Sat Jun 20 12:56:51 PDT 2009 NO_QUERY \n",
+ "... ... ... ... ... ... \n",
+ "19995 1374482 4 2051447103 Fri Jun 05 22:02:36 PDT 2009 NO_QUERY \n",
+ "19996 667014 0 2245469948 Fri Jun 19 16:10:39 PDT 2009 NO_QUERY \n",
+ "19997 1451234 4 2063022808 Sun Jun 07 01:05:46 PDT 2009 NO_QUERY \n",
+ "19998 1181412 4 1982082859 Sun May 31 10:29:36 PDT 2009 NO_QUERY \n",
+ "19999 517910 0 2191411932 Tue Jun 16 05:13:13 PDT 2009 NO_QUERY \n",
+ "\n",
+ " user text \\\n",
+ "0 DEWGetMeTho77 @Nkluvr4eva My poor little dumpling In Holmde... \n",
+ "1 Young_J I'm off too bed. I gotta wake up hella early t... \n",
+ "2 dougnawoschik I havent been able to listen to it yet My spe... \n",
+ "3 thireven now remembers why solving a relatively big equ... \n",
+ "4 taracollins086 Ate too much, feel sick \n",
+ "... ... ... \n",
+ "19995 _Jaska @girlwonder24 Thanks. \n",
+ "19996 julianicolao trying to study for the biggest test, next wee... \n",
+ "19997 ElaineToni Just finished watching Your Song Presents: Boy... \n",
+ "19998 lindseyrd20 @janfran813 awww i can't wait to get one \n",
+ "19999 serraannisa doing nothing \n",
+ "\n",
+ " text_cleaned \\\n",
+ "0 nkluvr eva my poor little dumpling in holmdel ... \n",
+ "1 i m off too bed i gotta wake up hella early to... \n",
+ "2 i havent been able to listen to it yet my spea... \n",
+ "3 now remembers why solving a relatively big equ... \n",
+ "4 ate too much feel sick \n",
+ "... ... \n",
+ "19995 girlwonder thanks \n",
+ "19996 trying to study for the biggest test next week... \n",
+ "19997 just finished watching your song presents boys... \n",
+ "19998 janfran awww i can t wait to get one \n",
+ "19999 doing nothing \n",
+ "\n",
+ " text_token \\\n",
+ "0 [nkluvr, eva, my, poor, little, dumpling, in, ... \n",
+ "1 [i, m, off, too, bed, i, got, ta, wake, up, he... \n",
+ "2 [i, havent, been, able, to, listen, to, it, ye... \n",
+ "3 [now, remembers, why, solving, a, relatively, ... \n",
+ "4 [ate, too, much, feel, sick] \n",
+ "... ... \n",
+ "19995 [girlwonder, thanks] \n",
+ "19996 [trying, to, study, for, the, biggest, test, n... \n",
+ "19997 [just, finished, watching, your, song, present... \n",
+ "19998 [janfran, awww, i, can, t, wait, to, get, one] \n",
+ "19999 [doing, nothing] \n",
+ "\n",
+ " text_lemma \\\n",
+ "0 [nkluvr, eva, my, poor, little, dumpling, in, ... \n",
+ "1 [i, m, off, too, bed, i, got, ta, wake, up, he... \n",
+ "2 [i, havent, been, able, to, listen, to, it, ye... \n",
+ "3 [now, remembers, why, solving, a, relatively, ... \n",
+ "4 [ate, too, much, feel, sick] \n",
+ "... ... \n",
+ "19995 [girlwonder, thanks] \n",
+ "19996 [trying, to, study, for, the, biggest, test, n... \n",
+ "19997 [just, finished, watching, your, song, present... \n",
+ "19998 [janfran, awww, i, can, t, wait, to, get, one] \n",
+ "19999 [doing, nothing] \n",
+ "\n",
+ " text_processed \n",
+ "0 [nkluvr, eva, poor, little, dumpling, holmdel,... \n",
+ "1 [bed, got, ta, wake, hella, early, tomorrow, m... \n",
+ "2 [havent, able, listen, yet, speaker, busted] \n",
+ "3 [remembers, solving, relatively, big, equation... \n",
+ "4 [ate, much, feel, sick] \n",
+ "... ... \n",
+ "19995 [girlwonder, thanks] \n",
+ "19996 [trying, study, biggest, test, next, week, wor... \n",
+ "19997 [finished, watching, song, present, boystown] \n",
+ "19998 [janfran, awww, wait, get, one] \n",
+ "19999 [nothing] \n",
+ "\n",
+ "[20000 rows x 11 columns]"
+ ]
+ },
+ "execution_count": 27,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "import re\n",
+ "\n",
+ "import nltk \n",
+ "from nltk.stem import WordNetLemmatizer\n",
+ "from nltk.corpus import stopwords\n",
+ "from nltk.tokenize import word_tokenize\n",
+ "from nltk.tokenize import sent_tokenize\n",
+ "from nltk.stem import PorterStemmer, LancasterStemmer, SnowballStemmer\n",
+ "from nltk.stem import WordNetLemmatizer\n",
+ "from nltk.corpus import wordnet\n",
+ "\n",
+ "def clean_up(column):\n",
+ " text = column.lower() # Convert to lowercase\n",
+ " text = re.sub(r'\\d+', ' ', text) # Remove numbers\n",
+ " text = re.sub(r'http\\S+', ' ', text) # Remove URLs\n",
+ " text = re.sub(r'[^\\w\\s]', ' ', text) # Remove special characters (except spaces)\n",
+ " text = re.sub(r'\\s+', ' ', text).strip() # Remove extra spaces\n",
+ "\n",
+ " return text\n",
+ "\n",
+ "def tokenize(column):\n",
+ " tokens = word_tokenize(column)\n",
+ " tokens = [word for word in tokens if word.isalnum()]\n",
+ " \n",
+ " return tokens\n",
+ "\n",
+ "def stem_and_lemmatize(column):\n",
+ " lemmatizer = WordNetLemmatizer()\n",
+ " lemmatized = [lemmatizer.lemmatize(word) for word in column]\n",
+ " \n",
+ " return lemmatized\n",
+ "\n",
+ "def remove_stopwords(column):\n",
+ " stop_words = set(stopwords.words('english'))\n",
+ " filtered_tokens = [word for word in column if word not in stop_words]\n",
+ " \n",
+ " return filtered_tokens\n",
+ "\n",
+ "# Apply functions\n",
+ "df_sample['text_cleaned'] = df_sample['text'].apply(clean_up)\n",
+ "df_sample['text_token'] = df_sample['text_cleaned'].apply(tokenize)\n",
+ "df_sample['text_lemma'] = df_sample['text_token'].apply(stem_and_lemmatize)\n",
+ "df_sample['text_processed'] = df_sample['text_lemma'].apply(remove_stopwords)\n",
+ "\n",
+ "df_sample = df_sample.reset_index()\n",
+ "df_sample"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 28,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(9942, 11)"
+ ]
+ },
+ "execution_count": 28,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "# your code here"
+ "neg_tweets = df_sample[df_sample['target']==0]\n",
+ "neg_tweets.shape"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 29,
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(0, 11)"
+ ]
+ },
+ "execution_count": 29,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "neutral_tweets = df_sample[df_sample['target']==2]\n",
+ "neutral_tweets.shape"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 30,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(10058, 11)"
+ ]
+ },
+ "execution_count": 30,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "pos_tweets = df_sample[df_sample['target']==4]\n",
+ "pos_tweets.shape"
]
},
{
@@ -98,11 +673,126 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 33,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " 0 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " wa | \n",
+ " 1400 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " day | \n",
+ " 1323 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " good | \n",
+ " 1175 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " get | \n",
+ " 1102 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " like | \n",
+ " 977 | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 4995 | \n",
+ " regularly | \n",
+ " 3 | \n",
+ "
\n",
+ " \n",
+ " | 4996 | \n",
+ " petewentz | \n",
+ " 3 | \n",
+ "
\n",
+ " \n",
+ " | 4997 | \n",
+ " inspired | \n",
+ " 3 | \n",
+ "
\n",
+ " \n",
+ " | 4998 | \n",
+ " nun | \n",
+ " 3 | \n",
+ "
\n",
+ " \n",
+ " | 4999 | \n",
+ " influence | \n",
+ " 3 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
5000 rows × 2 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " 0 1\n",
+ "0 wa 1400\n",
+ "1 day 1323\n",
+ "2 good 1175\n",
+ "3 get 1102\n",
+ "4 like 977\n",
+ "... ... ...\n",
+ "4995 regularly 3\n",
+ "4996 petewentz 3\n",
+ "4997 inspired 3\n",
+ "4998 nun 3\n",
+ "4999 influence 3\n",
+ "\n",
+ "[5000 rows x 2 columns]"
+ ]
+ },
+ "execution_count": 33,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "# your code here"
+ "from nltk.probability import FreqDist\n",
+ "\n",
+ "all_words = [word for words in df_sample['text_processed'] for word in words]\n",
+ "freq_dist = FreqDist(all_words)\n",
+ "top_5000_words = freq_dist.most_common(5000)\n",
+ "\n",
+ "top_5000_df = pd.DataFrame(top_5000_words)\n",
+ "top_5000_df"
]
},
{
@@ -167,11 +857,4158 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 35,
"metadata": {},
"outputs": [],
"source": [
- "# your code here"
+ "import numpy as np\n",
+ "from sklearn.feature_extraction.text import CountVectorizer\n",
+ "\n",
+ "# Extract only the words from top_5000_words\n",
+ "top_words = [word for word, _ in top_5000_words]\n",
+ "\n",
+ "# Create CountVectorizer with vocabulary restriction\n",
+ "vectorizer = CountVectorizer(vocabulary=top_words)\n",
+ "\n",
+ "# Fit and transform to get the document-term matrix\n",
+ "dtm = vectorizer.fit_transform([' '.join(words) for words in df_sample['text_processed']])\n",
+ "\n",
+ "# Convert sparse matrix to dense numpy array for easier manipulation\n",
+ "dtm_dense = dtm.toarray()\n",
+ "\n",
+ "# Assuming your sentiment labels (0 for negative, 4 for positive) are in 'target' column\n",
+ "sentiment_labels = df_sample['target'].replace(4,1).to_numpy()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 39,
+ "metadata": {
+ "scrolled": false
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array([[1, 0, 0, ..., 0, 0, 0],\n",
+ " [0, 0, 0, ..., 0, 0, 0],\n",
+ " [0, 0, 0, ..., 0, 0, 0],\n",
+ " ...,\n",
+ " [0, 0, 0, ..., 0, 0, 0],\n",
+ " [0, 0, 0, ..., 0, 0, 0],\n",
+ " [0, 0, 0, ..., 0, 0, 0]], dtype=int64)"
+ ]
+ },
+ "execution_count": 39,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "dtm_dense"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 41,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(20000, 5000)"
+ ]
+ },
+ "execution_count": 41,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "dtm_dense.shape"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 40,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array([0, 0, 0, ..., 1, 1, 0], dtype=int64)"
+ ]
+ },
+ "execution_count": 40,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "sentiment_labels"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 48,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "[({'wa': True,\n",
+ " 'day': False,\n",
+ " 'good': False,\n",
+ " 'get': False,\n",
+ " 'like': False,\n",
+ " 'go': False,\n",
+ " 'quot': False,\n",
+ " 'love': False,\n",
+ " 'work': False,\n",
+ " 'got': False,\n",
+ " 'today': False,\n",
+ " 'going': False,\n",
+ " 'u': False,\n",
+ " 'time': False,\n",
+ " 'one': False,\n",
+ " 'lol': False,\n",
+ " 'know': False,\n",
+ " 'im': False,\n",
+ " 'back': False,\n",
+ " 'really': True,\n",
+ " 'want': False,\n",
+ " 'night': False,\n",
+ " 'amp': False,\n",
+ " 'well': False,\n",
+ " 'new': False,\n",
+ " 'see': False,\n",
+ " 'think': False,\n",
+ " 'still': False,\n",
+ " 'oh': False,\n",
+ " 'thanks': False,\n",
+ " 'na': False,\n",
+ " 'ha': False,\n",
+ " 'need': False,\n",
+ " 'home': False,\n",
+ " 'much': False,\n",
+ " 'miss': False,\n",
+ " 'feel': False,\n",
+ " 'last': False,\n",
+ " 'morning': False,\n",
+ " 'great': False,\n",
+ " 'make': False,\n",
+ " 'tomorrow': False,\n",
+ " 'twitter': False,\n",
+ " 'haha': False,\n",
+ " 'wish': False,\n",
+ " 'hope': True,\n",
+ " 'bad': False,\n",
+ " 'sad': False,\n",
+ " 'fun': False,\n",
+ " 'come': False,\n",
+ " 'sleep': False,\n",
+ " 'would': False,\n",
+ " 'nice': False,\n",
+ " 'sorry': False,\n",
+ " 'right': False,\n",
+ " 'week': False,\n",
+ " 'tonight': True,\n",
+ " 'happy': False,\n",
+ " 'say': False,\n",
+ " 'thing': False,\n",
+ " 'getting': False,\n",
+ " 'look': False,\n",
+ " 'friend': False,\n",
+ " 'gon': False,\n",
+ " 'though': False,\n",
+ " 'hate': False,\n",
+ " 'wait': False,\n",
+ " 'better': False,\n",
+ " 'bed': False,\n",
+ " 'way': False,\n",
+ " 'watching': False,\n",
+ " 'lt': False,\n",
+ " 'people': False,\n",
+ " 'yeah': False,\n",
+ " 'hour': False,\n",
+ " 'show': False,\n",
+ " 'could': False,\n",
+ " 'thank': False,\n",
+ " 'take': False,\n",
+ " 'weekend': False,\n",
+ " 'next': False,\n",
+ " 'yes': False,\n",
+ " 'school': False,\n",
+ " 'even': False,\n",
+ " 'little': True,\n",
+ " 'life': False,\n",
+ " 'working': False,\n",
+ " 'everyone': False,\n",
+ " 'guy': False,\n",
+ " 'cant': False,\n",
+ " 'sick': False,\n",
+ " 'dont': True,\n",
+ " 'hey': False,\n",
+ " 'let': False,\n",
+ " 'awesome': False,\n",
+ " 'movie': False,\n",
+ " 'girl': False,\n",
+ " 'tweet': False,\n",
+ " 'always': False,\n",
+ " 'x': False,\n",
+ " 'never': False,\n",
+ " 'watch': False,\n",
+ " 'please': False,\n",
+ " 'soon': False,\n",
+ " 'year': False,\n",
+ " 'first': False,\n",
+ " 'long': False,\n",
+ " 'ok': False,\n",
+ " 'tired': False,\n",
+ " 'already': False,\n",
+ " 'feeling': False,\n",
+ " 'suck': False,\n",
+ " 'wan': False,\n",
+ " 'sure': False,\n",
+ " 'best': False,\n",
+ " 'looking': False,\n",
+ " 'n': False,\n",
+ " 'man': False,\n",
+ " 'another': False,\n",
+ " 'something': False,\n",
+ " 'find': False,\n",
+ " 'start': False,\n",
+ " 'cool': False,\n",
+ " 'done': False,\n",
+ " 'pretty': False,\n",
+ " 'omg': False,\n",
+ " 'yay': False,\n",
+ " 'yet': False,\n",
+ " 'phone': False,\n",
+ " 'damn': False,\n",
+ " 'lot': False,\n",
+ " 'away': False,\n",
+ " 'went': False,\n",
+ " 'old': False,\n",
+ " 'follow': False,\n",
+ " 'help': False,\n",
+ " 'guess': False,\n",
+ " 'keep': False,\n",
+ " 'house': False,\n",
+ " 'thought': False,\n",
+ " 'song': False,\n",
+ " 'made': False,\n",
+ " 'ever': False,\n",
+ " 'trying': True,\n",
+ " 'ya': False,\n",
+ " 'sun': False,\n",
+ " 'p': False,\n",
+ " 'bit': False,\n",
+ " 'hurt': False,\n",
+ " 'game': False,\n",
+ " 'finally': False,\n",
+ " 'maybe': False,\n",
+ " 'ready': False,\n",
+ " 'sound': False,\n",
+ " 'w': False,\n",
+ " 'big': False,\n",
+ " 'lost': False,\n",
+ " 'b': False,\n",
+ " 'nothing': False,\n",
+ " 'someone': False,\n",
+ " 'early': False,\n",
+ " 'summer': False,\n",
+ " 'tell': False,\n",
+ " 'hard': True,\n",
+ " 'also': False,\n",
+ " 'birthday': False,\n",
+ " 'r': False,\n",
+ " 'left': False,\n",
+ " 'mean': False,\n",
+ " 'missed': False,\n",
+ " 'ur': False,\n",
+ " 'th': False,\n",
+ " 'rain': False,\n",
+ " 'pic': False,\n",
+ " 'mom': False,\n",
+ " 'com': False,\n",
+ " 'party': False,\n",
+ " 'two': False,\n",
+ " 'glad': False,\n",
+ " 'world': False,\n",
+ " 'baby': False,\n",
+ " 'wow': False,\n",
+ " 'might': False,\n",
+ " 'bored': False,\n",
+ " 'call': False,\n",
+ " 'ta': False,\n",
+ " 'check': False,\n",
+ " 'late': False,\n",
+ " 'waiting': False,\n",
+ " 'car': False,\n",
+ " 'video': False,\n",
+ " 'stuff': False,\n",
+ " 'found': False,\n",
+ " 'yesterday': False,\n",
+ " 'said': False,\n",
+ " 'sunday': False,\n",
+ " 'luck': False,\n",
+ " 'saw': False,\n",
+ " 'amazing': False,\n",
+ " 'hot': False,\n",
+ " 'monday': False,\n",
+ " 'weather': False,\n",
+ " 'live': False,\n",
+ " 'many': False,\n",
+ " 'iphone': False,\n",
+ " 'doe': False,\n",
+ " 'exam': False,\n",
+ " 'thats': False,\n",
+ " 'play': False,\n",
+ " 'making': False,\n",
+ " 'may': False,\n",
+ " 'excited': False,\n",
+ " 'god': False,\n",
+ " 'dad': False,\n",
+ " 'family': False,\n",
+ " 'gone': False,\n",
+ " 'friday': False,\n",
+ " 'boy': False,\n",
+ " 'follower': False,\n",
+ " 'read': False,\n",
+ " 'funny': False,\n",
+ " 'hi': False,\n",
+ " 'since': False,\n",
+ " 'give': False,\n",
+ " 'poor': True,\n",
+ " 'enjoy': False,\n",
+ " 'job': False,\n",
+ " 'okay': False,\n",
+ " 'talk': False,\n",
+ " 'gt': False,\n",
+ " 'later': False,\n",
+ " 'head': False,\n",
+ " 'almost': False,\n",
+ " 'beautiful': False,\n",
+ " 'woke': False,\n",
+ " 'cold': False,\n",
+ " 'anything': False,\n",
+ " 'hear': False,\n",
+ " 'lunch': False,\n",
+ " 'missing': False,\n",
+ " 'put': False,\n",
+ " 'free': False,\n",
+ " 'try': True,\n",
+ " 'coming': False,\n",
+ " 'must': False,\n",
+ " 'end': False,\n",
+ " 'tho': False,\n",
+ " 'leave': False,\n",
+ " 'ugh': False,\n",
+ " 'till': False,\n",
+ " 'busy': False,\n",
+ " 'around': False,\n",
+ " 'food': False,\n",
+ " 'far': False,\n",
+ " 'book': False,\n",
+ " 'music': False,\n",
+ " 'cry': False,\n",
+ " 'headache': False,\n",
+ " 'cause': False,\n",
+ " 'use': False,\n",
+ " 'listening': False,\n",
+ " 'fan': False,\n",
+ " 'stop': False,\n",
+ " 'stay': False,\n",
+ " 'totally': False,\n",
+ " 'wanted': False,\n",
+ " 'place': False,\n",
+ " 'shit': False,\n",
+ " 'xx': True,\n",
+ " 'tv': False,\n",
+ " 'e': False,\n",
+ " 'picture': False,\n",
+ " 'update': False,\n",
+ " 'least': False,\n",
+ " 'sweet': False,\n",
+ " 'anyone': False,\n",
+ " 'lovely': False,\n",
+ " 'thinking': False,\n",
+ " 'forward': False,\n",
+ " 'aww': False,\n",
+ " 'g': False,\n",
+ " 'dog': False,\n",
+ " 'class': False,\n",
+ " 'actually': False,\n",
+ " 'everything': False,\n",
+ " 'eat': False,\n",
+ " 'mine': False,\n",
+ " 'playing': False,\n",
+ " 'cute': False,\n",
+ " 'hahaha': False,\n",
+ " 'kid': False,\n",
+ " 'dinner': False,\n",
+ " 'stupid': False,\n",
+ " 'sooo': False,\n",
+ " 'came': False,\n",
+ " 'word': False,\n",
+ " 'ill': False,\n",
+ " 'eating': False,\n",
+ " 'win': False,\n",
+ " 'hopefully': False,\n",
+ " 'finished': False,\n",
+ " 'welcome': False,\n",
+ " 'anymore': False,\n",
+ " 'minute': False,\n",
+ " 'every': False,\n",
+ " 'face': False,\n",
+ " 'idea': False,\n",
+ " 'hair': False,\n",
+ " 'without': False,\n",
+ " 'kinda': False,\n",
+ " 'month': False,\n",
+ " 'saturday': False,\n",
+ " 'c': False,\n",
+ " 'wrong': False,\n",
+ " 'super': False,\n",
+ " 'www': False,\n",
+ " 'final': False,\n",
+ " 'true': False,\n",
+ " 'reading': False,\n",
+ " 'hug': False,\n",
+ " 'probably': False,\n",
+ " 'photo': False,\n",
+ " 'buy': False,\n",
+ " 'hehe': False,\n",
+ " 'taking': False,\n",
+ " 'believe': False,\n",
+ " 'eye': False,\n",
+ " 'didnt': False,\n",
+ " 'name': False,\n",
+ " 'alone': False,\n",
+ " 'mind': False,\n",
+ " 'room': False,\n",
+ " 'either': False,\n",
+ " 'dream': False,\n",
+ " 'following': False,\n",
+ " 'able': False,\n",
+ " 'goodnight': False,\n",
+ " 'else': False,\n",
+ " 'lmao': False,\n",
+ " 'boo': False,\n",
+ " 'heard': False,\n",
+ " 'coffee': False,\n",
+ " 'outside': False,\n",
+ " 'real': False,\n",
+ " 'mileycyrus': False,\n",
+ " 'break': False,\n",
+ " 'part': False,\n",
+ " 'june': False,\n",
+ " 'computer': False,\n",
+ " 'forgot': False,\n",
+ " 'ticket': False,\n",
+ " 'blog': False,\n",
+ " 'post': False,\n",
+ " 'awww': False,\n",
+ " 'rest': False,\n",
+ " 'dude': False,\n",
+ " 'pm': False,\n",
+ " 'enough': False,\n",
+ " 'brother': False,\n",
+ " 'plan': False,\n",
+ " 'ð': False,\n",
+ " 'half': False,\n",
+ " 'using': False,\n",
+ " 'add': False,\n",
+ " 'stuck': False,\n",
+ " 'person': False,\n",
+ " 'text': False,\n",
+ " 'meet': False,\n",
+ " 'mother': False,\n",
+ " 'album': False,\n",
+ " 'study': False,\n",
+ " 'crazy': False,\n",
+ " 'hand': False,\n",
+ " 'send': False,\n",
+ " 'fine': False,\n",
+ " 'talking': False,\n",
+ " 'whole': False,\n",
+ " 'reply': False,\n",
+ " 'nite': False,\n",
+ " 'k': False,\n",
+ " 'seems': False,\n",
+ " 'run': False,\n",
+ " 'red': False,\n",
+ " 'took': False,\n",
+ " 'v': False,\n",
+ " 'seen': False,\n",
+ " 'trip': False,\n",
+ " 'full': False,\n",
+ " 'beach': False,\n",
+ " 'hello': False,\n",
+ " 'side': False,\n",
+ " 'loved': False,\n",
+ " 'news': False,\n",
+ " 'hit': False,\n",
+ " 'tried': False,\n",
+ " 'kind': False,\n",
+ " 'rock': False,\n",
+ " 'heart': False,\n",
+ " 'yea': False,\n",
+ " 'la': False,\n",
+ " 'shopping': False,\n",
+ " 'problem': False,\n",
+ " 'afternoon': False,\n",
+ " 'â': False,\n",
+ " 'pain': False,\n",
+ " 'change': False,\n",
+ " 'nap': False,\n",
+ " 'started': False,\n",
+ " 'used': False,\n",
+ " 'remember': False,\n",
+ " 'star': False,\n",
+ " 'boring': False,\n",
+ " 'course': False,\n",
+ " 'heading': False,\n",
+ " 'quite': False,\n",
+ " 'seeing': False,\n",
+ " 'hell': False,\n",
+ " 'train': False,\n",
+ " 'breakfast': False,\n",
+ " 'crap': False,\n",
+ " 'told': False,\n",
+ " 'english': False,\n",
+ " 'died': False,\n",
+ " 'sister': False,\n",
+ " 'fuck': False,\n",
+ " 'site': False,\n",
+ " 'instead': False,\n",
+ " 'leaving': False,\n",
+ " 'ipod': False,\n",
+ " 'money': False,\n",
+ " 'raining': False,\n",
+ " 'finish': False,\n",
+ " 'til': False,\n",
+ " 'anyway': False,\n",
+ " 'ah': False,\n",
+ " 'running': False,\n",
+ " 'concert': False,\n",
+ " 'soo': False,\n",
+ " 'facebook': False,\n",
+ " 'link': False,\n",
+ " 'sitting': False,\n",
+ " 'point': False,\n",
+ " 'jealous': False,\n",
+ " 'season': False,\n",
+ " 'bring': False,\n",
+ " 'ñ': False,\n",
+ " 'cuz': False,\n",
+ " 'awake': False,\n",
+ " 'mum': False,\n",
+ " 'reason': False,\n",
+ " 'studying': False,\n",
+ " 'pay': False,\n",
+ " 'cat': False,\n",
+ " 'wonder': False,\n",
+ " 'congrats': False,\n",
+ " 'ago': False,\n",
+ " 'xd': False,\n",
+ " 'page': False,\n",
+ " 'f': False,\n",
+ " 'bought': False,\n",
+ " 'store': False,\n",
+ " 'drink': False,\n",
+ " 'definitely': False,\n",
+ " 'couple': False,\n",
+ " 'evening': False,\n",
+ " 'chocolate': False,\n",
+ " 'btw': False,\n",
+ " 'soooo': False,\n",
+ " 'sunny': False,\n",
+ " 'sore': False,\n",
+ " 'drive': False,\n",
+ " 'shower': False,\n",
+ " 'st': False,\n",
+ " 'lucky': False,\n",
+ " 'aw': False,\n",
+ " 'test': False,\n",
+ " 'walk': False,\n",
+ " 'internet': False,\n",
+ " 'open': False,\n",
+ " 'water': False,\n",
+ " 'wake': False,\n",
+ " 'l': False,\n",
+ " 'office': False,\n",
+ " 'list': False,\n",
+ " 'watched': False,\n",
+ " 'team': False,\n",
+ " 'tommcfly': False,\n",
+ " 'clean': False,\n",
+ " 'enjoying': False,\n",
+ " 'hungry': False,\n",
+ " 'smile': False,\n",
+ " 'seriously': False,\n",
+ " 'move': False,\n",
+ " 'wont': False,\n",
+ " 'high': False,\n",
+ " 'asleep': False,\n",
+ " 'award': False,\n",
+ " 'fucking': False,\n",
+ " 'bout': False,\n",
+ " 'starting': False,\n",
+ " 'top': False,\n",
+ " 'moment': False,\n",
+ " 'dance': False,\n",
+ " 'project': False,\n",
+ " 'second': False,\n",
+ " 'park': False,\n",
+ " 'ask': False,\n",
+ " 'hr': False,\n",
+ " 'email': False,\n",
+ " 'church': False,\n",
+ " 'driving': False,\n",
+ " 'tea': False,\n",
+ " 'broke': False,\n",
+ " 'gym': False,\n",
+ " 'ride': False,\n",
+ " 'le': False,\n",
+ " 'loving': False,\n",
+ " 'meeting': False,\n",
+ " 'worth': False,\n",
+ " 'fail': False,\n",
+ " 'black': False,\n",
+ " 'close': False,\n",
+ " 'visit': False,\n",
+ " 'sigh': False,\n",
+ " 'ate': False,\n",
+ " 'online': False,\n",
+ " 'number': False,\n",
+ " 'vote': False,\n",
+ " 'bye': False,\n",
+ " 'hang': False,\n",
+ " 'wonderful': False,\n",
+ " 'youtube': False,\n",
+ " 'care': False,\n",
+ " 'co': False,\n",
+ " 'cut': False,\n",
+ " 'drinking': False,\n",
+ " 'horrible': False,\n",
+ " 'ddlovato': False,\n",
+ " 'shirt': False,\n",
+ " 'ice': False,\n",
+ " 'saying': False,\n",
+ " 'answer': False,\n",
+ " 'date': False,\n",
+ " 'dear': False,\n",
+ " 'agree': False,\n",
+ " 'j': False,\n",
+ " 'set': False,\n",
+ " 'town': False,\n",
+ " 'da': False,\n",
+ " 'wear': False,\n",
+ " 'happened': False,\n",
+ " 'line': False,\n",
+ " 'parent': False,\n",
+ " 'worse': False,\n",
+ " 'min': False,\n",
+ " 'longer': False,\n",
+ " 'lady': False,\n",
+ " 'together': False,\n",
+ " 'cream': False,\n",
+ " 'worry': False,\n",
+ " 'goin': False,\n",
+ " 'followfriday': False,\n",
+ " 'fast': False,\n",
+ " 'forget': False,\n",
+ " 'fb': False,\n",
+ " 'doesnt': False,\n",
+ " 'broken': False,\n",
+ " 'wtf': False,\n",
+ " 'turn': False,\n",
+ " 'unfortunately': False,\n",
+ " 'chance': False,\n",
+ " 'favorite': False,\n",
+ " 'spent': False,\n",
+ " 'fall': False,\n",
+ " 'air': False,\n",
+ " 'idk': False,\n",
+ " 'slept': False,\n",
+ " 'sometimes': False,\n",
+ " 'rainy': False,\n",
+ " 'question': False,\n",
+ " 'laptop': False,\n",
+ " 'tweeting': False,\n",
+ " 'tuesday': False,\n",
+ " 'earlier': False,\n",
+ " 'mood': False,\n",
+ " 'slow': False,\n",
+ " 'hoping': False,\n",
+ " 'thx': False,\n",
+ " 'h': False,\n",
+ " 'absolutely': False,\n",
+ " 'mr': False,\n",
+ " 'ahh': False,\n",
+ " 'cleaning': False,\n",
+ " 'via': False,\n",
+ " 'holiday': False,\n",
+ " 'happen': False,\n",
+ " 'wishing': False,\n",
+ " 'taken': False,\n",
+ " 'pool': False,\n",
+ " 'episode': False,\n",
+ " 'garden': False,\n",
+ " 'homework': False,\n",
+ " 'website': False,\n",
+ " 'sleeping': False,\n",
+ " 'airport': False,\n",
+ " 'especially': False,\n",
+ " 'business': False,\n",
+ " 'perfect': False,\n",
+ " 'em': False,\n",
+ " 'fell': False,\n",
+ " 'nd': False,\n",
+ " 'upset': False,\n",
+ " 'small': False,\n",
+ " 'city': False,\n",
+ " 'chat': False,\n",
+ " 'knew': False,\n",
+ " 'foot': False,\n",
+ " 'chicken': False,\n",
+ " '½': False,\n",
+ " 'father': False,\n",
+ " 'throat': False,\n",
+ " 'mac': False,\n",
+ " 'weird': False,\n",
+ " 'window': False,\n",
+ " 'passed': False,\n",
+ " 'son': False,\n",
+ " 'story': False,\n",
+ " 'tour': False,\n",
+ " 'message': False,\n",
+ " 'shop': False,\n",
+ " 'wednesday': False,\n",
+ " 'due': False,\n",
+ " 'bbq': False,\n",
+ " 'listen': False,\n",
+ " 'sleepy': False,\n",
+ " 'woman': False,\n",
+ " 'company': False,\n",
+ " 'sunshine': False,\n",
+ " 'met': False,\n",
+ " 'short': False,\n",
+ " 'comment': False,\n",
+ " 'mad': False,\n",
+ " 'understand': False,\n",
+ " 'gave': False,\n",
+ " 'pc': False,\n",
+ " 'hubby': False,\n",
+ " 'different': False,\n",
+ " 'havent': False,\n",
+ " 'account': False,\n",
+ " 'note': False,\n",
+ " 'seem': False,\n",
+ " 'scared': False,\n",
+ " 'bag': False,\n",
+ " 'alright': False,\n",
+ " 'cup': False,\n",
+ " 'ive': False,\n",
+ " 'leg': False,\n",
+ " 'green': False,\n",
+ " 'interesting': False,\n",
+ " 'bus': False,\n",
+ " 'past': False,\n",
+ " 'glass': False,\n",
+ " 'worst': False,\n",
+ " 'power': False,\n",
+ " 'plz': False,\n",
+ " 'math': False,\n",
+ " 'white': False,\n",
+ " 'support': False,\n",
+ " 'nope': False,\n",
+ " 'sat': False,\n",
+ " 'moon': False,\n",
+ " 'hangover': False,\n",
+ " 'moving': False,\n",
+ " 'taylor': False,\n",
+ " 'touch': False,\n",
+ " 'order': False,\n",
+ " 'jonas': False,\n",
+ " 'pick': False,\n",
+ " 'forever': False,\n",
+ " 'tom': False,\n",
+ " 'dead': False,\n",
+ " 'shoot': False,\n",
+ " 'called': False,\n",
+ " 'bitch': False,\n",
+ " 'rather': False,\n",
+ " 'catch': False,\n",
+ " 'bet': False,\n",
+ " 'officially': False,\n",
+ " 'vip': False,\n",
+ " 'writing': False,\n",
+ " 'si': False,\n",
+ " 'lil': False,\n",
+ " 'worried': False,\n",
+ " 'xoxo': False,\n",
+ " 'write': False,\n",
+ " 'graduation': False,\n",
+ " 'liked': False,\n",
+ " 'except': False,\n",
+ " 'bday': False,\n",
+ " 'gay': False,\n",
+ " 'dang': False,\n",
+ " 'cousin': False,\n",
+ " 'load': False,\n",
+ " 'myspace': False,\n",
+ " 'sent': False,\n",
+ " 'fix': False,\n",
+ " 'ahhh': False,\n",
+ " 'fly': False,\n",
+ " 'blue': False,\n",
+ " 'college': False,\n",
+ " 'gorgeous': False,\n",
+ " 'special': False,\n",
+ " 'kill': False,\n",
+ " 'apple': False,\n",
+ " 'interview': False,\n",
+ " 'fight': False,\n",
+ " 'everybody': False,\n",
+ " 'dress': False,\n",
+ " 'box': False,\n",
+ " 'bro': False,\n",
+ " 'case': False,\n",
+ " 'july': False,\n",
+ " 'cake': False,\n",
+ " 'three': False,\n",
+ " 'hmm': False,\n",
+ " 'deal': False,\n",
+ " 'ouch': False,\n",
+ " 'inside': False,\n",
+ " 'yep': False,\n",
+ " 'random': False,\n",
+ " 'meant': False,\n",
+ " 'profile': False,\n",
+ " 'mtv': False,\n",
+ " 'wedding': False,\n",
+ " 'clothes': False,\n",
+ " 'band': False,\n",
+ " 'issue': False,\n",
+ " 'apparently': False,\n",
+ " 'lonely': False,\n",
+ " 'age': False,\n",
+ " 'shall': False,\n",
+ " 'flight': False,\n",
+ " 'yr': False,\n",
+ " 'supposed': False,\n",
+ " 'david': False,\n",
+ " 'living': False,\n",
+ " 'needed': False,\n",
+ " 'london': False,\n",
+ " 'laugh': False,\n",
+ " 'sign': False,\n",
+ " 'finger': False,\n",
+ " 'beer': False,\n",
+ " 'itunes': False,\n",
+ " 'played': False,\n",
+ " 'tear': False,\n",
+ " 'ppl': False,\n",
+ " 'body': False,\n",
+ " 'paper': False,\n",
+ " 'group': False,\n",
+ " 'looked': False,\n",
+ " 'sadly': False,\n",
+ " 'bloody': False,\n",
+ " 'club': False,\n",
+ " 'web': False,\n",
+ " 'vacation': False,\n",
+ " 'jonasbrothers': False,\n",
+ " 'uk': False,\n",
+ " 'version': False,\n",
+ " 'huge': False,\n",
+ " 'sooooo': False,\n",
+ " 'lakers': False,\n",
+ " 'germany': False,\n",
+ " 'save': False,\n",
+ " 'hanging': False,\n",
+ " 'shoe': False,\n",
+ " 'fantastic': False,\n",
+ " 'wine': False,\n",
+ " 'sort': False,\n",
+ " 'none': False,\n",
+ " 'twilight': False,\n",
+ " 'shot': False,\n",
+ " 'thursday': False,\n",
+ " 'confused': False,\n",
+ " 'sale': False,\n",
+ " 'goodbye': False,\n",
+ " 'round': False,\n",
+ " 'lame': False,\n",
+ " 'cheer': False,\n",
+ " 'singing': False,\n",
+ " 'promise': False,\n",
+ " 'drop': False,\n",
+ " 'nose': False,\n",
+ " 'whats': False,\n",
+ " 'babe': False,\n",
+ " 'xxx': False,\n",
+ " 'lesson': False,\n",
+ " 'camera': False,\n",
+ " 'download': False,\n",
+ " 'info': False,\n",
+ " 'fair': False,\n",
+ " 'yummy': False,\n",
+ " 'french': False,\n",
+ " 'miley': False,\n",
+ " 'light': False,\n",
+ " 'asked': False,\n",
+ " 'lazy': False,\n",
+ " 'indeed': False,\n",
+ " 'along': False,\n",
+ " 'quick': False,\n",
+ " 'ã': False,\n",
+ " 'door': False,\n",
+ " 'sit': False,\n",
+ " 'yup': False,\n",
+ " 'street': False,\n",
+ " 'mile': False,\n",
+ " 'dm': False,\n",
+ " 'jus': False,\n",
+ " 'giving': False,\n",
+ " 'ear': False,\n",
+ " 'service': False,\n",
+ " 'low': False,\n",
+ " 'relaxing': False,\n",
+ " 'radio': False,\n",
+ " 'arm': False,\n",
+ " 'future': False,\n",
+ " 'proud': False,\n",
+ " 'fact': False,\n",
+ " 'learn': False,\n",
+ " 'easy': False,\n",
+ " 'child': False,\n",
+ " 'kitty': False,\n",
+ " 'hold': False,\n",
+ " 'mark': False,\n",
+ " 'peep': False,\n",
+ " 'wearing': False,\n",
+ " 'luv': False,\n",
+ " 'south': False,\n",
+ " 'road': False,\n",
+ " 'smell': False,\n",
+ " 'exciting': False,\n",
+ " 'currently': False,\n",
+ " 'warm': False,\n",
+ " 'puppy': False,\n",
+ " 'packing': False,\n",
+ " 'google': False,\n",
+ " 'exactly': False,\n",
+ " 'share': False,\n",
+ " 'decided': False,\n",
+ " 'voice': False,\n",
+ " 'fat': False,\n",
+ " 'mommy': False,\n",
+ " 'realize': False,\n",
+ " 'join': False,\n",
+ " 'nearly': False,\n",
+ " 'bb': False,\n",
+ " 'whatever': False,\n",
+ " 'woo': False,\n",
+ " 'spend': False,\n",
+ " 'stand': False,\n",
+ " 'enjoyed': False,\n",
+ " 'tweeps': False,\n",
+ " 'sold': False,\n",
+ " 'gettin': False,\n",
+ " 'fever': False,\n",
+ " 'paid': False,\n",
+ " 'beat': False,\n",
+ " 'pink': False,\n",
+ " 'mall': False,\n",
+ " 'gunna': False,\n",
+ " 'ï': False,\n",
+ " 'freaking': False,\n",
+ " 'snow': False,\n",
+ " 'hospital': False,\n",
+ " 'cheese': False,\n",
+ " 'nobody': False,\n",
+ " 'wondering': False,\n",
+ " 'plane': False,\n",
+ " 'shame': False,\n",
+ " 'search': False,\n",
+ " 'ended': False,\n",
+ " 'helping': False,\n",
+ " 'safe': False,\n",
+ " 'staying': False,\n",
+ " 'storm': False,\n",
+ " 'fam': False,\n",
+ " 'z': False,\n",
+ " 'sing': False,\n",
+ " 'view': False,\n",
+ " 'yo': False,\n",
+ " 'card': False,\n",
+ " 'stick': False,\n",
+ " 'joe': False,\n",
+ " 'matter': False,\n",
+ " 'delicious': False,\n",
+ " 'cook': False,\n",
+ " 'country': False,\n",
+ " 'hill': False,\n",
+ " 'peace': False,\n",
+ " 'worked': False,\n",
+ " 'race': False,\n",
+ " 'boot': False,\n",
+ " 'figure': False,\n",
+ " 'vega': False,\n",
+ " 'france': False,\n",
+ " 'wit': False,\n",
+ " 'topic': False,\n",
+ " 'spot': False,\n",
+ " 'cover': False,\n",
+ " 'afford': False,\n",
+ " 'floor': False,\n",
+ " 'bike': False,\n",
+ " 'starbucks': False,\n",
+ " 'hahah': False,\n",
+ " 'tummy': False,\n",
+ " 'trouble': False,\n",
+ " 'mouth': False,\n",
+ " 'ran': False,\n",
+ " 'drunk': False,\n",
+ " 'lately': False,\n",
+ " 'joke': False,\n",
+ " 'bird': False,\n",
+ " 'flu': False,\n",
+ " 'showing': False,\n",
+ " 'mail': False,\n",
+ " 'turned': False,\n",
+ " 'men': False,\n",
+ " 'taste': False,\n",
+ " 'gosh': False,\n",
+ " 'bar': False,\n",
+ " 'changed': False,\n",
+ " 'fish': False,\n",
+ " 'stopped': False,\n",
+ " 'wife': False,\n",
+ " 'alot': False,\n",
+ " 'tweetdeck': False,\n",
+ " 'magic': False,\n",
+ " 'brilliant': False,\n",
+ " 'cooking': False,\n",
+ " 'state': False,\n",
+ " 'design': False,\n",
+ " 'film': False,\n",
+ " 'tan': False,\n",
+ " 'isnt': False,\n",
+ " 'nail': False,\n",
+ " 'bummed': False,\n",
+ " 'prob': False,\n",
+ " 'happens': False,\n",
+ " 'eh': False,\n",
+ " 'type': False,\n",
+ " 'young': False,\n",
+ " 'honey': False,\n",
+ " 'price': False,\n",
+ " 'disappointed': False,\n",
+ " 'sky': False,\n",
+ " 'ring': False,\n",
+ " 'surprise': False,\n",
+ " 'self': False,\n",
+ " 'depressing': False,\n",
+ " 'sexy': False,\n",
+ " 'cd': False,\n",
+ " 'woot': False,\n",
+ " 'pop': False,\n",
+ " 'piece': False,\n",
+ " 'degree': False,\n",
+ " 'chillin': False,\n",
+ " 'hilarious': False,\n",
+ " 'rip': False,\n",
+ " 'updated': False,\n",
+ " 'ohh': False,\n",
+ " 'boyfriend': False,\n",
+ " 'awwww': False,\n",
+ " 'hun': False,\n",
+ " 'crappy': False,\n",
+ " 'er': False,\n",
+ " 'although': False,\n",
+ " 'death': False,\n",
+ " 'lake': False,\n",
+ " 'ahead': False,\n",
+ " 'art': False,\n",
+ " 'watchin': False,\n",
+ " 'scary': False,\n",
+ " 'annoying': False,\n",
+ " 'event': False,\n",
+ " 'plus': False,\n",
+ " 'ff': False,\n",
+ " 'major': False,\n",
+ " 'afraid': False,\n",
+ " 'pissed': False,\n",
+ " 'wat': False,\n",
+ " 'huh': False,\n",
+ " 'secret': False,\n",
+ " 'others': False,\n",
+ " 'land': False,\n",
+ " 'caught': False,\n",
+ " 'closed': False,\n",
+ " 'kiss': False,\n",
+ " 'trek': False,\n",
+ " 'code': False,\n",
+ " 'mobile': False,\n",
+ " 'knee': False,\n",
+ " ...},\n",
+ " 0),\n",
+ " ({'wa': False,\n",
+ " 'day': False,\n",
+ " 'good': False,\n",
+ " 'get': False,\n",
+ " 'like': False,\n",
+ " 'go': False,\n",
+ " 'quot': False,\n",
+ " 'love': False,\n",
+ " 'work': False,\n",
+ " 'got': True,\n",
+ " 'today': False,\n",
+ " 'going': False,\n",
+ " 'u': False,\n",
+ " 'time': False,\n",
+ " 'one': False,\n",
+ " 'lol': False,\n",
+ " 'know': False,\n",
+ " 'im': False,\n",
+ " 'back': False,\n",
+ " 'really': False,\n",
+ " 'want': False,\n",
+ " 'night': False,\n",
+ " 'amp': False,\n",
+ " 'well': False,\n",
+ " 'new': False,\n",
+ " 'see': False,\n",
+ " 'think': False,\n",
+ " 'still': False,\n",
+ " 'oh': False,\n",
+ " 'thanks': False,\n",
+ " 'na': False,\n",
+ " 'ha': False,\n",
+ " 'need': False,\n",
+ " 'home': False,\n",
+ " 'much': False,\n",
+ " 'miss': False,\n",
+ " 'feel': False,\n",
+ " 'last': False,\n",
+ " 'morning': True,\n",
+ " 'great': False,\n",
+ " 'make': False,\n",
+ " 'tomorrow': True,\n",
+ " 'twitter': False,\n",
+ " 'haha': False,\n",
+ " 'wish': False,\n",
+ " 'hope': False,\n",
+ " 'bad': False,\n",
+ " 'sad': False,\n",
+ " 'fun': False,\n",
+ " 'come': False,\n",
+ " 'sleep': False,\n",
+ " 'would': False,\n",
+ " 'nice': False,\n",
+ " 'sorry': False,\n",
+ " 'right': False,\n",
+ " 'week': False,\n",
+ " 'tonight': False,\n",
+ " 'happy': False,\n",
+ " 'say': False,\n",
+ " 'thing': False,\n",
+ " 'getting': False,\n",
+ " 'look': False,\n",
+ " 'friend': False,\n",
+ " 'gon': False,\n",
+ " 'though': False,\n",
+ " 'hate': False,\n",
+ " 'wait': False,\n",
+ " 'better': False,\n",
+ " 'bed': True,\n",
+ " 'way': False,\n",
+ " 'watching': False,\n",
+ " 'lt': False,\n",
+ " 'people': False,\n",
+ " 'yeah': False,\n",
+ " 'hour': False,\n",
+ " 'show': False,\n",
+ " 'could': False,\n",
+ " 'thank': False,\n",
+ " 'take': False,\n",
+ " 'weekend': False,\n",
+ " 'next': False,\n",
+ " 'yes': False,\n",
+ " 'school': False,\n",
+ " 'even': False,\n",
+ " 'little': False,\n",
+ " 'life': False,\n",
+ " 'working': False,\n",
+ " 'everyone': False,\n",
+ " 'guy': False,\n",
+ " 'cant': False,\n",
+ " 'sick': False,\n",
+ " 'dont': False,\n",
+ " 'hey': False,\n",
+ " 'let': False,\n",
+ " 'awesome': False,\n",
+ " 'movie': False,\n",
+ " 'girl': False,\n",
+ " 'tweet': False,\n",
+ " 'always': False,\n",
+ " 'x': False,\n",
+ " 'never': False,\n",
+ " 'watch': False,\n",
+ " 'please': False,\n",
+ " 'soon': False,\n",
+ " 'year': False,\n",
+ " 'first': False,\n",
+ " 'long': False,\n",
+ " 'ok': False,\n",
+ " 'tired': False,\n",
+ " 'already': False,\n",
+ " 'feeling': False,\n",
+ " 'suck': False,\n",
+ " 'wan': False,\n",
+ " 'sure': False,\n",
+ " 'best': False,\n",
+ " 'looking': False,\n",
+ " 'n': False,\n",
+ " 'man': False,\n",
+ " 'another': False,\n",
+ " 'something': False,\n",
+ " 'find': False,\n",
+ " 'start': False,\n",
+ " 'cool': False,\n",
+ " 'done': False,\n",
+ " 'pretty': False,\n",
+ " 'omg': False,\n",
+ " 'yay': False,\n",
+ " 'yet': False,\n",
+ " 'phone': False,\n",
+ " 'damn': False,\n",
+ " 'lot': False,\n",
+ " 'away': False,\n",
+ " 'went': False,\n",
+ " 'old': False,\n",
+ " 'follow': False,\n",
+ " 'help': False,\n",
+ " 'guess': False,\n",
+ " 'keep': False,\n",
+ " 'house': False,\n",
+ " 'thought': False,\n",
+ " 'song': False,\n",
+ " 'made': False,\n",
+ " 'ever': False,\n",
+ " 'trying': False,\n",
+ " 'ya': False,\n",
+ " 'sun': False,\n",
+ " 'p': False,\n",
+ " 'bit': False,\n",
+ " 'hurt': False,\n",
+ " 'game': False,\n",
+ " 'finally': False,\n",
+ " 'maybe': False,\n",
+ " 'ready': False,\n",
+ " 'sound': False,\n",
+ " 'w': False,\n",
+ " 'big': False,\n",
+ " 'lost': False,\n",
+ " 'b': False,\n",
+ " 'nothing': False,\n",
+ " 'someone': False,\n",
+ " 'early': True,\n",
+ " 'summer': False,\n",
+ " 'tell': False,\n",
+ " 'hard': False,\n",
+ " 'also': False,\n",
+ " 'birthday': False,\n",
+ " 'r': False,\n",
+ " 'left': False,\n",
+ " 'mean': False,\n",
+ " 'missed': False,\n",
+ " 'ur': False,\n",
+ " 'th': False,\n",
+ " 'rain': False,\n",
+ " 'pic': False,\n",
+ " 'mom': False,\n",
+ " 'com': False,\n",
+ " 'party': False,\n",
+ " 'two': False,\n",
+ " 'glad': False,\n",
+ " 'world': False,\n",
+ " 'baby': False,\n",
+ " 'wow': False,\n",
+ " 'might': False,\n",
+ " 'bored': False,\n",
+ " 'call': False,\n",
+ " 'ta': True,\n",
+ " 'check': False,\n",
+ " 'late': False,\n",
+ " 'waiting': False,\n",
+ " 'car': False,\n",
+ " 'video': False,\n",
+ " 'stuff': False,\n",
+ " 'found': False,\n",
+ " 'yesterday': False,\n",
+ " 'said': False,\n",
+ " 'sunday': False,\n",
+ " 'luck': False,\n",
+ " 'saw': False,\n",
+ " 'amazing': False,\n",
+ " 'hot': False,\n",
+ " 'monday': False,\n",
+ " 'weather': False,\n",
+ " 'live': False,\n",
+ " 'many': False,\n",
+ " 'iphone': False,\n",
+ " 'doe': False,\n",
+ " 'exam': False,\n",
+ " 'thats': False,\n",
+ " 'play': False,\n",
+ " 'making': False,\n",
+ " 'may': False,\n",
+ " 'excited': False,\n",
+ " 'god': False,\n",
+ " 'dad': False,\n",
+ " 'family': False,\n",
+ " 'gone': False,\n",
+ " 'friday': False,\n",
+ " 'boy': False,\n",
+ " 'follower': False,\n",
+ " 'read': False,\n",
+ " 'funny': False,\n",
+ " 'hi': False,\n",
+ " 'since': False,\n",
+ " 'give': False,\n",
+ " 'poor': False,\n",
+ " 'enjoy': False,\n",
+ " 'job': False,\n",
+ " 'okay': False,\n",
+ " 'talk': False,\n",
+ " 'gt': False,\n",
+ " 'later': False,\n",
+ " 'head': False,\n",
+ " 'almost': False,\n",
+ " 'beautiful': False,\n",
+ " 'woke': False,\n",
+ " 'cold': False,\n",
+ " 'anything': False,\n",
+ " 'hear': False,\n",
+ " 'lunch': False,\n",
+ " 'missing': False,\n",
+ " 'put': False,\n",
+ " 'free': False,\n",
+ " 'try': False,\n",
+ " 'coming': False,\n",
+ " 'must': False,\n",
+ " 'end': False,\n",
+ " 'tho': False,\n",
+ " 'leave': False,\n",
+ " 'ugh': False,\n",
+ " 'till': False,\n",
+ " 'busy': False,\n",
+ " 'around': False,\n",
+ " 'food': False,\n",
+ " 'far': False,\n",
+ " 'book': False,\n",
+ " 'music': False,\n",
+ " 'cry': False,\n",
+ " 'headache': False,\n",
+ " 'cause': False,\n",
+ " 'use': False,\n",
+ " 'listening': False,\n",
+ " 'fan': False,\n",
+ " 'stop': False,\n",
+ " 'stay': False,\n",
+ " 'totally': False,\n",
+ " 'wanted': False,\n",
+ " 'place': False,\n",
+ " 'shit': False,\n",
+ " 'xx': False,\n",
+ " 'tv': False,\n",
+ " 'e': False,\n",
+ " 'picture': False,\n",
+ " 'update': False,\n",
+ " 'least': False,\n",
+ " 'sweet': False,\n",
+ " 'anyone': False,\n",
+ " 'lovely': False,\n",
+ " 'thinking': False,\n",
+ " 'forward': False,\n",
+ " 'aww': False,\n",
+ " 'g': False,\n",
+ " 'dog': False,\n",
+ " 'class': False,\n",
+ " 'actually': False,\n",
+ " 'everything': False,\n",
+ " 'eat': False,\n",
+ " 'mine': False,\n",
+ " 'playing': False,\n",
+ " 'cute': False,\n",
+ " 'hahaha': False,\n",
+ " 'kid': False,\n",
+ " 'dinner': False,\n",
+ " 'stupid': False,\n",
+ " 'sooo': False,\n",
+ " 'came': False,\n",
+ " 'word': False,\n",
+ " 'ill': False,\n",
+ " 'eating': False,\n",
+ " 'win': False,\n",
+ " 'hopefully': False,\n",
+ " 'finished': False,\n",
+ " 'welcome': False,\n",
+ " 'anymore': False,\n",
+ " 'minute': False,\n",
+ " 'every': False,\n",
+ " 'face': False,\n",
+ " 'idea': False,\n",
+ " 'hair': False,\n",
+ " 'without': False,\n",
+ " 'kinda': False,\n",
+ " 'month': False,\n",
+ " 'saturday': False,\n",
+ " 'c': False,\n",
+ " 'wrong': False,\n",
+ " 'super': False,\n",
+ " 'www': False,\n",
+ " 'final': False,\n",
+ " 'true': False,\n",
+ " 'reading': False,\n",
+ " 'hug': False,\n",
+ " 'probably': False,\n",
+ " 'photo': False,\n",
+ " 'buy': False,\n",
+ " 'hehe': False,\n",
+ " 'taking': False,\n",
+ " 'believe': False,\n",
+ " 'eye': False,\n",
+ " 'didnt': False,\n",
+ " 'name': False,\n",
+ " 'alone': False,\n",
+ " 'mind': False,\n",
+ " 'room': False,\n",
+ " 'either': False,\n",
+ " 'dream': False,\n",
+ " 'following': False,\n",
+ " 'able': False,\n",
+ " 'goodnight': False,\n",
+ " 'else': False,\n",
+ " 'lmao': False,\n",
+ " 'boo': False,\n",
+ " 'heard': False,\n",
+ " 'coffee': False,\n",
+ " 'outside': False,\n",
+ " 'real': False,\n",
+ " 'mileycyrus': False,\n",
+ " 'break': False,\n",
+ " 'part': False,\n",
+ " 'june': False,\n",
+ " 'computer': False,\n",
+ " 'forgot': False,\n",
+ " 'ticket': False,\n",
+ " 'blog': False,\n",
+ " 'post': False,\n",
+ " 'awww': False,\n",
+ " 'rest': False,\n",
+ " 'dude': False,\n",
+ " 'pm': False,\n",
+ " 'enough': False,\n",
+ " 'brother': False,\n",
+ " 'plan': False,\n",
+ " 'ð': False,\n",
+ " 'half': False,\n",
+ " 'using': False,\n",
+ " 'add': False,\n",
+ " 'stuck': False,\n",
+ " 'person': False,\n",
+ " 'text': False,\n",
+ " 'meet': False,\n",
+ " 'mother': False,\n",
+ " 'album': False,\n",
+ " 'study': False,\n",
+ " 'crazy': False,\n",
+ " 'hand': False,\n",
+ " 'send': False,\n",
+ " 'fine': False,\n",
+ " 'talking': False,\n",
+ " 'whole': False,\n",
+ " 'reply': False,\n",
+ " 'nite': False,\n",
+ " 'k': False,\n",
+ " 'seems': False,\n",
+ " 'run': False,\n",
+ " 'red': False,\n",
+ " 'took': False,\n",
+ " 'v': False,\n",
+ " 'seen': False,\n",
+ " 'trip': False,\n",
+ " 'full': False,\n",
+ " 'beach': False,\n",
+ " 'hello': False,\n",
+ " 'side': False,\n",
+ " 'loved': False,\n",
+ " 'news': False,\n",
+ " 'hit': False,\n",
+ " 'tried': False,\n",
+ " 'kind': False,\n",
+ " 'rock': False,\n",
+ " 'heart': False,\n",
+ " 'yea': False,\n",
+ " 'la': False,\n",
+ " 'shopping': False,\n",
+ " 'problem': False,\n",
+ " 'afternoon': False,\n",
+ " 'â': False,\n",
+ " 'pain': False,\n",
+ " 'change': False,\n",
+ " 'nap': False,\n",
+ " 'started': False,\n",
+ " 'used': False,\n",
+ " 'remember': False,\n",
+ " 'star': False,\n",
+ " 'boring': False,\n",
+ " 'course': False,\n",
+ " 'heading': False,\n",
+ " 'quite': False,\n",
+ " 'seeing': False,\n",
+ " 'hell': False,\n",
+ " 'train': False,\n",
+ " 'breakfast': False,\n",
+ " 'crap': False,\n",
+ " 'told': False,\n",
+ " 'english': False,\n",
+ " 'died': False,\n",
+ " 'sister': False,\n",
+ " 'fuck': False,\n",
+ " 'site': False,\n",
+ " 'instead': False,\n",
+ " 'leaving': False,\n",
+ " 'ipod': False,\n",
+ " 'money': False,\n",
+ " 'raining': False,\n",
+ " 'finish': False,\n",
+ " 'til': False,\n",
+ " 'anyway': False,\n",
+ " 'ah': False,\n",
+ " 'running': False,\n",
+ " 'concert': False,\n",
+ " 'soo': False,\n",
+ " 'facebook': False,\n",
+ " 'link': False,\n",
+ " 'sitting': False,\n",
+ " 'point': False,\n",
+ " 'jealous': False,\n",
+ " 'season': False,\n",
+ " 'bring': False,\n",
+ " 'ñ': False,\n",
+ " 'cuz': False,\n",
+ " 'awake': False,\n",
+ " 'mum': False,\n",
+ " 'reason': False,\n",
+ " 'studying': False,\n",
+ " 'pay': False,\n",
+ " 'cat': False,\n",
+ " 'wonder': False,\n",
+ " 'congrats': False,\n",
+ " 'ago': False,\n",
+ " 'xd': False,\n",
+ " 'page': False,\n",
+ " 'f': False,\n",
+ " 'bought': False,\n",
+ " 'store': False,\n",
+ " 'drink': False,\n",
+ " 'definitely': False,\n",
+ " 'couple': False,\n",
+ " 'evening': False,\n",
+ " 'chocolate': False,\n",
+ " 'btw': False,\n",
+ " 'soooo': False,\n",
+ " 'sunny': False,\n",
+ " 'sore': False,\n",
+ " 'drive': False,\n",
+ " 'shower': False,\n",
+ " 'st': False,\n",
+ " 'lucky': False,\n",
+ " 'aw': False,\n",
+ " 'test': False,\n",
+ " 'walk': False,\n",
+ " 'internet': False,\n",
+ " 'open': False,\n",
+ " 'water': False,\n",
+ " 'wake': True,\n",
+ " 'l': False,\n",
+ " 'office': False,\n",
+ " 'list': False,\n",
+ " 'watched': False,\n",
+ " 'team': False,\n",
+ " 'tommcfly': False,\n",
+ " 'clean': False,\n",
+ " 'enjoying': False,\n",
+ " 'hungry': False,\n",
+ " 'smile': False,\n",
+ " 'seriously': False,\n",
+ " 'move': False,\n",
+ " 'wont': False,\n",
+ " 'high': False,\n",
+ " 'asleep': False,\n",
+ " 'award': False,\n",
+ " 'fucking': False,\n",
+ " 'bout': False,\n",
+ " 'starting': False,\n",
+ " 'top': False,\n",
+ " 'moment': False,\n",
+ " 'dance': False,\n",
+ " 'project': False,\n",
+ " 'second': False,\n",
+ " 'park': False,\n",
+ " 'ask': False,\n",
+ " 'hr': False,\n",
+ " 'email': False,\n",
+ " 'church': False,\n",
+ " 'driving': False,\n",
+ " 'tea': False,\n",
+ " 'broke': False,\n",
+ " 'gym': False,\n",
+ " 'ride': False,\n",
+ " 'le': False,\n",
+ " 'loving': False,\n",
+ " 'meeting': False,\n",
+ " 'worth': False,\n",
+ " 'fail': False,\n",
+ " 'black': False,\n",
+ " 'close': False,\n",
+ " 'visit': False,\n",
+ " 'sigh': False,\n",
+ " 'ate': False,\n",
+ " 'online': False,\n",
+ " 'number': False,\n",
+ " 'vote': False,\n",
+ " 'bye': False,\n",
+ " 'hang': False,\n",
+ " 'wonderful': False,\n",
+ " 'youtube': False,\n",
+ " 'care': False,\n",
+ " 'co': False,\n",
+ " 'cut': False,\n",
+ " 'drinking': False,\n",
+ " 'horrible': False,\n",
+ " 'ddlovato': False,\n",
+ " 'shirt': False,\n",
+ " 'ice': False,\n",
+ " 'saying': False,\n",
+ " 'answer': False,\n",
+ " 'date': False,\n",
+ " 'dear': False,\n",
+ " 'agree': False,\n",
+ " 'j': False,\n",
+ " 'set': False,\n",
+ " 'town': False,\n",
+ " 'da': False,\n",
+ " 'wear': False,\n",
+ " 'happened': False,\n",
+ " 'line': False,\n",
+ " 'parent': False,\n",
+ " 'worse': False,\n",
+ " 'min': False,\n",
+ " 'longer': False,\n",
+ " 'lady': False,\n",
+ " 'together': False,\n",
+ " 'cream': False,\n",
+ " 'worry': False,\n",
+ " 'goin': False,\n",
+ " 'followfriday': False,\n",
+ " 'fast': False,\n",
+ " 'forget': False,\n",
+ " 'fb': False,\n",
+ " 'doesnt': False,\n",
+ " 'broken': False,\n",
+ " 'wtf': False,\n",
+ " 'turn': False,\n",
+ " 'unfortunately': False,\n",
+ " 'chance': False,\n",
+ " 'favorite': False,\n",
+ " 'spent': False,\n",
+ " 'fall': False,\n",
+ " 'air': False,\n",
+ " 'idk': False,\n",
+ " 'slept': False,\n",
+ " 'sometimes': False,\n",
+ " 'rainy': False,\n",
+ " 'question': False,\n",
+ " 'laptop': False,\n",
+ " 'tweeting': False,\n",
+ " 'tuesday': False,\n",
+ " 'earlier': False,\n",
+ " 'mood': False,\n",
+ " 'slow': False,\n",
+ " 'hoping': False,\n",
+ " 'thx': False,\n",
+ " 'h': False,\n",
+ " 'absolutely': False,\n",
+ " 'mr': False,\n",
+ " 'ahh': False,\n",
+ " 'cleaning': False,\n",
+ " 'via': False,\n",
+ " 'holiday': False,\n",
+ " 'happen': False,\n",
+ " 'wishing': False,\n",
+ " 'taken': False,\n",
+ " 'pool': False,\n",
+ " 'episode': False,\n",
+ " 'garden': False,\n",
+ " 'homework': False,\n",
+ " 'website': False,\n",
+ " 'sleeping': False,\n",
+ " 'airport': False,\n",
+ " 'especially': False,\n",
+ " 'business': False,\n",
+ " 'perfect': False,\n",
+ " 'em': False,\n",
+ " 'fell': False,\n",
+ " 'nd': False,\n",
+ " 'upset': False,\n",
+ " 'small': False,\n",
+ " 'city': False,\n",
+ " 'chat': False,\n",
+ " 'knew': False,\n",
+ " 'foot': False,\n",
+ " 'chicken': False,\n",
+ " '½': False,\n",
+ " 'father': False,\n",
+ " 'throat': False,\n",
+ " 'mac': False,\n",
+ " 'weird': False,\n",
+ " 'window': False,\n",
+ " 'passed': False,\n",
+ " 'son': False,\n",
+ " 'story': False,\n",
+ " 'tour': False,\n",
+ " 'message': False,\n",
+ " 'shop': False,\n",
+ " 'wednesday': False,\n",
+ " 'due': False,\n",
+ " 'bbq': False,\n",
+ " 'listen': False,\n",
+ " 'sleepy': False,\n",
+ " 'woman': False,\n",
+ " 'company': False,\n",
+ " 'sunshine': False,\n",
+ " 'met': False,\n",
+ " 'short': False,\n",
+ " 'comment': False,\n",
+ " 'mad': False,\n",
+ " 'understand': False,\n",
+ " 'gave': False,\n",
+ " 'pc': False,\n",
+ " 'hubby': False,\n",
+ " 'different': False,\n",
+ " 'havent': False,\n",
+ " 'account': False,\n",
+ " 'note': False,\n",
+ " 'seem': False,\n",
+ " 'scared': False,\n",
+ " 'bag': False,\n",
+ " 'alright': False,\n",
+ " 'cup': False,\n",
+ " 'ive': False,\n",
+ " 'leg': False,\n",
+ " 'green': False,\n",
+ " 'interesting': False,\n",
+ " 'bus': False,\n",
+ " 'past': False,\n",
+ " 'glass': False,\n",
+ " 'worst': False,\n",
+ " 'power': False,\n",
+ " 'plz': False,\n",
+ " 'math': False,\n",
+ " 'white': False,\n",
+ " 'support': False,\n",
+ " 'nope': False,\n",
+ " 'sat': False,\n",
+ " 'moon': False,\n",
+ " 'hangover': False,\n",
+ " 'moving': False,\n",
+ " 'taylor': False,\n",
+ " 'touch': False,\n",
+ " 'order': False,\n",
+ " 'jonas': False,\n",
+ " 'pick': False,\n",
+ " 'forever': False,\n",
+ " 'tom': False,\n",
+ " 'dead': False,\n",
+ " 'shoot': False,\n",
+ " 'called': False,\n",
+ " 'bitch': False,\n",
+ " 'rather': False,\n",
+ " 'catch': False,\n",
+ " 'bet': False,\n",
+ " 'officially': False,\n",
+ " 'vip': False,\n",
+ " 'writing': False,\n",
+ " 'si': False,\n",
+ " 'lil': False,\n",
+ " 'worried': False,\n",
+ " 'xoxo': False,\n",
+ " 'write': False,\n",
+ " 'graduation': False,\n",
+ " 'liked': False,\n",
+ " 'except': False,\n",
+ " 'bday': False,\n",
+ " 'gay': False,\n",
+ " 'dang': False,\n",
+ " 'cousin': False,\n",
+ " 'load': False,\n",
+ " 'myspace': False,\n",
+ " 'sent': False,\n",
+ " 'fix': False,\n",
+ " 'ahhh': False,\n",
+ " 'fly': False,\n",
+ " 'blue': False,\n",
+ " 'college': False,\n",
+ " 'gorgeous': False,\n",
+ " 'special': False,\n",
+ " 'kill': False,\n",
+ " 'apple': False,\n",
+ " 'interview': False,\n",
+ " 'fight': False,\n",
+ " 'everybody': False,\n",
+ " 'dress': False,\n",
+ " 'box': False,\n",
+ " 'bro': False,\n",
+ " 'case': False,\n",
+ " 'july': False,\n",
+ " 'cake': False,\n",
+ " 'three': False,\n",
+ " 'hmm': False,\n",
+ " 'deal': False,\n",
+ " 'ouch': False,\n",
+ " 'inside': False,\n",
+ " 'yep': False,\n",
+ " 'random': False,\n",
+ " 'meant': False,\n",
+ " 'profile': False,\n",
+ " 'mtv': False,\n",
+ " 'wedding': False,\n",
+ " 'clothes': False,\n",
+ " 'band': False,\n",
+ " 'issue': False,\n",
+ " 'apparently': False,\n",
+ " 'lonely': False,\n",
+ " 'age': False,\n",
+ " 'shall': False,\n",
+ " 'flight': False,\n",
+ " 'yr': False,\n",
+ " 'supposed': False,\n",
+ " 'david': False,\n",
+ " 'living': False,\n",
+ " 'needed': False,\n",
+ " 'london': False,\n",
+ " 'laugh': False,\n",
+ " 'sign': False,\n",
+ " 'finger': False,\n",
+ " 'beer': False,\n",
+ " 'itunes': False,\n",
+ " 'played': False,\n",
+ " 'tear': False,\n",
+ " 'ppl': False,\n",
+ " 'body': False,\n",
+ " 'paper': False,\n",
+ " 'group': False,\n",
+ " 'looked': False,\n",
+ " 'sadly': False,\n",
+ " 'bloody': False,\n",
+ " 'club': False,\n",
+ " 'web': False,\n",
+ " 'vacation': False,\n",
+ " 'jonasbrothers': False,\n",
+ " 'uk': False,\n",
+ " 'version': False,\n",
+ " 'huge': False,\n",
+ " 'sooooo': False,\n",
+ " 'lakers': False,\n",
+ " 'germany': False,\n",
+ " 'save': False,\n",
+ " 'hanging': False,\n",
+ " 'shoe': False,\n",
+ " 'fantastic': False,\n",
+ " 'wine': False,\n",
+ " 'sort': False,\n",
+ " 'none': False,\n",
+ " 'twilight': False,\n",
+ " 'shot': False,\n",
+ " 'thursday': False,\n",
+ " 'confused': False,\n",
+ " 'sale': False,\n",
+ " 'goodbye': False,\n",
+ " 'round': False,\n",
+ " 'lame': False,\n",
+ " 'cheer': False,\n",
+ " 'singing': False,\n",
+ " 'promise': False,\n",
+ " 'drop': False,\n",
+ " 'nose': False,\n",
+ " 'whats': False,\n",
+ " 'babe': False,\n",
+ " 'xxx': False,\n",
+ " 'lesson': False,\n",
+ " 'camera': False,\n",
+ " 'download': False,\n",
+ " 'info': False,\n",
+ " 'fair': False,\n",
+ " 'yummy': False,\n",
+ " 'french': False,\n",
+ " 'miley': False,\n",
+ " 'light': False,\n",
+ " 'asked': False,\n",
+ " 'lazy': False,\n",
+ " 'indeed': False,\n",
+ " 'along': False,\n",
+ " 'quick': False,\n",
+ " 'ã': False,\n",
+ " 'door': False,\n",
+ " 'sit': False,\n",
+ " 'yup': False,\n",
+ " 'street': False,\n",
+ " 'mile': False,\n",
+ " 'dm': False,\n",
+ " 'jus': False,\n",
+ " 'giving': False,\n",
+ " 'ear': False,\n",
+ " 'service': False,\n",
+ " 'low': False,\n",
+ " 'relaxing': False,\n",
+ " 'radio': False,\n",
+ " 'arm': False,\n",
+ " 'future': False,\n",
+ " 'proud': False,\n",
+ " 'fact': False,\n",
+ " 'learn': False,\n",
+ " 'easy': False,\n",
+ " 'child': False,\n",
+ " 'kitty': False,\n",
+ " 'hold': False,\n",
+ " 'mark': False,\n",
+ " 'peep': False,\n",
+ " 'wearing': False,\n",
+ " 'luv': False,\n",
+ " 'south': False,\n",
+ " 'road': False,\n",
+ " 'smell': False,\n",
+ " 'exciting': False,\n",
+ " 'currently': False,\n",
+ " 'warm': False,\n",
+ " 'puppy': False,\n",
+ " 'packing': False,\n",
+ " 'google': False,\n",
+ " 'exactly': False,\n",
+ " 'share': False,\n",
+ " 'decided': False,\n",
+ " 'voice': False,\n",
+ " 'fat': False,\n",
+ " 'mommy': False,\n",
+ " 'realize': False,\n",
+ " 'join': False,\n",
+ " 'nearly': False,\n",
+ " 'bb': False,\n",
+ " 'whatever': False,\n",
+ " 'woo': False,\n",
+ " 'spend': False,\n",
+ " 'stand': False,\n",
+ " 'enjoyed': False,\n",
+ " 'tweeps': False,\n",
+ " 'sold': False,\n",
+ " 'gettin': False,\n",
+ " 'fever': False,\n",
+ " 'paid': False,\n",
+ " 'beat': False,\n",
+ " 'pink': False,\n",
+ " 'mall': False,\n",
+ " 'gunna': False,\n",
+ " 'ï': False,\n",
+ " 'freaking': False,\n",
+ " 'snow': False,\n",
+ " 'hospital': False,\n",
+ " 'cheese': False,\n",
+ " 'nobody': False,\n",
+ " 'wondering': False,\n",
+ " 'plane': False,\n",
+ " 'shame': False,\n",
+ " 'search': False,\n",
+ " 'ended': False,\n",
+ " 'helping': False,\n",
+ " 'safe': False,\n",
+ " 'staying': False,\n",
+ " 'storm': False,\n",
+ " 'fam': False,\n",
+ " 'z': False,\n",
+ " 'sing': False,\n",
+ " 'view': False,\n",
+ " 'yo': False,\n",
+ " 'card': False,\n",
+ " 'stick': False,\n",
+ " 'joe': False,\n",
+ " 'matter': False,\n",
+ " 'delicious': False,\n",
+ " 'cook': False,\n",
+ " 'country': False,\n",
+ " 'hill': False,\n",
+ " 'peace': False,\n",
+ " 'worked': False,\n",
+ " 'race': False,\n",
+ " 'boot': False,\n",
+ " 'figure': False,\n",
+ " 'vega': False,\n",
+ " 'france': False,\n",
+ " 'wit': False,\n",
+ " 'topic': False,\n",
+ " 'spot': False,\n",
+ " 'cover': False,\n",
+ " 'afford': False,\n",
+ " 'floor': False,\n",
+ " 'bike': False,\n",
+ " 'starbucks': False,\n",
+ " 'hahah': False,\n",
+ " 'tummy': False,\n",
+ " 'trouble': False,\n",
+ " 'mouth': False,\n",
+ " 'ran': False,\n",
+ " 'drunk': False,\n",
+ " 'lately': False,\n",
+ " 'joke': False,\n",
+ " 'bird': False,\n",
+ " 'flu': False,\n",
+ " 'showing': False,\n",
+ " 'mail': False,\n",
+ " 'turned': False,\n",
+ " 'men': False,\n",
+ " 'taste': False,\n",
+ " 'gosh': False,\n",
+ " 'bar': False,\n",
+ " 'changed': False,\n",
+ " 'fish': False,\n",
+ " 'stopped': False,\n",
+ " 'wife': False,\n",
+ " 'alot': False,\n",
+ " 'tweetdeck': False,\n",
+ " 'magic': False,\n",
+ " 'brilliant': False,\n",
+ " 'cooking': False,\n",
+ " 'state': False,\n",
+ " 'design': False,\n",
+ " 'film': False,\n",
+ " 'tan': False,\n",
+ " 'isnt': False,\n",
+ " 'nail': False,\n",
+ " 'bummed': False,\n",
+ " 'prob': False,\n",
+ " 'happens': False,\n",
+ " 'eh': False,\n",
+ " 'type': False,\n",
+ " 'young': False,\n",
+ " 'honey': False,\n",
+ " 'price': False,\n",
+ " 'disappointed': False,\n",
+ " 'sky': False,\n",
+ " 'ring': False,\n",
+ " 'surprise': False,\n",
+ " 'self': False,\n",
+ " 'depressing': False,\n",
+ " 'sexy': False,\n",
+ " 'cd': False,\n",
+ " 'woot': False,\n",
+ " 'pop': False,\n",
+ " 'piece': False,\n",
+ " 'degree': False,\n",
+ " 'chillin': False,\n",
+ " 'hilarious': False,\n",
+ " 'rip': False,\n",
+ " 'updated': False,\n",
+ " 'ohh': False,\n",
+ " 'boyfriend': False,\n",
+ " 'awwww': False,\n",
+ " 'hun': False,\n",
+ " 'crappy': False,\n",
+ " 'er': False,\n",
+ " 'although': False,\n",
+ " 'death': False,\n",
+ " 'lake': False,\n",
+ " 'ahead': False,\n",
+ " 'art': False,\n",
+ " 'watchin': False,\n",
+ " 'scary': False,\n",
+ " 'annoying': False,\n",
+ " 'event': False,\n",
+ " 'plus': False,\n",
+ " 'ff': False,\n",
+ " 'major': False,\n",
+ " 'afraid': False,\n",
+ " 'pissed': False,\n",
+ " 'wat': False,\n",
+ " 'huh': False,\n",
+ " 'secret': False,\n",
+ " 'others': False,\n",
+ " 'land': False,\n",
+ " 'caught': False,\n",
+ " 'closed': False,\n",
+ " 'kiss': False,\n",
+ " 'trek': False,\n",
+ " 'code': False,\n",
+ " 'mobile': False,\n",
+ " 'knee': False,\n",
+ " ...},\n",
+ " 0),\n",
+ " ({'wa': False,\n",
+ " 'day': False,\n",
+ " 'good': False,\n",
+ " 'get': False,\n",
+ " 'like': False,\n",
+ " 'go': False,\n",
+ " 'quot': False,\n",
+ " 'love': False,\n",
+ " 'work': False,\n",
+ " 'got': False,\n",
+ " 'today': False,\n",
+ " 'going': False,\n",
+ " 'u': False,\n",
+ " 'time': False,\n",
+ " 'one': False,\n",
+ " 'lol': False,\n",
+ " 'know': False,\n",
+ " 'im': False,\n",
+ " 'back': False,\n",
+ " 'really': False,\n",
+ " 'want': False,\n",
+ " 'night': False,\n",
+ " 'amp': False,\n",
+ " 'well': False,\n",
+ " 'new': False,\n",
+ " 'see': False,\n",
+ " 'think': False,\n",
+ " 'still': False,\n",
+ " 'oh': False,\n",
+ " 'thanks': False,\n",
+ " 'na': False,\n",
+ " 'ha': False,\n",
+ " 'need': False,\n",
+ " 'home': False,\n",
+ " 'much': False,\n",
+ " 'miss': False,\n",
+ " 'feel': False,\n",
+ " 'last': False,\n",
+ " 'morning': False,\n",
+ " 'great': False,\n",
+ " 'make': False,\n",
+ " 'tomorrow': False,\n",
+ " 'twitter': False,\n",
+ " 'haha': False,\n",
+ " 'wish': False,\n",
+ " 'hope': False,\n",
+ " 'bad': False,\n",
+ " 'sad': False,\n",
+ " 'fun': False,\n",
+ " 'come': False,\n",
+ " 'sleep': False,\n",
+ " 'would': False,\n",
+ " 'nice': False,\n",
+ " 'sorry': False,\n",
+ " 'right': False,\n",
+ " 'week': False,\n",
+ " 'tonight': False,\n",
+ " 'happy': False,\n",
+ " 'say': False,\n",
+ " 'thing': False,\n",
+ " 'getting': False,\n",
+ " 'look': False,\n",
+ " 'friend': False,\n",
+ " 'gon': False,\n",
+ " 'though': False,\n",
+ " 'hate': False,\n",
+ " 'wait': False,\n",
+ " 'better': False,\n",
+ " 'bed': False,\n",
+ " 'way': False,\n",
+ " 'watching': False,\n",
+ " 'lt': False,\n",
+ " 'people': False,\n",
+ " 'yeah': False,\n",
+ " 'hour': False,\n",
+ " 'show': False,\n",
+ " 'could': False,\n",
+ " 'thank': False,\n",
+ " 'take': False,\n",
+ " 'weekend': False,\n",
+ " 'next': False,\n",
+ " 'yes': False,\n",
+ " 'school': False,\n",
+ " 'even': False,\n",
+ " 'little': False,\n",
+ " 'life': False,\n",
+ " 'working': False,\n",
+ " 'everyone': False,\n",
+ " 'guy': False,\n",
+ " 'cant': False,\n",
+ " 'sick': False,\n",
+ " 'dont': False,\n",
+ " 'hey': False,\n",
+ " 'let': False,\n",
+ " 'awesome': False,\n",
+ " 'movie': False,\n",
+ " 'girl': False,\n",
+ " 'tweet': False,\n",
+ " 'always': False,\n",
+ " 'x': False,\n",
+ " 'never': False,\n",
+ " 'watch': False,\n",
+ " 'please': False,\n",
+ " 'soon': False,\n",
+ " 'year': False,\n",
+ " 'first': False,\n",
+ " 'long': False,\n",
+ " 'ok': False,\n",
+ " 'tired': False,\n",
+ " 'already': False,\n",
+ " 'feeling': False,\n",
+ " 'suck': False,\n",
+ " 'wan': False,\n",
+ " 'sure': False,\n",
+ " 'best': False,\n",
+ " 'looking': False,\n",
+ " 'n': False,\n",
+ " 'man': False,\n",
+ " 'another': False,\n",
+ " 'something': False,\n",
+ " 'find': False,\n",
+ " 'start': False,\n",
+ " 'cool': False,\n",
+ " 'done': False,\n",
+ " 'pretty': False,\n",
+ " 'omg': False,\n",
+ " 'yay': False,\n",
+ " 'yet': True,\n",
+ " 'phone': False,\n",
+ " 'damn': False,\n",
+ " 'lot': False,\n",
+ " 'away': False,\n",
+ " 'went': False,\n",
+ " 'old': False,\n",
+ " 'follow': False,\n",
+ " 'help': False,\n",
+ " 'guess': False,\n",
+ " 'keep': False,\n",
+ " 'house': False,\n",
+ " 'thought': False,\n",
+ " 'song': False,\n",
+ " 'made': False,\n",
+ " 'ever': False,\n",
+ " 'trying': False,\n",
+ " 'ya': False,\n",
+ " 'sun': False,\n",
+ " 'p': False,\n",
+ " 'bit': False,\n",
+ " 'hurt': False,\n",
+ " 'game': False,\n",
+ " 'finally': False,\n",
+ " 'maybe': False,\n",
+ " 'ready': False,\n",
+ " 'sound': False,\n",
+ " 'w': False,\n",
+ " 'big': False,\n",
+ " 'lost': False,\n",
+ " 'b': False,\n",
+ " 'nothing': False,\n",
+ " 'someone': False,\n",
+ " 'early': False,\n",
+ " 'summer': False,\n",
+ " 'tell': False,\n",
+ " 'hard': False,\n",
+ " 'also': False,\n",
+ " 'birthday': False,\n",
+ " 'r': False,\n",
+ " 'left': False,\n",
+ " 'mean': False,\n",
+ " 'missed': False,\n",
+ " 'ur': False,\n",
+ " 'th': False,\n",
+ " 'rain': False,\n",
+ " 'pic': False,\n",
+ " 'mom': False,\n",
+ " 'com': False,\n",
+ " 'party': False,\n",
+ " 'two': False,\n",
+ " 'glad': False,\n",
+ " 'world': False,\n",
+ " 'baby': False,\n",
+ " 'wow': False,\n",
+ " 'might': False,\n",
+ " 'bored': False,\n",
+ " 'call': False,\n",
+ " 'ta': False,\n",
+ " 'check': False,\n",
+ " 'late': False,\n",
+ " 'waiting': False,\n",
+ " 'car': False,\n",
+ " 'video': False,\n",
+ " 'stuff': False,\n",
+ " 'found': False,\n",
+ " 'yesterday': False,\n",
+ " 'said': False,\n",
+ " 'sunday': False,\n",
+ " 'luck': False,\n",
+ " 'saw': False,\n",
+ " 'amazing': False,\n",
+ " 'hot': False,\n",
+ " 'monday': False,\n",
+ " 'weather': False,\n",
+ " 'live': False,\n",
+ " 'many': False,\n",
+ " 'iphone': False,\n",
+ " 'doe': False,\n",
+ " 'exam': False,\n",
+ " 'thats': False,\n",
+ " 'play': False,\n",
+ " 'making': False,\n",
+ " 'may': False,\n",
+ " 'excited': False,\n",
+ " 'god': False,\n",
+ " 'dad': False,\n",
+ " 'family': False,\n",
+ " 'gone': False,\n",
+ " 'friday': False,\n",
+ " 'boy': False,\n",
+ " 'follower': False,\n",
+ " 'read': False,\n",
+ " 'funny': False,\n",
+ " 'hi': False,\n",
+ " 'since': False,\n",
+ " 'give': False,\n",
+ " 'poor': False,\n",
+ " 'enjoy': False,\n",
+ " 'job': False,\n",
+ " 'okay': False,\n",
+ " 'talk': False,\n",
+ " 'gt': False,\n",
+ " 'later': False,\n",
+ " 'head': False,\n",
+ " 'almost': False,\n",
+ " 'beautiful': False,\n",
+ " 'woke': False,\n",
+ " 'cold': False,\n",
+ " 'anything': False,\n",
+ " 'hear': False,\n",
+ " 'lunch': False,\n",
+ " 'missing': False,\n",
+ " 'put': False,\n",
+ " 'free': False,\n",
+ " 'try': False,\n",
+ " 'coming': False,\n",
+ " 'must': False,\n",
+ " 'end': False,\n",
+ " 'tho': False,\n",
+ " 'leave': False,\n",
+ " 'ugh': False,\n",
+ " 'till': False,\n",
+ " 'busy': False,\n",
+ " 'around': False,\n",
+ " 'food': False,\n",
+ " 'far': False,\n",
+ " 'book': False,\n",
+ " 'music': False,\n",
+ " 'cry': False,\n",
+ " 'headache': False,\n",
+ " 'cause': False,\n",
+ " 'use': False,\n",
+ " 'listening': False,\n",
+ " 'fan': False,\n",
+ " 'stop': False,\n",
+ " 'stay': False,\n",
+ " 'totally': False,\n",
+ " 'wanted': False,\n",
+ " 'place': False,\n",
+ " 'shit': False,\n",
+ " 'xx': False,\n",
+ " 'tv': False,\n",
+ " 'e': False,\n",
+ " 'picture': False,\n",
+ " 'update': False,\n",
+ " 'least': False,\n",
+ " 'sweet': False,\n",
+ " 'anyone': False,\n",
+ " 'lovely': False,\n",
+ " 'thinking': False,\n",
+ " 'forward': False,\n",
+ " 'aww': False,\n",
+ " 'g': False,\n",
+ " 'dog': False,\n",
+ " 'class': False,\n",
+ " 'actually': False,\n",
+ " 'everything': False,\n",
+ " 'eat': False,\n",
+ " 'mine': False,\n",
+ " 'playing': False,\n",
+ " 'cute': False,\n",
+ " 'hahaha': False,\n",
+ " 'kid': False,\n",
+ " 'dinner': False,\n",
+ " 'stupid': False,\n",
+ " 'sooo': False,\n",
+ " 'came': False,\n",
+ " 'word': False,\n",
+ " 'ill': False,\n",
+ " 'eating': False,\n",
+ " 'win': False,\n",
+ " 'hopefully': False,\n",
+ " 'finished': False,\n",
+ " 'welcome': False,\n",
+ " 'anymore': False,\n",
+ " 'minute': False,\n",
+ " 'every': False,\n",
+ " 'face': False,\n",
+ " 'idea': False,\n",
+ " 'hair': False,\n",
+ " 'without': False,\n",
+ " 'kinda': False,\n",
+ " 'month': False,\n",
+ " 'saturday': False,\n",
+ " 'c': False,\n",
+ " 'wrong': False,\n",
+ " 'super': False,\n",
+ " 'www': False,\n",
+ " 'final': False,\n",
+ " 'true': False,\n",
+ " 'reading': False,\n",
+ " 'hug': False,\n",
+ " 'probably': False,\n",
+ " 'photo': False,\n",
+ " 'buy': False,\n",
+ " 'hehe': False,\n",
+ " 'taking': False,\n",
+ " 'believe': False,\n",
+ " 'eye': False,\n",
+ " 'didnt': False,\n",
+ " 'name': False,\n",
+ " 'alone': False,\n",
+ " 'mind': False,\n",
+ " 'room': False,\n",
+ " 'either': False,\n",
+ " 'dream': False,\n",
+ " 'following': False,\n",
+ " 'able': True,\n",
+ " 'goodnight': False,\n",
+ " 'else': False,\n",
+ " 'lmao': False,\n",
+ " 'boo': False,\n",
+ " 'heard': False,\n",
+ " 'coffee': False,\n",
+ " 'outside': False,\n",
+ " 'real': False,\n",
+ " 'mileycyrus': False,\n",
+ " 'break': False,\n",
+ " 'part': False,\n",
+ " 'june': False,\n",
+ " 'computer': False,\n",
+ " 'forgot': False,\n",
+ " 'ticket': False,\n",
+ " 'blog': False,\n",
+ " 'post': False,\n",
+ " 'awww': False,\n",
+ " 'rest': False,\n",
+ " 'dude': False,\n",
+ " 'pm': False,\n",
+ " 'enough': False,\n",
+ " 'brother': False,\n",
+ " 'plan': False,\n",
+ " 'ð': False,\n",
+ " 'half': False,\n",
+ " 'using': False,\n",
+ " 'add': False,\n",
+ " 'stuck': False,\n",
+ " 'person': False,\n",
+ " 'text': False,\n",
+ " 'meet': False,\n",
+ " 'mother': False,\n",
+ " 'album': False,\n",
+ " 'study': False,\n",
+ " 'crazy': False,\n",
+ " 'hand': False,\n",
+ " 'send': False,\n",
+ " 'fine': False,\n",
+ " 'talking': False,\n",
+ " 'whole': False,\n",
+ " 'reply': False,\n",
+ " 'nite': False,\n",
+ " 'k': False,\n",
+ " 'seems': False,\n",
+ " 'run': False,\n",
+ " 'red': False,\n",
+ " 'took': False,\n",
+ " 'v': False,\n",
+ " 'seen': False,\n",
+ " 'trip': False,\n",
+ " 'full': False,\n",
+ " 'beach': False,\n",
+ " 'hello': False,\n",
+ " 'side': False,\n",
+ " 'loved': False,\n",
+ " 'news': False,\n",
+ " 'hit': False,\n",
+ " 'tried': False,\n",
+ " 'kind': False,\n",
+ " 'rock': False,\n",
+ " 'heart': False,\n",
+ " 'yea': False,\n",
+ " 'la': False,\n",
+ " 'shopping': False,\n",
+ " 'problem': False,\n",
+ " 'afternoon': False,\n",
+ " 'â': False,\n",
+ " 'pain': False,\n",
+ " 'change': False,\n",
+ " 'nap': False,\n",
+ " 'started': False,\n",
+ " 'used': False,\n",
+ " 'remember': False,\n",
+ " 'star': False,\n",
+ " 'boring': False,\n",
+ " 'course': False,\n",
+ " 'heading': False,\n",
+ " 'quite': False,\n",
+ " 'seeing': False,\n",
+ " 'hell': False,\n",
+ " 'train': False,\n",
+ " 'breakfast': False,\n",
+ " 'crap': False,\n",
+ " 'told': False,\n",
+ " 'english': False,\n",
+ " 'died': False,\n",
+ " 'sister': False,\n",
+ " 'fuck': False,\n",
+ " 'site': False,\n",
+ " 'instead': False,\n",
+ " 'leaving': False,\n",
+ " 'ipod': False,\n",
+ " 'money': False,\n",
+ " 'raining': False,\n",
+ " 'finish': False,\n",
+ " 'til': False,\n",
+ " 'anyway': False,\n",
+ " 'ah': False,\n",
+ " 'running': False,\n",
+ " 'concert': False,\n",
+ " 'soo': False,\n",
+ " 'facebook': False,\n",
+ " 'link': False,\n",
+ " 'sitting': False,\n",
+ " 'point': False,\n",
+ " 'jealous': False,\n",
+ " 'season': False,\n",
+ " 'bring': False,\n",
+ " 'ñ': False,\n",
+ " 'cuz': False,\n",
+ " 'awake': False,\n",
+ " 'mum': False,\n",
+ " 'reason': False,\n",
+ " 'studying': False,\n",
+ " 'pay': False,\n",
+ " 'cat': False,\n",
+ " 'wonder': False,\n",
+ " 'congrats': False,\n",
+ " 'ago': False,\n",
+ " 'xd': False,\n",
+ " 'page': False,\n",
+ " 'f': False,\n",
+ " 'bought': False,\n",
+ " 'store': False,\n",
+ " 'drink': False,\n",
+ " 'definitely': False,\n",
+ " 'couple': False,\n",
+ " 'evening': False,\n",
+ " 'chocolate': False,\n",
+ " 'btw': False,\n",
+ " 'soooo': False,\n",
+ " 'sunny': False,\n",
+ " 'sore': False,\n",
+ " 'drive': False,\n",
+ " 'shower': False,\n",
+ " 'st': False,\n",
+ " 'lucky': False,\n",
+ " 'aw': False,\n",
+ " 'test': False,\n",
+ " 'walk': False,\n",
+ " 'internet': False,\n",
+ " 'open': False,\n",
+ " 'water': False,\n",
+ " 'wake': False,\n",
+ " 'l': False,\n",
+ " 'office': False,\n",
+ " 'list': False,\n",
+ " 'watched': False,\n",
+ " 'team': False,\n",
+ " 'tommcfly': False,\n",
+ " 'clean': False,\n",
+ " 'enjoying': False,\n",
+ " 'hungry': False,\n",
+ " 'smile': False,\n",
+ " 'seriously': False,\n",
+ " 'move': False,\n",
+ " 'wont': False,\n",
+ " 'high': False,\n",
+ " 'asleep': False,\n",
+ " 'award': False,\n",
+ " 'fucking': False,\n",
+ " 'bout': False,\n",
+ " 'starting': False,\n",
+ " 'top': False,\n",
+ " 'moment': False,\n",
+ " 'dance': False,\n",
+ " 'project': False,\n",
+ " 'second': False,\n",
+ " 'park': False,\n",
+ " 'ask': False,\n",
+ " 'hr': False,\n",
+ " 'email': False,\n",
+ " 'church': False,\n",
+ " 'driving': False,\n",
+ " 'tea': False,\n",
+ " 'broke': False,\n",
+ " 'gym': False,\n",
+ " 'ride': False,\n",
+ " 'le': False,\n",
+ " 'loving': False,\n",
+ " 'meeting': False,\n",
+ " 'worth': False,\n",
+ " 'fail': False,\n",
+ " 'black': False,\n",
+ " 'close': False,\n",
+ " 'visit': False,\n",
+ " 'sigh': False,\n",
+ " 'ate': False,\n",
+ " 'online': False,\n",
+ " 'number': False,\n",
+ " 'vote': False,\n",
+ " 'bye': False,\n",
+ " 'hang': False,\n",
+ " 'wonderful': False,\n",
+ " 'youtube': False,\n",
+ " 'care': False,\n",
+ " 'co': False,\n",
+ " 'cut': False,\n",
+ " 'drinking': False,\n",
+ " 'horrible': False,\n",
+ " 'ddlovato': False,\n",
+ " 'shirt': False,\n",
+ " 'ice': False,\n",
+ " 'saying': False,\n",
+ " 'answer': False,\n",
+ " 'date': False,\n",
+ " 'dear': False,\n",
+ " 'agree': False,\n",
+ " 'j': False,\n",
+ " 'set': False,\n",
+ " 'town': False,\n",
+ " 'da': False,\n",
+ " 'wear': False,\n",
+ " 'happened': False,\n",
+ " 'line': False,\n",
+ " 'parent': False,\n",
+ " 'worse': False,\n",
+ " 'min': False,\n",
+ " 'longer': False,\n",
+ " 'lady': False,\n",
+ " 'together': False,\n",
+ " 'cream': False,\n",
+ " 'worry': False,\n",
+ " 'goin': False,\n",
+ " 'followfriday': False,\n",
+ " 'fast': False,\n",
+ " 'forget': False,\n",
+ " 'fb': False,\n",
+ " 'doesnt': False,\n",
+ " 'broken': False,\n",
+ " 'wtf': False,\n",
+ " 'turn': False,\n",
+ " 'unfortunately': False,\n",
+ " 'chance': False,\n",
+ " 'favorite': False,\n",
+ " 'spent': False,\n",
+ " 'fall': False,\n",
+ " 'air': False,\n",
+ " 'idk': False,\n",
+ " 'slept': False,\n",
+ " 'sometimes': False,\n",
+ " 'rainy': False,\n",
+ " 'question': False,\n",
+ " 'laptop': False,\n",
+ " 'tweeting': False,\n",
+ " 'tuesday': False,\n",
+ " 'earlier': False,\n",
+ " 'mood': False,\n",
+ " 'slow': False,\n",
+ " 'hoping': False,\n",
+ " 'thx': False,\n",
+ " 'h': False,\n",
+ " 'absolutely': False,\n",
+ " 'mr': False,\n",
+ " 'ahh': False,\n",
+ " 'cleaning': False,\n",
+ " 'via': False,\n",
+ " 'holiday': False,\n",
+ " 'happen': False,\n",
+ " 'wishing': False,\n",
+ " 'taken': False,\n",
+ " 'pool': False,\n",
+ " 'episode': False,\n",
+ " 'garden': False,\n",
+ " 'homework': False,\n",
+ " 'website': False,\n",
+ " 'sleeping': False,\n",
+ " 'airport': False,\n",
+ " 'especially': False,\n",
+ " 'business': False,\n",
+ " 'perfect': False,\n",
+ " 'em': False,\n",
+ " 'fell': False,\n",
+ " 'nd': False,\n",
+ " 'upset': False,\n",
+ " 'small': False,\n",
+ " 'city': False,\n",
+ " 'chat': False,\n",
+ " 'knew': False,\n",
+ " 'foot': False,\n",
+ " 'chicken': False,\n",
+ " '½': False,\n",
+ " 'father': False,\n",
+ " 'throat': False,\n",
+ " 'mac': False,\n",
+ " 'weird': False,\n",
+ " 'window': False,\n",
+ " 'passed': False,\n",
+ " 'son': False,\n",
+ " 'story': False,\n",
+ " 'tour': False,\n",
+ " 'message': False,\n",
+ " 'shop': False,\n",
+ " 'wednesday': False,\n",
+ " 'due': False,\n",
+ " 'bbq': False,\n",
+ " 'listen': True,\n",
+ " 'sleepy': False,\n",
+ " 'woman': False,\n",
+ " 'company': False,\n",
+ " 'sunshine': False,\n",
+ " 'met': False,\n",
+ " 'short': False,\n",
+ " 'comment': False,\n",
+ " 'mad': False,\n",
+ " 'understand': False,\n",
+ " 'gave': False,\n",
+ " 'pc': False,\n",
+ " 'hubby': False,\n",
+ " 'different': False,\n",
+ " 'havent': True,\n",
+ " 'account': False,\n",
+ " 'note': False,\n",
+ " 'seem': False,\n",
+ " 'scared': False,\n",
+ " 'bag': False,\n",
+ " 'alright': False,\n",
+ " 'cup': False,\n",
+ " 'ive': False,\n",
+ " 'leg': False,\n",
+ " 'green': False,\n",
+ " 'interesting': False,\n",
+ " 'bus': False,\n",
+ " 'past': False,\n",
+ " 'glass': False,\n",
+ " 'worst': False,\n",
+ " 'power': False,\n",
+ " 'plz': False,\n",
+ " 'math': False,\n",
+ " 'white': False,\n",
+ " 'support': False,\n",
+ " 'nope': False,\n",
+ " 'sat': False,\n",
+ " 'moon': False,\n",
+ " 'hangover': False,\n",
+ " 'moving': False,\n",
+ " 'taylor': False,\n",
+ " 'touch': False,\n",
+ " 'order': False,\n",
+ " 'jonas': False,\n",
+ " 'pick': False,\n",
+ " 'forever': False,\n",
+ " 'tom': False,\n",
+ " 'dead': False,\n",
+ " 'shoot': False,\n",
+ " 'called': False,\n",
+ " 'bitch': False,\n",
+ " 'rather': False,\n",
+ " 'catch': False,\n",
+ " 'bet': False,\n",
+ " 'officially': False,\n",
+ " 'vip': False,\n",
+ " 'writing': False,\n",
+ " 'si': False,\n",
+ " 'lil': False,\n",
+ " 'worried': False,\n",
+ " 'xoxo': False,\n",
+ " 'write': False,\n",
+ " 'graduation': False,\n",
+ " 'liked': False,\n",
+ " 'except': False,\n",
+ " 'bday': False,\n",
+ " 'gay': False,\n",
+ " 'dang': False,\n",
+ " 'cousin': False,\n",
+ " 'load': False,\n",
+ " 'myspace': False,\n",
+ " 'sent': False,\n",
+ " 'fix': False,\n",
+ " 'ahhh': False,\n",
+ " 'fly': False,\n",
+ " 'blue': False,\n",
+ " 'college': False,\n",
+ " 'gorgeous': False,\n",
+ " 'special': False,\n",
+ " 'kill': False,\n",
+ " 'apple': False,\n",
+ " 'interview': False,\n",
+ " 'fight': False,\n",
+ " 'everybody': False,\n",
+ " 'dress': False,\n",
+ " 'box': False,\n",
+ " 'bro': False,\n",
+ " 'case': False,\n",
+ " 'july': False,\n",
+ " 'cake': False,\n",
+ " 'three': False,\n",
+ " 'hmm': False,\n",
+ " 'deal': False,\n",
+ " 'ouch': False,\n",
+ " 'inside': False,\n",
+ " 'yep': False,\n",
+ " 'random': False,\n",
+ " 'meant': False,\n",
+ " 'profile': False,\n",
+ " 'mtv': False,\n",
+ " 'wedding': False,\n",
+ " 'clothes': False,\n",
+ " 'band': False,\n",
+ " 'issue': False,\n",
+ " 'apparently': False,\n",
+ " 'lonely': False,\n",
+ " 'age': False,\n",
+ " 'shall': False,\n",
+ " 'flight': False,\n",
+ " 'yr': False,\n",
+ " 'supposed': False,\n",
+ " 'david': False,\n",
+ " 'living': False,\n",
+ " 'needed': False,\n",
+ " 'london': False,\n",
+ " 'laugh': False,\n",
+ " 'sign': False,\n",
+ " 'finger': False,\n",
+ " 'beer': False,\n",
+ " 'itunes': False,\n",
+ " 'played': False,\n",
+ " 'tear': False,\n",
+ " 'ppl': False,\n",
+ " 'body': False,\n",
+ " 'paper': False,\n",
+ " 'group': False,\n",
+ " 'looked': False,\n",
+ " 'sadly': False,\n",
+ " 'bloody': False,\n",
+ " 'club': False,\n",
+ " 'web': False,\n",
+ " 'vacation': False,\n",
+ " 'jonasbrothers': False,\n",
+ " 'uk': False,\n",
+ " 'version': False,\n",
+ " 'huge': False,\n",
+ " 'sooooo': False,\n",
+ " 'lakers': False,\n",
+ " 'germany': False,\n",
+ " 'save': False,\n",
+ " 'hanging': False,\n",
+ " 'shoe': False,\n",
+ " 'fantastic': False,\n",
+ " 'wine': False,\n",
+ " 'sort': False,\n",
+ " 'none': False,\n",
+ " 'twilight': False,\n",
+ " 'shot': False,\n",
+ " 'thursday': False,\n",
+ " 'confused': False,\n",
+ " 'sale': False,\n",
+ " 'goodbye': False,\n",
+ " 'round': False,\n",
+ " 'lame': False,\n",
+ " 'cheer': False,\n",
+ " 'singing': False,\n",
+ " 'promise': False,\n",
+ " 'drop': False,\n",
+ " 'nose': False,\n",
+ " 'whats': False,\n",
+ " 'babe': False,\n",
+ " 'xxx': False,\n",
+ " 'lesson': False,\n",
+ " 'camera': False,\n",
+ " 'download': False,\n",
+ " 'info': False,\n",
+ " 'fair': False,\n",
+ " 'yummy': False,\n",
+ " 'french': False,\n",
+ " 'miley': False,\n",
+ " 'light': False,\n",
+ " 'asked': False,\n",
+ " 'lazy': False,\n",
+ " 'indeed': False,\n",
+ " 'along': False,\n",
+ " 'quick': False,\n",
+ " 'ã': False,\n",
+ " 'door': False,\n",
+ " 'sit': False,\n",
+ " 'yup': False,\n",
+ " 'street': False,\n",
+ " 'mile': False,\n",
+ " 'dm': False,\n",
+ " 'jus': False,\n",
+ " 'giving': False,\n",
+ " 'ear': False,\n",
+ " 'service': False,\n",
+ " 'low': False,\n",
+ " 'relaxing': False,\n",
+ " 'radio': False,\n",
+ " 'arm': False,\n",
+ " 'future': False,\n",
+ " 'proud': False,\n",
+ " 'fact': False,\n",
+ " 'learn': False,\n",
+ " 'easy': False,\n",
+ " 'child': False,\n",
+ " 'kitty': False,\n",
+ " 'hold': False,\n",
+ " 'mark': False,\n",
+ " 'peep': False,\n",
+ " 'wearing': False,\n",
+ " 'luv': False,\n",
+ " 'south': False,\n",
+ " 'road': False,\n",
+ " 'smell': False,\n",
+ " 'exciting': False,\n",
+ " 'currently': False,\n",
+ " 'warm': False,\n",
+ " 'puppy': False,\n",
+ " 'packing': False,\n",
+ " 'google': False,\n",
+ " 'exactly': False,\n",
+ " 'share': False,\n",
+ " 'decided': False,\n",
+ " 'voice': False,\n",
+ " 'fat': False,\n",
+ " 'mommy': False,\n",
+ " 'realize': False,\n",
+ " 'join': False,\n",
+ " 'nearly': False,\n",
+ " 'bb': False,\n",
+ " 'whatever': False,\n",
+ " 'woo': False,\n",
+ " 'spend': False,\n",
+ " 'stand': False,\n",
+ " 'enjoyed': False,\n",
+ " 'tweeps': False,\n",
+ " 'sold': False,\n",
+ " 'gettin': False,\n",
+ " 'fever': False,\n",
+ " 'paid': False,\n",
+ " 'beat': False,\n",
+ " 'pink': False,\n",
+ " 'mall': False,\n",
+ " 'gunna': False,\n",
+ " 'ï': False,\n",
+ " 'freaking': False,\n",
+ " 'snow': False,\n",
+ " 'hospital': False,\n",
+ " 'cheese': False,\n",
+ " 'nobody': False,\n",
+ " 'wondering': False,\n",
+ " 'plane': False,\n",
+ " 'shame': False,\n",
+ " 'search': False,\n",
+ " 'ended': False,\n",
+ " 'helping': False,\n",
+ " 'safe': False,\n",
+ " 'staying': False,\n",
+ " 'storm': False,\n",
+ " 'fam': False,\n",
+ " 'z': False,\n",
+ " 'sing': False,\n",
+ " 'view': False,\n",
+ " 'yo': False,\n",
+ " 'card': False,\n",
+ " 'stick': False,\n",
+ " 'joe': False,\n",
+ " 'matter': False,\n",
+ " 'delicious': False,\n",
+ " 'cook': False,\n",
+ " 'country': False,\n",
+ " 'hill': False,\n",
+ " 'peace': False,\n",
+ " 'worked': False,\n",
+ " 'race': False,\n",
+ " 'boot': False,\n",
+ " 'figure': False,\n",
+ " 'vega': False,\n",
+ " 'france': False,\n",
+ " 'wit': False,\n",
+ " 'topic': False,\n",
+ " 'spot': False,\n",
+ " 'cover': False,\n",
+ " 'afford': False,\n",
+ " 'floor': False,\n",
+ " 'bike': False,\n",
+ " 'starbucks': False,\n",
+ " 'hahah': False,\n",
+ " 'tummy': False,\n",
+ " 'trouble': False,\n",
+ " 'mouth': False,\n",
+ " 'ran': False,\n",
+ " 'drunk': False,\n",
+ " 'lately': False,\n",
+ " 'joke': False,\n",
+ " 'bird': False,\n",
+ " 'flu': False,\n",
+ " 'showing': False,\n",
+ " 'mail': False,\n",
+ " 'turned': False,\n",
+ " 'men': False,\n",
+ " 'taste': False,\n",
+ " 'gosh': False,\n",
+ " 'bar': False,\n",
+ " 'changed': False,\n",
+ " 'fish': False,\n",
+ " 'stopped': False,\n",
+ " 'wife': False,\n",
+ " 'alot': False,\n",
+ " 'tweetdeck': False,\n",
+ " 'magic': False,\n",
+ " 'brilliant': False,\n",
+ " 'cooking': False,\n",
+ " 'state': False,\n",
+ " 'design': False,\n",
+ " 'film': False,\n",
+ " 'tan': False,\n",
+ " 'isnt': False,\n",
+ " 'nail': False,\n",
+ " 'bummed': False,\n",
+ " 'prob': False,\n",
+ " 'happens': False,\n",
+ " 'eh': False,\n",
+ " 'type': False,\n",
+ " 'young': False,\n",
+ " 'honey': False,\n",
+ " 'price': False,\n",
+ " 'disappointed': False,\n",
+ " 'sky': False,\n",
+ " 'ring': False,\n",
+ " 'surprise': False,\n",
+ " 'self': False,\n",
+ " 'depressing': False,\n",
+ " 'sexy': False,\n",
+ " 'cd': False,\n",
+ " 'woot': False,\n",
+ " 'pop': False,\n",
+ " 'piece': False,\n",
+ " 'degree': False,\n",
+ " 'chillin': False,\n",
+ " 'hilarious': False,\n",
+ " 'rip': False,\n",
+ " 'updated': False,\n",
+ " 'ohh': False,\n",
+ " 'boyfriend': False,\n",
+ " 'awwww': False,\n",
+ " 'hun': False,\n",
+ " 'crappy': False,\n",
+ " 'er': False,\n",
+ " 'although': False,\n",
+ " 'death': False,\n",
+ " 'lake': False,\n",
+ " 'ahead': False,\n",
+ " 'art': False,\n",
+ " 'watchin': False,\n",
+ " 'scary': False,\n",
+ " 'annoying': False,\n",
+ " 'event': False,\n",
+ " 'plus': False,\n",
+ " 'ff': False,\n",
+ " 'major': False,\n",
+ " 'afraid': False,\n",
+ " 'pissed': False,\n",
+ " 'wat': False,\n",
+ " 'huh': False,\n",
+ " 'secret': False,\n",
+ " 'others': False,\n",
+ " 'land': False,\n",
+ " 'caught': False,\n",
+ " 'closed': False,\n",
+ " 'kiss': False,\n",
+ " 'trek': False,\n",
+ " 'code': False,\n",
+ " 'mobile': False,\n",
+ " 'knee': False,\n",
+ " ...},\n",
+ " 0),\n",
+ " ({'wa': False,\n",
+ " 'day': False,\n",
+ " 'good': False,\n",
+ " 'get': False,\n",
+ " 'like': False,\n",
+ " 'go': False,\n",
+ " 'quot': False,\n",
+ " 'love': False,\n",
+ " 'work': False,\n",
+ " 'got': False,\n",
+ " 'today': False,\n",
+ " 'going': False,\n",
+ " 'u': False,\n",
+ " 'time': False,\n",
+ " 'one': False,\n",
+ " 'lol': False,\n",
+ " 'know': False,\n",
+ " 'im': False,\n",
+ " 'back': False,\n",
+ " 'really': False,\n",
+ " 'want': False,\n",
+ " 'night': False,\n",
+ " 'amp': False,\n",
+ " 'well': False,\n",
+ " 'new': False,\n",
+ " 'see': False,\n",
+ " 'think': False,\n",
+ " 'still': False,\n",
+ " 'oh': False,\n",
+ " 'thanks': False,\n",
+ " 'na': False,\n",
+ " 'ha': False,\n",
+ " 'need': False,\n",
+ " 'home': False,\n",
+ " 'much': False,\n",
+ " 'miss': False,\n",
+ " 'feel': False,\n",
+ " 'last': False,\n",
+ " 'morning': False,\n",
+ " 'great': False,\n",
+ " 'make': False,\n",
+ " 'tomorrow': False,\n",
+ " 'twitter': False,\n",
+ " 'haha': False,\n",
+ " 'wish': False,\n",
+ " 'hope': False,\n",
+ " 'bad': False,\n",
+ " 'sad': False,\n",
+ " 'fun': False,\n",
+ " 'come': False,\n",
+ " 'sleep': False,\n",
+ " 'would': False,\n",
+ " 'nice': False,\n",
+ " 'sorry': False,\n",
+ " 'right': False,\n",
+ " 'week': False,\n",
+ " 'tonight': False,\n",
+ " 'happy': False,\n",
+ " 'say': False,\n",
+ " 'thing': False,\n",
+ " 'getting': False,\n",
+ " 'look': False,\n",
+ " 'friend': False,\n",
+ " 'gon': False,\n",
+ " 'though': False,\n",
+ " 'hate': False,\n",
+ " 'wait': False,\n",
+ " 'better': False,\n",
+ " 'bed': False,\n",
+ " 'way': False,\n",
+ " 'watching': False,\n",
+ " 'lt': False,\n",
+ " 'people': False,\n",
+ " 'yeah': False,\n",
+ " 'hour': False,\n",
+ " 'show': False,\n",
+ " 'could': False,\n",
+ " 'thank': False,\n",
+ " 'take': False,\n",
+ " 'weekend': False,\n",
+ " 'next': False,\n",
+ " 'yes': False,\n",
+ " 'school': False,\n",
+ " 'even': False,\n",
+ " 'little': False,\n",
+ " 'life': False,\n",
+ " 'working': False,\n",
+ " 'everyone': False,\n",
+ " 'guy': False,\n",
+ " 'cant': False,\n",
+ " 'sick': False,\n",
+ " 'dont': False,\n",
+ " 'hey': False,\n",
+ " 'let': False,\n",
+ " 'awesome': False,\n",
+ " 'movie': False,\n",
+ " 'girl': False,\n",
+ " 'tweet': False,\n",
+ " 'always': False,\n",
+ " 'x': False,\n",
+ " 'never': False,\n",
+ " 'watch': False,\n",
+ " 'please': False,\n",
+ " 'soon': False,\n",
+ " 'year': False,\n",
+ " 'first': False,\n",
+ " 'long': False,\n",
+ " 'ok': False,\n",
+ " 'tired': False,\n",
+ " 'already': False,\n",
+ " 'feeling': False,\n",
+ " 'suck': False,\n",
+ " 'wan': False,\n",
+ " 'sure': False,\n",
+ " 'best': False,\n",
+ " 'looking': False,\n",
+ " 'n': False,\n",
+ " 'man': False,\n",
+ " 'another': False,\n",
+ " 'something': False,\n",
+ " 'find': False,\n",
+ " 'start': False,\n",
+ " 'cool': False,\n",
+ " 'done': False,\n",
+ " 'pretty': False,\n",
+ " 'omg': False,\n",
+ " 'yay': False,\n",
+ " 'yet': False,\n",
+ " 'phone': False,\n",
+ " 'damn': False,\n",
+ " 'lot': False,\n",
+ " 'away': False,\n",
+ " 'went': False,\n",
+ " 'old': False,\n",
+ " 'follow': False,\n",
+ " 'help': False,\n",
+ " 'guess': False,\n",
+ " 'keep': False,\n",
+ " 'house': False,\n",
+ " 'thought': False,\n",
+ " 'song': False,\n",
+ " 'made': False,\n",
+ " 'ever': False,\n",
+ " 'trying': False,\n",
+ " 'ya': False,\n",
+ " 'sun': False,\n",
+ " 'p': False,\n",
+ " 'bit': False,\n",
+ " 'hurt': False,\n",
+ " 'game': False,\n",
+ " 'finally': False,\n",
+ " 'maybe': False,\n",
+ " 'ready': False,\n",
+ " 'sound': False,\n",
+ " 'w': False,\n",
+ " 'big': True,\n",
+ " 'lost': False,\n",
+ " 'b': False,\n",
+ " 'nothing': False,\n",
+ " 'someone': False,\n",
+ " 'early': False,\n",
+ " 'summer': False,\n",
+ " 'tell': False,\n",
+ " 'hard': False,\n",
+ " 'also': False,\n",
+ " 'birthday': False,\n",
+ " 'r': False,\n",
+ " 'left': False,\n",
+ " 'mean': False,\n",
+ " 'missed': False,\n",
+ " 'ur': False,\n",
+ " 'th': False,\n",
+ " 'rain': False,\n",
+ " 'pic': False,\n",
+ " 'mom': False,\n",
+ " 'com': False,\n",
+ " 'party': False,\n",
+ " 'two': True,\n",
+ " 'glad': False,\n",
+ " 'world': False,\n",
+ " 'baby': False,\n",
+ " 'wow': False,\n",
+ " 'might': False,\n",
+ " 'bored': False,\n",
+ " 'call': False,\n",
+ " 'ta': False,\n",
+ " 'check': False,\n",
+ " 'late': False,\n",
+ " 'waiting': False,\n",
+ " 'car': False,\n",
+ " 'video': False,\n",
+ " 'stuff': False,\n",
+ " 'found': False,\n",
+ " 'yesterday': False,\n",
+ " 'said': False,\n",
+ " 'sunday': False,\n",
+ " 'luck': False,\n",
+ " 'saw': False,\n",
+ " 'amazing': False,\n",
+ " 'hot': False,\n",
+ " 'monday': False,\n",
+ " 'weather': False,\n",
+ " 'live': False,\n",
+ " 'many': False,\n",
+ " 'iphone': False,\n",
+ " 'doe': False,\n",
+ " 'exam': False,\n",
+ " 'thats': False,\n",
+ " 'play': False,\n",
+ " 'making': False,\n",
+ " 'may': False,\n",
+ " 'excited': False,\n",
+ " 'god': False,\n",
+ " 'dad': False,\n",
+ " 'family': False,\n",
+ " 'gone': False,\n",
+ " 'friday': False,\n",
+ " 'boy': False,\n",
+ " 'follower': False,\n",
+ " 'read': False,\n",
+ " 'funny': False,\n",
+ " 'hi': False,\n",
+ " 'since': False,\n",
+ " 'give': False,\n",
+ " 'poor': False,\n",
+ " 'enjoy': False,\n",
+ " 'job': False,\n",
+ " 'okay': False,\n",
+ " 'talk': False,\n",
+ " 'gt': False,\n",
+ " 'later': False,\n",
+ " 'head': False,\n",
+ " 'almost': False,\n",
+ " 'beautiful': False,\n",
+ " 'woke': False,\n",
+ " 'cold': False,\n",
+ " 'anything': False,\n",
+ " 'hear': False,\n",
+ " 'lunch': False,\n",
+ " 'missing': False,\n",
+ " 'put': False,\n",
+ " 'free': False,\n",
+ " 'try': False,\n",
+ " 'coming': False,\n",
+ " 'must': False,\n",
+ " 'end': False,\n",
+ " 'tho': False,\n",
+ " 'leave': False,\n",
+ " 'ugh': False,\n",
+ " 'till': False,\n",
+ " 'busy': False,\n",
+ " 'around': False,\n",
+ " 'food': False,\n",
+ " 'far': False,\n",
+ " 'book': False,\n",
+ " 'music': False,\n",
+ " 'cry': False,\n",
+ " 'headache': False,\n",
+ " 'cause': False,\n",
+ " 'use': False,\n",
+ " 'listening': False,\n",
+ " 'fan': False,\n",
+ " 'stop': False,\n",
+ " 'stay': False,\n",
+ " 'totally': False,\n",
+ " 'wanted': False,\n",
+ " 'place': False,\n",
+ " 'shit': False,\n",
+ " 'xx': False,\n",
+ " 'tv': False,\n",
+ " 'e': False,\n",
+ " 'picture': False,\n",
+ " 'update': False,\n",
+ " 'least': False,\n",
+ " 'sweet': False,\n",
+ " 'anyone': False,\n",
+ " 'lovely': False,\n",
+ " 'thinking': False,\n",
+ " 'forward': False,\n",
+ " 'aww': False,\n",
+ " 'g': False,\n",
+ " 'dog': False,\n",
+ " 'class': False,\n",
+ " 'actually': False,\n",
+ " 'everything': False,\n",
+ " 'eat': False,\n",
+ " 'mine': False,\n",
+ " 'playing': False,\n",
+ " 'cute': False,\n",
+ " 'hahaha': False,\n",
+ " 'kid': False,\n",
+ " 'dinner': False,\n",
+ " 'stupid': False,\n",
+ " 'sooo': False,\n",
+ " 'came': False,\n",
+ " 'word': False,\n",
+ " 'ill': False,\n",
+ " 'eating': False,\n",
+ " 'win': False,\n",
+ " 'hopefully': False,\n",
+ " 'finished': False,\n",
+ " 'welcome': False,\n",
+ " 'anymore': False,\n",
+ " 'minute': False,\n",
+ " 'every': False,\n",
+ " 'face': False,\n",
+ " 'idea': False,\n",
+ " 'hair': False,\n",
+ " 'without': False,\n",
+ " 'kinda': False,\n",
+ " 'month': False,\n",
+ " 'saturday': False,\n",
+ " 'c': False,\n",
+ " 'wrong': False,\n",
+ " 'super': False,\n",
+ " 'www': False,\n",
+ " 'final': False,\n",
+ " 'true': False,\n",
+ " 'reading': False,\n",
+ " 'hug': False,\n",
+ " 'probably': False,\n",
+ " 'photo': False,\n",
+ " 'buy': False,\n",
+ " 'hehe': False,\n",
+ " 'taking': False,\n",
+ " 'believe': False,\n",
+ " 'eye': False,\n",
+ " 'didnt': False,\n",
+ " 'name': False,\n",
+ " 'alone': False,\n",
+ " 'mind': False,\n",
+ " 'room': False,\n",
+ " 'either': False,\n",
+ " 'dream': False,\n",
+ " 'following': False,\n",
+ " 'able': False,\n",
+ " 'goodnight': False,\n",
+ " 'else': False,\n",
+ " 'lmao': False,\n",
+ " 'boo': False,\n",
+ " 'heard': False,\n",
+ " 'coffee': False,\n",
+ " 'outside': False,\n",
+ " 'real': False,\n",
+ " 'mileycyrus': False,\n",
+ " 'break': False,\n",
+ " 'part': False,\n",
+ " 'june': False,\n",
+ " 'computer': False,\n",
+ " 'forgot': False,\n",
+ " 'ticket': False,\n",
+ " 'blog': False,\n",
+ " 'post': False,\n",
+ " 'awww': False,\n",
+ " 'rest': False,\n",
+ " 'dude': False,\n",
+ " 'pm': False,\n",
+ " 'enough': False,\n",
+ " 'brother': False,\n",
+ " 'plan': False,\n",
+ " 'ð': False,\n",
+ " 'half': False,\n",
+ " 'using': False,\n",
+ " 'add': False,\n",
+ " 'stuck': False,\n",
+ " 'person': False,\n",
+ " 'text': False,\n",
+ " 'meet': False,\n",
+ " 'mother': False,\n",
+ " 'album': False,\n",
+ " 'study': False,\n",
+ " 'crazy': False,\n",
+ " 'hand': False,\n",
+ " 'send': False,\n",
+ " 'fine': False,\n",
+ " 'talking': False,\n",
+ " 'whole': False,\n",
+ " 'reply': False,\n",
+ " 'nite': False,\n",
+ " 'k': False,\n",
+ " 'seems': False,\n",
+ " 'run': False,\n",
+ " 'red': False,\n",
+ " 'took': False,\n",
+ " 'v': False,\n",
+ " 'seen': False,\n",
+ " 'trip': False,\n",
+ " 'full': False,\n",
+ " 'beach': False,\n",
+ " 'hello': False,\n",
+ " 'side': False,\n",
+ " 'loved': False,\n",
+ " 'news': False,\n",
+ " 'hit': False,\n",
+ " 'tried': False,\n",
+ " 'kind': False,\n",
+ " 'rock': False,\n",
+ " 'heart': False,\n",
+ " 'yea': False,\n",
+ " 'la': False,\n",
+ " 'shopping': False,\n",
+ " 'problem': False,\n",
+ " 'afternoon': False,\n",
+ " 'â': False,\n",
+ " 'pain': True,\n",
+ " 'change': False,\n",
+ " 'nap': False,\n",
+ " 'started': False,\n",
+ " 'used': False,\n",
+ " 'remember': False,\n",
+ " 'star': False,\n",
+ " 'boring': False,\n",
+ " 'course': False,\n",
+ " 'heading': False,\n",
+ " 'quite': False,\n",
+ " 'seeing': False,\n",
+ " 'hell': False,\n",
+ " 'train': False,\n",
+ " 'breakfast': False,\n",
+ " 'crap': False,\n",
+ " 'told': False,\n",
+ " 'english': False,\n",
+ " 'died': False,\n",
+ " 'sister': False,\n",
+ " 'fuck': False,\n",
+ " 'site': False,\n",
+ " 'instead': False,\n",
+ " 'leaving': False,\n",
+ " 'ipod': False,\n",
+ " 'money': False,\n",
+ " 'raining': False,\n",
+ " 'finish': False,\n",
+ " 'til': False,\n",
+ " 'anyway': False,\n",
+ " 'ah': False,\n",
+ " 'running': False,\n",
+ " 'concert': False,\n",
+ " 'soo': False,\n",
+ " 'facebook': False,\n",
+ " 'link': False,\n",
+ " 'sitting': False,\n",
+ " 'point': False,\n",
+ " 'jealous': False,\n",
+ " 'season': False,\n",
+ " 'bring': False,\n",
+ " 'ñ': False,\n",
+ " 'cuz': False,\n",
+ " 'awake': False,\n",
+ " 'mum': False,\n",
+ " 'reason': False,\n",
+ " 'studying': False,\n",
+ " 'pay': False,\n",
+ " 'cat': False,\n",
+ " 'wonder': False,\n",
+ " 'congrats': False,\n",
+ " 'ago': False,\n",
+ " 'xd': False,\n",
+ " 'page': False,\n",
+ " 'f': False,\n",
+ " 'bought': False,\n",
+ " 'store': False,\n",
+ " 'drink': False,\n",
+ " 'definitely': False,\n",
+ " 'couple': False,\n",
+ " 'evening': False,\n",
+ " 'chocolate': False,\n",
+ " 'btw': False,\n",
+ " 'soooo': False,\n",
+ " 'sunny': False,\n",
+ " 'sore': False,\n",
+ " 'drive': False,\n",
+ " 'shower': False,\n",
+ " 'st': False,\n",
+ " 'lucky': False,\n",
+ " 'aw': False,\n",
+ " 'test': False,\n",
+ " 'walk': False,\n",
+ " 'internet': False,\n",
+ " 'open': False,\n",
+ " 'water': False,\n",
+ " 'wake': False,\n",
+ " 'l': False,\n",
+ " 'office': False,\n",
+ " 'list': False,\n",
+ " 'watched': False,\n",
+ " 'team': False,\n",
+ " 'tommcfly': False,\n",
+ " 'clean': False,\n",
+ " 'enjoying': False,\n",
+ " 'hungry': False,\n",
+ " 'smile': False,\n",
+ " 'seriously': False,\n",
+ " 'move': False,\n",
+ " 'wont': False,\n",
+ " 'high': False,\n",
+ " 'asleep': False,\n",
+ " 'award': False,\n",
+ " 'fucking': False,\n",
+ " 'bout': False,\n",
+ " 'starting': False,\n",
+ " 'top': False,\n",
+ " 'moment': False,\n",
+ " 'dance': False,\n",
+ " 'project': False,\n",
+ " 'second': False,\n",
+ " 'park': False,\n",
+ " 'ask': False,\n",
+ " 'hr': False,\n",
+ " 'email': False,\n",
+ " 'church': False,\n",
+ " 'driving': False,\n",
+ " 'tea': False,\n",
+ " 'broke': False,\n",
+ " 'gym': False,\n",
+ " 'ride': False,\n",
+ " 'le': False,\n",
+ " 'loving': False,\n",
+ " 'meeting': False,\n",
+ " 'worth': False,\n",
+ " 'fail': False,\n",
+ " 'black': False,\n",
+ " 'close': False,\n",
+ " 'visit': False,\n",
+ " 'sigh': False,\n",
+ " 'ate': False,\n",
+ " 'online': False,\n",
+ " 'number': False,\n",
+ " 'vote': False,\n",
+ " 'bye': False,\n",
+ " 'hang': False,\n",
+ " 'wonderful': False,\n",
+ " 'youtube': False,\n",
+ " 'care': False,\n",
+ " 'co': False,\n",
+ " 'cut': False,\n",
+ " 'drinking': False,\n",
+ " 'horrible': False,\n",
+ " 'ddlovato': False,\n",
+ " 'shirt': False,\n",
+ " 'ice': False,\n",
+ " 'saying': False,\n",
+ " 'answer': False,\n",
+ " 'date': False,\n",
+ " 'dear': False,\n",
+ " 'agree': False,\n",
+ " 'j': False,\n",
+ " 'set': False,\n",
+ " 'town': False,\n",
+ " 'da': False,\n",
+ " 'wear': False,\n",
+ " 'happened': False,\n",
+ " 'line': False,\n",
+ " 'parent': False,\n",
+ " 'worse': False,\n",
+ " 'min': False,\n",
+ " 'longer': False,\n",
+ " 'lady': False,\n",
+ " 'together': False,\n",
+ " 'cream': False,\n",
+ " 'worry': False,\n",
+ " 'goin': False,\n",
+ " 'followfriday': False,\n",
+ " 'fast': False,\n",
+ " 'forget': False,\n",
+ " 'fb': False,\n",
+ " 'doesnt': False,\n",
+ " 'broken': False,\n",
+ " 'wtf': False,\n",
+ " 'turn': False,\n",
+ " 'unfortunately': False,\n",
+ " 'chance': False,\n",
+ " 'favorite': False,\n",
+ " 'spent': False,\n",
+ " 'fall': False,\n",
+ " 'air': False,\n",
+ " 'idk': False,\n",
+ " 'slept': False,\n",
+ " 'sometimes': False,\n",
+ " 'rainy': False,\n",
+ " 'question': False,\n",
+ " 'laptop': False,\n",
+ " 'tweeting': False,\n",
+ " 'tuesday': False,\n",
+ " 'earlier': False,\n",
+ " 'mood': False,\n",
+ " 'slow': False,\n",
+ " 'hoping': False,\n",
+ " 'thx': False,\n",
+ " 'h': False,\n",
+ " 'absolutely': False,\n",
+ " 'mr': False,\n",
+ " 'ahh': False,\n",
+ " 'cleaning': False,\n",
+ " 'via': False,\n",
+ " 'holiday': False,\n",
+ " 'happen': False,\n",
+ " 'wishing': False,\n",
+ " 'taken': False,\n",
+ " 'pool': False,\n",
+ " 'episode': False,\n",
+ " 'garden': False,\n",
+ " 'homework': False,\n",
+ " 'website': False,\n",
+ " 'sleeping': False,\n",
+ " 'airport': False,\n",
+ " 'especially': False,\n",
+ " 'business': False,\n",
+ " 'perfect': False,\n",
+ " 'em': False,\n",
+ " 'fell': False,\n",
+ " 'nd': False,\n",
+ " 'upset': False,\n",
+ " 'small': False,\n",
+ " 'city': False,\n",
+ " 'chat': False,\n",
+ " 'knew': False,\n",
+ " 'foot': False,\n",
+ " 'chicken': False,\n",
+ " '½': False,\n",
+ " 'father': False,\n",
+ " 'throat': False,\n",
+ " 'mac': False,\n",
+ " 'weird': False,\n",
+ " 'window': False,\n",
+ " 'passed': False,\n",
+ " 'son': False,\n",
+ " 'story': False,\n",
+ " 'tour': False,\n",
+ " 'message': False,\n",
+ " 'shop': False,\n",
+ " 'wednesday': False,\n",
+ " 'due': False,\n",
+ " 'bbq': False,\n",
+ " 'listen': False,\n",
+ " 'sleepy': False,\n",
+ " 'woman': False,\n",
+ " 'company': False,\n",
+ " 'sunshine': False,\n",
+ " 'met': False,\n",
+ " 'short': False,\n",
+ " 'comment': False,\n",
+ " 'mad': False,\n",
+ " 'understand': False,\n",
+ " 'gave': False,\n",
+ " 'pc': False,\n",
+ " 'hubby': False,\n",
+ " 'different': False,\n",
+ " 'havent': False,\n",
+ " 'account': False,\n",
+ " 'note': False,\n",
+ " 'seem': False,\n",
+ " 'scared': False,\n",
+ " 'bag': False,\n",
+ " 'alright': False,\n",
+ " 'cup': False,\n",
+ " 'ive': False,\n",
+ " 'leg': False,\n",
+ " 'green': False,\n",
+ " 'interesting': False,\n",
+ " 'bus': False,\n",
+ " 'past': False,\n",
+ " 'glass': False,\n",
+ " 'worst': False,\n",
+ " 'power': False,\n",
+ " 'plz': False,\n",
+ " 'math': False,\n",
+ " 'white': False,\n",
+ " 'support': False,\n",
+ " 'nope': False,\n",
+ " 'sat': False,\n",
+ " 'moon': False,\n",
+ " 'hangover': False,\n",
+ " 'moving': False,\n",
+ " 'taylor': False,\n",
+ " 'touch': False,\n",
+ " 'order': False,\n",
+ " 'jonas': False,\n",
+ " 'pick': False,\n",
+ " 'forever': False,\n",
+ " 'tom': False,\n",
+ " 'dead': False,\n",
+ " 'shoot': False,\n",
+ " 'called': False,\n",
+ " 'bitch': False,\n",
+ " 'rather': False,\n",
+ " 'catch': False,\n",
+ " 'bet': False,\n",
+ " 'officially': False,\n",
+ " 'vip': False,\n",
+ " 'writing': False,\n",
+ " 'si': False,\n",
+ " 'lil': False,\n",
+ " 'worried': False,\n",
+ " 'xoxo': False,\n",
+ " 'write': False,\n",
+ " 'graduation': False,\n",
+ " 'liked': False,\n",
+ " 'except': False,\n",
+ " 'bday': False,\n",
+ " 'gay': False,\n",
+ " 'dang': False,\n",
+ " 'cousin': False,\n",
+ " 'load': False,\n",
+ " 'myspace': False,\n",
+ " 'sent': False,\n",
+ " 'fix': False,\n",
+ " 'ahhh': False,\n",
+ " 'fly': False,\n",
+ " 'blue': False,\n",
+ " 'college': False,\n",
+ " 'gorgeous': False,\n",
+ " 'special': False,\n",
+ " 'kill': False,\n",
+ " 'apple': False,\n",
+ " 'interview': False,\n",
+ " 'fight': False,\n",
+ " 'everybody': False,\n",
+ " 'dress': False,\n",
+ " 'box': False,\n",
+ " 'bro': False,\n",
+ " 'case': False,\n",
+ " 'july': False,\n",
+ " 'cake': False,\n",
+ " 'three': False,\n",
+ " 'hmm': False,\n",
+ " 'deal': False,\n",
+ " 'ouch': False,\n",
+ " 'inside': False,\n",
+ " 'yep': False,\n",
+ " 'random': False,\n",
+ " 'meant': False,\n",
+ " 'profile': False,\n",
+ " 'mtv': False,\n",
+ " 'wedding': False,\n",
+ " 'clothes': False,\n",
+ " 'band': False,\n",
+ " 'issue': False,\n",
+ " 'apparently': False,\n",
+ " 'lonely': False,\n",
+ " 'age': False,\n",
+ " 'shall': False,\n",
+ " 'flight': False,\n",
+ " 'yr': False,\n",
+ " 'supposed': False,\n",
+ " 'david': False,\n",
+ " 'living': False,\n",
+ " 'needed': False,\n",
+ " 'london': False,\n",
+ " 'laugh': False,\n",
+ " 'sign': False,\n",
+ " 'finger': False,\n",
+ " 'beer': False,\n",
+ " 'itunes': False,\n",
+ " 'played': False,\n",
+ " 'tear': False,\n",
+ " 'ppl': False,\n",
+ " 'body': False,\n",
+ " 'paper': False,\n",
+ " 'group': False,\n",
+ " 'looked': False,\n",
+ " 'sadly': False,\n",
+ " 'bloody': False,\n",
+ " 'club': False,\n",
+ " 'web': False,\n",
+ " 'vacation': False,\n",
+ " 'jonasbrothers': False,\n",
+ " 'uk': False,\n",
+ " 'version': False,\n",
+ " 'huge': False,\n",
+ " 'sooooo': False,\n",
+ " 'lakers': False,\n",
+ " 'germany': False,\n",
+ " 'save': False,\n",
+ " 'hanging': False,\n",
+ " 'shoe': False,\n",
+ " 'fantastic': False,\n",
+ " 'wine': False,\n",
+ " 'sort': False,\n",
+ " 'none': False,\n",
+ " 'twilight': False,\n",
+ " 'shot': False,\n",
+ " 'thursday': False,\n",
+ " 'confused': False,\n",
+ " 'sale': False,\n",
+ " 'goodbye': False,\n",
+ " 'round': False,\n",
+ " 'lame': False,\n",
+ " 'cheer': False,\n",
+ " 'singing': False,\n",
+ " 'promise': False,\n",
+ " 'drop': False,\n",
+ " 'nose': False,\n",
+ " 'whats': False,\n",
+ " 'babe': False,\n",
+ " 'xxx': False,\n",
+ " 'lesson': False,\n",
+ " 'camera': False,\n",
+ " 'download': False,\n",
+ " 'info': False,\n",
+ " 'fair': False,\n",
+ " 'yummy': False,\n",
+ " 'french': False,\n",
+ " 'miley': False,\n",
+ " 'light': False,\n",
+ " 'asked': False,\n",
+ " 'lazy': False,\n",
+ " 'indeed': False,\n",
+ " 'along': False,\n",
+ " 'quick': False,\n",
+ " 'ã': False,\n",
+ " 'door': False,\n",
+ " 'sit': False,\n",
+ " 'yup': False,\n",
+ " 'street': False,\n",
+ " 'mile': False,\n",
+ " 'dm': False,\n",
+ " 'jus': False,\n",
+ " 'giving': False,\n",
+ " 'ear': False,\n",
+ " 'service': False,\n",
+ " 'low': False,\n",
+ " 'relaxing': False,\n",
+ " 'radio': False,\n",
+ " 'arm': False,\n",
+ " 'future': False,\n",
+ " 'proud': False,\n",
+ " 'fact': False,\n",
+ " 'learn': False,\n",
+ " 'easy': False,\n",
+ " 'child': False,\n",
+ " 'kitty': False,\n",
+ " 'hold': False,\n",
+ " 'mark': False,\n",
+ " 'peep': False,\n",
+ " 'wearing': False,\n",
+ " 'luv': False,\n",
+ " 'south': False,\n",
+ " 'road': False,\n",
+ " 'smell': False,\n",
+ " 'exciting': False,\n",
+ " 'currently': False,\n",
+ " 'warm': False,\n",
+ " 'puppy': False,\n",
+ " 'packing': False,\n",
+ " 'google': False,\n",
+ " 'exactly': False,\n",
+ " 'share': False,\n",
+ " 'decided': False,\n",
+ " 'voice': False,\n",
+ " 'fat': False,\n",
+ " 'mommy': False,\n",
+ " 'realize': False,\n",
+ " 'join': False,\n",
+ " 'nearly': False,\n",
+ " 'bb': False,\n",
+ " 'whatever': False,\n",
+ " 'woo': False,\n",
+ " 'spend': False,\n",
+ " 'stand': False,\n",
+ " 'enjoyed': False,\n",
+ " 'tweeps': False,\n",
+ " 'sold': False,\n",
+ " 'gettin': False,\n",
+ " 'fever': False,\n",
+ " 'paid': False,\n",
+ " 'beat': False,\n",
+ " 'pink': False,\n",
+ " 'mall': False,\n",
+ " 'gunna': False,\n",
+ " 'ï': False,\n",
+ " 'freaking': False,\n",
+ " 'snow': False,\n",
+ " 'hospital': False,\n",
+ " 'cheese': False,\n",
+ " 'nobody': False,\n",
+ " 'wondering': False,\n",
+ " 'plane': False,\n",
+ " 'shame': False,\n",
+ " 'search': False,\n",
+ " 'ended': False,\n",
+ " 'helping': False,\n",
+ " 'safe': False,\n",
+ " 'staying': False,\n",
+ " 'storm': False,\n",
+ " 'fam': False,\n",
+ " 'z': False,\n",
+ " 'sing': False,\n",
+ " 'view': False,\n",
+ " 'yo': False,\n",
+ " 'card': False,\n",
+ " 'stick': False,\n",
+ " 'joe': False,\n",
+ " 'matter': False,\n",
+ " 'delicious': False,\n",
+ " 'cook': False,\n",
+ " 'country': False,\n",
+ " 'hill': False,\n",
+ " 'peace': False,\n",
+ " 'worked': False,\n",
+ " 'race': False,\n",
+ " 'boot': False,\n",
+ " 'figure': False,\n",
+ " 'vega': False,\n",
+ " 'france': False,\n",
+ " 'wit': False,\n",
+ " 'topic': False,\n",
+ " 'spot': False,\n",
+ " 'cover': False,\n",
+ " 'afford': False,\n",
+ " 'floor': False,\n",
+ " 'bike': False,\n",
+ " 'starbucks': False,\n",
+ " 'hahah': False,\n",
+ " 'tummy': False,\n",
+ " 'trouble': False,\n",
+ " 'mouth': False,\n",
+ " 'ran': False,\n",
+ " 'drunk': False,\n",
+ " 'lately': False,\n",
+ " 'joke': False,\n",
+ " 'bird': False,\n",
+ " 'flu': False,\n",
+ " 'showing': False,\n",
+ " 'mail': False,\n",
+ " 'turned': False,\n",
+ " 'men': False,\n",
+ " 'taste': False,\n",
+ " 'gosh': False,\n",
+ " 'bar': False,\n",
+ " 'changed': False,\n",
+ " 'fish': False,\n",
+ " 'stopped': False,\n",
+ " 'wife': False,\n",
+ " 'alot': False,\n",
+ " 'tweetdeck': False,\n",
+ " 'magic': False,\n",
+ " 'brilliant': False,\n",
+ " 'cooking': False,\n",
+ " 'state': False,\n",
+ " 'design': False,\n",
+ " 'film': False,\n",
+ " 'tan': False,\n",
+ " 'isnt': False,\n",
+ " 'nail': False,\n",
+ " 'bummed': False,\n",
+ " 'prob': False,\n",
+ " 'happens': False,\n",
+ " 'eh': False,\n",
+ " 'type': False,\n",
+ " 'young': False,\n",
+ " 'honey': False,\n",
+ " 'price': False,\n",
+ " 'disappointed': False,\n",
+ " 'sky': False,\n",
+ " 'ring': False,\n",
+ " 'surprise': False,\n",
+ " 'self': False,\n",
+ " 'depressing': False,\n",
+ " 'sexy': False,\n",
+ " 'cd': False,\n",
+ " 'woot': False,\n",
+ " 'pop': False,\n",
+ " 'piece': False,\n",
+ " 'degree': False,\n",
+ " 'chillin': False,\n",
+ " 'hilarious': False,\n",
+ " 'rip': False,\n",
+ " 'updated': False,\n",
+ " 'ohh': False,\n",
+ " 'boyfriend': False,\n",
+ " 'awwww': False,\n",
+ " 'hun': False,\n",
+ " 'crappy': False,\n",
+ " 'er': False,\n",
+ " 'although': False,\n",
+ " 'death': False,\n",
+ " 'lake': False,\n",
+ " 'ahead': False,\n",
+ " 'art': False,\n",
+ " 'watchin': False,\n",
+ " 'scary': False,\n",
+ " 'annoying': False,\n",
+ " 'event': False,\n",
+ " 'plus': False,\n",
+ " 'ff': False,\n",
+ " 'major': False,\n",
+ " 'afraid': False,\n",
+ " 'pissed': False,\n",
+ " 'wat': False,\n",
+ " 'huh': False,\n",
+ " 'secret': False,\n",
+ " 'others': False,\n",
+ " 'land': False,\n",
+ " 'caught': False,\n",
+ " 'closed': False,\n",
+ " 'kiss': False,\n",
+ " 'trek': False,\n",
+ " 'code': False,\n",
+ " 'mobile': False,\n",
+ " 'knee': False,\n",
+ " ...},\n",
+ " 0)]"
+ ]
+ },
+ "execution_count": 48,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Create a list to hold the features in the required format\n",
+ "feature_list = []\n",
+ "\n",
+ "# Iterate through each document (row) in the matrix\n",
+ "for i in range(dtm_dense.shape[0]):\n",
+ "\n",
+ " # Create a dictionary for the document's word features\n",
+ " document_dict = {word: (count > 0) for word, count in zip(top_words, dtm_dense[i, :])}\n",
+ " \n",
+ " # Add the document's feature dictionary and sentiment label as a tuple to the feature_list\n",
+ " feature_list.append((document_dict, sentiment_labels[i]))\n",
+ "\n",
+ "feature_list"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "#### Explanation\n",
+ "\n",
+ "DTM: Document-Term Matrix\n",
+ "\n",
+ "DTM stands for Document-Term Matrix. It's a fundamental data structure used in text analysis and natural language processing (NLP). Here's how it works:\n",
+ "\n",
+ "Documents: Each row in the matrix represents a single document (in your case, a tweet).\n",
+ "Terms: Each column represents a unique word (term) from your vocabulary (the top 5000 words in your case).\n",
+ "Values: The values in the matrix are the frequencies of each word in each document.\n",
+ "In simpler terms, a DTM shows you how many times each word appears in each document.\n",
+ "\n",
+ "True/False in the feature_list:\n",
+ "\n",
+ "The True/False values you see in the feature_list are a slightly different representation of the DTM. Instead of raw word counts, they indicate the presence or absence of each word in a document:\n",
+ "\n",
+ "True: The word is present in the document.\n",
+ "False: The word is absent from the document.\n",
+ "This binary representation is often used for text classification tasks because it focuses on whether a word appears at all, rather than how many times it appears. It makes the features more suitable for algorithms like Naive Bayes that often work well with categorical data."
]
},
{
@@ -210,11 +5047,62 @@
},
{
"cell_type": "code",
- "execution_count": 1,
+ "execution_count": 49,
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Most Informative Features:\n",
+ "Most Informative Features\n",
+ " ugh = True 0 : 1 = 23.3 : 1.0\n",
+ " upset = True 0 : 1 = 22.6 : 1.0\n",
+ " sadly = True 0 : 1 = 17.2 : 1.0\n",
+ " broke = True 0 : 1 = 15.6 : 1.0\n",
+ " poor = True 0 : 1 = 15.5 : 1.0\n",
+ " congrats = True 1 : 0 = 14.8 : 1.0\n",
+ " died = True 0 : 1 = 14.0 : 1.0\n",
+ " sad = True 0 : 1 = 12.9 : 1.0\n",
+ " burnt = True 0 : 1 = 11.8 : 1.0\n",
+ " depressing = True 0 : 1 = 11.1 : 1.0\n"
+ ]
+ }
+ ],
+ "source": [
+ "from nltk.classify import NaiveBayesClassifier\n",
+ "from sklearn.model_selection import train_test_split\n",
+ "\n",
+ "# Splitting the data into training and test sets\n",
+ "train_set, test_set = train_test_split(feature_list, test_size=0.2, random_state=42)\n",
+ "\n",
+ "# Training the Naive Bayes classifier\n",
+ "classifier = NaiveBayesClassifier.train(train_set)\n",
+ "\n",
+ "# Displaying the most informative features\n",
+ "classifier.show_most_informative_features(10)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
"metadata": {},
- "outputs": [],
"source": [
- "# your code here"
+ "#### Explanation\n",
+ "The output above provides insights into which words are most helpful in classifying your tweets as positive (1) or negative (0).\n",
+ "\n",
+ "Here's a breakdown of what each part means:\n",
+ "\n",
+ "E.g: ugh = True 0 : 1 = 23.3 : 1.0\n",
+ "ugh = True: This means the presence of the word \"ugh\" in a tweet.\n",
+ "0 : 1: This indicates the sentiment categories being compared:\n",
+ "0: Negative sentiment\n",
+ "1: Positive sentiment\n",
+ "23.3 : 1.0: This is the likelihood ratio. It tells you how much more likely the word \"ugh\" is to appear in a negative tweet compared to a positive tweet.\n",
+ "Interpreting the Likelihood Ratio:\n",
+ "\n",
+ "A likelihood ratio of 23.3 : 1.0 means that the word \"ugh\" is 23.3 times more likely to appear in a negative tweet than in a positive tweet. This suggests that \"ugh\" is a strong indicator of negative sentiment."
]
},
{
@@ -230,11 +5118,28 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 50,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "0.726"
+ ]
+ },
+ "execution_count": 50,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "# your code here"
+ "accuracy = nltk.classify.accuracy(classifier, test_set)\n",
+ "\n",
+ "accuracy\n",
+ "\n",
+ "#Yey!\n",
+ "# An accuracy of 0.726 means that the classifier correctly predicted the sentiment of 72.6% of the tweets in the test set. \n",
+ "# In other words, for every 100 tweets the model was given, it got about ~73 of them right."
]
},
{
@@ -298,7 +5203,7 @@
],
"metadata": {
"kernelspec": {
- "display_name": "Python 3",
+ "display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
@@ -312,7 +5217,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.7.3"
+ "version": "3.11.5"
}
},
"nbformat": 4,