-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathR_Sentiment_Analyze.Rmd
More file actions
145 lines (88 loc) · 2.86 KB
/
R_Sentiment_Analyze.Rmd
File metadata and controls
145 lines (88 loc) · 2.86 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
```{r}
# FIRST OF ALL WE INSTALL THAT PACKAGE AND WE CALL SENTIMENT.SENTIMENT DATASET INCLUDE POSITIVE AND NEGATIVE WORD.
install.packages("tidytext")
library(tidytext)
sentiments
```
```{r}
# BING CLASSIFY WORD AS POSITIVE AND NEGATIVE
get_sentiments("bing")
```
```{r}
library(janeaustenr)
library(stringr)
# FIRST OF ALL WE INCLUDE IMPORT LIBRARIES AND WE TİDY OPERATION WİTH PIPING IN TEXT.
# WE CONVERT TEXT INTO TO TİDY FORMAT
#EACH ROW CONTAIN SINGLE WORD PROVIDE ROW NUMBER FUNCTION.
#GROUP BY AUSTEN BOOKS
#FINALLY WITH REGULAR EXPRESSION WE STRING MANIPULATION
tidy_data <- austen_books() %>%
group_by(book) %>%
mutate(linenumber=row_number(),
chapter=
cumsum(str_detect(text,
regex("^chapter [\\divxlc]",
ignore_case = TRUE)))) %>%
ungroup() %>%
unnest_tokens(word,text)
```
```{r}
# FIRST OF ALL WE CALL WORD BING.BING CLASSIFY TEXT AS POSITIVE AND NEGATIVE
# AFTERTHAT WE JUST TAKE POSITIVE WORD IN TEXT
#WE CHOOSE EMMA BOOK AND WE SORT POSİTİVE WORD IN EMMA BOOKS
positive_senti <-get_sentiments("bing") %>%
filter(sentiment=="positive")
tidy_data %>%
filter(book=="Emma") %>%
semi_join(positive_senti) %>%
count(word,sort=TRUE)
```
```{r}
# FIRST OF ALL SPREAD FUNC PROVIDE EXIT TWO UNIQUE COLUMNS AND THAT OPERATE LIKE GROUP BY THINKING
#SHORTL,WE CALCULATE TOTAL SENTIMENT AND THE GROUP BY POSITVE AND NEGATIVE AND SHOW 2 UNIQIE COLUMNS
library(tidyr)
bing <- get_sentiments("bing")
Emma_sentients <- tidy_data %>%
inner_join(bing) %>%
count(book="Emma",index=linenumber %/% 80,sentiment) %>%
spread(sentiment,n,fill=0) %>%
mutate(sentiment=positive-negative)
```
```{r}
#WE VISUALIZE WORD IN EMMA WITH POSITIVE AND NEGATIVE SCORES
library(ggplot2)
ggplot(Emma_sentients,aes(index,sentiment,fill=book))+
geom_bar(stat="identity",show.legend = TRUE)+
facet_wrap(~book,ncol=2,scales="free_x")
```
```{r}
# MOST COMMON WORD THAT USE EMMA BOOK WE SORT THAT
# WE CALL 5 ROW WITH HEAD FUNC
counting_words <- tidy_data %>%
inner_join(bing) %>%
count(word,sentiment,sort=TRUE)
head(counting_words)
```
```{r}
#WE VISUALIZE THE SENTIMENT IN EMMA BOOK AS POSITIVE AND NEGATIVE SCORE
counting_words %>%
filter(n>250) %>%
mutate(n=ifelse(sentiment=="negative",-n,n)) %>%
mutate(word=reorder(word,n))%>%
ggplot(aes(word,n,fill=sentiment))+
geom_col()+
coord_flip()+
labs(y="Sentiment Score")
```
```{r}
#wE PLOT NEGATIVE AND POSITIVE WORD.
#AS TEST SCORE INCREASE,TEXT SHOW MORE BIGGER THAN OTHER
library(reshape2)
library(wordcloud)
tidy_data %>%
inner_join(bing) %>%
count(word,sentiment,sort=TRUE) %>%
acast(word~sentiment,value.var ="n",fill=0) %>%
comparison.cloud(colors = c("red","blue"),
max.words = 60)
```