DataScienceProject_R/R_Sentiment_Analyze.Rmd at main · furkan-cyber/DataScienceProject_R · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145


```{r}
# FIRST OF ALL WE INSTALL THAT PACKAGE AND WE CALL SENTIMENT.SENTIMENT DATASET INCLUDE POSITIVE AND NEGATIVE WORD.

install.packages("tidytext")

library(tidytext)

sentiments


```


```{r}
# BING CLASSIFY WORD AS POSITIVE AND NEGATIVE
get_sentiments("bing")

```


```{r}
library(janeaustenr)
library(stringr)

# FIRST OF ALL WE INCLUDE IMPORT LIBRARIES AND WE TİDY OPERATION WİTH PIPING IN TEXT.
# WE CONVERT TEXT INTO TO TİDY FORMAT
#EACH ROW CONTAIN SINGLE WORD PROVIDE ROW NUMBER FUNCTION.
#GROUP BY AUSTEN BOOKS
#FINALLY WITH REGULAR EXPRESSION WE STRING MANIPULATION

 tidy_data <- austen_books() %>%
  group_by(book) %>%
  mutate(linenumber=row_number(),
         chapter=
  cumsum(str_detect(text,
  regex("^chapter [\\divxlc]",

  ignore_case = TRUE)))) %>%
   ungroup() %>%
   unnest_tokens(word,text)
```


```{r}

# FIRST OF ALL WE CALL WORD BING.BING CLASSIFY TEXT AS POSITIVE AND NEGATIVE
# AFTERTHAT WE JUST TAKE POSITIVE WORD IN TEXT
#WE CHOOSE EMMA BOOK AND WE SORT POSİTİVE WORD IN EMMA BOOKS

positive_senti <-get_sentiments("bing") %>%
  filter(sentiment=="positive")

tidy_data %>%
  filter(book=="Emma") %>%
  semi_join(positive_senti) %>%
  count(word,sort=TRUE)


```


```{r}

# FIRST OF ALL SPREAD FUNC PROVIDE EXIT TWO UNIQUE COLUMNS AND THAT OPERATE LIKE GROUP BY THINKING
#SHORTL,WE CALCULATE TOTAL SENTIMENT AND THE GROUP BY POSITVE AND NEGATIVE AND SHOW 2 UNIQIE COLUMNS


library(tidyr)
bing <- get_sentiments("bing")
Emma_sentients <- tidy_data %>%
  inner_join(bing) %>%
  count(book="Emma",index=linenumber %/% 80,sentiment) %>%
  spread(sentiment,n,fill=0) %>%
  mutate(sentiment=positive-negative)

```


```{r}
#WE VISUALIZE WORD IN EMMA WITH POSITIVE AND NEGATIVE SCORES

library(ggplot2)

ggplot(Emma_sentients,aes(index,sentiment,fill=book))+
  geom_bar(stat="identity",show.legend = TRUE)+
  facet_wrap(~book,ncol=2,scales="free_x")


```


```{r}
#  MOST COMMON WORD THAT USE EMMA BOOK WE SORT THAT
# WE CALL 5 ROW WITH HEAD FUNC
counting_words <- tidy_data %>%
  inner_join(bing) %>%
  count(word,sentiment,sort=TRUE)
head(counting_words)

```

```{r}
#WE VISUALIZE THE SENTIMENT IN EMMA BOOK AS POSITIVE AND NEGATIVE SCORE

counting_words %>%
  filter(n>250) %>%
  mutate(n=ifelse(sentiment=="negative",-n,n)) %>%
  mutate(word=reorder(word,n))%>%
  ggplot(aes(word,n,fill=sentiment))+
  geom_col()+
  coord_flip()+
  labs(y="Sentiment Score")
```


```{r}

#wE PLOT NEGATIVE AND POSITIVE WORD.
#AS TEST SCORE INCREASE,TEXT SHOW MORE BIGGER THAN OTHER

library(reshape2)
library(wordcloud)
tidy_data %>%
  inner_join(bing) %>%
  count(word,sentiment,sort=TRUE) %>%
  acast(word~sentiment,value.var ="n",fill=0) %>%
  comparison.cloud(colors = c("red","blue"),
                   max.words = 60)


```