-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathlab2.R
More file actions
76 lines (49 loc) · 1.45 KB
/
lab2.R
File metadata and controls
76 lines (49 loc) · 1.45 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
library('arules')
library('dplyr')
library('readr')
df = read.csv('/home/goda/Desktop/tacd/log.csv')
summary(df)
df
#1
top_pages = df %>% group_by(PAGE) %>% count() %>% arrange(desc(n))
top_pages
top_pages = df %>% group_by(PAGE) %>% tally(sort = TRUE) %>% top_n(3) %>% pull(PAGE)
top_pages
#2
# table(df)
matrix = table(df$USER,df$PAGE)
matrix
dis = dist(matrix)
dis
c1 = hclust(dis)
c1
plot(c1)
plot(c1,hang = -0.1)
c2 = cutree(c1,k=2)
c2
rect.hclust(c1,k=2)
#3
dcluster = mutate(df,Cluster = c2[df$USER])
dcluster
filter(dcluster, Cluster == 1) %>% pull(PAGE)
top_c1 = filter(dcluster, Cluster == 1) %>% group_by(PAGE) %>% tally(sort = TRUE)
top_c1
top_2_c1 = filter(dcluster, Cluster == 1) %>% group_by(PAGE) %>% tally(sort = TRUE) %>% top_n(2) %>% pull(PAGE)
top_2_c1
#4
top_2_c2 = filter(dcluster, Cluster == 2) %>% group_by(PAGE) %>% tally(sort = TRUE) %>% top_n(2) %>% pull(PAGE)
top_2_c2
#5
c_u2 = dcluster %>% filter(USER == 'u2') %>% select(Cluster) %>% head(1) %>% pull
c_u2
top_u2 = filter(dcluster, Cluster == c_u2) %>% group_by(PAGE) %>% tally(sort = TRUE) %>% top_n(3) %>% pull(PAGE) #select(PAGE)
top_u2
u2_p = filter(dcluster, USER == 'u2') %>% pull(PAGE)
u2_p
top_u2[! top_c1_p %in% u2_p]
#5 version 2
top_u2_s = filter(dcluster, Cluster == c_u2) %>% group_by(PAGE) %>% tally(sort = TRUE) %>% top_n(3) %>% select(PAGE)
u2_p_s = filter(dcluster, USER == 'u2') %>% select(PAGE)
top_u2_s
u2_p_s
anti_join(top_u2_s, u2_p_s) # need to use select