-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy path03-network.r
More file actions
81 lines (68 loc) · 2.25 KB
/
03-network.r
File metadata and controls
81 lines (68 loc) · 2.25 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
library(tidyverse)
library(ggraph)
library(tidygraph)
# [DRAFT] quick and dirty overview of main component
# -- parse abstracts -----------------------------------------------------------
library(rvest)
f <- fs::dir_ls("html/abstracts", glob = "*.html")
cat("Parsing", length(f), "abstracts...")
d <- map(f, read_html) %>%
map_dfr(
~ tibble::tibble(
# # detailed metadata
# html_nodes(.x, "meta") %>%
# map_chr(html_attr, "content")
panel = html_nodes(.x, xpath = "//a[contains(@href, 'session')]") %>%
html_attr("href"),
authors = html_nodes(.x, "meta[name='authors']") %>%
html_attr("content"),
affiliations = html_nodes(.x, "meta[name='affiliations']") %>%
html_attr("content")
),
.id = "abstract"
) %>%
mutate(
# authors have extra spaces, but are clean otherwise
authors = str_squish(authors),
# numeric identifiers but stored as character; `abstract` is 4-padded
panel = str_remove_all(basename(panel), "\\D"),
abstract = str_remove_all(basename(abstract), "\\D")
) %>%
arrange(abstract)
cat("\n")
readr::write_tsv(d, "data/abstracts.tsv")
d <- readr::read_tsv("data/abstracts.tsv")
e <- str_split(d$affiliations, "(,\\s)?\\d\\.\\s") %>%
map(str_subset, "\\w") %>%
# weight = 1 / number of organizations
map_dfr(~ crossing(i = .x, j = .x, w = 1 / (length(.x) - 1))) %>%
# de-duplicate, remove self-ties
filter(i < j)
# weights range 0.25-1
table(e$w)
tidygraph::as_tbl_graph(e) %>%
tidygraph::activate(nodes) %>%
mutate(
wdegree = tidygraph::centrality_degree(weights = w),
group = tidygraph::group_components(),
label = str_remove_all(name, "\\sof"),
label = if_else(
str_count(label, "\\s") > 1,
str_split(label, "\\s") %>%
map(str_sub, 1, 1) %>%
map_chr(str_c, collapse = ""),
label
),
label = if_else(wdegree > 3, label, NA_character_) %>%
str_remove("University") %>%
str_squish()
) %>%
filter(group == 1) %>%
ggraph::ggraph(layout = "stress") +
ggraph::geom_edge_link0() +
ggraph::geom_node_point(aes(size = wdegree)) +
ggraph::geom_node_label(aes(label = label)) +
ggraph::theme_graph() +
guides(size = "none")
# ggsave("example-network.png", width = 7, height = 7)
# wip