Skip to content

Commit dfd55cf

Browse files
committed
added naming sankey plot stages flexibility
1 parent 3431449 commit dfd55cf

2 files changed

Lines changed: 31 additions & 14 deletions

File tree

R/plot_sankey.R

Lines changed: 27 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,8 @@
33
#' A function to create a Sankey plot showcasing elements of seafood supply chains in ARTIS or consumption datasets.
44
#'
55
#' @param data dataframe. An ARTIS trade or consumption dataframe.
6-
#' @param cols vector. Column names to generate the sections of the Sankey plot, in the order they should appear (left to right).
6+
#' @param cols character vector. Column names to generate the sections of the Sankey plot, in the order they should appear (left to right).
7+
#' @param cols_labels character vector. User-specified labels for the columns selected with cols argument.
78
#' @param prop_flow_cutoff integer. A percent in which trade volumes that comprise less than x\% of the total trade are renamed as "Other". Default prop_flow_cutoff = 0.05 means trade volumes less than 5\% are labeled as "Other".
89
#' @param value character. Trade quantity column name to visualize. Default is "live_weight_t".
910
#' @param show.other logical. Controls whether or not nodes within a column falling below the prop_flow_cutoff threshold should be displayed in a group ("Other"). Default value is TRUE, filtering for threshold occurs regardless if "Other" is displayed.
@@ -33,6 +34,9 @@ plot_sankey <- function(data,
3334
cols = c("source_country_iso3c",
3435
"exporter_iso3c",
3536
"importer_iso3c"),
37+
cols_labels = c("Source",
38+
"Exporter",
39+
"Importer"),
3640
prop_flow_cutoff = 0.05,
3741
value = "live_weight_t",
3842
show.other = TRUE,
@@ -43,24 +47,32 @@ plot_sankey <- function(data,
4347
warning("WARNING: The selected columns include NA values")
4448
}
4549

46-
# Setting up parameters based on user input-----------------------------------
50+
if(length(cols) != length(cols_labels)){
51+
stop("cols and cols_labels must be the same length")
52+
}
4753

48-
# Assign weight column to quantity
49-
quantity <- value
54+
# Setting up parameters based on user input-----------------------------------
5055

51-
# Summarizing data based on quantity variable selected
56+
# Summarizing data based on value variable selected
57+
58+
# FIXIT:
59+
# links <- data %>%
60+
# group_by_at(vars(cols)) %>% # vars() deprecated
61+
# summarize(total_q = sum(.data[[value]], na.rm = TRUE))
62+
63+
### FIXIT: AM new code to replace vars() for testing later
5264
links <- data %>%
53-
group_by_at(vars(cols)) %>% # vars() deprecated
54-
summarize(total_q = sum(.data[[quantity]], na.rm = TRUE))
65+
group_by(across(all_of(cols))) %>% # Replaces vars()
66+
summarize(total_q = sum(.data[[value]], na.rm = TRUE), .groups = "drop")
5567

5668
# Rename specified columns to generic names for processing
5769
colnames(links) <- c(paste("col_", 1:length(cols), sep = ""), "total_q")
58-
cols <- colnames(links)[1:length(cols)]
70+
cols_standard <- colnames(links)[1:length(cols)]
5971

6072
# Identify list of nodes by column by proportional flow cutoff
6173
node_names <- c()
6274

63-
for(i in 1:length(cols)){
75+
for(i in 1:length(cols_standard)){
6476
# Identify nodes in the column falling below the threshold
6577
node_i <- links %>%
6678
rename(col_i = paste("col_", i, sep = "")) %>%
@@ -105,12 +117,14 @@ plot_sankey <- function(data,
105117

106118
sankey_df <- links %>%
107119
# Filtering data based on prop flow cutoff - use show.other value
108-
filter_at(vars(cols), all_vars(. %in% node_names)) %>%
109-
ungroup()
120+
filter_at(vars(cols_standard), all_vars(. %in% node_names)) %>%
121+
ungroup()
122+
123+
names(sankey_df) <- c(cols_labels, "total_q")
110124

111125
sankey_df <- sankey_df %>%
112126
# Transforming into ggsankey format (x, node, next_x, next_node)
113-
ggsankey::make_long({{ cols }}, value = total_q)
127+
ggsankey::make_long({{ cols_labels }}, value = total_q)
114128

115129
num_nodes <- length(unique(c(sankey_df$node,sankey_df$next_node)))
116130

@@ -130,6 +144,6 @@ plot_sankey <- function(data,
130144
labs(x = NULL, title = plot.title) +
131145
theme(
132146
legend.position = "none",
133-
axis.text.x = element_text()
147+
axis.text.x = element_text(size = 12)
134148
)
135149
}

man/plot_sankey.Rd

Lines changed: 4 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)