-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathR_graph_bacteria_order_pathogen_status2.R
More file actions
53 lines (41 loc) · 1.68 KB
/
R_graph_bacteria_order_pathogen_status2.R
File metadata and controls
53 lines (41 loc) · 1.68 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
#load pathogen data
load("../DATA/PROCESSED/bacteria_pathogenic_mammals.Rdata")
df1 = bacteria_pathogenic_mammals
#remove rows for hosts that do not host pathogens
df1 = subset(df1, pathogen != "")
load("../DATA/PROCESSED/bacteria_species_out.Rdata")
df2 = bacteria_species_out
df2 = subset(df2, rank == "species")
# df2$row = seq(1,dim(df2)[1])
df2 <- df2 %>%
select (-c(rank))
#rename column
df2 = rename(df2, bacteria_species = name)
df2 = rename(df2, tax_id = id)
df2 = rename(df2, order_pathogen = order)
df2$bacteria_original = ""
df1 = rename(df1, bacteria_species = pathogen)
df1 = rename(df1, bacteria_original = pathogen_original)
inds_df1_missing_in_df2 = which(!(df1$bacteria_species %in% df2$bacteria_species))
# print("present in pathogen list, absent in full bacteria list; these should all be genus-level")
# print(df1$bacteria_species[inds_df1_missing_in_df2])
inds_df1_original_missing_in_df2 = which(!(df1$bacteria_original %in% df2$bacteria_species))
# print("present in original pathogen list, absent in full bacteria list")
df1$bacteria_original=as.character(df1$bacteria_original)
# print(df1$pathogen_original[inds_df1_original_missing_in_df2])
df1=df1[inds_df1_original_missing_in_df2,]
# df1 <- df1 %>%
# select(c(bacteria_species,
# order_pathogen,
# tax_id,
# bacteria_original
# ))
df1$pathogenic = 1
print(unique(df1$bacteria_original))
#find inds in master list matching pathogenic species
inds = which(df2$pathogen_species %in% c(df1$pathogen_species))
df2$pathogenic = 0
df2$pathogenic[inds] = 1
df2 <- rbind(df1, df2)
bacteria_species_out3 = df2
save(bacteria_species_out3, file = "../DATA/PROCESSED/bacteria_species_out3.Rdata")