forked from Tong-Chen/NGS
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathanovaTwoFactor.sh
More file actions
executable file
·190 lines (158 loc) · 4.58 KB
/
anovaTwoFactor.sh
File metadata and controls
executable file
·190 lines (158 loc) · 4.58 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
#!/bin/bash
#set -x
set -e
set -u
usage()
{
cat <<EOF >&2
${txtcyn}
Usage:
$0 options${txtrst}
${bldblu}Function${txtrst}:
This script is used to do ANOVA analysis with two factors.
It accepts several types of input data.
1. Compute if there is significant difference for every compound in each two groups.
Data matrix
Compound Ita1 Ita2 Chi1 Chi2 Ger1 Ger2 ...
pig 0 0 800 790.66089 200 200.49341 ...
rabbit 283.05845 145.22119 1224.42165 501.76551 1176.32261 497.01935 ...
monkey 5316.20028 5324.68285 8223.78233 6077.86028 10167.074 15420.62318 ...
Group file (two column file, header is needed but their real name will be omitted)
Individual genotyep treament
Ita1 Ita 0h
Ita2 Ita 0h
Ita3 Ita 0h
Ita4 Ita 1h
Ita5 Ita 1h
Ita6 Ita 1h
Chi1 Chi 0h
Chi2 Chi 0h
Chi3 Chi 0h
Chi4 Chi 1h
Chi5 Chi 1h
Chi6 Chi 1h
Ger1 Ger 0h
Ger2 Ger 0h
Ger3 Ger 0h
Ger4 Ger 1h
Ger5 Ger 1h
Ger6 Ger 1h
The output would be
Compound genotype treatment genotype:treatment
monkey 0.177434068907696 0.733945335047997 0.0951758177204322
pig 0 0 0
rabbit 0.997701154807196 0.37525039314285 0.397819955522017
${txtbld}OPTIONS${txtrst}:
-f Data matrix ${bldred}[NECESSARY]${txtrst}
-F Filter data by mad value (Median Absolute Deviation)
${bldred}[Default 0 meaning no filter]${txtrst}
-g Group file. Normally the first column of group file
corresponds with the first column of data matrix to
specify the group information (order not matter).
${bldred}[NECESSARY]${txtrst}
-a Factor 1, one coulmn name of group file ${bldred}[NECESSARY]${txtrst}
-b Factor 2, one column name of group file ${bldred}[NECESSARY]${txtrst}
-Q FDR filter. ${bldred}[Default 0.1]${txtrst}
-I (uppercase I)Do these two factors have interactions.
${bldred}[Default TRUE]${txtrst}
EOF
}
file=
group=
mad_threshold=0
header='TRUE'
factor1=''
factor2=''
interaction="TRUE"
fdr=0.1
while getopts "hf:g:a:b:I:F:Q:" OPTION
do
case $OPTION in
h)
usage
exit 1
;;
f)
file=$OPTARG
;;
F)
mad_threshold=$OPTARG
;;
g)
group=$OPTARG
;;
a)
factor1=$OPTARG
;;
b)
factor2=$OPTARG
;;
I)
interaction=$OPTARG
;;
Q)
fdr=$OPTARG
;;
?)
usage
exit 1
;;
esac
done
if [ -z $file ]; then
usage
exit 1
fi
cat <<END >${file}.annnovar.r
library(reshape2)
library(data.table, quietly=T)
test <- read.table("${file}", header=T, row.names=1, quote="")
if (${mad_threshold} > 0){
mad_cmp = apply(test, 1, mad)
test <- test[mad_cmp>${mad_threshold}, ]
}
test\$id <- rownames(test)
test_m <- melt(test, id.vars="id")
grp_file <- read.table("${group}", header=T, sep="\t", row.names = 1, quote="")
#colnames(grp_file) <- c("group")
# Here we use variable as column name because this column name exists in test_m
grp_file\$variable <- rownames(grp_file)
merged_data <- merge(test_m, grp_file, by="variable", all.x=TRUE)
if (${interaction}) {
all_result <- by(merged_data, INDICES=merged_data\$id, FUN=function(m) {
m <- droplevels(m)
ct_aov <- aov(value ~ ${factor1}+${factor2}+${factor1}:${factor2}, data=m)
#ct_hsd <- TukeyHSD(ct_aov)
#result <- as.data.frame(ct_hsd\$group[,4, drop=F])
#colnames(result) <- paste0(unique(m\$id), "_", colnames(result))
result <- as.data.frame(summary(ct_aov)[[1]][["Pr(>F)"]][1:3], row.names=c("${factor1}","${factor2}","${factor1}_${factor2}"))
colnames(result) <- unique(m\$id)
t(result)
})
result <- do.call(rbind, unname(all_result))
} else {
all_result <- by(merged_data, INDICES=merged_data\$id, FUN=function(m) {
m <- droplevels(m)
ct_aov <- aov(value ~ ${factor1}+${factor2}, data=m)
#ct_hsd <- TukeyHSD(ct_aov)
#result <- as.data.frame(ct_hsd\$group[,4, drop=F])
#colnames(result) <- paste0(unique(m\$id), "_", colnames(result))
result <- as.data.frame(summary(ct_aov)[[1]][["Pr(>F)"]][1:3], row.names=c("${factor1}","${factor2}"))
colnames(result) <- unique(m\$id)
t(result)
})
result <- do.call(rbind, unname(all_result))
}
result_adjusted = apply(result, 2, p.adjust,"BH")
colnames(result_adjusted) <- paste(colnames(result_adjusted), 'FDR', sep="_")
result <- cbind(result, result_adjusted)
result <- as.data.frame(result)
result <- cbind(id=rownames(result), result)
result_m <- melt(result, id.vars=c('id'))
result_m_0_1 <- result_m[result_m\$value<${fdr}, ]
write.table(result_m_0_1, file="${file}.${factor1}.${factor2}.sig.xls", sep="\t", quote=F, row.names=F, col.names=F)
system("s-plot vennDiagram -f ${file}.${factor1}.${factor2}.sig.xls -a ${factor1}_FDR -b ${factor2}_FDR -c ${factor1}_${factor2}_FDR")
#result
write.table(result, file="${file}.${factor1}.${factor2}.xls", sep="\t", quote=F, row.names=F)
END
Rscript ${file}.annnovar.r