-
Notifications
You must be signed in to change notification settings - Fork 4
Expand file tree
/
Copy pathcdhit.sh
More file actions
executable file
·45 lines (36 loc) · 1.65 KB
/
cdhit.sh
File metadata and controls
executable file
·45 lines (36 loc) · 1.65 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
#!/bin/bash
#############################################################
# To get clusters based on sequence similarity with cd-hit #
# cd-hit package can be installed with conda. However, the #
# psi-cd-hit.pl will not directly be available after the #
# installation. Path to the source file need to be added to #
# PATH manually. psi-cd-hid.pl also needs blastp, which can #
# also be installed with conda. #
#############################################################
root=Data/HMDB
saveroot=${root}/HMDB_clustered_corpora
n_tread=100
f=protein.fasta
mkdir ${saveroot}/hmdb_cdhit_clusters90
mkdir ${saveroot}/hmdb_cdhit_clusters60
mkdir ${saveroot}/hmdb_cdhit_clusters30
mkdir ${saveroot}/hmdb_cdhit_clusters9060
mkdir ${saveroot}/hmdb_cdhit_clusters906030
cd-hit -i ${root}/${f} \
-o ${saveroot}/hmdb_cdhit_clusters90/hmdb_cluster90 -c 0.9 -n 5 \
-g 1 -G 0 -aS 0.8 \
-d 100 -p 1 -T ${n_tread} -M 0 >> hmdb_cdhit_cluster.log
cd-hit -i ${root}/${f} \
-o ${saveroot}/hmdb_cdhit_clusters60/hmdb_cluster60 -c 0.6 -n 4 \
-g 1 -G 0 -aS 0.8 \
-d 100 -p 1 -T ${n_tread} -M 0 >> hmdb_cdhit_cluster.log
psi-cd-hit.pl -i ${root}/${f} -o ${saveroot}/hmdb_cdhit_clusters30/hmdb_cluster30 \
-c 0.3 -ce 1e-6 -aS 0.8 -G 0 -g 1 -exec local -para 8 -blp 4
clstr_rev.pl \
${saveroot}/hmdb_cdhit_clusters90/hmdb_cluster90.clstr \
${saveroot}/hmdb_cdhit_clusters60/hmdb_cluster60.clstr \
> ${saveroot}/hmdb_cdhit_clusters9060/hmdb_cluster9060.clstr
clstr_rev.pl \
${saveroot}/hmdb_cdhit_clusters9060/hmdb_cluster9060.clstr \
${saveroot}/hmdb_cdhit_clusters30/hmdb_cluster30.clstr \
> ${saveroot}/hmdb_cdhit_clusters906030/hmdb_cluster906030.clstr