diff --git a/BioinformaticsProject.Answer b/BioinformaticsProject.Answer new file mode 100644 index 0000000..f628bde --- /dev/null +++ b/BioinformaticsProject.Answer @@ -0,0 +1,30 @@ +#Creating the Muscle for hspGenes +cd ~/Private/bioinformaticsProject/ref_sequences | cat hsp70gene_*.fasta >> hspgenes.txt | ~/Private/Biocomputing2022/tools/muscle -in hspgenes.txt -out hspGenesmuscle.txt +#HmmBuild for hspGenes +~/Private/Biocomputing2022/tools/hmmbuild hmmbuild.hsp hspGenesmuscle.txt +#Copy hmmbuild.hsp to Proteome Directory +cp hmmbuild.hsp ../proteomes +#Move to Proteomes Directory to Run for loop +~/Private/bioinformaticsProject/proteomes +#HmmSearch for hspGenes For Loop +for file in proteome_*.fasta +do + ~/Private/Biocomputing2022/tools/hmmsearch --tblout hmm.hspSearch${file} hmmbuild.hsp $file +done + + + +#Creating the Muscle for the mcrAgenes +cd ~/Private/bioinformaticsProject/ref_sequences | cat mcrAgene_*.fasta >> mcrAgene.txt | ~/Private/Biocomputing2022/tools/muscle -in mcrAgene.txt -out mcrAmuscle.txt +#HmmBuild for mcrAgenes +~/Private/Biocomputing2022/tools/hmmbuild hmmbuild.mcrA mcrAmuscle.txt +#Copy hmmbuild.mcrA to Proteomes Directory +cp hmmbuild.mcrA ../proteomes +#Move to Proteomes Directory to Run For Loop +~/Private/bioinformaticsProject/proteomes +#HmmSearch for mcrA gene For Loop +for file in proteome_*.fasta +do + ~/Private/Biocomputing2022/tools/hmmsearch --tblout hmmSearch.mcrA${file} hmmbuild.mcrA $file +done + diff --git a/BioinformaticsProjectAnswer.sh b/BioinformaticsProjectAnswer.sh new file mode 100644 index 0000000..d44aa2e --- /dev/null +++ b/BioinformaticsProjectAnswer.sh @@ -0,0 +1,54 @@ +#Final Table Name: finaltable.txt +#Final Proteome Candidates: proteomecandidates.txt +#Assumption that our file system looks the same as the Graduate Student +#Usage: bash BioinformaticsProjectAnswer.sh +#Creating the Muscle for hspGenes +cd ~/Private/bioinformaticsProject/ref_sequences | cat hsp70gene_*.fasta >> hspgenes.txt | ~/Private/Biocomputing2022/tools/muscle -in hspgenes.txt -out hspGenesmuscle.txt +#HmmBuild for hspGenes +~/Private/Biocomputing2022/tools/hmmbuild hmmbuild.hsp hspGenesmuscle.txt +#Copy hmmbuild.hsp to Proteome Directory +cp hmmbuild.hsp ../proteomes +#Move to Proteomes Directory to Run for loop +~/Private/bioinformaticsProject/proteomes + + +#Creating the Muscle for the mcrAgenes +cd ~/Private/bioinformaticsProject/ref_sequences | cat mcrAgene_*.fasta >> mcrAgene.txt | ~/Private/Biocomputing2022/tools/muscle -in mcrAgene.txt -out mcrAmuscle.txt +#HmmBuild for mcrAgenes +~/Private/Biocomputing2022/tools/hmmbuild hmmbuild.mcrA mcrAmuscle.txt +#Copy hmmbuild.mcrA to Proteomes Directory +cp hmmbuild.mcrA ../proteomes +#Move to Proteomes Directory to Run For Loop +~/Private/bioinformaticsProject/proteomes + +#Make Final File +echo proteome name, mcrAgenes, hsp70genes >> final.txt + +#HmmSearch for Loop for both Hsp70 and mcrA +for file in proteome_*.fasta +do +#HmmSearch for hspgenes + ~/Private/Biocomputing2022/tools/hmmsearch --tblout hmm.hspSearch${file} hmmbuild.hsp $file +#HmmSearch for mcrAgenes + ~/Private/Biocomputing2022/tools/hmmsearch --tblout hmmSearch.mcrA${file} hmmbuild.mcrA $file +#Define Proteome Variable + proteome=$(echo $file) +#Define mcrA Variable + mcrAmatches=$(cat hmmSearch.mcrA${file} | grep -v "#" | wc -l) +#Define hsp70 Variable + hsp70matches=$(cat hmm.hspSearch${file} | grep -v "#" | wc -l) +#Put Loop Results in a text file + echo "$proteome,$mcrAmatches,$hsp70matches" >> final.txt +#Finish For Loop +done + +#Remove .fasta from proteome names to Complete Final Table +cat final.txt | tr "." "," | cut -d , -f 1,3,4 >> finaltable.txt + +#Make File With pH resistant methanogens +echo Top 15 candidate pH-resistant methanogens >> pH.txt +#See if mcrA is present, sort by hsp70 genes and give top 15 results +cat finaltable.txt | grep -E ,[1-2], | sort -t, -k3 -r | head -n 15 >> pH.txt +#Give name of final 15 Proteomes +cat pH.txt | cut -d , -f 1 >> proteomecandidates.txt + diff --git a/finaltable.txt b/finaltable.txt new file mode 100644 index 0000000..c698402 --- /dev/null +++ b/finaltable.txt @@ -0,0 +1,50 @@ +proteome_01,0,4 +proteome_02,0,1 +proteome_03,1,3 +proteome_04,0,4 +proteome_05,1,2 +proteome_06,0,0 +proteome_07,1,2 +proteome_08,0,5 +proteome_09,0,1 +proteome_10,0,3 +proteome_11,0,6 +proteome_12,0,6 +proteome_13,0,3 +proteome_14,0,1 +proteome_15,1,1 +proteome_16,1,1 +proteome_17,0,3 +proteome_18,0,8 +proteome_19,2,1 +proteome_20,0,3 +proteome_21,0,5 +proteome_22,0,9 +proteome_23,2,2 +proteome_24,1,2 +proteome_25,0,5 +proteome_26,0,1 +proteome_27,0,1 +proteome_28,0,1 +proteome_29,1,0 +proteome_30,0,1 +proteome_31,0,7 +proteome_32,0,4 +proteome_33,0,0 +proteome_34,0,1 +proteome_35,0,1 +proteome_36,0,3 +proteome_37,0,1 +proteome_38,1,1 +proteome_39,1,1 +proteome_40,0,1 +proteome_41,0,1 +proteome_42,1,3 +proteome_43,0,3 +proteome_44,1,1 +proteome_45,1,3 +proteome_46,0,1 +proteome_47,0,1 +proteome_48,1,1 +proteome_49,0,3 +proteome_50,1,3 diff --git a/pHcandidates.txt b/pHcandidates.txt new file mode 100644 index 0000000..5e039d3 --- /dev/null +++ b/pHcandidates.txt @@ -0,0 +1,15 @@ +proteome_50 +proteome_45 +proteome_42 +proteome_03 +proteome_24 +proteome_23 +proteome_07 +proteome_05 +proteome_48 +proteome_44 +proteome_39 +proteome_38 +proteome_19 +proteome_16 +proteome_15 diff --git a/proteomes/finaltable.txt b/proteomes/finaltable.txt new file mode 100644 index 0000000..c698402 --- /dev/null +++ b/proteomes/finaltable.txt @@ -0,0 +1,50 @@ +proteome_01,0,4 +proteome_02,0,1 +proteome_03,1,3 +proteome_04,0,4 +proteome_05,1,2 +proteome_06,0,0 +proteome_07,1,2 +proteome_08,0,5 +proteome_09,0,1 +proteome_10,0,3 +proteome_11,0,6 +proteome_12,0,6 +proteome_13,0,3 +proteome_14,0,1 +proteome_15,1,1 +proteome_16,1,1 +proteome_17,0,3 +proteome_18,0,8 +proteome_19,2,1 +proteome_20,0,3 +proteome_21,0,5 +proteome_22,0,9 +proteome_23,2,2 +proteome_24,1,2 +proteome_25,0,5 +proteome_26,0,1 +proteome_27,0,1 +proteome_28,0,1 +proteome_29,1,0 +proteome_30,0,1 +proteome_31,0,7 +proteome_32,0,4 +proteome_33,0,0 +proteome_34,0,1 +proteome_35,0,1 +proteome_36,0,3 +proteome_37,0,1 +proteome_38,1,1 +proteome_39,1,1 +proteome_40,0,1 +proteome_41,0,1 +proteome_42,1,3 +proteome_43,0,3 +proteome_44,1,1 +proteome_45,1,3 +proteome_46,0,1 +proteome_47,0,1 +proteome_48,1,1 +proteome_49,0,3 +proteome_50,1,3 diff --git a/proteomes/pHcandidates.txt b/proteomes/pHcandidates.txt new file mode 100644 index 0000000..5e039d3 --- /dev/null +++ b/proteomes/pHcandidates.txt @@ -0,0 +1,15 @@ +proteome_50 +proteome_45 +proteome_42 +proteome_03 +proteome_24 +proteome_23 +proteome_07 +proteome_05 +proteome_48 +proteome_44 +proteome_39 +proteome_38 +proteome_19 +proteome_16 +proteome_15