From 735ed0cef528ad99961cad5e4f4561a9ce7376bb Mon Sep 17 00:00:00 2001 From: Max Zupfer Date: Tue, 11 Oct 2022 15:43:33 -0400 Subject: [PATCH 1/8] Through Hmm Build --- BioinformaticsProject.Answer | 1 + 1 file changed, 1 insertion(+) create mode 100644 BioinformaticsProject.Answer diff --git a/BioinformaticsProject.Answer b/BioinformaticsProject.Answer new file mode 100644 index 0000000..98219cc --- /dev/null +++ b/BioinformaticsProject.Answer @@ -0,0 +1 @@ +cat hsp70gene_*.fasta >> hspgenes.txt | ~/Private/Biocomputing2022/tools/muscle -in hspgenes.txt -out results.test | ~/Private/Biocomputing2022/tools/hmmbuild hmm.hsp results.test From e4981a33c9904738c87b398cc147650a44f8729b Mon Sep 17 00:00:00 2001 From: Max Zupfer Date: Tue, 11 Oct 2022 15:59:32 -0400 Subject: [PATCH 2/8] Through For Loop --- BioinformaticsProject.Answer | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/BioinformaticsProject.Answer b/BioinformaticsProject.Answer index 98219cc..6b9bb1f 100644 --- a/BioinformaticsProject.Answer +++ b/BioinformaticsProject.Answer @@ -1 +1,8 @@ -cat hsp70gene_*.fasta >> hspgenes.txt | ~/Private/Biocomputing2022/tools/muscle -in hspgenes.txt -out results.test | ~/Private/Biocomputing2022/tools/hmmbuild hmm.hsp results.test +#Creating the hmmsearch for the hspgenes +cat hsp70gene_*.fasta >> hspgenes.txt | ~/Private/Biocomputing2022/tools/muscle -in hspgenes.txt -out results.test | ~/Private/Biocomputing2022/tools/hmmbuild hmm.hsp results.test | cp hmm.hsp ../proteomes | ~/Private/Biocomputing2022/tools/hmmsearch --tblout hmm.hspsearch hmm.hsp proteome_01.fasta + +#For Loop +for file in proteome_*.fasta +do + ~Private/Biocomputing2022/tools/hmmsearch --tblout hmm.hspsearch${file} hmm.hsp $file +done From 8af90a078fb5caa33a4d090ce5c356dc9db4bb65 Mon Sep 17 00:00:00 2001 From: Max Zupfer Date: Tue, 11 Oct 2022 17:18:27 -0400 Subject: [PATCH 3/8] through mcrA For Loop --- BioinformaticsProject.Answer | 30 ++++++++++++++++++++++++++---- 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/BioinformaticsProject.Answer b/BioinformaticsProject.Answer index 6b9bb1f..f628bde 100644 --- a/BioinformaticsProject.Answer +++ b/BioinformaticsProject.Answer @@ -1,8 +1,30 @@ -#Creating the hmmsearch for the hspgenes -cat hsp70gene_*.fasta >> hspgenes.txt | ~/Private/Biocomputing2022/tools/muscle -in hspgenes.txt -out results.test | ~/Private/Biocomputing2022/tools/hmmbuild hmm.hsp results.test | cp hmm.hsp ../proteomes | ~/Private/Biocomputing2022/tools/hmmsearch --tblout hmm.hspsearch hmm.hsp proteome_01.fasta +#Creating the Muscle for hspGenes +cd ~/Private/bioinformaticsProject/ref_sequences | cat hsp70gene_*.fasta >> hspgenes.txt | ~/Private/Biocomputing2022/tools/muscle -in hspgenes.txt -out hspGenesmuscle.txt +#HmmBuild for hspGenes +~/Private/Biocomputing2022/tools/hmmbuild hmmbuild.hsp hspGenesmuscle.txt +#Copy hmmbuild.hsp to Proteome Directory +cp hmmbuild.hsp ../proteomes +#Move to Proteomes Directory to Run for loop +~/Private/bioinformaticsProject/proteomes +#HmmSearch for hspGenes For Loop +for file in proteome_*.fasta +do + ~/Private/Biocomputing2022/tools/hmmsearch --tblout hmm.hspSearch${file} hmmbuild.hsp $file +done -#For Loop + + +#Creating the Muscle for the mcrAgenes +cd ~/Private/bioinformaticsProject/ref_sequences | cat mcrAgene_*.fasta >> mcrAgene.txt | ~/Private/Biocomputing2022/tools/muscle -in mcrAgene.txt -out mcrAmuscle.txt +#HmmBuild for mcrAgenes +~/Private/Biocomputing2022/tools/hmmbuild hmmbuild.mcrA mcrAmuscle.txt +#Copy hmmbuild.mcrA to Proteomes Directory +cp hmmbuild.mcrA ../proteomes +#Move to Proteomes Directory to Run For Loop +~/Private/bioinformaticsProject/proteomes +#HmmSearch for mcrA gene For Loop for file in proteome_*.fasta do - ~Private/Biocomputing2022/tools/hmmsearch --tblout hmm.hspsearch${file} hmm.hsp $file + ~/Private/Biocomputing2022/tools/hmmsearch --tblout hmmSearch.mcrA${file} hmmbuild.mcrA $file done + From 5f00635d1522ab6fd1cb96c8b8dddcee34df15b3 Mon Sep 17 00:00:00 2001 From: Max Zupfer Date: Thu, 13 Oct 2022 17:18:44 -0400 Subject: [PATCH 4/8] Final Bioinformatics Script --- BioinformaticsProjectAnswer.sh | 54 ++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) create mode 100644 BioinformaticsProjectAnswer.sh diff --git a/BioinformaticsProjectAnswer.sh b/BioinformaticsProjectAnswer.sh new file mode 100644 index 0000000..d44aa2e --- /dev/null +++ b/BioinformaticsProjectAnswer.sh @@ -0,0 +1,54 @@ +#Final Table Name: finaltable.txt +#Final Proteome Candidates: proteomecandidates.txt +#Assumption that our file system looks the same as the Graduate Student +#Usage: bash BioinformaticsProjectAnswer.sh +#Creating the Muscle for hspGenes +cd ~/Private/bioinformaticsProject/ref_sequences | cat hsp70gene_*.fasta >> hspgenes.txt | ~/Private/Biocomputing2022/tools/muscle -in hspgenes.txt -out hspGenesmuscle.txt +#HmmBuild for hspGenes +~/Private/Biocomputing2022/tools/hmmbuild hmmbuild.hsp hspGenesmuscle.txt +#Copy hmmbuild.hsp to Proteome Directory +cp hmmbuild.hsp ../proteomes +#Move to Proteomes Directory to Run for loop +~/Private/bioinformaticsProject/proteomes + + +#Creating the Muscle for the mcrAgenes +cd ~/Private/bioinformaticsProject/ref_sequences | cat mcrAgene_*.fasta >> mcrAgene.txt | ~/Private/Biocomputing2022/tools/muscle -in mcrAgene.txt -out mcrAmuscle.txt +#HmmBuild for mcrAgenes +~/Private/Biocomputing2022/tools/hmmbuild hmmbuild.mcrA mcrAmuscle.txt +#Copy hmmbuild.mcrA to Proteomes Directory +cp hmmbuild.mcrA ../proteomes +#Move to Proteomes Directory to Run For Loop +~/Private/bioinformaticsProject/proteomes + +#Make Final File +echo proteome name, mcrAgenes, hsp70genes >> final.txt + +#HmmSearch for Loop for both Hsp70 and mcrA +for file in proteome_*.fasta +do +#HmmSearch for hspgenes + ~/Private/Biocomputing2022/tools/hmmsearch --tblout hmm.hspSearch${file} hmmbuild.hsp $file +#HmmSearch for mcrAgenes + ~/Private/Biocomputing2022/tools/hmmsearch --tblout hmmSearch.mcrA${file} hmmbuild.mcrA $file +#Define Proteome Variable + proteome=$(echo $file) +#Define mcrA Variable + mcrAmatches=$(cat hmmSearch.mcrA${file} | grep -v "#" | wc -l) +#Define hsp70 Variable + hsp70matches=$(cat hmm.hspSearch${file} | grep -v "#" | wc -l) +#Put Loop Results in a text file + echo "$proteome,$mcrAmatches,$hsp70matches" >> final.txt +#Finish For Loop +done + +#Remove .fasta from proteome names to Complete Final Table +cat final.txt | tr "." "," | cut -d , -f 1,3,4 >> finaltable.txt + +#Make File With pH resistant methanogens +echo Top 15 candidate pH-resistant methanogens >> pH.txt +#See if mcrA is present, sort by hsp70 genes and give top 15 results +cat finaltable.txt | grep -E ,[1-2], | sort -t, -k3 -r | head -n 15 >> pH.txt +#Give name of final 15 Proteomes +cat pH.txt | cut -d , -f 1 >> proteomecandidates.txt + From c6569c95caec252024f360ff58eef69bbcc4756c Mon Sep 17 00:00:00 2001 From: Max Zupfer Date: Sun, 23 Oct 2022 17:01:21 -0400 Subject: [PATCH 5/8] Final pH Candidates --- proteomes/pHcandidates.txt | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 proteomes/pHcandidates.txt diff --git a/proteomes/pHcandidates.txt b/proteomes/pHcandidates.txt new file mode 100644 index 0000000..5e039d3 --- /dev/null +++ b/proteomes/pHcandidates.txt @@ -0,0 +1,15 @@ +proteome_50 +proteome_45 +proteome_42 +proteome_03 +proteome_24 +proteome_23 +proteome_07 +proteome_05 +proteome_48 +proteome_44 +proteome_39 +proteome_38 +proteome_19 +proteome_16 +proteome_15 From 4ceb9935acd6852badda72fc76b2dc55e1aa2c34 Mon Sep 17 00:00:00 2001 From: Max Zupfer Date: Sun, 23 Oct 2022 17:02:10 -0400 Subject: [PATCH 6/8] Final Proteome Table --- proteomes/finaltable.txt | 50 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) create mode 100644 proteomes/finaltable.txt diff --git a/proteomes/finaltable.txt b/proteomes/finaltable.txt new file mode 100644 index 0000000..c698402 --- /dev/null +++ b/proteomes/finaltable.txt @@ -0,0 +1,50 @@ +proteome_01,0,4 +proteome_02,0,1 +proteome_03,1,3 +proteome_04,0,4 +proteome_05,1,2 +proteome_06,0,0 +proteome_07,1,2 +proteome_08,0,5 +proteome_09,0,1 +proteome_10,0,3 +proteome_11,0,6 +proteome_12,0,6 +proteome_13,0,3 +proteome_14,0,1 +proteome_15,1,1 +proteome_16,1,1 +proteome_17,0,3 +proteome_18,0,8 +proteome_19,2,1 +proteome_20,0,3 +proteome_21,0,5 +proteome_22,0,9 +proteome_23,2,2 +proteome_24,1,2 +proteome_25,0,5 +proteome_26,0,1 +proteome_27,0,1 +proteome_28,0,1 +proteome_29,1,0 +proteome_30,0,1 +proteome_31,0,7 +proteome_32,0,4 +proteome_33,0,0 +proteome_34,0,1 +proteome_35,0,1 +proteome_36,0,3 +proteome_37,0,1 +proteome_38,1,1 +proteome_39,1,1 +proteome_40,0,1 +proteome_41,0,1 +proteome_42,1,3 +proteome_43,0,3 +proteome_44,1,1 +proteome_45,1,3 +proteome_46,0,1 +proteome_47,0,1 +proteome_48,1,1 +proteome_49,0,3 +proteome_50,1,3 From 08b1b08143eb85aead5ef3a9872c265d557b92fa Mon Sep 17 00:00:00 2001 From: Max Zupfer Date: Sun, 23 Oct 2022 17:03:59 -0400 Subject: [PATCH 7/8] Final Proteome Table --- finaltable.txt | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) create mode 100644 finaltable.txt diff --git a/finaltable.txt b/finaltable.txt new file mode 100644 index 0000000..c698402 --- /dev/null +++ b/finaltable.txt @@ -0,0 +1,50 @@ +proteome_01,0,4 +proteome_02,0,1 +proteome_03,1,3 +proteome_04,0,4 +proteome_05,1,2 +proteome_06,0,0 +proteome_07,1,2 +proteome_08,0,5 +proteome_09,0,1 +proteome_10,0,3 +proteome_11,0,6 +proteome_12,0,6 +proteome_13,0,3 +proteome_14,0,1 +proteome_15,1,1 +proteome_16,1,1 +proteome_17,0,3 +proteome_18,0,8 +proteome_19,2,1 +proteome_20,0,3 +proteome_21,0,5 +proteome_22,0,9 +proteome_23,2,2 +proteome_24,1,2 +proteome_25,0,5 +proteome_26,0,1 +proteome_27,0,1 +proteome_28,0,1 +proteome_29,1,0 +proteome_30,0,1 +proteome_31,0,7 +proteome_32,0,4 +proteome_33,0,0 +proteome_34,0,1 +proteome_35,0,1 +proteome_36,0,3 +proteome_37,0,1 +proteome_38,1,1 +proteome_39,1,1 +proteome_40,0,1 +proteome_41,0,1 +proteome_42,1,3 +proteome_43,0,3 +proteome_44,1,1 +proteome_45,1,3 +proteome_46,0,1 +proteome_47,0,1 +proteome_48,1,1 +proteome_49,0,3 +proteome_50,1,3 From b50aa124c839660d21c2ff301a9d65e918f06a7c Mon Sep 17 00:00:00 2001 From: Max Zupfer Date: Sun, 23 Oct 2022 17:04:53 -0400 Subject: [PATCH 8/8] Final pH Candidates --- pHcandidates.txt | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 pHcandidates.txt diff --git a/pHcandidates.txt b/pHcandidates.txt new file mode 100644 index 0000000..5e039d3 --- /dev/null +++ b/pHcandidates.txt @@ -0,0 +1,15 @@ +proteome_50 +proteome_45 +proteome_42 +proteome_03 +proteome_24 +proteome_23 +proteome_07 +proteome_05 +proteome_48 +proteome_44 +proteome_39 +proteome_38 +proteome_19 +proteome_16 +proteome_15