-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathSequence_Evaluation.pl
More file actions
107 lines (80 loc) · 2.79 KB
/
Sequence_Evaluation.pl
File metadata and controls
107 lines (80 loc) · 2.79 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
#!usr/bin/perl -w
# Sequence_Evaluation is a program for evaluating the effects of mutations on RNA structure and ensemble structural properties.
# This program was developed by Walter N. Moss at Iowa State Universtiy.
#
# Usage:
#
# $ perl Sequence_Evaluation.pl input_fasta_file temperature > output_file
#
# If used in research please Cite:
#
#
# R. Lorenz, S.H. Bernhart, C. Hoener zu Siederdissen, H. Tafer, C. Flamm, P.F. Stadler and I.L. Hofacker (2011), "ViennaRNA Package 2.0", Algorithms for Molecular Biology: 6:26
# J.S. McCaskill (1990), "The equilibrium partition function and base pair binding probabilities for RNA secondary structures", Biopolymers: 29, pp 1105-1119
#Read FASTA file and fold sequences.
#Define filename
$targetfile = $ARGV[0];
open (INFILE, "$targetfile") || die "Can't open the infile!\n";
my @Fasta = <INFILE>;
close(INFILE) || die "can't close file";
my $N = @Fasta;
#Define temperature
$Temperature = $ARGV[1];
if (! defined $Temperature) { $Temperature = 37;}
#Put FASTA seqs into a Hash
my %FastaHash = ();
my $CurrentSeq = "";
#Create array of names to keep track of order
my @SeqNames = ();
for ($i = 0; $i < $N; $i++) {
$Line = $Fasta[$i];
chomp $Line;
#Place title and seq into their respective places in the FastaHash
#Test if title
if (substr($Line, 0, 1) eq ">") {
my $SeqName = $Line;
chomp $SeqName;
$SeqName =~ s/\R//g;
$SeqName =~ s/>//g;
$SeqName =~ s/:/-/g;
$CurrentSeq = $SeqName;
push (@SeqNames, $CurrentSeq);
}
#Test if sequence data (must be IUPAC code)
if ( $Line =~ m/(A|G|C|U|T|Y|R|K|M|B|D|H|V|N|S|W)/g && substr($Line, 0, 1) ne ">") {
$FastaHash{$CurrentSeq} .= $Line;
}
}
foreach my $SequenceName (@SeqNames) {
chomp $SequenceName;
my $Sequence = $FastaHash{$SequenceName};
chomp $Sequence;
$Sequence =~ s/\s+//g;
$Sequence =~ s/-//g;
$Sequence = uc $Sequence;
#Run RNAfold partition function calculation
my $Command = "echo " . $Sequence . " | RNAfold -p -T " . $Temperature;
my @Out = `$Command`;
#Parse the MFE data
my $MFEData = $Out[1];
my @MFE = split(/\s+\(/, $MFEData);
$MFEStructure = $MFE[0];
my $MFE = $MFE[1];
chomp $MFE;
$MFE =~ s/\(//g;
$MFE =~ s/\)//g;
chomp $MFEStructure;
#Parse the centroid folding data
my $CentroidData = $Out[3];
my @Centroid = split(/\s+/, $CentroidData);
my $CentroidStructure = $Centroid[0];
#Parse the data for the Ensemble Diversity
my $Data = $Out[4];
my @Data = split(/\s+/, $Data);
my $EnsembleDiversity = $Data[10];
# Print results for all mutant sequences and structures
open (OUT, ">>", "RNA2DMut_Evaluate_output.txt");
print OUT "$SequenceName\t$Sequence\t$MFEStructure\t$MFE\t$CentroidStructure\t$EnsembleDiversity\n";
close OUT;
}
`rm *.ps`;