<?xml version='1.0'?><rss version="2.0" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:georss="http://www.georss.org/georss" xmlns:atom="http://www.w3.org/2005/Atom" >
<channel>
	<title><![CDATA[BOL: Needleman-Wunsch  Algorithm in Perl]]></title>
	<link>https://bioinformaticsonline.com/snippets/view/27454/needleman-wunsch-algorithm-in-perl?</link>
	<atom:link href="https://bioinformaticsonline.com/snippets/view/27454/needleman-wunsch-algorithm-in-perl?" rel="self" type="application/rss+xml" />
	<description><![CDATA[]]></description>
	
	<item>
	<guid isPermaLink="true">https://bioinformaticsonline.com/snippets/view/27454/needleman-wunsch-algorithm-in-perl</guid>
	<pubDate>Sat, 21 May 2016 22:07:06 -0500</pubDate>
	<link>https://bioinformaticsonline.com/snippets/view/27454/needleman-wunsch-algorithm-in-perl</link>
	<title><![CDATA[Needleman-Wunsch  Algorithm in Perl]]></title>
	<description><![CDATA[<code>#!/usr/bin/perl

# USAGE:   perl nw.pl HEAGAWGHEE PAWHEAE BLOSUM50.txt -8

# See:     &quot;Biological sequence anaysis&quot; Durbin et al. ed. CUP 1998, Pg. 19
# Needleman-Wunsch global alignment algo (GOTHO 1982 mod)

# usage statement
die &quot;usage: $0 &lt;sequence 1&gt; &lt;sequence 2&gt; &lt;substmatrix&gt; &lt;gapscore&gt; \n&quot; unless @ARGV == 4;

# get sequences, matrix and gapcost from command line
my ($seq1, $seq2, $smfile, $gapcost) = @ARGV;

# scoring scheme (instead of using fixed MATCH and MISMATCH scores we will use values read from BLOSUM50)
my $MATCH    =  1; # +1 for letters that match
my $MISMATCH = -1; # -1 for letters that mismatch
my $GAP      = $gapcost; # for any gap
my %BLOSUM50 = ();
my @aalist = ();

# read substitution matrix
open in, $smfile;
while(&lt;in&gt;){
    if($.&lt;2){next;}
    # read columns names (aa)
    if($.&lt;3){
        chop $_;
        @aalist=split(/\s+/,$_);
        next;
        }
    chop $_;
    @vals=split(/\s+/,$_);
    $curaaROW=$vals[0];
    for($i=1;$i&lt;=$#vals;$i++){
        $curaaCOLUMN=$aalist[$i];
        $BLOSUM50{$curaaROW}{$curaaCOLUMN}=$vals[$i];
    }   
}
close in;


# initialization
my @matrix;
$matrix[0][0]{score}   = 0;
$matrix[0][0]{pointer} = &quot;none&quot;;
for(my $j = 1; $j &lt;= length($seq1); $j++) {
    $matrix[0][$j]{score}   = $GAP * $j;
    $matrix[0][$j]{pointer} = &quot;left&quot;;
}
for (my $i = 1; $i &lt;= length($seq2); $i++) {
    $matrix[$i][0]{score}   = $GAP * $i;
    $matrix[$i][0]{pointer} = &quot;up&quot;;
}

# fill
for(my $i = 1; $i &lt;= length($seq2); $i++) {
    for(my $j = 1; $j &lt;= length($seq1); $j++) {
        my ($diagonal_score, $left_score, $up_score);

        # calculate match score
        my $letter1 = substr($seq1, $j-1, 1);
        my $letter2 = substr($seq2, $i-1, 1);                            
        if ($letter1 eq $letter2) {
            $diagonal_score = $matrix[$i-1][$j-1]{score} + $BLOSUM50{$letter1}{$letter2};
        }
        else {
            $diagonal_score = $matrix[$i-1][$j-1]{score} + $BLOSUM50{$letter1}{$letter2};
        }

        # calculate gap scores
        $up_score   = $matrix[$i-1][$j]{score} + $GAP;
        $left_score = $matrix[$i][$j-1]{score} + $GAP;

        # choose best score
        if ($diagonal_score &gt;= $up_score) {
            if ($diagonal_score &gt;= $left_score) {
                $matrix[$i][$j]{score}   = $diagonal_score;
                $matrix[$i][$j]{pointer} = &quot;diagonal&quot;;
            }
        else {
                $matrix[$i][$j]{score}   = $left_score;
                $matrix[$i][$j]{pointer} = &quot;left&quot;;
            }
        } else {
            if ($up_score &gt;= $left_score) {
                $matrix[$i][$j]{score}   = $up_score;
                $matrix[$i][$j]{pointer} = &quot;up&quot;;
            }
            else {
                $matrix[$i][$j]{score}   = $left_score;
                $matrix[$i][$j]{pointer} = &quot;left&quot;;
            }
        }
    }
}

# trace-back

my $align1 = &quot;&quot;;
my $align2 = &quot;&quot;;
my $descrstr = &quot;&quot;;

# start at last cell of matrix
my $j = length($seq1);
my $i = length($seq2);

while (1) {
    last if $matrix[$i][$j]{pointer} eq &quot;none&quot;; # ends at first cell of matrix

    if ($matrix[$i][$j]{pointer} eq &quot;diagonal&quot;) {
        $align1 .= substr($seq1, $j-1, 1);
        $align2 .= substr($seq2, $i-1, 1);
        if(substr($seq1, $j-1,1) eq substr($seq2, $i-1,1)){$descrstr .=&quot;|&quot;;}else{$descrstr .= &quot;.&quot;;}
        $i--;
        $j--;
    }
    elsif ($matrix[$i][$j]{pointer} eq &quot;left&quot;) {
        $align1 .= substr($seq1, $j-1, 1);
        $align2 .= &quot;-&quot;;
        $descrstr .= &quot; &quot;;
        $j--;
    }
    elsif ($matrix[$i][$j]{pointer} eq &quot;up&quot;) {
        $align1 .= &quot;-&quot;;
        $align2 .= substr($seq2, $i-1, 1);
        $descrstr .= &quot; &quot;;
        $i--;
    }    
}

$align1 = reverse $align1;
$align2 = reverse $align2;
$descrstr = reverse $descrstr;

# print matrices:
print &quot;\n\n&quot;;

for(my $i = 0; $i &lt;= length($seq2); $i++) {
    for(my $j = 0; $j &lt;= length($seq1); $j++) {
        printf(&quot;%2.1f&quot;, $matrix[$i][$j]{score});
        print(&quot;\t&quot;);
    }
    print&quot;\n&quot;;
}
print &quot;\n\n&quot;;

# print the alignment:
print &quot;$align1\n&quot;;
print &quot;$descrstr\n&quot;;
print &quot;$align2\n&quot;;

__END__
# Entries for the BLOSUM50 matrix at a scale of ln(2)/3.0.
Find matrix at http://bioinformaticsonline.com/file/view/27455/blosum50-matrix</code>]]></description>
	<dc:creator>Radha Agarkar</dc:creator>
</item>

</channel>
</rss>