Our Sponsors



Download BioinformaticsOnline(BOL) Apps in your chrome browser.




Needleman-Wunsch Algorithm in Perl

  • Public
By Radha Agarkar 3108 days ago
#!/usr/bin/perl # USAGE: perl nw.pl HEAGAWGHEE PAWHEAE BLOSUM50.txt -8 # See: "Biological sequence anaysis" Durbin et al. ed. CUP 1998, Pg. 19 # Needleman-Wunsch global alignment algo (GOTHO 1982 mod) # usage statement die "usage: $0 <sequence 1> <sequence 2> <substmatrix> <gapscore> \n" unless @ARGV == 4; # get sequences, matrix and gapcost from command line my ($seq1, $seq2, $smfile, $gapcost) = @ARGV; # scoring scheme (instead of using fixed MATCH and MISMATCH scores we will use values read from BLOSUM50) my $MATCH = 1; # +1 for letters that match my $MISMATCH = -1; # -1 for letters that mismatch my $GAP = $gapcost; # for any gap my %BLOSUM50 = (); my @aalist = (); # read substitution matrix open in, $smfile; while(<in>){ if($.<2){next;} # read columns names (aa) if($.<3){ chop $_; @aalist=split(/\s+/,$_); next; } chop $_; @vals=split(/\s+/,$_); $curaaROW=$vals[0]; for($i=1;$i<=$#vals;$i++){ $curaaCOLUMN=$aalist[$i]; $BLOSUM50{$curaaROW}{$curaaCOLUMN}=$vals[$i]; } } close in; # initialization my @matrix; $matrix[0][0]{score} = 0; $matrix[0][0]{pointer} = "none"; for(my $j = 1; $j <= length($seq1); $j++) { $matrix[0][$j]{score} = $GAP * $j; $matrix[0][$j]{pointer} = "left"; } for (my $i = 1; $i <= length($seq2); $i++) { $matrix[$i][0]{score} = $GAP * $i; $matrix[$i][0]{pointer} = "up"; } # fill for(my $i = 1; $i <= length($seq2); $i++) { for(my $j = 1; $j <= length($seq1); $j++) { my ($diagonal_score, $left_score, $up_score); # calculate match score my $letter1 = substr($seq1, $j-1, 1); my $letter2 = substr($seq2, $i-1, 1); if ($letter1 eq $letter2) { $diagonal_score = $matrix[$i-1][$j-1]{score} + $BLOSUM50{$letter1}{$letter2}; } else { $diagonal_score = $matrix[$i-1][$j-1]{score} + $BLOSUM50{$letter1}{$letter2}; } # calculate gap scores $up_score = $matrix[$i-1][$j]{score} + $GAP; $left_score = $matrix[$i][$j-1]{score} + $GAP; # choose best score if ($diagonal_score >= $up_score) { if ($diagonal_score >= $left_score) { $matrix[$i][$j]{score} = $diagonal_score; $matrix[$i][$j]{pointer} = "diagonal"; } else { $matrix[$i][$j]{score} = $left_score; $matrix[$i][$j]{pointer} = "left"; } } else { if ($up_score >= $left_score) { $matrix[$i][$j]{score} = $up_score; $matrix[$i][$j]{pointer} = "up"; } else { $matrix[$i][$j]{score} = $left_score; $matrix[$i][$j]{pointer} = "left"; } } } } # trace-back my $align1 = ""; my $align2 = ""; my $descrstr = ""; # start at last cell of matrix my $j = length($seq1); my $i = length($seq2); while (1) { last if $matrix[$i][$j]{pointer} eq "none"; # ends at first cell of matrix if ($matrix[$i][$j]{pointer} eq "diagonal") { $align1 .= substr($seq1, $j-1, 1); $align2 .= substr($seq2, $i-1, 1); if(substr($seq1, $j-1,1) eq substr($seq2, $i-1,1)){$descrstr .="|";}else{$descrstr .= ".";} $i--; $j--; } elsif ($matrix[$i][$j]{pointer} eq "left") { $align1 .= substr($seq1, $j-1, 1); $align2 .= "-"; $descrstr .= " "; $j--; } elsif ($matrix[$i][$j]{pointer} eq "up") { $align1 .= "-"; $align2 .= substr($seq2, $i-1, 1); $descrstr .= " "; $i--; } } $align1 = reverse $align1; $align2 = reverse $align2; $descrstr = reverse $descrstr; # print matrices: print "\n\n"; for(my $i = 0; $i <= length($seq2); $i++) { for(my $j = 0; $j <= length($seq1); $j++) { printf("%2.1f", $matrix[$i][$j]{score}); print("\t"); } print"\n"; } print "\n\n"; # print the alignment: print "$align1\n"; print "$descrstr\n"; print "$align2\n"; __END__ # Entries for the BLOSUM50 matrix at a scale of ln(2)/3.0. Find matrix at http://bioinformaticsonline.com/file/view/27455/blosum50-matrix