<?xml version='1.0'?><rss version="2.0" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:georss="http://www.georss.org/georss" xmlns:atom="http://www.w3.org/2005/Atom" >
<channel>
	<title><![CDATA[BOL: Biological Sequence handling with Perl !]]></title>
	<link>https://bioinformaticsonline.com/snippets/view/36671/biological-sequence-handling-with-perl?</link>
	<atom:link href="https://bioinformaticsonline.com/snippets/view/36671/biological-sequence-handling-with-perl?" rel="self" type="application/rss+xml" />
	<description><![CDATA[]]></description>
	
	<item>
	<guid isPermaLink="true">https://bioinformaticsonline.com/snippets/view/36671/biological-sequence-handling-with-perl</guid>
	<pubDate>Wed, 16 May 2018 08:18:12 -0500</pubDate>
	<link>https://bioinformaticsonline.com/snippets/view/36671/biological-sequence-handling-with-perl</link>
	<title><![CDATA[Biological Sequence handling with Perl !]]></title>
	<description><![CDATA[<code>package Sequence::Generic;
# File: Sequence/Generic.pm

use strict;
use Carp;
use overload 
  &#039;&quot;&quot;&#039;        =&gt; &#039;asString&#039;,
  &#039;neg&#039;       =&gt; &#039;reverse&#039;,
  &#039;.&#039;         =&gt; &#039;concatenate&#039;,
  &#039;fallback&#039;  =&gt; &#039;TRUE&#039;;

# These methods should be overriden by child classes
# class constructor
sub new {
    my $class = shift;
    croak &quot;$class must override the new() method&quot;;
}
# Return the sequence as a string
sub seq {
    my $self = shift;
    croak ref($self),&quot; must override the seq() method&quot;;
}
# Return the type of the sequence as a human readable string
sub type {
    return &#039;Generic Sequence&#039;;
}
# These methods probably don&#039;t have to be overridden
# The length of the sequence
sub length {
    my $self = shift;
    return length($self-&gt;seq);
}
# The reverse of the sequence
sub reverse {
    my $self = shift;
    my $reversed = reverse $self-&gt;seq;
    return $reversed;
}
# A human-readable description of the object
sub asString {
  my $self = shift;
  return $self-&gt;type . &#039;(&#039; . $self-&gt;length . &#039; residues)&#039;;
}
# Concatenate two sequences together and return the result

sub concatenate {
  my $self = shift;
  my ($new_seq,$prepend) = @_;
  my ($to_append);
  if (ref($new_seq)) {
      croak &quot;argument to concatenate must be a string or a Sequence object&quot;
      unless $new_seq-&gt;isa(__PACKAGE__);
      $to_append = $new_seq-&gt;seq ;
  } else {
      $to_append = $new_seq;
  }
  return $self-&gt;new($prepend ? $to_append . $self-&gt;seq 
                     : $self-&gt;seq . $to_append);
}
1;

Back to Article

Listing Two
 package Sequence::Nucleotide;
# file: Sequence/Nucleotide.pm

use Sequence::Generic;
use Sequence::Nucleotide::Subsequence;
use Sequence::Alignment;
use Carp;

use strict;
use vars &#039;@ISA&#039;;
:Generic&#039;;

my %CODON_TABLE = (
           UCA =&gt; &#039;S&#039;,UCG =&gt; &#039;S&#039;,UCC =&gt; &#039;S&#039;,UCU =&gt; &#039;S&#039;,
           UUU =&gt; &#039;F&#039;,UUC =&gt; &#039;F&#039;,UUA =&gt; &#039;L&#039;,UUG =&gt; &#039;L&#039;,
           UAU =&gt; &#039;Y&#039;,UAC =&gt; &#039;Y&#039;,UAA =&gt; &#039;*&#039;,UAG =&gt; &#039;*&#039;,
           UGU =&gt; &#039;C&#039;,UGC =&gt; &#039;C&#039;,UGA =&gt; &#039;*&#039;,UGG =&gt; &#039;W&#039;,
           CUA =&gt; &#039;L&#039;,CUG =&gt; &#039;L&#039;,CUC =&gt; &#039;L&#039;,CUU =&gt; &#039;L&#039;,
           CCA =&gt; &#039;P&#039;,CCG =&gt; &#039;P&#039;,CCC =&gt; &#039;P&#039;,CCU =&gt; &#039;P&#039;,
           CAU =&gt; &#039;H&#039;,CAC =&gt; &#039;H&#039;,CAA =&gt; &#039;Q&#039;,CAG =&gt; &#039;Q&#039;,
           CGA =&gt; &#039;R&#039;,CGG =&gt; &#039;R&#039;,CGC =&gt; &#039;R&#039;,CGU =&gt; &#039;R&#039;,
           AUU =&gt; &#039;I&#039;,AUC =&gt; &#039;I&#039;,AUA =&gt; &#039;I&#039;,AUG =&gt; &#039;M&#039;,
           ACA =&gt; &#039;T&#039;,ACG =&gt; &#039;T&#039;,ACC =&gt; &#039;T&#039;,ACU =&gt; &#039;T&#039;,
           AAU =&gt; &#039;N&#039;,AAC =&gt; &#039;N&#039;,AAA =&gt; &#039;K&#039;,AAG =&gt; &#039;K&#039;,
           AGU =&gt; &#039;S&#039;,AGC =&gt; &#039;S&#039;,AGA =&gt; &#039;R&#039;,AGG =&gt; &#039;R&#039;,
           GUA =&gt; &#039;V&#039;,GUG =&gt; &#039;V&#039;,GUC =&gt; &#039;V&#039;,GUU =&gt; &#039;V&#039;,
           GCA =&gt; &#039;A&#039;,GCG =&gt; &#039;A&#039;,GCC =&gt; &#039;A&#039;,GCU =&gt; &#039;A&#039;,
           GAU =&gt; &#039;D&#039;,GAC =&gt; &#039;D&#039;,GAA =&gt; &#039;E&#039;,GAG =&gt; &#039;E&#039;,
           GGA =&gt; &#039;G&#039;,GGG =&gt; &#039;G&#039;,GGC =&gt; &#039;G&#039;,GGU =&gt; &#039;G&#039;,
          );
*complement = *reversec = \&amp;reverse;

sub new {
  my $class = shift;
  $class = ref($class) if ref($class);
  my ($sequence,$type) = @_;

  my $self = bless {},$class;
  if (ref($sequence)) {
    croak &quot;Can&#039;t initialize sequence from non-Sequence object.\n&quot;
      unless $sequence-&gt;can(&#039;seq&#039;);
    %{$self} = %{$sequence};  # clone operation
  } else {
    croak &quot;Doesn&#039;t look like sequence data&quot; 
      unless $sequence=~/^[gactnu\s]+$/i;
    $self-&gt;{&#039;data&#039;} = $self-&gt;_canonicalize($sequence);
    $self-&gt;{&#039;type&#039;} = $type || ($sequence=~/u/i ? &#039;RNA&#039; : &#039;DNA&#039;);
  }
  return $self;
}
sub seq {
    my $self = shift;
    $self-&gt;{&#039;data&#039;} = $self-&gt;_canonicalize($_[0])  if defined($_[0]);
    my $seq = $self-&gt;{&#039;data&#039;};
    return $seq unless $self-&gt;is_RNA;
    $seq=~tr/T/U/;
    return $seq;
}
sub type {
    my $self = shift;
    return defined($_[0]) ? $self-&gt;{&#039;type&#039;} = $_[0] : $self-&gt;{&#039;type&#039;};
}
sub is_DNA {
    my $self = shift;
    return $self-&gt;type eq &#039;DNA&#039;;
}
sub is_RNA {
  my $self = shift;
  return $self-&gt;type eq &#039;RNA&#039;;
}
sub subseq {
  my $self = shift;
  my ($start,$end) = @_;
  return (__PACKAGE__ . &#039;::Subsequence&#039;)-&gt;new($self,$start,$end);
}
sub reverse {
  my $self = shift;
  return (__PACKAGE__ . &#039;::Subsequence&#039;)-&gt;new($self,$self-&gt;length,1);
}
sub translate {
  my $self = shift;
  my $frame = shift() || 1;
  my $l = $self-&gt;length;
  my $seq = $frame &gt; 0 ? $self-&gt;subseq($frame,$l-($l-$frame+1)%3)
              : $self-&gt;reverse-&gt;subseq(abs($frame),$l-($l-abs($frame)+1)%3);
  my $s = $seq-&gt;seq;
  $s=~tr/T/U/;  # put it in RNA mode
  $s =~ s/(\S{3})/$CODON_TABLE{$1} || &#039;X&#039;/eg;
  return $s;
}
sub longest_orf {
    my $self = shift;

    my ($max,$pos,$frame);
    foreach (-3..-1,1..3) {
    my $translation = $self-&gt;translate($_);
    while ($translation=~/([^*]+)/g) {
        if (length($1) &gt; length($max)) {
        $max = $1;
        $frame = $_;
        $pos = pos($translation) - length($max); 
        }
    }
    }
    $pos *= 3;
    $pos += abs($frame);
    return ($pos,$pos+3*length($max)-1) if $frame &gt; 0;
    return ($self-&gt;length-$pos,$self-&gt;length-$pos-3*length($max));
}
sub align {
    my $self = shift;
    my $seq = shift;
    $seq = $seq-&gt;seq if ref($seq);
    return new Sequence::Alignment(src=&gt;$seq,target=&gt;$self-&gt;seq);
}
sub _canonicalize {
  my $self = shift;
  my $seq = shift;
  $seq =~ tr/uU/tT/;
  $seq =~ s/[^gatcn]//ig;
  return uc($seq);
}
1;</code>]]></description>
	<dc:creator>Rahul Nayak</dc:creator>
</item>

</channel>
</rss>