Our Sponsors



Download BioinformaticsOnline(BOL) Apps in your chrome browser.




Biological Sequence handling with Perl !

  • Public
By Rahul Nayak 2382 days ago
package Sequence::Generic; # File: Sequence/Generic.pm use strict; use Carp; use overload '""' => 'asString', 'neg' => 'reverse', '.' => 'concatenate', 'fallback' => 'TRUE'; # These methods should be overriden by child classes # class constructor sub new { my $class = shift; croak "$class must override the new() method"; } # Return the sequence as a string sub seq { my $self = shift; croak ref($self)," must override the seq() method"; } # Return the type of the sequence as a human readable string sub type { return 'Generic Sequence'; } # These methods probably don't have to be overridden # The length of the sequence sub length { my $self = shift; return length($self->seq); } # The reverse of the sequence sub reverse { my $self = shift; my $reversed = reverse $self->seq; return $reversed; } # A human-readable description of the object sub asString { my $self = shift; return $self->type . '(' . $self->length . ' residues)'; } # Concatenate two sequences together and return the result sub concatenate { my $self = shift; my ($new_seq,$prepend) = @_; my ($to_append); if (ref($new_seq)) { croak "argument to concatenate must be a string or a Sequence object" unless $new_seq->isa(__PACKAGE__); $to_append = $new_seq->seq ; } else { $to_append = $new_seq; } return $self->new($prepend ? $to_append . $self->seq : $self->seq . $to_append); } 1; Back to Article Listing Two package Sequence::Nucleotide; # file: Sequence/Nucleotide.pm use Sequence::Generic; use Sequence::Nucleotide::Subsequence; use Sequence::Alignment; use Carp; use strict; use vars '@ISA'; :Generic'; my %CODON_TABLE = ( UCA => 'S',UCG => 'S',UCC => 'S',UCU => 'S', UUU => 'F',UUC => 'F',UUA => 'L',UUG => 'L', UAU => 'Y',UAC => 'Y',UAA => '*',UAG => '*', UGU => 'C',UGC => 'C',UGA => '*',UGG => 'W', CUA => 'L',CUG => 'L',CUC => 'L',CUU => 'L', CCA => 'P',CCG => 'P',CCC => 'P',CCU => 'P', CAU => 'H',CAC => 'H',CAA => 'Q',CAG => 'Q', CGA => 'R',CGG => 'R',CGC => 'R',CGU => 'R', AUU => 'I',AUC => 'I',AUA => 'I',AUG => 'M', ACA => 'T',ACG => 'T',ACC => 'T',ACU => 'T', AAU => 'N',AAC => 'N',AAA => 'K',AAG => 'K', AGU => 'S',AGC => 'S',AGA => 'R',AGG => 'R', GUA => 'V',GUG => 'V',GUC => 'V',GUU => 'V', GCA => 'A',GCG => 'A',GCC => 'A',GCU => 'A', GAU => 'D',GAC => 'D',GAA => 'E',GAG => 'E', GGA => 'G',GGG => 'G',GGC => 'G',GGU => 'G', ); *complement = *reversec = \&reverse; sub new { my $class = shift; $class = ref($class) if ref($class); my ($sequence,$type) = @_; my $self = bless {},$class; if (ref($sequence)) { croak "Can't initialize sequence from non-Sequence object.\n" unless $sequence->can('seq'); %{$self} = %{$sequence}; # clone operation } else { croak "Doesn't look like sequence data" unless $sequence=~/^[gactnu\s]+$/i; $self->{'data'} = $self->_canonicalize($sequence); $self->{'type'} = $type || ($sequence=~/u/i ? 'RNA' : 'DNA'); } return $self; } sub seq { my $self = shift; $self->{'data'} = $self->_canonicalize($_[0]) if defined($_[0]); my $seq = $self->{'data'}; return $seq unless $self->is_RNA; $seq=~tr/T/U/; return $seq; } sub type { my $self = shift; return defined($_[0]) ? $self->{'type'} = $_[0] : $self->{'type'}; } sub is_DNA { my $self = shift; return $self->type eq 'DNA'; } sub is_RNA { my $self = shift; return $self->type eq 'RNA'; } sub subseq { my $self = shift; my ($start,$end) = @_; return (__PACKAGE__ . '::Subsequence')->new($self,$start,$end); } sub reverse { my $self = shift; return (__PACKAGE__ . '::Subsequence')->new($self,$self->length,1); } sub translate { my $self = shift; my $frame = shift() || 1; my $l = $self->length; my $seq = $frame > 0 ? $self->subseq($frame,$l-($l-$frame+1)%3) : $self->reverse->subseq(abs($frame),$l-($l-abs($frame)+1)%3); my $s = $seq->seq; $s=~tr/T/U/; # put it in RNA mode $s =~ s/(\S{3})/$CODON_TABLE{$1} || 'X'/eg; return $s; } sub longest_orf { my $self = shift; my ($max,$pos,$frame); foreach (-3..-1,1..3) { my $translation = $self->translate($_); while ($translation=~/([^*]+)/g) { if (length($1) > length($max)) { $max = $1; $frame = $_; $pos = pos($translation) - length($max); } } } $pos *= 3; $pos += abs($frame); return ($pos,$pos+3*length($max)-1) if $frame > 0; return ($self->length-$pos,$self->length-$pos-3*length($max)); } sub align { my $self = shift; my $seq = shift; $seq = $seq->seq if ref($seq); return new Sequence::Alignment(src=>$seq,target=>$self->seq); } sub _canonicalize { my $self = shift; my $seq = shift; $seq =~ tr/uU/tT/; $seq =~ s/[^gatcn]//ig; return uc($seq); } 1;