package Sequence::Generic;
# File: Sequence/Generic.pm
use strict;
use Carp;
use overload
'""' => 'asString',
'neg' => 'reverse',
'.' => 'concatenate',
'fallback' => 'TRUE';
# These methods should be overriden by child classes
# class constructor
sub new {
my $class = shift;
croak "$class must override the new() method";
}
# Return the sequence as a string
sub seq {
my $self = shift;
croak ref($self)," must override the seq() method";
}
# Return the type of the sequence as a human readable string
sub type {
return 'Generic Sequence';
}
# These methods probably don't have to be overridden
# The length of the sequence
sub length {
my $self = shift;
return length($self->seq);
}
# The reverse of the sequence
sub reverse {
my $self = shift;
my $reversed = reverse $self->seq;
return $reversed;
}
# A human-readable description of the object
sub asString {
my $self = shift;
return $self->type . '(' . $self->length . ' residues)';
}
# Concatenate two sequences together and return the result
sub concatenate {
my $self = shift;
my ($new_seq,$prepend) = @_;
my ($to_append);
if (ref($new_seq)) {
croak "argument to concatenate must be a string or a Sequence object"
unless $new_seq->isa(__PACKAGE__);
$to_append = $new_seq->seq ;
} else {
$to_append = $new_seq;
}
return $self->new($prepend ? $to_append . $self->seq
: $self->seq . $to_append);
}
1;
Back to Article
Listing Two
package Sequence::Nucleotide;
# file: Sequence/Nucleotide.pm
use Sequence::Generic;
use Sequence::Nucleotide::Subsequence;
use Sequence::Alignment;
use Carp;
use strict;
use vars '@ISA';
:Generic';
my %CODON_TABLE = (
UCA => 'S',UCG => 'S',UCC => 'S',UCU => 'S',
UUU => 'F',UUC => 'F',UUA => 'L',UUG => 'L',
UAU => 'Y',UAC => 'Y',UAA => '*',UAG => '*',
UGU => 'C',UGC => 'C',UGA => '*',UGG => 'W',
CUA => 'L',CUG => 'L',CUC => 'L',CUU => 'L',
CCA => 'P',CCG => 'P',CCC => 'P',CCU => 'P',
CAU => 'H',CAC => 'H',CAA => 'Q',CAG => 'Q',
CGA => 'R',CGG => 'R',CGC => 'R',CGU => 'R',
AUU => 'I',AUC => 'I',AUA => 'I',AUG => 'M',
ACA => 'T',ACG => 'T',ACC => 'T',ACU => 'T',
AAU => 'N',AAC => 'N',AAA => 'K',AAG => 'K',
AGU => 'S',AGC => 'S',AGA => 'R',AGG => 'R',
GUA => 'V',GUG => 'V',GUC => 'V',GUU => 'V',
GCA => 'A',GCG => 'A',GCC => 'A',GCU => 'A',
GAU => 'D',GAC => 'D',GAA => 'E',GAG => 'E',
GGA => 'G',GGG => 'G',GGC => 'G',GGU => 'G',
);
*complement = *reversec = \&reverse;
sub new {
my $class = shift;
$class = ref($class) if ref($class);
my ($sequence,$type) = @_;
my $self = bless {},$class;
if (ref($sequence)) {
croak "Can't initialize sequence from non-Sequence object.\n"
unless $sequence->can('seq');
%{$self} = %{$sequence}; # clone operation
} else {
croak "Doesn't look like sequence data"
unless $sequence=~/^[gactnu\s]+$/i;
$self->{'data'} = $self->_canonicalize($sequence);
$self->{'type'} = $type || ($sequence=~/u/i ? 'RNA' : 'DNA');
}
return $self;
}
sub seq {
my $self = shift;
$self->{'data'} = $self->_canonicalize($_[0]) if defined($_[0]);
my $seq = $self->{'data'};
return $seq unless $self->is_RNA;
$seq=~tr/T/U/;
return $seq;
}
sub type {
my $self = shift;
return defined($_[0]) ? $self->{'type'} = $_[0] : $self->{'type'};
}
sub is_DNA {
my $self = shift;
return $self->type eq 'DNA';
}
sub is_RNA {
my $self = shift;
return $self->type eq 'RNA';
}
sub subseq {
my $self = shift;
my ($start,$end) = @_;
return (__PACKAGE__ . '::Subsequence')->new($self,$start,$end);
}
sub reverse {
my $self = shift;
return (__PACKAGE__ . '::Subsequence')->new($self,$self->length,1);
}
sub translate {
my $self = shift;
my $frame = shift() || 1;
my $l = $self->length;
my $seq = $frame > 0 ? $self->subseq($frame,$l-($l-$frame+1)%3)
: $self->reverse->subseq(abs($frame),$l-($l-abs($frame)+1)%3);
my $s = $seq->seq;
$s=~tr/T/U/; # put it in RNA mode
$s =~ s/(\S{3})/$CODON_TABLE{$1} || 'X'/eg;
return $s;
}
sub longest_orf {
my $self = shift;
my ($max,$pos,$frame);
foreach (-3..-1,1..3) {
my $translation = $self->translate($_);
while ($translation=~/([^*]+)/g) {
if (length($1) > length($max)) {
$max = $1;
$frame = $_;
$pos = pos($translation) - length($max);
}
}
}
$pos *= 3;
$pos += abs($frame);
return ($pos,$pos+3*length($max)-1) if $frame > 0;
return ($self->length-$pos,$self->length-$pos-3*length($max));
}
sub align {
my $self = shift;
my $seq = shift;
$seq = $seq->seq if ref($seq);
return new Sequence::Alignment(src=>$seq,target=>$self->seq);
}
sub _canonicalize {
my $self = shift;
my $seq = shift;
$seq =~ tr/uU/tT/;
$seq =~ s/[^gatcn]//ig;
return uc($seq);
}
1;