#! /usr/local/bin/perl -w
$genbank = "genbank_file.txt";
open (GENBANK, $genbank) || die "cannot open $gb_report for reading: $!";
# Flag for multiline translation; 1 means translation "in progress"
$trans = 0;
while (<GENBANK>)
{
if (/(LOCUS\s*)(\w*)(.*)/) {
print "Locus: $2\n";
}
elsif (/(VERSION.*GI:)(\d*)/) {
print "GI: $2\n";
}
elsif (/(DEFINITION\s*)(.*)(\.)/) {
print "Sequence name: $2\n";
}
elsif (/(ORGANISM\s*)(.*)/) {
print "Organism: $2\n";
}
elsif(/(gene)(\s*)(\d*)(\.\.)(\d*)/) {
print "Gene length: $5\n";
}
elsif (/(CDS\s*)(\d*)(\.\.)(\d*)/) {
# ex: CDS 357..1541
$cds_start = $2;
$cds_end = $4;
print "CDS: $cds_start - $cds_end\n";
}
elsif (/(\/translation=")(.*)/) { # protein product begins
print "Translation: ";
$protein = $2;
$trans = 1;
}
elsif ($trans) { # translation still going on
if (!/"/) { # no terminal quote; translation continues
$protein .= $_;
}
elsif (/(.*)(")/) { # terminal quote; end of translation
$protein .= $1;
$protein =~ s/\s*//g;
print "$protein\n";
$trans = 0;
}
else {
print "Problems: end of translation product not found.\n";
}
}
else {
# Skip this data
}
}