<?xml version='1.0'?><rss version="2.0" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:georss="http://www.georss.org/georss" xmlns:atom="http://www.w3.org/2005/Atom" >
<channel>
	<title><![CDATA[BOL: Parse a genbank file using regular expressions]]></title>
	<link>https://bioinformaticsonline.com/snippets/view/27270/parse-a-genbank-file-using-regular-expressions?</link>
	<atom:link href="https://bioinformaticsonline.com/snippets/view/27270/parse-a-genbank-file-using-regular-expressions?" rel="self" type="application/rss+xml" />
	<description><![CDATA[]]></description>
	
	<item>
	<guid isPermaLink="true">https://bioinformaticsonline.com/snippets/view/27270/parse-a-genbank-file-using-regular-expressions</guid>
	<pubDate>Tue, 10 May 2016 11:56:26 -0500</pubDate>
	<link>https://bioinformaticsonline.com/snippets/view/27270/parse-a-genbank-file-using-regular-expressions</link>
	<title><![CDATA[Parse a genbank file using regular expressions]]></title>
	<description><![CDATA[<code>#! /usr/local/bin/perl -w

$genbank = &quot;genbank_file.txt&quot;;

open (GENBANK, $genbank) || die &quot;cannot open $gb_report for reading: $!&quot;;

# Flag for multiline translation; 1 means translation &quot;in progress&quot;  
$trans = 0;

while (&lt;GENBANK&gt;)
{
   if (/(LOCUS\s*)(\w*)(.*)/) { 
       print &quot;Locus: $2\n&quot;; 
   }
   elsif (/(VERSION.*GI:)(\d*)/) { 
      print &quot;GI: $2\n&quot;; 
   }
   elsif (/(DEFINITION\s*)(.*)(\.)/) {
      print &quot;Sequence name: $2\n&quot;;
   }
   elsif (/(ORGANISM\s*)(.*)/) {
      print &quot;Organism: $2\n&quot;;
   }
   elsif(/(gene)(\s*)(\d*)(\.\.)(\d*)/) {
      print &quot;Gene length: $5\n&quot;;
   }
   elsif (/(CDS\s*)(\d*)(\.\.)(\d*)/)  {
   # ex: CDS             357..1541
      $cds_start = $2;
      $cds_end = $4;
      print &quot;CDS: $cds_start - $cds_end\n&quot;;
   }
   elsif (/(\/translation=&quot;)(.*)/)  {  # protein product begins
      print &quot;Translation: &quot;;
      $protein = $2;
      $trans = 1;
   }
   elsif ($trans)  {   # translation still going on
      if (!/&quot;/)  {  # no terminal quote; translation continues
         $protein .= $_;
      }
      elsif (/(.*)(&quot;)/)  {  # terminal quote; end of translation
         $protein .= $1;
         $protein =~ s/\s*//g;
         print &quot;$protein\n&quot;;
         $trans = 0;
      }
      else  {
         print &quot;Problems: end of translation product not found.\n&quot;;
      }
   }
   else  {
      # Skip this data
   }
}</code>]]></description>
	<dc:creator>Nishi Singh</dc:creator>
</item>

</channel>
</rss>