<?xml version='1.0'?><rss version="2.0" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:georss="http://www.georss.org/georss" xmlns:atom="http://www.w3.org/2005/Atom" >
<channel>
	<title><![CDATA[BOL: Blast result parser with Perl and Bioperl]]></title>
	<link>https://bioinformaticsonline.com/snippets/view/27312/blast-result-parser-with-perl-and-bioperl?</link>
	<atom:link href="https://bioinformaticsonline.com/snippets/view/27312/blast-result-parser-with-perl-and-bioperl?" rel="self" type="application/rss+xml" />
	<description><![CDATA[]]></description>
	
	<item>
	<guid isPermaLink="true">https://bioinformaticsonline.com/snippets/view/27312/blast-result-parser-with-perl-and-bioperl</guid>
	<pubDate>Fri, 13 May 2016 03:15:06 -0500</pubDate>
	<link>https://bioinformaticsonline.com/snippets/view/27312/blast-result-parser-with-perl-and-bioperl</link>
	<title><![CDATA[Blast result parser with Perl and Bioperl]]></title>
	<description><![CDATA[<code>#!/usr/local/bin/perl

#
#	Dr. Xiaodong Bai
#	It may be freely distributed under GNU General Public License.
#	This script will parse a NCBI blastx output file and output the top N hits of each blast search result.
#	For each hit, the following results are reported:
#	accesion number, length, description, E value, bit score, query frame, query start, query end, hit start, hit end, positives, and identical
# 	The results are tab-deliminated and ready for import into a spreadsheet program for browsing and further analysis.
#

use strict;
use warnings;
use Bio::SearchIO;

# Usage information
die &quot;Usage: $0 &lt;BLAST-report-file&gt; &lt;number-of-top-hits&gt; &lt;output-file&gt;\n&quot;, if (@ARGV != 3);

my ($infile,$numHits,$outfile) = @ARGV;
print &quot;Parsing the BLAST result ...&quot;;
my $in = Bio::SearchIO-&gt;new(-format =&gt; &#039;blast&#039;, -file =&gt; $infile);
open (OUT,&quot;&gt;$outfile&quot;) or die &quot;Cannot open $outfile: $!&quot;;

# print the header info for tab-deliminated columns
print OUT &quot;query_name\tquery_length\taccession_number\tlength\tdescription\tE value\tbit score\tframe\tquery_start\t&quot;;
print OUT &quot;query_end\thit_start\thit_end\tpositives\tidentical\n&quot;;

# extraction of information for each result recursively
while ( my $result = $in-&gt;next_result ) {
	# the name of the query sequence
   	print OUT $result-&gt;query_name . &quot;\t&quot;;

        # the length of the query sequence
    	print OUT $result-&gt;query_length;

        # output &quot;no hits found&quot; if there is no hits
    	if ( $result-&gt;num_hits == 0 ) {
		print OUT &quot;\tNo hits found\n&quot;;
    	} else {
		my $count = 0;

                # process each hit recursively
		while (my $hit = $result-&gt;next_hit) {
			print OUT &quot;\t&quot; if ($count &gt; 0);
                        # get the accession numbers of the hits
			print OUT &quot;\t&quot; . $hit-&gt;accession . &quot;\t&quot;;
                        # get the lengths of the hit sequences
                        print OUT $hit-&gt;length . &quot;\t&quot;;
                        # get the description of the hit sequences
			print OUT $hit-&gt;description . &quot;\t&quot;;
                        # get the E value of the hit
			print OUT $hit-&gt;significance . &quot;\t&quot;;
                        #get the bit score of the hit
			print OUT $hit-&gt;bits . &quot;\t&quot;;

                        my $hspcount = 0;

                        # process the top HSP for the top hit
			while (my $hsp = $hit-&gt;next_hsp) {
                        	print OUT &quot;\t\t\t\t\t\t\t&quot;, if ($hspcount &gt; 0);
                        	# get the frame of the query sequence
				print OUT $hsp-&gt;query-&gt;frame . &quot;\t&quot;;
                                # get the start and the end of the query sequence in the alignment
				print OUT $hsp-&gt;start(&#039;query&#039;) . &quot;\t&quot; . $hsp-&gt;end(&#039;query&#039;). &quot;\t&quot;;
                                # get the start and the end of the hit sequence in the alignment
				print OUT $hsp-&gt;start(&#039;hit&#039;) . &quot;\t&quot; . $hsp-&gt;end(&#039;hit&#039;) . &quot;\t&quot;;
                                # get the similarity value
				printf OUT &quot;%.1f&quot; , ($hsp-&gt;frac_conserved * 100);
				print OUT &quot;%\t&quot;;
                                # get the identity value
				printf OUT &quot;%.1f&quot; , ($hsp-&gt;frac_identical * 100);
		       		print OUT &quot;%\n&quot;;
                                $hspcount++;
                        }
			$count++;

                        # flow control for the number of hits needed
			last if ($count == $numHits);
		}
    	}
}
close OUT;
print &quot; DONE!!!\n&quot;;</code>]]></description>
	<dc:creator>Radha Agarkar</dc:creator>
</item>

</channel>
</rss>