<?xml version='1.0'?><rss version="2.0" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:georss="http://www.georss.org/georss" xmlns:atom="http://www.w3.org/2005/Atom" >
<channel>
	<title><![CDATA[BOL: All]]></title>
	<link>https://bioinformaticsonline.com/snippets?offset=410</link>
	<atom:link href="https://bioinformaticsonline.com/snippets?offset=410" rel="self" type="application/rss+xml" />
	<description><![CDATA[]]></description>
	
	<item>
	<guid isPermaLink="true">https://bioinformaticsonline.com/snippets/view/27400/string-matching-with-perl</guid>
	<pubDate>Wed, 18 May 2016 08:37:27 -0500</pubDate>
	<link>https://bioinformaticsonline.com/snippets/view/27400/string-matching-with-perl</link>
	<title><![CDATA[String matching with Perl]]></title>
	<description><![CDATA[<code>#!/usr/bin/perl


# make three strings of nucleotides
$dna1 = “AAAAAAAAAAAAAAATGAAAAAAAAAAAAAAAA”;
$dna2 = “AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA”;

$dna3 = “ATGAAAAAAAAAATGAAAAAAAAAAAATGAAAA”;
$pattern = “ATG”;


# match pattern to dna1
$m = $dna1 =~ m/$pattern/g;

print “Was the ATG pattern found in dna1 : $m \n”;


# match pattern in dna2

$m2 = $dna2 =~ m/$pattern/g;

print “Was the ATG pattern found in dna2 : $m2 \n”;


# find the position of the pattern match in dna1

$pos = index($dna1, $pattern);

print “The match position of ATG in dna1 is : $pos \n”;


# replace the ATG sites with CCC

$dna3 =~ s/ATG/CCC/g;

print “Replaced dna3 with CCC is : $dna3 \n”;</code>]]></description>
	<dc:creator>Abhi</dc:creator>
</item>
<item>
	<guid isPermaLink="true">https://bioinformaticsonline.com/snippets/view/27354/extract-sequence-from-ucsc</guid>
	<pubDate>Tue, 17 May 2016 08:08:26 -0500</pubDate>
	<link>https://bioinformaticsonline.com/snippets/view/27354/extract-sequence-from-ucsc</link>
	<title><![CDATA[Extract sequence from UCSC]]></title>
	<description><![CDATA[<code>#!/usr/bin/env perl

use strict;
use warnings;
use LWP::Simple;
use XML::XPath;
use XML::XPath::XMLParser;

# Use DAS of UCSC to fetch specific sequence by its given chromosome position
# From here: https://www.biostars.org/p/6156/

my $chr  = shift;
my $pos  = shift;
my $size = shift;

my $usage = &quot;Example: perl extract_seq_from_ucsc.pl 14 482780 1000\n&quot;;

if (! $size) {
	die &quot;ERROR: You must pass three arguments: chr. num., position, and size.\n$usage&quot;;
	
}

chomp $size;

my $start = $pos - ($size/2);
my $end   = $pos + ($size/2);

# Figure out URL for the DAS server. Example:
# http://genome.ucsc.edu/cgi-bin/das/calJac3/dna?segment=chr14:482280,483280

my $URL_gene =&quot;http://genome.ucsc.edu/cgi-bin/das/papAnu2/dna?segment=chr&quot;;
$URL_gene .= $chr . &quot;:&quot; . $start . &quot;,&quot; . $end;

my $xml = get($URL_gene);

my $xp = XML::XPath-&gt;new(xml=&gt;$xml);

my $nodeset = $xp-&gt;find(&#039;/DASDNA/SEQUENCE/DNA/text()&#039;); # find all sequences
# there should be only one node, anyway:    
foreach my $node ($nodeset-&gt;get_nodelist) {

	my $seq = $node-&gt;getValue;
	$seq =~ s/\s//g; # remove white spaces
	print &quot;&gt;papAnu2_chr&quot; . $chr . &quot;:&quot; . $start . &quot;-&quot; . $end . &quot;\n&quot;;
	print $seq, &quot;\n&quot;;
	
}</code>]]></description>
	<dc:creator>Jit</dc:creator>
</item>
<item>
	<guid isPermaLink="true">https://bioinformaticsonline.com/snippets/view/27335/perl-to-print-indivisual-nucleotide-from-a-sequence</guid>
	<pubDate>Fri, 13 May 2016 10:17:35 -0500</pubDate>
	<link>https://bioinformaticsonline.com/snippets/view/27335/perl-to-print-indivisual-nucleotide-from-a-sequence</link>
	<title><![CDATA[Perl to print indivisual nucleotide from a sequence!]]></title>
	<description><![CDATA[<code>#!/usr/bin/perl
use strict;
use warnings;

my $string = &quot;ATGCTTGCGT?AAATG??CT?GCGTA&quot;;

my @chars = split(&quot;&quot;, $string);

print &quot;First character: $chars[0]\n&quot;;</code>]]></description>
	<dc:creator>Jit</dc:creator>
</item>
<item>
	<guid isPermaLink="true">https://bioinformaticsonline.com/snippets/view/27325/parse-a-fasta-file-with-perl</guid>
	<pubDate>Fri, 13 May 2016 05:00:18 -0500</pubDate>
	<link>https://bioinformaticsonline.com/snippets/view/27325/parse-a-fasta-file-with-perl</link>
	<title><![CDATA[Parse a Fasta file with Perl]]></title>
	<description><![CDATA[<code>#!/usr/bin/env perl

# Usage:  fastaRead.pl data.fa

use strict;
use warnings;

my $filename = $ARGV[0];
my  $sequence;
open my $fileH, &quot;&lt;&quot;, $filename or die &quot;could not open $filename\n&quot;;
while (&lt;$fileH&gt;) {
    chomp;
    if ($_ =~ /^&gt;/) {
        print &quot;this line is a header: $_\n&quot;;
    }
    else {
        print &quot;this line contains sequence data: $_\n&quot;;
        # Concatenate everything from the file into a single var
        $sequence .= $_;
    }
}
close $fileH;</code>]]></description>
	<dc:creator>Radha Agarkar</dc:creator>
</item>
<item>
	<guid isPermaLink="true">https://bioinformaticsonline.com/snippets/view/27320/perl-program-to-implement-sliding-window</guid>
	<pubDate>Fri, 13 May 2016 04:28:24 -0500</pubDate>
	<link>https://bioinformaticsonline.com/snippets/view/27320/perl-program-to-implement-sliding-window</link>
	<title><![CDATA[Perl program to implement sliding window !]]></title>
	<description><![CDATA[<code>#!/usr/bin/perl -w

my $filename = &#039;data.txt&#039;;
open(my TR, &#039;&lt;:encoding(UTF-8)&#039;, $filename)
  or die &quot;Could not open file &#039;$filename&#039; $!&quot;;

my %hash;
while (my $line1=&lt;TR&gt;)
{
    chomp($line1);
    my @ar = split(/\t/,$line1);
    $hash{$ar[1]} = $ar[3];
}
close TR;

open my $SC, &quot;&lt;&quot;, $file2 or die &quot;Error blah blah... $!&quot;;
while (my $line2 = &lt;$SC&gt;) 
{
    my ($id, $val) = split /\t/, $line2;
    my $val_file1 = $hash{$id};
    if ( $val &gt; $val_file1 - $margin and $val &lt; $val_file1 + $margin) {
        # print out something
    }
}
close $SC;</code>]]></description>
	<dc:creator>Radha Agarkar</dc:creator>
</item>
<item>
	<guid isPermaLink="true">https://bioinformaticsonline.com/snippets/view/27313/find-and-replace-ambiguous-characters-in-fasta-file-with-perl-and-bioperl</guid>
	<pubDate>Fri, 13 May 2016 03:20:09 -0500</pubDate>
	<link>https://bioinformaticsonline.com/snippets/view/27313/find-and-replace-ambiguous-characters-in-fasta-file-with-perl-and-bioperl</link>
	<title><![CDATA[Find and replace ambiguous characters in fasta file with Perl and Bioperl]]></title>
	<description><![CDATA[<code>#!/usr/bin/perl -w

my $usage=&quot;\nUsage: $0 [-h] [-m char] [fastaFileName1 ...]\n&quot;.
    &quot;  -h: help\n&quot;.
    &quot;  -m: missing character\n&quot;.
    &quot;Print out the name of sequences with characters other than ATGC-.\n&quot;.
    &quot;If -m is specified, the ambiguous characters are repleced with the\n&quot;.
    &quot;specified character.  e.g. -m &#039;?&#039; will place ? to the ambigous characters.\n&quot; .
    &quot;If multiple files are given, sequences in all files are marged.  If no \n&quot;.
    &quot;argument is given, it will take STDIN as the input\n&quot;;

our($opt_h, $opt_m);

use Bio::SeqIO;

use Getopt::Std;
getopts(&#039;hm:&#039;) || die &quot;$usage\n&quot;;
die &quot;$usage\n&quot; if (defined($opt_h));

my $format = &quot;fasta&quot;;
my @seqArr = ();

@ARGV = (&#039;-&#039;) unless @ARGV;
while (my $file = shift) {
    my $seqio_obj = Bio::SeqIO-&gt;new(-file =&gt; $file, -format =&gt; $format);
    while (my $seq = $seqio_obj-&gt;next_seq()) {
	push(@seqArr, $seq);
    }
}

#@seqArr = sort { $a-&gt;id() cmp $b-&gt;id() } @seqArr;

foreach my $s (@seqArr) {
    my $thisSeq = $s-&gt;seq();
    my $ambig = AmbiguousChar($thisSeq);
    if ($ambig ne &quot;&quot;) {
	print STDERR $s-&gt;id(), &quot;\t$ambig\n&quot;;
	if (defined($opt_m)) {
	    $thisSeq = ReplaceAmbiguousChar($thisSeq, $opt_m);
	    $s-&gt;seq($thisSeq);
	}
    }
}

if (defined($opt_m)) {
    my $seqOut = Bio::SeqIO-&gt;new(-fs =&gt; \*STDOUT, -format =&gt; $format);
    foreach my $s (@seqArr) {
	$seqOut-&gt;write_seq($s);
    }
}
exit;


sub AmbiguousChar {
    my $string = shift;
    $string =~ s/[ATGC-]//g;

    $string =~ s/\s+//g;
    return $string;
}

sub ReplaceAmbiguousChar {
    my ($string, $char) = @_;
    $string =~ s/[^ATGC-]/$char/g;
    return $string;
}</code>]]></description>
	<dc:creator>Radha Agarkar</dc:creator>
</item>
<item>
	<guid isPermaLink="true">https://bioinformaticsonline.com/snippets/view/27312/blast-result-parser-with-perl-and-bioperl</guid>
	<pubDate>Fri, 13 May 2016 03:15:06 -0500</pubDate>
	<link>https://bioinformaticsonline.com/snippets/view/27312/blast-result-parser-with-perl-and-bioperl</link>
	<title><![CDATA[Blast result parser with Perl and Bioperl]]></title>
	<description><![CDATA[<code>#!/usr/local/bin/perl

#
#	Dr. Xiaodong Bai
#	It may be freely distributed under GNU General Public License.
#	This script will parse a NCBI blastx output file and output the top N hits of each blast search result.
#	For each hit, the following results are reported:
#	accesion number, length, description, E value, bit score, query frame, query start, query end, hit start, hit end, positives, and identical
# 	The results are tab-deliminated and ready for import into a spreadsheet program for browsing and further analysis.
#

use strict;
use warnings;
use Bio::SearchIO;

# Usage information
die &quot;Usage: $0 &lt;BLAST-report-file&gt; &lt;number-of-top-hits&gt; &lt;output-file&gt;\n&quot;, if (@ARGV != 3);

my ($infile,$numHits,$outfile) = @ARGV;
print &quot;Parsing the BLAST result ...&quot;;
my $in = Bio::SearchIO-&gt;new(-format =&gt; &#039;blast&#039;, -file =&gt; $infile);
open (OUT,&quot;&gt;$outfile&quot;) or die &quot;Cannot open $outfile: $!&quot;;

# print the header info for tab-deliminated columns
print OUT &quot;query_name\tquery_length\taccession_number\tlength\tdescription\tE value\tbit score\tframe\tquery_start\t&quot;;
print OUT &quot;query_end\thit_start\thit_end\tpositives\tidentical\n&quot;;

# extraction of information for each result recursively
while ( my $result = $in-&gt;next_result ) {
	# the name of the query sequence
   	print OUT $result-&gt;query_name . &quot;\t&quot;;

        # the length of the query sequence
    	print OUT $result-&gt;query_length;

        # output &quot;no hits found&quot; if there is no hits
    	if ( $result-&gt;num_hits == 0 ) {
		print OUT &quot;\tNo hits found\n&quot;;
    	} else {
		my $count = 0;

                # process each hit recursively
		while (my $hit = $result-&gt;next_hit) {
			print OUT &quot;\t&quot; if ($count &gt; 0);
                        # get the accession numbers of the hits
			print OUT &quot;\t&quot; . $hit-&gt;accession . &quot;\t&quot;;
                        # get the lengths of the hit sequences
                        print OUT $hit-&gt;length . &quot;\t&quot;;
                        # get the description of the hit sequences
			print OUT $hit-&gt;description . &quot;\t&quot;;
                        # get the E value of the hit
			print OUT $hit-&gt;significance . &quot;\t&quot;;
                        #get the bit score of the hit
			print OUT $hit-&gt;bits . &quot;\t&quot;;

                        my $hspcount = 0;

                        # process the top HSP for the top hit
			while (my $hsp = $hit-&gt;next_hsp) {
                        	print OUT &quot;\t\t\t\t\t\t\t&quot;, if ($hspcount &gt; 0);
                        	# get the frame of the query sequence
				print OUT $hsp-&gt;query-&gt;frame . &quot;\t&quot;;
                                # get the start and the end of the query sequence in the alignment
				print OUT $hsp-&gt;start(&#039;query&#039;) . &quot;\t&quot; . $hsp-&gt;end(&#039;query&#039;). &quot;\t&quot;;
                                # get the start and the end of the hit sequence in the alignment
				print OUT $hsp-&gt;start(&#039;hit&#039;) . &quot;\t&quot; . $hsp-&gt;end(&#039;hit&#039;) . &quot;\t&quot;;
                                # get the similarity value
				printf OUT &quot;%.1f&quot; , ($hsp-&gt;frac_conserved * 100);
				print OUT &quot;%\t&quot;;
                                # get the identity value
				printf OUT &quot;%.1f&quot; , ($hsp-&gt;frac_identical * 100);
		       		print OUT &quot;%\n&quot;;
                                $hspcount++;
                        }
			$count++;

                        # flow control for the number of hits needed
			last if ($count == $numHits);
		}
    	}
}
close OUT;
print &quot; DONE!!!\n&quot;;</code>]]></description>
	<dc:creator>Radha Agarkar</dc:creator>
</item>
<item>
	<guid isPermaLink="true">https://bioinformaticsonline.com/snippets/view/27308/extract-a-random-sequence-from-a-file</guid>
	<pubDate>Thu, 12 May 2016 11:02:24 -0500</pubDate>
	<link>https://bioinformaticsonline.com/snippets/view/27308/extract-a-random-sequence-from-a-file</link>
	<title><![CDATA[Extract a random sequence from a file]]></title>
	<description><![CDATA[<code>#!/usr/local/bin/perl -w

use strict;
use warnings;
use autodie;

use List::Util qw/ shuffle /;

my $outputfile = &#039;randomoutput.txt&#039;;

open my $in_fh,  &#039;&lt;&#039;, &#039;seq1.fa&#039;;
open my $out_fh, &#039;&gt;&#039;, $outputfile;

my $size       = 21;
my $count      = 10;

while (my $line = &lt;$in_fh&gt;) {
   next unless $line =~ /^([ATGCN]+)/;

   my $genome     = $1;
   my $len_genome = length $genome;

   my @start_points = shuffle(0 .. $len_genome-$size);
   next unless @start_points &gt;= $count;
   print substr($genome, $_, 21), &quot;\n&quot; for @start_points[0 .. $count-1];
}</code>]]></description>
	<dc:creator>Abhi</dc:creator>
</item>
<item>
	<guid isPermaLink="true">https://bioinformaticsonline.com/snippets/view/27301/a-multilayer-perceptron-mlp-neural-network-in-perl</guid>
	<pubDate>Wed, 11 May 2016 11:48:08 -0500</pubDate>
	<link>https://bioinformaticsonline.com/snippets/view/27301/a-multilayer-perceptron-mlp-neural-network-in-perl</link>
	<title><![CDATA[A multilayer perceptron (MLP) neural network in Perl]]></title>
	<description><![CDATA[<code>#!/usr/local/bin/perl -w

####################################################
#MLP neural network in Perl Original source code by Phil Brierley
#Translated into perl - ccolbourn Oct 2004
####################################################


#Tanh hidden neurons
#Linear output neuron

#To include an input bias create an
#extra input in the training data
#and set to 1


################ User settings #########
my $numEpochs = 500;
my $numHidden = 4;
my $LR_IH = 0.7;
my $LR_HO = 0.07;

################ Data dependent settings #########
my $numInputs = 3;
my $numPatterns = 4;

########################################

my $patNum;
my $errThisPat;
my $outPred;
my $RMSerror;

my @trainInputs;
my @trainOutput;


# the outputs of the hidden neurons
my @hiddenVal;

# the weights
my @weightsIH;
my @weightsHO;


main();


#==============================================================
#********** THIS IS THE MAIN PROGRAM **************************
#==============================================================

sub main
 {

 # initiate the weights
  initWeights();

 # load in the data
  initData();

 # train the network
    for(my $j = 0;$j &lt;= $numEpochs;$j++)
    {

        for(my $i = 0;$i&lt;$numPatterns;$i++)
        {

            #select a pattern at random
            $patNum = (rand()*$numPatterns)-0.001;

            #calculate the current network output
            #and error for this pattern
            calcNet();

            #change network weights
            WeightChangesHO();
            WeightChangesIH();
        }

        #display the overall network error
        #after each epoch
        calcOverallError();

        print &quot;epoch = &quot;.$j.&quot;  RMS Error = &quot;.$RMSerror.&quot;\n&quot;;

    }

    #training has finished
    #display the results
    displayResults();

 }

#============================================================
#********** END OF THE MAIN PROGRAM **************************
#=============================================================






#***********************************
sub calcNet()
 {
    #calculate the outputs of the hidden neurons
    #the hidden neurons are tanh

    for(my $i = 0;$i&lt;$numHidden;$i++)
    {
	$hiddenVal[$i] = 0.0;

        for(my $j = 0;$j&lt;$numInputs;$j++)
	{
        $hiddenVal[$i] = $hiddenVal[$i] + ($trainInputs[$patNum][$j] * $weightsIH[$j][$i]);
	}

        $hiddenVal[$i] = tanh($hiddenVal[$i]);
    }

   #calculate the output of the network
   #the output neuron is linear
   $outPred = 0.0;

   for(my $i = 0;$i&lt;$numHidden;$i++)
   {
    $outPred = $outPred + $hiddenVal[$i] * $weightsHO[$i];
   }
    #calculate the error
    $errThisPat = $outPred - $trainOutput[$patNum];
 }


#************************************
 sub WeightChangesHO()
 #adjust the weights hidden-output
 {
   for(my $k = 0;$k&lt;$numHidden;$k++)
   {
    $weightChange = $LR_HO * $errThisPat * $hiddenVal[$k];
    $weightsHO[$k] = $weightsHO[$k] - $weightChange;

    #regularisation on the output weights
    if ($weightsHO[$k] &lt; -5)
    {
        $weightsHO[$k] = -5;
    }
    elsif ($weightsHO[$k] &gt; 5)
    {
        $weightsHO[$k] = 5;
    }
   }
 }


#************************************
 sub WeightChangesIH()
 #adjust the weights input-hidden
 {
  for(my $i = 0;$i&lt;$numHidden;$i++)
  {
   for(my $k = 0;$k&lt;$numInputs;$k++)
   {
    my $x = 1 - ($hiddenVal[$i] * $hiddenVal[$i]);
    $x = $x * $weightsHO[$i] * $errThisPat * $LR_IH;
    $x = $x * $trainInputs[$patNum][$k];
    my $weightChange = $x;
    $weightsIH[$k][$i] = $weightsIH[$k][$i] - $weightChange;
   }
  }
 }


#************************************
 sub initWeights()
 {

  for(my $j = 0;$j&lt;$numHidden;$j++)
  {
    $weightsHO[$j] = (rand() - 0.5)/2;
    for(my $i = 0;$i&lt;$numInputs;$i++)
    {
    $weightsIH[$i][$j] = (rand() - 0.5)/5;
    }
  }

 }


#************************************
 sub initData()
 {

    print &quot;initialising data\n&quot;;

    # the data here is the XOR data
    # it has been rescaled to the range
    # [-1][1]
    # an extra input valued 1 is also added
    # to act as the bias
    # the output must lie in the range -1 to 1

    $trainInputs[0][0]  = 1;
    $trainInputs[0][1]  = -1;
    $trainInputs[0][2]  = 1;    #bias
    $trainOutput[0] = 1;

    $trainInputs[1][0]  = -1;
    $trainInputs[1][1]  = 1;
    $trainInputs[1][2]  = 1;       #bias
    $trainOutput[1] = 1;

    $trainInputs[2][0]  = 1;
    $trainInputs[2][1]  = 1;
    $trainInputs[2][2]  = 1;        #bias
    $trainOutput[2] = -1;

    $trainInputs[3][0]  = -1;
    $trainInputs[3][1]  = -1;
    $trainInputs[3][2]  = 1;     #bias
    $trainOutput[3] = -1;

 }


#************************************
 sub tanh()
 {


	my $x = shift;

    if ($x &gt; 20){ return 1;}
    elsif ($x &lt; -20){ return -1;}
    else
        {
        my $a = exp($x);
        my $b = exp(-$x);
        return ($a-$b)/($a+$b);
        }
 }


#************************************
 sub displayResults()
    {
     for(my $i = 0;$i&lt;$numPatterns;$i++)
        {
        $patNum = $i;
        calcNet();
        print &quot;pat = &quot;.($patNum+1).&quot; actual = &quot;.$trainOutput[$patNum].&quot; neural model = &quot;.$outPred.&quot;\n&quot;;
        }
    }


#************************************
sub calcOverallError()
    {
     $RMSerror = 0.0;
     for(my $i = 0;$i&lt;$numPatterns;$i++)
        {
        $patNum = $i;
        calcNet();
        $RMSerror = $RMSerror + ($errThisPat * $errThisPat);
        }
     $RMSerror = $RMSerror/$numPatterns;
     $RMSerror = sqrt($RMSerror);
    }</code>]]></description>
	<dc:creator>Jit</dc:creator>
</item>
<item>
	<guid isPermaLink="true">https://bioinformaticsonline.com/snippets/view/27299/retrieve-ncbi-genbank-records-with-a-range-of-accession-numbers</guid>
	<pubDate>Wed, 11 May 2016 11:02:40 -0500</pubDate>
	<link>https://bioinformaticsonline.com/snippets/view/27299/retrieve-ncbi-genbank-records-with-a-range-of-accession-numbers</link>
	<title><![CDATA[Retrieve NCBI GenBank records with a range of accession numbers]]></title>
	<description><![CDATA[<code>#!/usr/bin/perl

#FILE: ncbi_search.pl
#AUTH: Paul Stothard (paul.stothard@gmail.com)

use warnings;
use strict;
use Getopt::Long;
use LWP::Simple;
use URI::Escape;

use LWP::UserAgent;
use HTTP::Request::Common;

my %param = (
    query      =&gt; undef,
    outputFile =&gt; undef,
    database   =&gt; undef,
    returnType =&gt; undef,
    maxRecords =&gt; undef,
    format     =&gt; undef,
    verbose    =&gt; undef,
    url        =&gt; &#039;http://www.ncbi.nlm.nih.gov/entrez/eutils&#039;,
    retries    =&gt; 0,
    maxRetries =&gt; 5,
    help       =&gt; undef
);

Getopt::Long::Configure(&#039;bundling&#039;);
GetOptions(
    &#039;q|query=s&#039;       =&gt; \$param{query},
    &#039;o|output_file=s&#039; =&gt; \$param{outputFile},
    &#039;d|database=s&#039;    =&gt; \$param{database},
    &#039;r|return_type=s&#039; =&gt; \$param{returnType},
    &#039;m|max_records=i&#039; =&gt; \$param{maxRecords},
    &#039;verbose|v&#039;       =&gt; \$param{verbose},
    &#039;h|help&#039;          =&gt; \$param{help}
);

if ( defined( $param{help} ) ) {
    print_usage();
    exit(0);
}

if (   !( defined( $param{query} ) )
    or !( defined( $param{outputFile} ) )
    or !( defined( $param{database} ) )
    or !( defined( $param{returnType} ) ) )
{
    print_usage();
    exit(1);
}

$param{returnType} = lc( $param{returnType} );

$param{query} = uri_escape( $param{query} );

_doSearch(%param);

sub _doSearch {
    my %param = @_;

    my $esearch = &quot;$param{url}/esearch.fcgi?db=$param{database}&quot;
        . &quot;&amp;retmax=1&amp;usehistory=y&amp;term=$param{query}&quot;;
    my $esearch_result = get($esearch);

    while (
        ( !defined($esearch_result) )
        || (!(  $esearch_result
                =~ m/&lt;Count&gt;(\d+)&lt;\/Count&gt;.*&lt;QueryKey&gt;(\d+)&lt;\/QueryKey&gt;.*&lt;WebEnv&gt;(\S+)&lt;\/WebEnv&gt;/s
            )
        )
        )
    {
        if ($esearch_result =~ m/&lt;ERROR&gt;(.*)&lt;\/ERROR&gt;/is) {
            die(&quot;ESearch returned an error: $1&quot;);
        }
        message( $param{verbose},
            &quot;ESearch results could not be parsed. Resubmitting query.\n&quot; );
        sleep(10);
        if ( $param{retries} &gt;= $param{maxRetries} ) {
            die(&quot;Too many failures--giving up search.&quot;);
        }

        $esearch_result = get($esearch);
        $param{retries}++;
    }

    $param{retries} = 0;

    $esearch_result
        =~ m/&lt;Count&gt;(\d+)&lt;\/Count&gt;.*&lt;QueryKey&gt;(\d+)&lt;\/QueryKey&gt;.*&lt;WebEnv&gt;(\S+)&lt;\/WebEnv&gt;/s;

    my $count     = $1;
    my $query_key = $2;
    my $web_env   = $3;

    if ( defined( $param{maxRecords} ) ) {
        if ( $count &gt; $param{maxRecords} ) {
            message( $param{verbose},
                &quot;Retrieving $param{maxRecords} records out of $count available records.\n&quot;
            );
            $count = $param{maxRecords};
        }
        else {
            message( $param{verbose},
                &quot;Retrieving $count records out of $count available records.\n&quot;
            );
        }
    }
    else {
        message( $param{verbose},
            &quot;Retrieving $count records out of $count available records.\n&quot; );
    }

    my $retmax = 500;
    if ( $retmax &gt; $count ) {
        $retmax = $count;
    }

    open( my $OUTFILE, &quot;&gt;&quot; . $param{outputFile} )
        or die(&quot;Error: Cannot open $param{outputFile} : $!&quot;);

    for (
        my $retstart = 0;
        $retstart &lt; $count;
        $retstart = $retstart + $retmax
        )
    {
        message( $param{verbose},
                  &quot;Downloading records &quot;
                . ( $retstart + 1 ) . &quot; to &quot;
                . ( $retstart + $retmax )
                . &quot;\n&quot; );
        my $efetch
            = &quot;$param{url}/efetch.fcgi?rettype=$param{returnType}&amp;retmode=text&amp;retstart=$retstart&amp;retmax=$retmax&amp;db=$param{database}&amp;query_key=$query_key&amp;WebEnv=$web_env&quot;;
        my $efetch_result = get($efetch);

        while ( !defined($efetch_result) ) {
            message( $param{verbose},
                &quot;EFetch results could not be parsed. Resubmitting query.\n&quot; );
            sleep(10);
            if ( $param{retries} &gt;= $param{maxRetries} ) {
                die(&quot;Too many failures--giving up search.&quot;);
            }

            $efetch_result = get($efetch);
            $param{retries}++;
        }

        print( $OUTFILE $efetch_result );

        unless (
            ( defined( $param{maxRecords} ) &amp;&amp; ( $param{maxRecords} == 1 ) ) )
        {
            sleep(3);
        }
    }

    close($OUTFILE) or die(&quot;Error: Cannot close $param{outputFile} file: $!&quot;);
}

sub message {
    my $verbose = shift;
    my $message = shift;
    if ($verbose) {
        print $message;
    }
}

sub print_usage {
    print &lt;&lt;BLOCK;
USAGE:
   perl ncbi_search.pl -q STRING -o FILE -d STRING -r STRING [Options]

DESCRIPTION:
   Uses NCBI&#039;s eSearch to download collections of sequences.

REQUIRED ARGUMENTS:
   -q, --query [STRING]
      Raw query text.
   -o, --output [FILE]
      Output file to create.
   -d, --database [STRING]
      Name of the NCBI database to search, such as &#039;nucleotide&#039;, &#039;protein&#039;,
      or &#039;gene&#039;.
   -r, --return_type [STRING]
      The type of information requested. For sequences &#039;fasta&#039; is often used.
      The accepted formats vary depending on the database being queried.
   -m, --max_records [INTEGER]
      The maximum number of records to return (default is to return all matches
      satisfying the query).
   -v, --verbose
      Provide progress messages.
   -h, --help
      Show this message.

EXAMPLE:
   perl ncbi_search.pl -q &#039;dysphagia AND homo sapiens[ORGN]&#039; \\
     -o results.txt -d pubmed -r uilist -m 100

BLOCK
}</code>]]></description>
	<dc:creator>Anjana</dc:creator>
</item>

</channel>
</rss>