<?xml version='1.0'?><rss version="2.0" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:georss="http://www.georss.org/georss" xmlns:atom="http://www.w3.org/2005/Atom" >
<channel>
	<title><![CDATA[BOL: Retrieve NCBI GenBank records with a range of accession numbers]]></title>
	<link>https://bioinformaticsonline.com/snippets/view/27299/retrieve-ncbi-genbank-records-with-a-range-of-accession-numbers?</link>
	<atom:link href="https://bioinformaticsonline.com/snippets/view/27299/retrieve-ncbi-genbank-records-with-a-range-of-accession-numbers?" rel="self" type="application/rss+xml" />
	<description><![CDATA[]]></description>
	
	<item>
	<guid isPermaLink="true">https://bioinformaticsonline.com/snippets/view/27299/retrieve-ncbi-genbank-records-with-a-range-of-accession-numbers</guid>
	<pubDate>Wed, 11 May 2016 11:02:40 -0500</pubDate>
	<link>https://bioinformaticsonline.com/snippets/view/27299/retrieve-ncbi-genbank-records-with-a-range-of-accession-numbers</link>
	<title><![CDATA[Retrieve NCBI GenBank records with a range of accession numbers]]></title>
	<description><![CDATA[<code>#!/usr/bin/perl

#FILE: ncbi_search.pl
#AUTH: Paul Stothard (paul.stothard@gmail.com)

use warnings;
use strict;
use Getopt::Long;
use LWP::Simple;
use URI::Escape;

use LWP::UserAgent;
use HTTP::Request::Common;

my %param = (
    query      =&gt; undef,
    outputFile =&gt; undef,
    database   =&gt; undef,
    returnType =&gt; undef,
    maxRecords =&gt; undef,
    format     =&gt; undef,
    verbose    =&gt; undef,
    url        =&gt; &#039;http://www.ncbi.nlm.nih.gov/entrez/eutils&#039;,
    retries    =&gt; 0,
    maxRetries =&gt; 5,
    help       =&gt; undef
);

Getopt::Long::Configure(&#039;bundling&#039;);
GetOptions(
    &#039;q|query=s&#039;       =&gt; \$param{query},
    &#039;o|output_file=s&#039; =&gt; \$param{outputFile},
    &#039;d|database=s&#039;    =&gt; \$param{database},
    &#039;r|return_type=s&#039; =&gt; \$param{returnType},
    &#039;m|max_records=i&#039; =&gt; \$param{maxRecords},
    &#039;verbose|v&#039;       =&gt; \$param{verbose},
    &#039;h|help&#039;          =&gt; \$param{help}
);

if ( defined( $param{help} ) ) {
    print_usage();
    exit(0);
}

if (   !( defined( $param{query} ) )
    or !( defined( $param{outputFile} ) )
    or !( defined( $param{database} ) )
    or !( defined( $param{returnType} ) ) )
{
    print_usage();
    exit(1);
}

$param{returnType} = lc( $param{returnType} );

$param{query} = uri_escape( $param{query} );

_doSearch(%param);

sub _doSearch {
    my %param = @_;

    my $esearch = &quot;$param{url}/esearch.fcgi?db=$param{database}&quot;
        . &quot;&amp;retmax=1&amp;usehistory=y&amp;term=$param{query}&quot;;
    my $esearch_result = get($esearch);

    while (
        ( !defined($esearch_result) )
        || (!(  $esearch_result
                =~ m/&lt;Count&gt;(\d+)&lt;\/Count&gt;.*&lt;QueryKey&gt;(\d+)&lt;\/QueryKey&gt;.*&lt;WebEnv&gt;(\S+)&lt;\/WebEnv&gt;/s
            )
        )
        )
    {
        if ($esearch_result =~ m/&lt;ERROR&gt;(.*)&lt;\/ERROR&gt;/is) {
            die(&quot;ESearch returned an error: $1&quot;);
        }
        message( $param{verbose},
            &quot;ESearch results could not be parsed. Resubmitting query.\n&quot; );
        sleep(10);
        if ( $param{retries} &gt;= $param{maxRetries} ) {
            die(&quot;Too many failures--giving up search.&quot;);
        }

        $esearch_result = get($esearch);
        $param{retries}++;
    }

    $param{retries} = 0;

    $esearch_result
        =~ m/&lt;Count&gt;(\d+)&lt;\/Count&gt;.*&lt;QueryKey&gt;(\d+)&lt;\/QueryKey&gt;.*&lt;WebEnv&gt;(\S+)&lt;\/WebEnv&gt;/s;

    my $count     = $1;
    my $query_key = $2;
    my $web_env   = $3;

    if ( defined( $param{maxRecords} ) ) {
        if ( $count &gt; $param{maxRecords} ) {
            message( $param{verbose},
                &quot;Retrieving $param{maxRecords} records out of $count available records.\n&quot;
            );
            $count = $param{maxRecords};
        }
        else {
            message( $param{verbose},
                &quot;Retrieving $count records out of $count available records.\n&quot;
            );
        }
    }
    else {
        message( $param{verbose},
            &quot;Retrieving $count records out of $count available records.\n&quot; );
    }

    my $retmax = 500;
    if ( $retmax &gt; $count ) {
        $retmax = $count;
    }

    open( my $OUTFILE, &quot;&gt;&quot; . $param{outputFile} )
        or die(&quot;Error: Cannot open $param{outputFile} : $!&quot;);

    for (
        my $retstart = 0;
        $retstart &lt; $count;
        $retstart = $retstart + $retmax
        )
    {
        message( $param{verbose},
                  &quot;Downloading records &quot;
                . ( $retstart + 1 ) . &quot; to &quot;
                . ( $retstart + $retmax )
                . &quot;\n&quot; );
        my $efetch
            = &quot;$param{url}/efetch.fcgi?rettype=$param{returnType}&amp;retmode=text&amp;retstart=$retstart&amp;retmax=$retmax&amp;db=$param{database}&amp;query_key=$query_key&amp;WebEnv=$web_env&quot;;
        my $efetch_result = get($efetch);

        while ( !defined($efetch_result) ) {
            message( $param{verbose},
                &quot;EFetch results could not be parsed. Resubmitting query.\n&quot; );
            sleep(10);
            if ( $param{retries} &gt;= $param{maxRetries} ) {
                die(&quot;Too many failures--giving up search.&quot;);
            }

            $efetch_result = get($efetch);
            $param{retries}++;
        }

        print( $OUTFILE $efetch_result );

        unless (
            ( defined( $param{maxRecords} ) &amp;&amp; ( $param{maxRecords} == 1 ) ) )
        {
            sleep(3);
        }
    }

    close($OUTFILE) or die(&quot;Error: Cannot close $param{outputFile} file: $!&quot;);
}

sub message {
    my $verbose = shift;
    my $message = shift;
    if ($verbose) {
        print $message;
    }
}

sub print_usage {
    print &lt;&lt;BLOCK;
USAGE:
   perl ncbi_search.pl -q STRING -o FILE -d STRING -r STRING [Options]

DESCRIPTION:
   Uses NCBI&#039;s eSearch to download collections of sequences.

REQUIRED ARGUMENTS:
   -q, --query [STRING]
      Raw query text.
   -o, --output [FILE]
      Output file to create.
   -d, --database [STRING]
      Name of the NCBI database to search, such as &#039;nucleotide&#039;, &#039;protein&#039;,
      or &#039;gene&#039;.
   -r, --return_type [STRING]
      The type of information requested. For sequences &#039;fasta&#039; is often used.
      The accepted formats vary depending on the database being queried.
   -m, --max_records [INTEGER]
      The maximum number of records to return (default is to return all matches
      satisfying the query).
   -v, --verbose
      Provide progress messages.
   -h, --help
      Show this message.

EXAMPLE:
   perl ncbi_search.pl -q &#039;dysphagia AND homo sapiens[ORGN]&#039; \\
     -o results.txt -d pubmed -r uilist -m 100

BLOCK
}</code>]]></description>
	<dc:creator>Anjana</dc:creator>
</item>

</channel>
</rss>