<?xml version='1.0'?><rss version="2.0" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:georss="http://www.georss.org/georss" xmlns:atom="http://www.w3.org/2005/Atom" >
<channel>
	<title><![CDATA[BOL: Perl script to check perl modules and download NCBI, BUSCO, Taonomy, Silva databases !]]></title>
	<link>https://bioinformaticsonline.com/snippets/view/42926/perl-script-to-check-perl-modules-and-download-ncbi-busco-taonomy-silva-databases?</link>
	<atom:link href="https://bioinformaticsonline.com/snippets/view/42926/perl-script-to-check-perl-modules-and-download-ncbi-busco-taonomy-silva-databases?" rel="self" type="application/rss+xml" />
	<description><![CDATA[]]></description>
	
	<item>
	<guid isPermaLink="true">https://bioinformaticsonline.com/snippets/view/42926/perl-script-to-check-perl-modules-and-download-ncbi-busco-taonomy-silva-databases</guid>
	<pubDate>Mon, 01 Mar 2021 23:13:19 -0600</pubDate>
	<link>https://bioinformaticsonline.com/snippets/view/42926/perl-script-to-check-perl-modules-and-download-ncbi-busco-taonomy-silva-databases</link>
	<title><![CDATA[Perl script to check perl modules and download NCBI, BUSCO, Taonomy, Silva databases !]]></title>
	<description><![CDATA[<code>use strict;
use warnings;
use ExtUtils::Installed;
use LWP::Simple;
use Archive::Extract;
#
# First, check if all the required modules have been installed in the system and download the mandatory database
#
BEGIN {
    my @import_modules = (
    &#039;Cwd&#039;,
    &#039;File::chdir&#039;,
    &#039;File::Copy&#039;,
    &#039;POSIX&#039;,
    &#039;Tie::File&#039;,
    &#039;Try::Tiny&#039;,
    &#039;Data::Dumper&#039;,
    &#039;File::Basename&#039;,
    &#039;Bio::SeqIO&#039;,
    &#039;FindBin&#039;,
    &#039;File::Remove&#039;,
    &#039;Capture::Tiny&#039;,
    &#039;File::Temp&#039;,
    &#039;File::Spec::Functions&#039;,
    &#039;Statistics::Multtest&#039;,
    &#039;File::Path&#039;,
    &#039;Statistics::Distributions&#039;,
    &#039;Getopt::Long&#039;,
    &#039;Statistics::R&#039;,
    &#039;Math::Round&#039;,
    &#039;File::Find&#039;,
    &#039;Bio::DB::Taxonomy&#039;,
    &#039;Pod::Usage&#039;,
        );

    my ($inst) = ExtUtils::Installed-&gt;new();
    my (@installed_modules) = $inst-&gt;modules();

    for ( @import_modules ) {

        eval{ $inst-&gt;validate($_) };
        if($@) {
            print qq{\n Module $_   NOT OK!\n };
            #exit 1;
        } # end &#039;if&#039;
        else { print &quot;\n Module $_ OK!\n&quot;;}
    } # end &#039;for&#039;
} # end &#039;BEGIN&#039; block

#Bash script else here

print &quot;\nDownloading third party database for Alienomics\n&quot;;

#Install NCBI taxdump
my $url = &#039;ftp://ftp.ncbi.nlm.nih.gov/pub/taxonomy/taxdump.tar.gz&#039;;
my $file = &#039;taxdump.tar.gz&#039;;

my $code = getstore($url, $file);
#print $code;

my $ae = Archive::Extract-&gt;new( archive =&gt; $file);

my $ok = $ae-&gt;extract( to =&gt; &#039;third_party_DB/taxdump&#039; );


#Install silvaDB
#https://www.arb-silva.de/no_cache/download/archive/release_138_1/Exports/
#LSU: Large subunit (23S/28S ribosomal RNAs)
#SSU: Small subunit (16S/18S ribosomal RNAs)

my $silva_url_LSU = &#039;https://www.arb-silva.de/fileadmin/silva_databases/release_138_1/Exports/SILVA_138.1_LSUParc_tax_silva.fasta.gz&#039;;
my $silva_LSU_fileName = &#039;SILVA_138.1_LSUParc_tax_silva.fasta.gz&#039;;

my $silva_LSU_code = getstore($silva_url_LSU, $silva_LSU_fileName);
#print $silva_LSU_code;

my $silva_LSU_ae = Archive::Extract-&gt;new( archive =&gt; $silva_LSU_fileName);

my $silva_ok = $silva_LSU_ae-&gt;extract( to =&gt; &#039;third_party_DB/silvaDB/SILVA_138.1_LSUParc_tax_silva.fasta&#039; );


#Install buscoDB
#https://busco-archive.ezlab.org/

my $busco_url_LSU = &#039;https://busco-archive.ezlab.org/datasets/metazoa_odb9.tar.gz&#039;;
my $busco_LSU_fileName = &#039;metazoa_odb9.tar.gz&#039;;

my $busco_LSU_code = getstore($busco_url_LSU, $busco_LSU_fileName);
#print $busco_LSU_code;

my $busco_LSU_ae = Archive::Extract-&gt;new( archive =&gt; $busco_LSU_fileName);

my $busco_ok = $busco_LSU_ae-&gt;extract( to =&gt; &#039;third_party_DB/&#039; );


#Install diamondDB


__END__

# BlastDB download
my $result = `mkdir taxdump; cd taxdump; wget ftp://ftp.ncbi.nlm.nih.gov/pub/taxonomy/taxdump.tar.gz; tar xzfp taxdump.tar.gz`;
parseResult ($result);

#do your magic here
sub parseResult {
  system ($_[0]);
}

__END__
my $content = get(&quot;ftp://ftp.ncbi.nlm.nih.gov/pub/taxonomy/taxdump.tar.gz&quot;);
die &quot;Couldn&#039;t get it!&quot; unless defined $content;

if (is_success(get(&quot;$content&quot;))) {
    print &quot;Download started&quot;;
}



if [ ! -d &quot;$PWD/blastDB&quot; ]; then
    if [ ! -d &quot;$PWD/blastDB&quot; ]; then
        mkdir $PWD/blastDB
    fi
    cd $PWD/blastDB
    wget &quot;ftp://ftp.ncbi.nlm.nih.gov/blast/db/nt.*.tar.gz&quot;
    for a in nt.*.tar.gz; do tar xzf $a; done
    #Out of the directory
    cd ..
else
   echo -e &quot;exists \t blastDB, If not installed sucessfully, delete the folder and re-run it&quot;
fi

echo -e &quot;\nAll Done\n&quot;

echo -e &quot;\nInstall R package install.packages(&#039;Ckmeans.1d.dp&#039;) \n&quot;

/;</code>]]></description>
	<dc:creator>Jit</dc:creator>
</item>

</channel>
</rss>