<![CDATA[BOL: Perl script to check perl modules and download NCBI, BUSCO, Taonomy, Silva databases !]]>

<![CDATA[BOL: Perl script to check perl modules and download NCBI, BUSCO, Taonomy, Silva databases !]]> https://bioinformaticsonline.com/snippets/view/42926/perl-script-to-check-perl-modules-and-download-ncbi-busco-taonomy-silva-databases? https://bioinformaticsonline.com/snippets/view/42926/perl-script-to-check-perl-modules-and-download-ncbi-busco-taonomy-silva-databases Mon, 01 Mar 2021 23:13:19 -0600 https://bioinformaticsonline.com/snippets/view/42926/perl-script-to-check-perl-modules-and-download-ncbi-busco-taonomy-silva-databases <![CDATA[Perl script to check perl modules and download NCBI, BUSCO, Taonomy, Silva databases !]]> use strict; use warnings; use ExtUtils::Installed; use LWP::Simple; use Archive::Extract; # # First, check if all the required modules have been installed in the system and download the mandatory database # BEGIN { my @import_modules = ( 'Cwd', 'File::chdir', 'File::Copy', 'POSIX', 'Tie::File', 'Try::Tiny', 'Data::Dumper', 'File::Basename', 'Bio::SeqIO', 'FindBin', 'File::Remove', 'Capture::Tiny', 'File::Temp', 'File::Spec::Functions', 'Statistics::Multtest', 'File::Path', 'Statistics::Distributions', 'Getopt::Long', 'Statistics::R', 'Math::Round', 'File::Find', 'Bio::DB::Taxonomy', 'Pod::Usage', ); my ($inst) = ExtUtils::Installed->new(); my (@installed_modules) = $inst->modules(); for ( @import_modules ) { eval{ $inst->validate($_) }; if($@) { print qq{\n Module $_ NOT OK!\n }; #exit 1; } # end 'if' else { print "\n Module $_ OK!\n";} } # end 'for' } # end 'BEGIN' block #Bash script else here print "\nDownloading third party database for Alienomics\n"; #Install NCBI taxdump my $url = 'ftp://ftp.ncbi.nlm.nih.gov/pub/taxonomy/taxdump.tar.gz'; my $file = 'taxdump.tar.gz'; my $code = getstore($url, $file); #print $code; my $ae = Archive::Extract->new( archive => $file); my $ok = $ae->extract( to => 'third_party_DB/taxdump' ); #Install silvaDB #https://www.arb-silva.de/no_cache/download/archive/release_138_1/Exports/ #LSU: Large subunit (23S/28S ribosomal RNAs) #SSU: Small subunit (16S/18S ribosomal RNAs) my $silva_url_LSU = 'https://www.arb-silva.de/fileadmin/silva_databases/release_138_1/Exports/SILVA_138.1_LSUParc_tax_silva.fasta.gz'; my $silva_LSU_fileName = 'SILVA_138.1_LSUParc_tax_silva.fasta.gz'; my $silva_LSU_code = getstore($silva_url_LSU, $silva_LSU_fileName); #print $silva_LSU_code; my $silva_LSU_ae = Archive::Extract->new( archive => $silva_LSU_fileName); my $silva_ok = $silva_LSU_ae->extract( to => 'third_party_DB/silvaDB/SILVA_138.1_LSUParc_tax_silva.fasta' ); #Install buscoDB #https://busco-archive.ezlab.org/ my $busco_url_LSU = 'https://busco-archive.ezlab.org/datasets/metazoa_odb9.tar.gz'; my $busco_LSU_fileName = 'metazoa_odb9.tar.gz'; my $busco_LSU_code = getstore($busco_url_LSU, $busco_LSU_fileName); #print $busco_LSU_code; my $busco_LSU_ae = Archive::Extract->new( archive => $busco_LSU_fileName); my $busco_ok = $busco_LSU_ae->extract( to => 'third_party_DB/' ); #Install diamondDB __END__ # BlastDB download my $result = `mkdir taxdump; cd taxdump; wget ftp://ftp.ncbi.nlm.nih.gov/pub/taxonomy/taxdump.tar.gz; tar xzfp taxdump.tar.gz`; parseResult ($result); #do your magic here sub parseResult { system ($_[0]); } __END__ my $content = get("ftp://ftp.ncbi.nlm.nih.gov/pub/taxonomy/taxdump.tar.gz"); die "Couldn't get it!" unless defined $content; if (is_success(get("$content"))) { print "Download started"; } if [ ! -d "$PWD/blastDB" ]; then if [ ! -d "$PWD/blastDB" ]; then mkdir $PWD/blastDB fi cd $PWD/blastDB wget "ftp://ftp.ncbi.nlm.nih.gov/blast/db/nt.*.tar.gz" for a in nt.*.tar.gz; do tar xzf $a; done #Out of the directory cd .. else echo -e "exists \t blastDB, If not installed sucessfully, delete the folder and re-run it" fi echo -e "\nAll Done\n" echo -e "\nInstall R package install.packages('Ckmeans.1d.dp') \n" /;]]> Jit