<?xml version='1.0'?><rss version="2.0" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:georss="http://www.georss.org/georss" xmlns:atom="http://www.w3.org/2005/Atom" >
<channel>
	<title><![CDATA[BOL: Download the gff files from NCBI using bash script/command]]></title>
	<link>https://bioinformaticsonline.com/snippets/view/33471/download-the-gff-files-from-ncbi-using-bash-scriptcommand?</link>
	<atom:link href="https://bioinformaticsonline.com/snippets/view/33471/download-the-gff-files-from-ncbi-using-bash-scriptcommand?" rel="self" type="application/rss+xml" />
	<description><![CDATA[]]></description>
	
	<item>
	<guid isPermaLink="true">https://bioinformaticsonline.com/snippets/view/33471/download-the-gff-files-from-ncbi-using-bash-scriptcommand</guid>
	<pubDate>Thu, 08 Jun 2017 08:17:11 -0500</pubDate>
	<link>https://bioinformaticsonline.com/snippets/view/33471/download-the-gff-files-from-ncbi-using-bash-scriptcommand</link>
	<title><![CDATA[Download the gff files from NCBI using bash script/command]]></title>
	<description><![CDATA[<code>#!/bin/bash

# Download the genome from NCBI using command

# Create a Directory
mkdir genome_gff
cd genome_gff

# Look for genome assembly summary and extract the URL
# USER need to provide the right summary file to curl  
# Commentline if you are not interested in that genome set
# -for fungi
curl &#039;ftp://ftp.ncbi.nlm.nih.gov/genomes/refseq/fungi/assembly_summary.txt&#039; | awk &#039;{FS=&quot;\t&quot;} !/^#/ {print $20} &#039; | sed -r &#039;s|(ftp://ftp.ncbi.nlm.nih.gov/genomes/all/.+/)(GCF_.+)|\1\2/\2_genomic.gff.gz|&#039; &gt; genomic_file_fungi

# -for bacteria
curl &#039;ftp://ftp.ncbi.nlm.nih.gov/genomes/genbank/bacteria/assembly_summary.txt&#039; | awk &#039;{FS=&quot;\t&quot;} !/^#/ {print $20} &#039; | sed -r &#039;s|(ftp://ftp.ncbi.nlm.nih.gov/genomes/all/.+/)(GCA_.+)|\1\2/\2_genomic.gff.gz|&#039; &gt; genomic_file_bacteria

# -for plant 
curl &#039;ftp://ftp.ncbi.nlm.nih.gov/genomes/refseq/plant/assembly_summary.txt&#039; | awk &#039;{FS=&quot;\t&quot;} !/^#/ {print $20} &#039; | sed -r &#039;s|(ftp://ftp.ncbi.nlm.nih.gov/genomes/all/.+/)(GCF_.+)|\1\2/\2_genomic.gff.gz|&#039; &gt; genomic_file_plant 

# -for archaea
curl &#039;ftp://ftp.ncbi.nlm.nih.gov/genomes/refseq/archaea/assembly_summary.txt&#039; | awk &#039;{FS=&quot;\t&quot;} !/^#/ {print $20} &#039; | sed -r &#039;s|(ftp://ftp.ncbi.nlm.nih.gov/genomes/all/.+/)(GCF_.+)|\1\2/\2_genomic.gff.gz|&#039; &gt; genomic_file_archaea

# -for protozoa
curl &#039;ftp://ftp.ncbi.nlm.nih.gov/genomes/refseq/protozoa/assembly_summary.txt&#039; | awk &#039;{FS=&quot;\t&quot;} !/^#/ {print $20} &#039; | sed -r &#039;s|(ftp://ftp.ncbi.nlm.nih.gov/genomes/all/.+/)(GCF_.+)|\1\2/\2_genomic.gff.gz|&#039; &gt; genomic_file_protozoa

# -for vertebrate_mammalian
curl &#039;ftp://ftp.ncbi.nlm.nih.gov/genomes/refseq/vertebrate_mammalian/assembly_summary.txt&#039; | awk &#039;{FS=&quot;\t&quot;} !/^#/ {print $20} &#039; | sed -r &#039;s|(ftp://ftp.ncbi.nlm.nih.gov/genomes/all/.+/)(GCF_.+)|\1\2/\2_genomic.gff.gz|&#039; &gt; genomic_file_vertebrate_mammalian

# -for vertebrate_other
curl &#039;ftp://ftp.ncbi.nlm.nih.gov/genomes/refseq/vertebrate_other/assembly_summary.txt&#039; | awk &#039;{FS=&quot;\t&quot;} !/^#/ {print $20} &#039; | sed -r &#039;s|(ftp://ftp.ncbi.nlm.nih.gov/genomes/all/.+/)(GCF_.+)|\1\2/\2_genomic.gff.gz|&#039; &gt; genomic_file_vertebrate_other

# -for invertebrate
curl &#039;ftp://ftp.ncbi.nlm.nih.gov/genomes/refseq/invertebrate/assembly_summary.txt&#039; | awk &#039;{FS=&quot;\t&quot;} !/^#/ {print $20} &#039; | sed -r &#039;s|(ftp://ftp.ncbi.nlm.nih.gov/genomes/all/.+/)(GCF_.+)|\1\2/\2_genomic.gff.gz|&#039; &gt; genomic_file_invertebrate

# -for viral
curl &#039;ftp://ftp.ncbi.nlm.nih.gov/genomes/refseq/viral/assembly_summary.txt&#039; | awk &#039;{FS=&quot;\t&quot;} !/^#/ {print $20} &#039; | sed -r &#039;s|(ftp://ftp.ncbi.nlm.nih.gov/genomes/all/.+/)(GCF_.+)|\1\2/\2_genomic.gff.gz|&#039; &gt; genomic_file_viral

#Read the uerl from file and download

FILES=$(pwd)/*
for f in $FILES
do
  echo &quot;Processing $f file...&quot;
  filename=$(basename &quot;$f&quot;)
  extension=&quot;${filename##*.}&quot;
  filename=&quot;${filename%.*}&quot;
  # Create a directory with appending G
  mkdir &quot;GFF$filename&quot;
  cd &quot;GFF$filename&quot;
  # take action on each file. $f store current file name
  head -n 4 $f &gt; $f.head
  wget --input $f.head
  gunzip *.gz
  #cat $f
  cd ..
done</code>]]></description>
	<dc:creator>Rahul Nayak</dc:creator>
</item>

</channel>
</rss>