<?xml version='1.0'?><rss version="2.0" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:georss="http://www.georss.org/georss" xmlns:atom="http://www.w3.org/2005/Atom" >
<channel>
	<title><![CDATA[BOL: Owner]]></title>
	<link>https://bioinformaticsonline.com/snippets/owner/rahul?offset=30</link>
	<atom:link href="https://bioinformaticsonline.com/snippets/owner/rahul?offset=30" rel="self" type="application/rss+xml" />
	<description><![CDATA[]]></description>
	
	<item>
	<guid isPermaLink="true">https://bioinformaticsonline.com/snippets/view/36671/biological-sequence-handling-with-perl</guid>
	<pubDate>Wed, 16 May 2018 08:18:12 -0500</pubDate>
	<link>https://bioinformaticsonline.com/snippets/view/36671/biological-sequence-handling-with-perl</link>
	<title><![CDATA[Biological Sequence handling with Perl !]]></title>
	<description><![CDATA[<code>package Sequence::Generic;
# File: Sequence/Generic.pm

use strict;
use Carp;
use overload 
  &#039;&quot;&quot;&#039;        =&gt; &#039;asString&#039;,
  &#039;neg&#039;       =&gt; &#039;reverse&#039;,
  &#039;.&#039;         =&gt; &#039;concatenate&#039;,
  &#039;fallback&#039;  =&gt; &#039;TRUE&#039;;

# These methods should be overriden by child classes
# class constructor
sub new {
    my $class = shift;
    croak &quot;$class must override the new() method&quot;;
}
# Return the sequence as a string
sub seq {
    my $self = shift;
    croak ref($self),&quot; must override the seq() method&quot;;
}
# Return the type of the sequence as a human readable string
sub type {
    return &#039;Generic Sequence&#039;;
}
# These methods probably don&#039;t have to be overridden
# The length of the sequence
sub length {
    my $self = shift;
    return length($self-&gt;seq);
}
# The reverse of the sequence
sub reverse {
    my $self = shift;
    my $reversed = reverse $self-&gt;seq;
    return $reversed;
}
# A human-readable description of the object
sub asString {
  my $self = shift;
  return $self-&gt;type . &#039;(&#039; . $self-&gt;length . &#039; residues)&#039;;
}
# Concatenate two sequences together and return the result

sub concatenate {
  my $self = shift;
  my ($new_seq,$prepend) = @_;
  my ($to_append);
  if (ref($new_seq)) {
      croak &quot;argument to concatenate must be a string or a Sequence object&quot;
      unless $new_seq-&gt;isa(__PACKAGE__);
      $to_append = $new_seq-&gt;seq ;
  } else {
      $to_append = $new_seq;
  }
  return $self-&gt;new($prepend ? $to_append . $self-&gt;seq 
                     : $self-&gt;seq . $to_append);
}
1;

Back to Article

Listing Two
 package Sequence::Nucleotide;
# file: Sequence/Nucleotide.pm

use Sequence::Generic;
use Sequence::Nucleotide::Subsequence;
use Sequence::Alignment;
use Carp;

use strict;
use vars &#039;@ISA&#039;;
:Generic&#039;;

my %CODON_TABLE = (
           UCA =&gt; &#039;S&#039;,UCG =&gt; &#039;S&#039;,UCC =&gt; &#039;S&#039;,UCU =&gt; &#039;S&#039;,
           UUU =&gt; &#039;F&#039;,UUC =&gt; &#039;F&#039;,UUA =&gt; &#039;L&#039;,UUG =&gt; &#039;L&#039;,
           UAU =&gt; &#039;Y&#039;,UAC =&gt; &#039;Y&#039;,UAA =&gt; &#039;*&#039;,UAG =&gt; &#039;*&#039;,
           UGU =&gt; &#039;C&#039;,UGC =&gt; &#039;C&#039;,UGA =&gt; &#039;*&#039;,UGG =&gt; &#039;W&#039;,
           CUA =&gt; &#039;L&#039;,CUG =&gt; &#039;L&#039;,CUC =&gt; &#039;L&#039;,CUU =&gt; &#039;L&#039;,
           CCA =&gt; &#039;P&#039;,CCG =&gt; &#039;P&#039;,CCC =&gt; &#039;P&#039;,CCU =&gt; &#039;P&#039;,
           CAU =&gt; &#039;H&#039;,CAC =&gt; &#039;H&#039;,CAA =&gt; &#039;Q&#039;,CAG =&gt; &#039;Q&#039;,
           CGA =&gt; &#039;R&#039;,CGG =&gt; &#039;R&#039;,CGC =&gt; &#039;R&#039;,CGU =&gt; &#039;R&#039;,
           AUU =&gt; &#039;I&#039;,AUC =&gt; &#039;I&#039;,AUA =&gt; &#039;I&#039;,AUG =&gt; &#039;M&#039;,
           ACA =&gt; &#039;T&#039;,ACG =&gt; &#039;T&#039;,ACC =&gt; &#039;T&#039;,ACU =&gt; &#039;T&#039;,
           AAU =&gt; &#039;N&#039;,AAC =&gt; &#039;N&#039;,AAA =&gt; &#039;K&#039;,AAG =&gt; &#039;K&#039;,
           AGU =&gt; &#039;S&#039;,AGC =&gt; &#039;S&#039;,AGA =&gt; &#039;R&#039;,AGG =&gt; &#039;R&#039;,
           GUA =&gt; &#039;V&#039;,GUG =&gt; &#039;V&#039;,GUC =&gt; &#039;V&#039;,GUU =&gt; &#039;V&#039;,
           GCA =&gt; &#039;A&#039;,GCG =&gt; &#039;A&#039;,GCC =&gt; &#039;A&#039;,GCU =&gt; &#039;A&#039;,
           GAU =&gt; &#039;D&#039;,GAC =&gt; &#039;D&#039;,GAA =&gt; &#039;E&#039;,GAG =&gt; &#039;E&#039;,
           GGA =&gt; &#039;G&#039;,GGG =&gt; &#039;G&#039;,GGC =&gt; &#039;G&#039;,GGU =&gt; &#039;G&#039;,
          );
*complement = *reversec = \&amp;reverse;

sub new {
  my $class = shift;
  $class = ref($class) if ref($class);
  my ($sequence,$type) = @_;

  my $self = bless {},$class;
  if (ref($sequence)) {
    croak &quot;Can&#039;t initialize sequence from non-Sequence object.\n&quot;
      unless $sequence-&gt;can(&#039;seq&#039;);
    %{$self} = %{$sequence};  # clone operation
  } else {
    croak &quot;Doesn&#039;t look like sequence data&quot; 
      unless $sequence=~/^[gactnu\s]+$/i;
    $self-&gt;{&#039;data&#039;} = $self-&gt;_canonicalize($sequence);
    $self-&gt;{&#039;type&#039;} = $type || ($sequence=~/u/i ? &#039;RNA&#039; : &#039;DNA&#039;);
  }
  return $self;
}
sub seq {
    my $self = shift;
    $self-&gt;{&#039;data&#039;} = $self-&gt;_canonicalize($_[0])  if defined($_[0]);
    my $seq = $self-&gt;{&#039;data&#039;};
    return $seq unless $self-&gt;is_RNA;
    $seq=~tr/T/U/;
    return $seq;
}
sub type {
    my $self = shift;
    return defined($_[0]) ? $self-&gt;{&#039;type&#039;} = $_[0] : $self-&gt;{&#039;type&#039;};
}
sub is_DNA {
    my $self = shift;
    return $self-&gt;type eq &#039;DNA&#039;;
}
sub is_RNA {
  my $self = shift;
  return $self-&gt;type eq &#039;RNA&#039;;
}
sub subseq {
  my $self = shift;
  my ($start,$end) = @_;
  return (__PACKAGE__ . &#039;::Subsequence&#039;)-&gt;new($self,$start,$end);
}
sub reverse {
  my $self = shift;
  return (__PACKAGE__ . &#039;::Subsequence&#039;)-&gt;new($self,$self-&gt;length,1);
}
sub translate {
  my $self = shift;
  my $frame = shift() || 1;
  my $l = $self-&gt;length;
  my $seq = $frame &gt; 0 ? $self-&gt;subseq($frame,$l-($l-$frame+1)%3)
              : $self-&gt;reverse-&gt;subseq(abs($frame),$l-($l-abs($frame)+1)%3);
  my $s = $seq-&gt;seq;
  $s=~tr/T/U/;  # put it in RNA mode
  $s =~ s/(\S{3})/$CODON_TABLE{$1} || &#039;X&#039;/eg;
  return $s;
}
sub longest_orf {
    my $self = shift;

    my ($max,$pos,$frame);
    foreach (-3..-1,1..3) {
    my $translation = $self-&gt;translate($_);
    while ($translation=~/([^*]+)/g) {
        if (length($1) &gt; length($max)) {
        $max = $1;
        $frame = $_;
        $pos = pos($translation) - length($max); 
        }
    }
    }
    $pos *= 3;
    $pos += abs($frame);
    return ($pos,$pos+3*length($max)-1) if $frame &gt; 0;
    return ($self-&gt;length-$pos,$self-&gt;length-$pos-3*length($max));
}
sub align {
    my $self = shift;
    my $seq = shift;
    $seq = $seq-&gt;seq if ref($seq);
    return new Sequence::Alignment(src=&gt;$seq,target=&gt;$self-&gt;seq);
}
sub _canonicalize {
  my $self = shift;
  my $seq = shift;
  $seq =~ tr/uU/tT/;
  $seq =~ s/[^gatcn]//ig;
  return uc($seq);
}
1;</code>]]></description>
	<dc:creator>Rahul Nayak</dc:creator>
</item>
<item>
	<guid isPermaLink="true">https://bioinformaticsonline.com/snippets/view/35910/estimate-genome-size-with-jellyfish-and-r</guid>
	<pubDate>Mon, 12 Mar 2018 10:11:19 -0500</pubDate>
	<link>https://bioinformaticsonline.com/snippets/view/35910/estimate-genome-size-with-jellyfish-and-r</link>
	<title><![CDATA[Estimate Genome Size with Jellyfish and R]]></title>
	<description><![CDATA[<code>jellyfish count -t 8 -C -m 19 -s 5G -o 19mer_out --min-qual-char=? /common/Tutorial/Genome_estimation/sample_read_1.fastq /common/Tutorial/Genome_estimation/sample_read_2.fastq

#-t    -treads=unit32       Number of treads to be used in the run. eg: 1,2,3,..etc.
#-C    -both-strands        Count both strands
#-m    -mer-len=unit32      Length of the k-mer    
#-s    -size=unit32         Hash size / memory allocation  
#-o    -output=string       Output file name
#--min-quality-char         Base quality value. Version 2.2.3 of Jellyfish uses the “Phred” score, where &quot;?&quot; = 30

jellyfish histo -o 19mer_out.histo 19mer_out

#Plot
dataframe19 &lt;- read.table(&quot;19mer_out.histo&quot;) #load the data into dataframe19
plot(dataframe19[1:200,], type=&quot;l&quot;) #plots the data points 1 through 200 in the dataframe19 using a line

plot(dataframe19[2:200,], type=&quot;l&quot;)

plot(dataframe19[2:100,], type=&quot;l&quot;) #plot line graph 
points(dataframe19[2:100,]) #plot the data points from 2 through 100

sum(as.numeric(dataframe19[2:9325,1]*dataframe19[2:9325,2]))

data[10:20,]

sum(as.numeric(dataframe19[2:9325,1]*dataframe19[2:9325,2]))/12

#Return around ~ 305 Mb</code>]]></description>
	<dc:creator>Rahul Nayak</dc:creator>
</item>
<item>
	<guid isPermaLink="true">https://bioinformaticsonline.com/snippets/view/35872/plot-dotplot-with-last</guid>
	<pubDate>Tue, 06 Mar 2018 09:21:18 -0600</pubDate>
	<link>https://bioinformaticsonline.com/snippets/view/35872/plot-dotplot-with-last</link>
	<title><![CDATA[Plot dotplot with last !]]></title>
	<description><![CDATA[<code># generate dotplot
lastdb test/ref.fa
lastal -f TAB test/ref.fa test/contigs.reduced.pacbio.fa | last-dotplot - test/contigs.reduced.pacbio.fa.ref.png
lastal -f TAB test/ref.fa test/contigs.reduced.nanopore.fa | last-dotplot - test/contigs.reduced.nanopore.fa.ref.png</code>]]></description>
	<dc:creator>Rahul Nayak</dc:creator>
</item>
<item>
	<guid isPermaLink="true">https://bioinformaticsonline.com/snippets/view/35758/download-genomes-in-batch-from-ncbi</guid>
	<pubDate>Fri, 23 Feb 2018 08:52:03 -0600</pubDate>
	<link>https://bioinformaticsonline.com/snippets/view/35758/download-genomes-in-batch-from-ncbi</link>
	<title><![CDATA[Download genomes in batch from NCBI]]></title>
	<description><![CDATA[<code>curl &#039;ftp://ftp.ncbi.nlm.nih.gov/genomes/genbank/bacteria/assembly_summary.txt&#039; | awk &#039;{FS=&quot;\t&quot;} !/^#/ {print $20}&#039; | sed -r &#039;s|(ftp://ftp.ncbi.nlm.nih.gov/genomes/all/)(GCA/)([0-9]{3}/)([0-9]{3}/)([0-9]{3}/)(GCA_.+)|\1\2\3\4\5\6/\6_genomic.fna.gz|&#039; &gt; genomic_file</code>]]></description>
	<dc:creator>Rahul Nayak</dc:creator>
</item>
<item>
	<guid isPermaLink="true">https://bioinformaticsonline.com/snippets/view/35642/estimate-genome-size</guid>
	<pubDate>Thu, 22 Feb 2018 03:28:26 -0600</pubDate>
	<link>https://bioinformaticsonline.com/snippets/view/35642/estimate-genome-size</link>
	<title><![CDATA[Estimate Genome Size]]></title>
	<description><![CDATA[<code># Count k-mer occurrence using Jellyfish 2.2.6
jellyfish count -t 8 -C -m 19 -s 5G -o 19mer_out --min-qual-char=? sread_1.fastq sread_2.fastq

# points for a histogram
jellyfish histo -o 19mer_out.histo 19mer_out

#Plot results using R
##load the data into dataframe19
dataframe19 &lt;- read.table(&quot;19mer_out.histo&quot;) 
##plots the data points 1 through 200 in the dataframe19 using a line
plot(dataframe19[1:200,], type=&quot;l&quot;)
##plot the data points from 2 through 100
points(dataframe19[2:100,])

#calculate the total k-mers in the distribution
#Assuming the total number of data points is 9325
sum(as.numeric(dataframe19[2:9325,1]*dataframe19[2:9325,2]))

#peak position and genome size
#plotted graph we can get an idea where the peak position lies
#see actual point 
data[10:20,] #If peak more likely to be between 10-20

#If 12 is the peak
sum(as.numeric(dataframe19[2:9325,1]*dataframe19[2:9325,2]))/12

#Compare the peak shape with Poisson distribution in R
singleC &lt;- sum(as.numeric(dataframe19[2:28,1]*dataframe19[2:28,2]))/12
poisdtb &lt;- dpois(1:100,12)*singleC
plot(poisdtb, type=&#039;l&#039;, lty=2, col=&quot;green&quot;)
lines(dataframe19[1:100,12] * singleC, type = &quot;l&quot;, col=3)#, Ity=2)
lines(dataframe19[1:100,],type= &quot;l&quot;)


#ALTERNATE  WAY
#https://github.com/dib-lab/khmer/blob/master/scripts/normalize-by-median.py
python normalize-by-median.py -x 1e8 -k 20 -C 20 -R report.txt reads.fa
#https://github.com/dib-lab/khmer-recipes/blob/master/003-estimate-genome-size/estimate-genome-size.py
python estimate-genome-size.py -C 20 -k 20 reads.fa.keep report.txt</code>]]></description>
	<dc:creator>Rahul Nayak</dc:creator>
</item>
<item>
	<guid isPermaLink="true">https://bioinformaticsonline.com/snippets/view/34584/calculate-dinucleotide-frequency-with-perl</guid>
	<pubDate>Sun, 10 Dec 2017 05:51:42 -0600</pubDate>
	<link>https://bioinformaticsonline.com/snippets/view/34584/calculate-dinucleotide-frequency-with-perl</link>
	<title><![CDATA[Calculate Dinucleotide Frequency with Perl]]></title>
	<description><![CDATA[<code>#!/usr/bin/perl -w
use strict;

my ($genome, $head, $tail);
my (%mono_nt, %di_nt);

$/ = &quot;&gt;&quot;;
open my $fasta, &#039;&lt;&#039;, $ARGV[0] or die $!;
while (&lt;$fasta&gt;) {
    chomp; s/\r//g; s/^\s*|\s*$//;
    if (/.+?\n(.+)/s) {
        (my $seq = $1) =~ s/\n//g;
        $genome .= uc $seq;
        $head = uc substr($seq, 0, 1);
        $di_nt{&quot;$tail$head&quot;}-- if $tail;
        $tail = uc substr($seq, -1);
    }
}
close $fasta;

my $len = length $genome;
for my $i (0..$len-2) {
    my $each_mono_nt = substr($genome, $i, 1);
    my $each_di_nt   = substr($genome, $i, 2);
    $mono_nt{$each_mono_nt}++;
    $di_nt{$each_di_nt}++;
}
$mono_nt{$tail}++;

print &quot;-&quot;x30, &quot;\nSingle nucleotide frequency:\n&quot;;
for my $nt (sort keys %mono_nt) {print &quot;$nt\t&quot;, $mono_nt{$nt} / $len, &quot;\n&quot;;}

print &quot;\n&quot;, &quot;-&quot;x30, &quot;\nDinucleotide frequency:\nDinucleotide\tObs. freq.\tExp. freq.\n&quot;;
for my $nt_pair (sort keys %di_nt) {
    my ($first_nt, $second_nt) = split //, $nt_pair;
    print &quot;$nt_pair\t&quot;, $di_nt{$nt_pair} / ($len-1), &quot;\t&quot;,
        $mono_nt{$first_nt} * $mono_nt{$second_nt} /$len /$len, &quot;\n&quot;;
}</code>]]></description>
	<dc:creator>Rahul Nayak</dc:creator>
</item>
<item>
	<guid isPermaLink="true">https://bioinformaticsonline.com/snippets/view/34043/create-a-heatmap-with-r</guid>
	<pubDate>Mon, 31 Jul 2017 08:45:58 -0500</pubDate>
	<link>https://bioinformaticsonline.com/snippets/view/34043/create-a-heatmap-with-r</link>
	<title><![CDATA[Create a heatmap with R]]></title>
	<description><![CDATA[<code>bio &lt;- read.csv(&quot;ppg2008.csv&quot;, sep=&quot;,&quot;)

bio &lt;- bio[order(bio$PTS),]
row.names(bio) &lt;- bio$Name
bio &lt;- bio[,2:20]

bio_matrix &lt;- data.matrix(bio)
bio_heatmap &lt;- heatmap(bio_matrix, Rowv=NA, Colv=NA, col = brewer.pal(9, &quot;Blues&quot;), scale=&quot;column&quot;, margins=c(5,10))


##
#Sample DATA
#Name  ,G,MIN,PTS,FGM,FGA,FGP,FTM,FTA,FTP,3PM,3PA,3PP,ORB,DRB,TRB,AST,STL,BLK,TO,PF
#Genome1 ,79,38.6,30.2,10.8,22,0.491,7.5,9.8,0.765,1.1,3.5,0.317,1.1,3.9,5,7.5,2.2,1.3,3.4,2.3
#Genome2 ,81,37.7,28.4,9.7,19.9,0.489,7.3,9.4,0.78,1.6,4.7,0.344,1.3,6.3,7.6,7.2,1.7,1.1,3,1.7
#Genome3,82,36.2,26.8,9.8,20.9,0.467,5.9,6.9,0.856,1.4,4.1,0.351,1.1,4.1,5.2,4.9,1.5,0.5,2.6,2.3


library(ggplot2)
bio &lt;- read.csv(&quot;seeTNF_Final&quot;, sep=&quot;\t&quot;)
row.names(bio) &lt;- bio$Contig
bio &lt;- bio[,2:256]
data=as.matrix(bio)
head(data)
#Rcolorbrewer palette
library(RColorBrewer)
coul = colorRampPalette(brewer.pal(8, &quot;PiYG&quot;))(25)
#heatmap(data)

# Use &#039;scale&#039; to normalize (right)
heatmap(data, scale=&quot;column&quot;)
#heatmap(data, scale=&quot;column&quot;, col = coul)</code>]]></description>
	<dc:creator>Rahul Nayak</dc:creator>
</item>
<item>
	<guid isPermaLink="true">https://bioinformaticsonline.com/snippets/view/33471/download-the-gff-files-from-ncbi-using-bash-scriptcommand</guid>
	<pubDate>Thu, 08 Jun 2017 08:17:11 -0500</pubDate>
	<link>https://bioinformaticsonline.com/snippets/view/33471/download-the-gff-files-from-ncbi-using-bash-scriptcommand</link>
	<title><![CDATA[Download the gff files from NCBI using bash script/command]]></title>
	<description><![CDATA[<code>#!/bin/bash

# Download the genome from NCBI using command

# Create a Directory
mkdir genome_gff
cd genome_gff

# Look for genome assembly summary and extract the URL
# USER need to provide the right summary file to curl  
# Commentline if you are not interested in that genome set
# -for fungi
curl &#039;ftp://ftp.ncbi.nlm.nih.gov/genomes/refseq/fungi/assembly_summary.txt&#039; | awk &#039;{FS=&quot;\t&quot;} !/^#/ {print $20} &#039; | sed -r &#039;s|(ftp://ftp.ncbi.nlm.nih.gov/genomes/all/.+/)(GCF_.+)|\1\2/\2_genomic.gff.gz|&#039; &gt; genomic_file_fungi

# -for bacteria
curl &#039;ftp://ftp.ncbi.nlm.nih.gov/genomes/genbank/bacteria/assembly_summary.txt&#039; | awk &#039;{FS=&quot;\t&quot;} !/^#/ {print $20} &#039; | sed -r &#039;s|(ftp://ftp.ncbi.nlm.nih.gov/genomes/all/.+/)(GCA_.+)|\1\2/\2_genomic.gff.gz|&#039; &gt; genomic_file_bacteria

# -for plant 
curl &#039;ftp://ftp.ncbi.nlm.nih.gov/genomes/refseq/plant/assembly_summary.txt&#039; | awk &#039;{FS=&quot;\t&quot;} !/^#/ {print $20} &#039; | sed -r &#039;s|(ftp://ftp.ncbi.nlm.nih.gov/genomes/all/.+/)(GCF_.+)|\1\2/\2_genomic.gff.gz|&#039; &gt; genomic_file_plant 

# -for archaea
curl &#039;ftp://ftp.ncbi.nlm.nih.gov/genomes/refseq/archaea/assembly_summary.txt&#039; | awk &#039;{FS=&quot;\t&quot;} !/^#/ {print $20} &#039; | sed -r &#039;s|(ftp://ftp.ncbi.nlm.nih.gov/genomes/all/.+/)(GCF_.+)|\1\2/\2_genomic.gff.gz|&#039; &gt; genomic_file_archaea

# -for protozoa
curl &#039;ftp://ftp.ncbi.nlm.nih.gov/genomes/refseq/protozoa/assembly_summary.txt&#039; | awk &#039;{FS=&quot;\t&quot;} !/^#/ {print $20} &#039; | sed -r &#039;s|(ftp://ftp.ncbi.nlm.nih.gov/genomes/all/.+/)(GCF_.+)|\1\2/\2_genomic.gff.gz|&#039; &gt; genomic_file_protozoa

# -for vertebrate_mammalian
curl &#039;ftp://ftp.ncbi.nlm.nih.gov/genomes/refseq/vertebrate_mammalian/assembly_summary.txt&#039; | awk &#039;{FS=&quot;\t&quot;} !/^#/ {print $20} &#039; | sed -r &#039;s|(ftp://ftp.ncbi.nlm.nih.gov/genomes/all/.+/)(GCF_.+)|\1\2/\2_genomic.gff.gz|&#039; &gt; genomic_file_vertebrate_mammalian

# -for vertebrate_other
curl &#039;ftp://ftp.ncbi.nlm.nih.gov/genomes/refseq/vertebrate_other/assembly_summary.txt&#039; | awk &#039;{FS=&quot;\t&quot;} !/^#/ {print $20} &#039; | sed -r &#039;s|(ftp://ftp.ncbi.nlm.nih.gov/genomes/all/.+/)(GCF_.+)|\1\2/\2_genomic.gff.gz|&#039; &gt; genomic_file_vertebrate_other

# -for invertebrate
curl &#039;ftp://ftp.ncbi.nlm.nih.gov/genomes/refseq/invertebrate/assembly_summary.txt&#039; | awk &#039;{FS=&quot;\t&quot;} !/^#/ {print $20} &#039; | sed -r &#039;s|(ftp://ftp.ncbi.nlm.nih.gov/genomes/all/.+/)(GCF_.+)|\1\2/\2_genomic.gff.gz|&#039; &gt; genomic_file_invertebrate

# -for viral
curl &#039;ftp://ftp.ncbi.nlm.nih.gov/genomes/refseq/viral/assembly_summary.txt&#039; | awk &#039;{FS=&quot;\t&quot;} !/^#/ {print $20} &#039; | sed -r &#039;s|(ftp://ftp.ncbi.nlm.nih.gov/genomes/all/.+/)(GCF_.+)|\1\2/\2_genomic.gff.gz|&#039; &gt; genomic_file_viral

#Read the uerl from file and download

FILES=$(pwd)/*
for f in $FILES
do
  echo &quot;Processing $f file...&quot;
  filename=$(basename &quot;$f&quot;)
  extension=&quot;${filename##*.}&quot;
  filename=&quot;${filename%.*}&quot;
  # Create a directory with appending G
  mkdir &quot;GFF$filename&quot;
  cd &quot;GFF$filename&quot;
  # take action on each file. $f store current file name
  head -n 4 $f &gt; $f.head
  wget --input $f.head
  gunzip *.gz
  #cat $f
  cd ..
done</code>]]></description>
	<dc:creator>Rahul Nayak</dc:creator>
</item>

</channel>
</rss>