<?xml version='1.0'?><rss version="2.0" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:georss="http://www.georss.org/georss" xmlns:atom="http://www.w3.org/2005/Atom" >
<channel>
	<title><![CDATA[BOL: All]]></title>
	<link>https://bioinformaticsonline.com/snippets?offset=380</link>
	<atom:link href="https://bioinformaticsonline.com/snippets?offset=380" rel="self" type="application/rss+xml" />
	<description><![CDATA[]]></description>
	
	<item>
	<guid isPermaLink="true">https://bioinformaticsonline.com/snippets/view/31562/extract-ids-from-file-with-perl</guid>
	<pubDate>Wed, 15 Mar 2017 05:21:59 -0500</pubDate>
	<link>https://bioinformaticsonline.com/snippets/view/31562/extract-ids-from-file-with-perl</link>
	<title><![CDATA[Extract ids from file with perl]]></title>
	<description><![CDATA[<code>#!/usr/bin/perl
use strict;
use warnings;

my $fh=read_fh(&quot;fin.txt&quot;);
my %idHash;
my $lastKey;
while (&lt;$fh&gt;) {
  chomp $_;
  my @cells = split /\t/, $_;
  $idHash{$cells[0]}=$.;
  push @allIds, $cells[0];
}

#Delete the last id for secutiry -- might does not finish all steps 
delete $hash{$allIds[-1]};
next if exists $hash{$look_for};


############################################################
#Open and Read a file
sub read_fh {
    my $filename = shift @_;
    my $filehandle;
    if ($filename =~ /gz$/) {
        open $filehandle, &quot;gunzip -dc $filename |&quot; or die $!;
    }
    else {
        open $filehandle, &quot;&lt;$filename&quot; or die $!;
    }
    return $filehandle;
}</code>]]></description>
	<dc:creator>Radha Agarkar</dc:creator>
</item>
<item>
	<guid isPermaLink="true">https://bioinformaticsonline.com/snippets/view/31356/transpose-the-file-coordinates-and-plot-dendrogram-in-r</guid>
	<pubDate>Mon, 06 Mar 2017 04:57:54 -0600</pubDate>
	<link>https://bioinformaticsonline.com/snippets/view/31356/transpose-the-file-coordinates-and-plot-dendrogram-in-r</link>
	<title><![CDATA[Transpose the file coordinates and plot dendrogram in R]]></title>
	<description><![CDATA[<code>#Save this as tr.awk
{ 
    for (i=1; i&lt;=NF; i++) a[NR,i]=$i
}
END {
    for (i=1; i&lt;=NF; i++) {
        for (j=1; j&lt;=NR; j++) {
            printf &quot;%s&quot;, a[j,i]
            if (j&lt;NR) printf &quot;%s&quot;, OFS
        }
        printf &quot;%s&quot;,ORS
    }
}

#Run this on command-line to transpose ur file
awk -f tr.awk bbb.txt &gt; bbbout.txt

#Plot in R
tetra &lt;- read.csv(&quot;bbbout.txt&quot;, header=T, stringsAsFactors = F, sep = &quot;\t&quot;, row.names = 1)
hc = hclust(dist(tetra))
plot (hc)</code>]]></description>
	<dc:creator>Abhimanyu Singh</dc:creator>
</item>
<item>
	<guid isPermaLink="true">https://bioinformaticsonline.com/snippets/view/31216/extracting-fasta-sequences-based-on-position-with-perl-script</guid>
	<pubDate>Wed, 01 Mar 2017 17:10:11 -0600</pubDate>
	<link>https://bioinformaticsonline.com/snippets/view/31216/extracting-fasta-sequences-based-on-position-with-perl-script</link>
	<title><![CDATA[Extracting FASTA sequences based on position with perl script !!]]></title>
	<description><![CDATA[<code>#!/usr/bin/env perl
 
#Uses: perl sub-seq.pl input.txt range
 
use strict;
use warnings;
 
my $end   = pop;
my $start = pop;
local $/ = &#039;&gt;&#039;;
 
while (&lt;&gt;) {
    chomp;
    next unless /(.+)/;
    my ($header) = &quot;$/$1_$start-$end\n&quot;;
    my $seq = ${^POSTMATCH};
    $seq =~ s/\s//g;
    print $header;
    print +( substr $seq, $start - 1, $end ) . &quot;\n&quot;;
}</code>]]></description>
	<dc:creator>Shruti Paniwala</dc:creator>
</item>
<item>
	<guid isPermaLink="true">https://bioinformaticsonline.com/snippets/view/31103/extract-a-range-from-genome-file-with-perl</guid>
	<pubDate>Fri, 24 Feb 2017 09:30:11 -0600</pubDate>
	<link>https://bioinformaticsonline.com/snippets/view/31103/extract-a-range-from-genome-file-with-perl</link>
	<title><![CDATA[Extract a range from genome file with perl.]]></title>
	<description><![CDATA[<code>#!/usr/bin/perl 

use strict;
use warnings;
use Bio::SeqIO;

my $in_file = $ARGV[0];
my $start_pos = $ARGV[1];
my $end_pos = $ARGV[2];

my $in = Bio::SeqIO-&gt;new ( -file =&gt; $in_file, -format =&gt; &#039;fasta&#039;);
my $out = Bio::SeqIO-&gt;new( -file =&gt; &quot;&gt;$in_file.out&quot;, -format =&gt; &#039;fasta&#039;);


while (my $seq = $in-&gt;next_seq() ) {

    $seq-&gt;display_id( $seq-&gt;display_id() . &quot;_$start_pos-$end_pos&quot; );
    $out-&gt;write_seq( $seq-&gt;trunc($start_pos, $end_pos) );
}</code>]]></description>
	<dc:creator>Abhimanyu Singh</dc:creator>
</item>
<item>
	<guid isPermaLink="true">https://bioinformaticsonline.com/snippets/view/31094/check-overlapping-range-with-perl</guid>
	<pubDate>Fri, 24 Feb 2017 05:09:56 -0600</pubDate>
	<link>https://bioinformaticsonline.com/snippets/view/31094/check-overlapping-range-with-perl</link>
	<title><![CDATA[Check overlapping range with Perl]]></title>
	<description><![CDATA[<code>#!/usr/bin/perl

use strict;
use warnings;

my @ranges = 0;
push @ranges, $ranges[-1] + 1 + int rand 200 for 1..10000;
my @tests = map int rand $ranges[-1], 0..1000000;
match (\@ranges, \@tests);

sub div {
    my ($border, $tests) = @_;
    my ($lt, $ge) = ([], []);
    push @{$_ &lt; $border ? $lt : $ge}, $_ for @$tests;
    ($lt, $ge);
}

sub match {
    my ($ranges, $tests) = @_;
    if (@$ranges == 1) {
        if (@$tests) {
            print &quot;tests in range $ranges-&gt;[0]:\n&quot;, join(&quot;, &quot;, @$tests), &quot;\n&quot;;
        }
        else {
            print &quot;range $ranges-&gt;[0] is empty\n&quot;;
        }
    }
    else {
        my $pivot = int((@$ranges + 1)/ 2);
        my ($lt, $ge) = div($ranges-&gt;[$pivot], $tests);
        match([@{$ranges}[0..$pivot-1]], $lt);
        match([@{$ranges}[$pivot..$#$ranges]], $ge);
    }
}</code>]]></description>
	<dc:creator>Jit</dc:creator>
</item>
<item>
	<guid isPermaLink="true">https://bioinformaticsonline.com/snippets/view/30930/check-overlaps-with-perl</guid>
	<pubDate>Wed, 15 Feb 2017 04:43:39 -0600</pubDate>
	<link>https://bioinformaticsonline.com/snippets/view/30930/check-overlaps-with-perl</link>
	<title><![CDATA[Check overlaps with Perl]]></title>
	<description><![CDATA[<code>#!/usr/bin/perl
use strict;
use warnings;

#For normal overlaps 

my ($lower, $upper) = (40, 100);

for my $num (17,42,99,111) {

    my $is_between = (sort {$a &lt;=&gt; $b} $lower, $upper, $num)[1] == $num;
    
    printf &quot;$num is%s between $lower and $upper\n&quot;, $is_between ? &quot;&quot; : &quot; not&quot;;
}


#For range to range overlaps

#!/usr/bin/perl
use strict;
use warnings;
use 5.010;

my ($lower, $upper) = (40, 100);

for my $range ( [10,17],
                [30,71],
                [42,99],
                [83,120],
                [101,111] ) {

    my $is_within = [(sort {$a &lt;=&gt; $b} $lower, $upper, @$range)[1,2]] ~~ $range;
    
    printf &quot;[@$range] is%s within [$lower $upper]\n&quot;, $is_within ? &quot;&quot; : &quot; not&quot;;
}</code>]]></description>
	<dc:creator>Jit</dc:creator>
</item>
<item>
	<guid isPermaLink="true">https://bioinformaticsonline.com/snippets/view/30764/calculate-some-statistics-for-a-dna-alignment-with-perl</guid>
	<pubDate>Thu, 02 Feb 2017 04:11:54 -0600</pubDate>
	<link>https://bioinformaticsonline.com/snippets/view/30764/calculate-some-statistics-for-a-dna-alignment-with-perl</link>
	<title><![CDATA[Calculate some statistics for a DNA alignment with Perl]]></title>
	<description><![CDATA[<code>use Bio::AlignIO;
  use Bio::Align::DNAStatistics;

  my $stats = Bio::Align::DNAStatistics-&gt;new();
  my $alignin = Bio::AlignIO-&gt;new(-format =&gt; &#039;emboss&#039;,
                                 -file   =&gt; &#039;t/data/insulin.water&#039;);
  my $aln = $alignin-&gt;next_aln;
  my $jcmatrix = $stats-&gt;distance(-align =&gt; $aln, 
                                  -method =&gt; &#039;Jukes-Cantor&#039;);

  print $jcmatrix-&gt;print_matrix;
  ## and for measurements of synonymous /nonsynonymous substitutions ##

  my $in = Bio::AlignIO-&gt;new(-format =&gt; &#039;fasta&#039;,
                            -file   =&gt; &#039;t/data/nei_gojobori_test.aln&#039;);
  my $alnobj = $in-&gt;next_aln;
  my ($seq1id,$seq2id) = map { $_-&gt;display_id } $alnobj-&gt;each_seq;
  my $results = $stats-&gt;calc_KaKs_pair($alnobj, $seq1id, $seq2id);
  print &quot;comparing &quot;.$results-&gt;[0]{&#039;Seq1&#039;}.&quot; and &quot;.$results-&gt;[0]{&#039;Seq2&#039;}.&quot;\n&quot;;
  for (sort keys %{$results-&gt;[0]} ){
      next if /Seq/;
      printf(&quot;%-9s %.4f \n&quot;,$_ , $results-&gt;[0]{$_});
  }

  my $results2 = $stats-&gt;calc_all_KaKs_pairs($alnobj);
  for my $an (@$results2){
      print &quot;comparing &quot;. $an-&gt;{&#039;Seq1&#039;}.&quot; and &quot;. $an-&gt;{&#039;Seq2&#039;}. &quot; \n&quot;;
      for (sort keys %$an ){
          next if /Seq/;
          printf(&quot;%-9s %.4f \n&quot;,$_ , $an-&gt;{$_});
      }
      print &quot;\n\n&quot;;
  }

  my $result3 = $stats-&gt;calc_average_KaKs($alnobj, 1000);
  for (sort keys %$result3 ){
      next if /Seq/;
      printf(&quot;%-9s %.4f \n&quot;,$_ , $result3-&gt;{$_});
  }</code>]]></description>
	<dc:creator>Abhimanyu Singh</dc:creator>
</item>
<item>
	<guid isPermaLink="true">https://bioinformaticsonline.com/snippets/view/30686/bash-script-for-selfblast-a-genome</guid>
	<pubDate>Mon, 30 Jan 2017 09:31:33 -0600</pubDate>
	<link>https://bioinformaticsonline.com/snippets/view/30686/bash-script-for-selfblast-a-genome</link>
	<title><![CDATA[BASH script for SelfBLAST a genome]]></title>
	<description><![CDATA[<code>#!/bin/bash

#self BLAST a genome -- Expecting you have blast and samtools installed in your system
#Author: Jitendra Narayan
#USAGE: ./selfBlast.sh extract &lt;chrName&gt;
#USAGE: ./selfBlast.sh all

#Common settings 
FASTAFILE=MergedContigs.fasta
MYDB=myDB
OUTFILE=seeRES
THREAD=20
SEQ=&quot;&quot;

echo &quot;User $USER provided $# arguments, Detail of the arguments: $@&quot;

if [ -f $MYDB.nhr ]
then
  echo &quot;BLAST database for MergedContigs.fasta genome exists&quot;
else
  echo &quot;Thanks for testing this script $USER; Me creating creating blastDB named $MYDB for you&quot;;
  makeblastdb -in $FASTAFILE -parse_seqids -dbtype nucl -out $MYDB
fi

if [ $1 = &quot;extract&quot; ]
then
  echo &quot;Extracting the sequence $2 for you from $FASTAFILE -- MAKE SURE U HAVE ADDED CORRECT NAME&quot;
  samtools faidx MergedContigs.fasta
  samtools faidx MergedContigs.fasta $2 &gt; $2.fa
  SEQ=$2.fa
elif [ $1 = &quot;all&quot; ]
then
  echo &quot;You want entire sequence to blast&quot;
  SEQ=$FASTAFILE
else
  echo &quot;Something went wrong $USER - Contact jitendra&quot;
fi

echo &quot;Doing alignments -- BLASting&quot;;
blastn -task megablast -query $SEQ -db $MYDB -evalue 1e-5 -num_threads $THREAD -max_target_seqs 1 -outfmt &#039;6 qseqid staxid qstart qend sseqid sstart send evalue length frames qcovs&#039; -out $OUTFILE;

echo &quot;DONE successfully :)&quot;</code>]]></description>
	<dc:creator>Jit</dc:creator>
</item>
<item>
	<guid isPermaLink="true">https://bioinformaticsonline.com/snippets/view/30676/calculate-atgc-percentage-in-parallel-with-perl</guid>
	<pubDate>Thu, 26 Jan 2017 10:18:53 -0600</pubDate>
	<link>https://bioinformaticsonline.com/snippets/view/30676/calculate-atgc-percentage-in-parallel-with-perl</link>
	<title><![CDATA[Calculate ATGC percentage in parallel with perl]]></title>
	<description><![CDATA[<code>#!/usr/bin/perl

use strict;
use Parallel::ForkManager;
use Bio::SeqIO;

#usage: perl testParallel.pl &lt;multi fasta infile&gt;

my %sequences;
my $seqio = Bio::SeqIO-&gt;new(-file =&gt; &quot;$ARGV[0]&quot;, -format =&gt; &quot;fasta&quot;);
while(my$seqobj = $seqio-&gt;next_seq) {
    my $id  = $seqobj-&gt;display_id;    # there&#039;s your key
    my $seq = $seqobj-&gt;seq;           # and there&#039;s your value
    $sequences{$id} = $seq;
}

  my $max_procs = 5;
  my @names = keys %sequences;

  # hash to resolve PID&#039;s back to child specific information
  my $pm =  new Parallel::ForkManager($max_procs);

 # Setup a callback for when a child finishes up so we can
  # get it&#039;s exit code
  $pm-&gt;run_on_finish (
    sub { my ($pid, $exit_code, $ident) = @_;
      #print &quot;** $ident just got out of the pool &quot;.
        &quot;with PID $pid and exit code: $exit_code\n&quot;;
    }
  );

  $pm-&gt;run_on_start(
    sub { my ($pid,$ident)=@_;
     #print &quot;** $ident started, pid: $pid\n&quot;;
    }
  );

  $pm-&gt;run_on_wait(
    sub {
      #print &quot;** Have to wait for one children ...\n&quot;
    },
    0.5
  );

  NAMES:
  foreach my $child ( 0 .. $#names ) {
    my $pid = $pm-&gt;start($names[$child]) and next NAMES;
    checkATCG($names[$child]);
    $pm-&gt;finish($child); # pass an exit code to finish
  }

  print &quot;Waiting for Children...\n&quot;;
  $pm-&gt;wait_all_children;
  print &quot;Everybody is out of the pool!\n&quot;;


sub checkATCG {
my $name=shift;
my $DNA=$sequences{$name};
my $length=length $DNA;
my $a=($DNA=~tr/A//);
my $b=($DNA=~tr/C//);
my $c=($DNA=~tr/G//);
my $d=($DNA=~tr/T//);
my $Total=$a+$b+$c+$d;
my $GC=($DNA=~s/GC/GC/g);
my $AT=($DNA=~s/AT/AT/g);
my $GCper=($GC/($Total)*100);
print&quot;$name\t$Total\t$AT\t$GC\t$GCper:\n&quot;;

}</code>]]></description>
	<dc:creator>Jit</dc:creator>
</item>
<item>
	<guid isPermaLink="true">https://bioinformaticsonline.com/snippets/view/30650/perl-script-to-insert-the-dna-string-in-genome</guid>
	<pubDate>Mon, 23 Jan 2017 10:04:55 -0600</pubDate>
	<link>https://bioinformaticsonline.com/snippets/view/30650/perl-script-to-insert-the-dna-string-in-genome</link>
	<title><![CDATA[Perl script to insert the DNA string in genome]]></title>
	<description><![CDATA[<code>#!/usr/bin/perl

use warnings;
use strict;
use Bio::SeqIO;
use Bio::Seq;

my $file = $ARGV[0]; # input fasta file (genome file)
my $out = $ARGV[1]; # output fasta file

my $chr=&quot;test&quot;; #insertion chromosome
my $pos=10; # position of the insertion
my $seqI = &quot;AAAA&quot;; #sequence of the insertion

my $seq_in  = Bio::SeqIO-&gt;new( -format =&gt; &#039;fasta&#039;,-file =&gt; $file);
my $seq_out = Bio::SeqIO-&gt;new( -format =&gt; &#039;fasta&#039;,-file =&gt; &quot;&gt;&quot;.$out);
while( my $seq = $seq_in-&gt;next_seq() ) {    
    if($seq-&gt;primary_id eq $chr){
        my $length = length($seq-&gt;seq);    
        my $upstream=substr($seq-&gt;seq, 0, $pos);
        my $downstream=substr($seq-&gt;seq, $pos,$length);        
        my $seq_obj = Bio::Seq-&gt;new(-seq =&gt; $upstream.$seqI.$downstream,-display_id =&gt; $seq-&gt;primary_id,-alphabet =&gt; &quot;dna&quot; );
            $seq_out-&gt;write_seq($seq_obj);
    }
    else{
        $seq_out-&gt;write_seq($seq);
    }
}</code>]]></description>
	<dc:creator>Shruti Paniwala</dc:creator>
</item>

</channel>
</rss>