<?xml version='1.0'?><rss version="2.0" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:georss="http://www.georss.org/georss" xmlns:atom="http://www.w3.org/2005/Atom" >
<channel>
	<title><![CDATA[BOL: All]]></title>
	<link>https://bioinformaticsonline.com/snippets?offset=350</link>
	<atom:link href="https://bioinformaticsonline.com/snippets?offset=350" rel="self" type="application/rss+xml" />
	<description><![CDATA[]]></description>
	
	<item>
	<guid isPermaLink="true">https://bioinformaticsonline.com/snippets/view/35340/extract-the-values-between-to-user-defined-string-with-perl</guid>
	<pubDate>Wed, 24 Jan 2018 16:18:10 -0600</pubDate>
	<link>https://bioinformaticsonline.com/snippets/view/35340/extract-the-values-between-to-user-defined-string-with-perl</link>
	<title><![CDATA[Extract the values between to user defined string with Perl]]></title>
	<description><![CDATA[<code>#!/usr/bin/perl -w
use strict;

while (&lt;DATA&gt;)
{
    process_record() if /^\s*START/;
}

sub process_record
{
   my $line;
   while (defined ($line = &lt;DATA&gt;) and $line !~ /^\s*END/)
   {
      print &quot;$line&quot;
   }
   print &quot;\n&quot;;  #a printout spacer for next record
}

__DATA__

XXXX
YYYY
START
These are the first
set of lines
which are to be extracted
END
   START
New line
And new
Will be extracted?
END
XXX
ZZZ
YYY
START
These are the second
set of lines
which are to be extracted
END
aasds
tteret
tertetr</code>]]></description>
	<dc:creator>Jit</dc:creator>
</item>
<item>
	<guid isPermaLink="true">https://bioinformaticsonline.com/snippets/view/35339/remove-the-duplicated-line-present-only-next-to-each-other-with-perl</guid>
	<pubDate>Wed, 24 Jan 2018 15:35:15 -0600</pubDate>
	<link>https://bioinformaticsonline.com/snippets/view/35339/remove-the-duplicated-line-present-only-next-to-each-other-with-perl</link>
	<title><![CDATA[Remove the duplicated line present only next to each other with Perl]]></title>
	<description><![CDATA[<code>#!/usr/bin/perl
use strict;
use warnings;

{
  $_ = &lt;DATA&gt;;
  my $next_line;

  while( $next_line = &lt;DATA&gt; )
  {
    #print &quot;current line: $_ -- next line: $next_line$/&quot;;
    print $_ if $_ ne $next_line;
  }
  continue
  {
    $_ = $next_line;
  }
print $_ if eof;
}
__DATA__
apple
apple
plum
vinegar
apple
banana
banana
banana
apple</code>]]></description>
	<dc:creator>Jit</dc:creator>
</item>
<item>
	<guid isPermaLink="true">https://bioinformaticsonline.com/snippets/view/35338/remove-duplicate-lines-with-perl</guid>
	<pubDate>Wed, 24 Jan 2018 15:12:18 -0600</pubDate>
	<link>https://bioinformaticsonline.com/snippets/view/35338/remove-duplicate-lines-with-perl</link>
	<title><![CDATA[Remove duplicate lines with perl]]></title>
	<description><![CDATA[<code>#! perl -sw
use strict;
my %lines;
#open DATA, $ARGV[0] or die &quot;Couldn&#039;t open $ARGV[0]: $!\n&quot;;
while (&lt;DATA&gt;) {
    print if not $lines{$_}++;
}

__DATA__
apple
apple
plum
vinegar
apple
banana
banana
banana
apple</code>]]></description>
	<dc:creator>Jit</dc:creator>
</item>
<item>
	<guid isPermaLink="true">https://bioinformaticsonline.com/snippets/view/35265/plot-the-clock-using-lastz-gerenal-outfile</guid>
	<pubDate>Thu, 18 Jan 2018 11:05:25 -0600</pubDate>
	<link>https://bioinformaticsonline.com/snippets/view/35265/plot-the-clock-using-lastz-gerenal-outfile</link>
	<title><![CDATA[Plot the clock using Lastz -gerenal outfile]]></title>
	<description><![CDATA[<code>use strict;
use warnings;
use Statistics::R ;
use List::Util qw(sum);

#Usage  perl clockPlot.pl Palindrome.palfc 1500
my $R = Statistics::R-&gt;new() ;
$R-&gt;startR ;

my $fileN=$ARGV[0];
my $mSize=$ARGV[1];
open (my $fh2, $fileN) or die &quot;Could not open file $fileN $!&quot;;
my (@allKeys, @allFreq); my %pHash;
while(&lt;$fh2&gt;) {
	chomp;
	next if /^$/; #next if empty
	my @arr = split(&quot;\t&quot;, $_);
	if ($arr[7] eq &quot;-&quot;) {
		my $len=$arr[5]-$arr[4];
		#next if $len &lt; $mSize;
		my @chr = split &#039;\_&#039;, $arr[1];
		$chr[0] =~ s/[a-z]//g;
		my $newChr=&quot;$chr[0]&quot;.&quot;_&quot;.&quot;$arr[3]&quot;;
		$pHash{$newChr}++;
	}
}

foreach my $val (keys %pHash) {
	#next if $pHash{$val} &gt; 10;
	my @sChr = split &#039;\_&#039;, $val;
	my $score=$pHash{$val}/$sChr[1];
	push @allKeys, $sChr[0];
	push @allFreq, $score;
}

my $allKeys=join &#039;,&#039;, @allKeys;
my $allFreq=join &#039;,&#039;, @allFreq;
my $hLen=scalar (@allKeys);
my $ll=&quot;$hLen&quot;.&#039;L&#039;;

$R-&gt;run(qq`
d &lt;- structure(list(Chromosome = c($allKeys), Frequency = c($allFreq)), .Names = c(&quot;Chromosome&quot;, &quot;Frequency&quot;
), row.names = c(NA, $ll), class = &quot;data.frame&quot;)
str(d)

library(ggplot2)
ggplot(d , aes(x =  Chromosome, y =  Frequency, fill=Frequency)) +
  coord_polar(theta = &quot;x&quot;, start = -pi/5) +
  geom_bar(stat = &quot;identity&quot;) + 
  scale_x_continuous(breaks = seq(0, $hLen, 50))`);

$R-&gt;stopR() ;</code>]]></description>
	<dc:creator>Jit</dc:creator>
</item>
<item>
	<guid isPermaLink="true">https://bioinformaticsonline.com/snippets/view/35123/create-genome-scaffolding-with-perl</guid>
	<pubDate>Mon, 08 Jan 2018 23:51:46 -0600</pubDate>
	<link>https://bioinformaticsonline.com/snippets/view/35123/create-genome-scaffolding-with-perl</link>
	<title><![CDATA[Create genome scaffolding with Perl]]></title>
	<description><![CDATA[<code>#!/usr/bin/perl

use warnings;
use strict;
use English;

use Pod::Usage; ## uses pod documentation in usage code
use Getopt::Long qw(:config auto_version auto_help pass_through);

our $VERSION = &quot;1.00&quot;;

=head1 NAME

psl_scaffolder.pl - use self-mapped PSL file to scaffold a genome

=head1 SYNOPSIS

./psl_scaffolder.pl -query &lt;file&gt; [options] &lt;mapping.psl&gt;

=cut

sub min {
  ($a, $b) = @_;
  return( ($a &lt; $b) ? $a : $b);
}

sub max {
  ($a, $b) = @_;
  return( ($a &gt; $b) ? $a : $b);
}

sub rc {
  my ($seq) = @_;
  $seq =~ tr/ACGTUYRSWMKDVHBXN-/TGCAARYSWKMHBDVXN-/;
  # work on masked sequences as well
  $seq =~ tr/acgtuyrswmkdvhbxn/tgcaaryswkmhbdvxn/;
  return(scalar(reverse($seq)));
}

sub getConsensus {
  my ($b1, $b2) = @_;
  if(($b1 eq $b2) || ($b1 eq &quot; &quot;) || ($b2 eq &quot; &quot;)){
    ## equal bases, or absent bases, so consensus is easy
    return($b1);
  }
  # if different, convert to upper case to simplify lookup
  my $bc = uc(($b1 cmp $b2) ? $b1.$b2 : $b2.$b1);
  my %consensusLookup =
    (AC =&gt; &quot;M&quot;, AM =&gt; &quot;A&quot;, CM =&gt; &quot;C&quot;,
     GT =&gt; &quot;K&quot;, GK =&gt; &quot;G&quot;, KT =&gt; &quot;T&quot;,
     AG =&gt; &quot;R&quot;, AR =&gt; &quot;A&quot;, GR =&gt; &quot;G&quot;,
     CT =&gt; &quot;Y&quot;, CY =&gt; &quot;C&quot;, TY =&gt; &quot;T&quot;,
     AT =&gt; &quot;W&quot;, AW =&gt; &quot;A&quot;, TW =&gt; &quot;T&quot;,
    );
  # if &quot;simple&quot; ambiguity can be found, return that, otherwise return N
  # (i.e. GT =&gt; K, -A =&gt; N, YM -&gt; N)
  return( ($consensusLookup{$bc}) ? $consensusLookup{$bc} : &quot;N&quot;);
}

sub getMatch {
  my ($b1, $b2) = @_;
  return((($b1 eq $b2) || ($b1 eq &quot; &quot;) || ($b2 eq &quot; &quot;) ||
         ($b1 eq &quot;N&quot;) || ($b2 eq &quot;N&quot;)) ? &quot; &quot; : &quot;*&quot;);
}

############### Program starts here

# set default options
my @pslFiles = ();
my $projOpts =
  {
   &quot;query&quot; =&gt; 0, # contig file for query sequences
   &quot;prefix&quot; =&gt; &quot;psl_scaffold_&quot;, # prefix for contig names
   &quot;pid&quot; =&gt; 90, # percent ID threshold
   &quot;trimlimit&quot; =&gt; 50, # max number of overlapping bases outside match region
  };

GetOptions($projOpts, &#039;query=s&#039;, &#039;pid=i&#039;, &#039;trimlimit=i&#039;, &#039;prefix=s&#039;);

# process remaining command line arguments (hopefully only PSL files)
while (@ARGV) {
  my $argument = shift @ARGV;
  if(-f $argument){
    push (@pslFiles, $argument);
  } else {
  pod2usage({-exitVal =&gt; 1,
               -message =&gt; &quot;Error: Unknown command-line option or &quot;.
             &quot;non-existent file, &#039;$argument&#039;\n&quot;, -verbose =&gt; 0});
  }
}

@ARGV = @pslFiles;

if(!$projOpts-&gt;{&quot;query&quot;}){
  pod2usage({-exitVal =&gt; 1,
             -message =&gt; &quot;Error: No query assembly file provided&quot;,
             -verbose =&gt; 0});
}

if(!(-f $projOpts-&gt;{&quot;query&quot;})){
  pod2usage({-exitVal =&gt; 1,
             -message =&gt; sprintf(&quot;Error: query file &#039;%s&#039; doesn&#039;t exist&quot;,
                                 $projOpts-&gt;{&quot;query&quot;}),
             -verbose =&gt; 0});
}

print(STDERR &quot;Loading query sequences into memory...&quot;);
open(my $queryFile, &quot;&lt;&quot;, $projOpts-&gt;{&quot;query&quot;});
my $seqID = &quot;&quot;;
my %querySeqs = ();
while(&lt;$queryFile&gt;){
  chomp;
  if(/^&gt;((.+?)( .*?\s*)?)$/){
    ## line is sequence header
    $seqID = $2;
    $querySeqs{$seqID}{fullName} = $1;
    $querySeqs{$seqID}{sequence} = &quot;&quot;;
  } else {
    if(!$seqID){
      pod2usage({-exitVal =&gt; 1,
                 -message =&gt; sprintf(&quot; Error: query file &#039;%s&#039; doesn&#039;t look &quot;.
                                     &quot;like a FASTA file (no initial ID header)&quot;,
                                     $projOpts-&gt;{&quot;query&quot;}),
                 -verbose =&gt; 0});
    }
    ## line is sequence
    $querySeqs{$seqID}{&quot;sequence&quot;} .= $_;
  }
}
close($queryFile);

my %targetSeqs = %querySeqs;
my $nextScaffoldID = 1;

my %replacementSeqs = ();

printf(STDERR &quot; loaded in %d sequences\n&quot;, scalar(keys(%querySeqs)));

print(STDERR &quot;Processing results...&quot;);
while(&lt;&gt;){
  chomp;
  my @fields = split(/\t/);
  my ($matches, $misMatches, $repMatches, $nCount, $qNumInsert,
      $qBaseInsert, $tNumInsert, $tBaseInsert, $strand, $qName,
      $qSize, $qStart, $qEnd, $tName, $tSize,
      $tStart, $tEnd, $blockCount, $blockSizes, $qStarts,
      $tStarts, @rest) = @fields;
  if(!$tStarts){
    pod2usage({-exitVal =&gt; 1,
               -message =&gt; sprintf(&quot; Error: mapping file doesn&#039;t look &quot;.
                                   &quot;like a PSL file (expecting&quot;.
                                   &quot;&gt;=21 tab-separated values, got %d)&quot;,
                                  scalar(@fields)),
               -verbose =&gt; 0});
  }
  ## calculate percent identity
  my $qAliSize = $qEnd - $qStart;
  my $tAliSize = $tEnd - $tStart;
  my $sizeDif = abs($qAliSize - $tAliSize);
  my $pid = 100 * ($matches + $repMatches -
                   ($qNumInsert + $tNumInsert + 3*log(1+$sizeDif))) /
                     ($matches + $repMatches + $misMatches);
  if(($pid &gt;= $projOpts-&gt;{&quot;pid&quot;}) &amp;&amp;
     $querySeqs{$qName} &amp;&amp; $targetSeqs{$tName}){
    my %meta = ();
    my $shortTarget = ($tSize &lt; $qSize) ? 1 : 0;
    my $longTarget = (1 - $shortTarget);
    my $sName = $fields[9 + ($shortTarget * 4)];
    my $lName = $fields[9 + ($longTarget * 4)];
    my $sLen = $fields[10 + ($shortTarget * 4)];
    my $lLen = $fields[10 + ($longTarget * 4)];
    my $sStart = $fields[11 + ($shortTarget * 4)];
    my $lStart = $fields[11 + ($longTarget * 4)];
    my $sEnd = $fields[12 + ($shortTarget * 4)];
    my $lEnd = $fields[12 + ($longTarget * 4)];
    my @sBlStarts = split(/,/, $fields[19 + $shortTarget]);
    my @lBlStarts = split(/,/, $fields[19 + $longTarget]);
    my @blSizes = split(/,/, $fields[18]);
    my ($sSeq, $lSeq) = ($querySeqs{$qName}{sequence},
                         $querySeqs{$tName}{sequence});
    if($shortTarget){
      ($sSeq, $lSeq) = ($lSeq, $sSeq);
    }
    my $doRC = ($strand eq &quot;-&quot;);
    if($doRC){
        if ($shortTarget) { # target sequence is assumed to be forward strand
          $lSeq = rc($lSeq);
          ($lStart, $lEnd) = ($lLen - $lEnd, $lEnd - $lStart);
        } else {
          $sSeq = rc($sSeq);
          ($sStart, $sEnd) = ($sLen - $sEnd, $sEnd - $sStart);
        }
    }
    my $preTrim = min($sStart, $lStart);
    my $postTrim = min($sLen - $sEnd, $lLen - $lEnd);
    ## Only continue on if there&#039;s a good likelihood that this will work
    ## i.e. trimLength * (1-%id) &lt; threshold
    my $trimTotal = ($preTrim + $postTrim);
    if($trimTotal &lt;= $projOpts-&gt;{&quot;trimlimit&quot;}){
      my $sPre = substr($sSeq, 0, $sStart);
      my $lPre = substr($lSeq, 0, $lStart);
      my $sMid = substr($sSeq, $sStart, $sEnd-$sStart);
      my $lMid = substr($lSeq, $lStart, $lEnd-$lStart);
      my $sPost = substr($sSeq, $sEnd);
      my $lPost = substr($lSeq, $lEnd);
      my $sPreTrim = substr($sPre, length($sPre)-$preTrim);
      my $sPostTrim = substr($sPost, 0, $postTrim);
      my $lPreTrim = substr($lPre, length($lPre)-$preTrim);
      my $lPostTrim = substr($lPost, 0, $postTrim);
      my $preLen = max(length($sPre), length($lPre));
      my $postLen = max(length($sPost), length($lPost));
      my $lastS = $sBlStarts[0];
      my $lastL = $lBlStarts[0];
      my $alSeqS = &quot;&quot;;
      my $alSeqL = &quot;&quot;;
      for (my $i = 0; $i &lt;= $#blSizes; $i++) {
        my $gapS = $sBlStarts[$i] - $lastS;
        my $gapL = $lBlStarts[$i] - $lastL;
        my $gapLength = max($gapS, $gapL);
        my $fillS = $gapLength - $gapS;
        my $fillL = $gapLength - $gapL;
        $alSeqS .= (&quot;-&quot; x $fillS) . substr($sSeq, $sBlStarts[$i]-$gapS, $gapS);
        $alSeqL .= (&quot;-&quot; x $fillL) . substr($lSeq, $lBlStarts[$i]-$gapL, $gapL);
        $alSeqS .= substr($sSeq, $sBlStarts[$i], $blSizes[$i]);
        $alSeqL .= substr($lSeq, $lBlStarts[$i], $blSizes[$i]);
        $lastS = $sBlStarts[$i] + $blSizes[$i];
        $lastL = $lBlStarts[$i] + $blSizes[$i];
      }
      $alSeqS = $sPreTrim . $alSeqS . $sPostTrim;
      $alSeqL = $lPreTrim . $alSeqL . $lPostTrim;
      my $alConsensus = &quot;&quot;;
      for (my $i = 0; $i &lt; length($alSeqS); $i++) {
        $alConsensus .= getConsensus(substr($alSeqS,$i,1),substr($alSeqL,$i,1));
      }
      my $consensusLength = length($alConsensus);
      $alConsensus =
        substr($sPre, 0, length($sPre) - $preTrim).
          substr($lPre, 0, length($lPre) - $preTrim).
            $alConsensus.
              substr($sPost, $postTrim).substr($lPost, $postTrim);
      my $newSeqID = sprintf(&quot;%s_%d&quot;, $projOpts-&gt;{&quot;prefix&quot;}, $nextScaffoldID++);
      if(!exists($replacementSeqs{$sName}{score}) ||
         ($trimTotal &lt; $replacementSeqs{$sName}{score}) ||
         (($trimTotal == $replacementSeqs{$sName}{score}) &amp;&amp;
          ($consensusLength &gt; $replacementSeqs{$sName}{clength}))){
        $replacementSeqs{$sName}{score} = $trimTotal;
        $replacementSeqs{$sName}{clength} = $consensusLength;
        $replacementSeqs{$sName}{fullName} =
          sprintf(&quot;%s [%s %s]&quot;, $newSeqID, $sName, $lName);
        $replacementSeqs{$sName}{sequence} = $alConsensus;
        # printf(STDERR &quot;Match: $sName\n&quot;);
      }
      if(!exists($replacementSeqs{$lName}{score}) ||
         ($trimTotal &lt; $replacementSeqs{$lName}{score}) ||
         (($trimTotal == $replacementSeqs{$lName}{score}) &amp;&amp;
          ($consensusLength &gt; $replacementSeqs{$lName}{clength}))){
        $replacementSeqs{$lName}{score} = $trimTotal;
        $replacementSeqs{$lName}{clength} = $consensusLength;
        $replacementSeqs{$lName}{fullName} =
          sprintf(&quot;%s [%s %s]&quot;, $newSeqID, $sName, $lName);
        $replacementSeqs{$lName}{sequence} = $alConsensus;
        # printf(STDERR &quot;Match: $lName\n&quot;);
      }
    } else {
      # printf(STDERR &quot;Rejecting match &#039;%s&#039; vs &#039;%s&#039;: too many bases trimmed (%d [%d,%d] [%d,%d])\n&quot;,
      #        $qName, $tName, $trimTotal, $sStart, $lStart, $sLen-$sEnd, $lLen-$lEnd);
    }
  } elsif($pid &lt; $projOpts-&gt;{&quot;pid&quot;}){
    # printf(STDERR &quot;Rejecting match &#039;%s&#039; vs &#039;%s&#039;: identity (%f) too low\n&quot;,
    #      $qName, $tName, $pid);
  }
}
printf(STDERR &quot; done\n&quot;);

my %displayed = ();

foreach my $seqID (sort(keys(%targetSeqs))){
  my $fullName = $targetSeqs{$seqID}{fullName};
  my $sequence = $targetSeqs{$seqID}{sequence};
  if(exists($replacementSeqs{$seqID})){
    print(STDERR &quot;Found match for $seqID\n&quot;);
    $fullName = $replacementSeqs{$seqID}{fullName};
    $sequence = $replacementSeqs{$seqID}{sequence};
  }
  if(!$displayed{$fullName}){
    printf(&quot;&gt;%s\n%s\n&quot;, $fullName, $sequence);
    $displayed{$fullName} = 1;
  }
}</code>]]></description>
	<dc:creator>BioStar</dc:creator>
</item>
<item>
	<guid isPermaLink="true">https://bioinformaticsonline.com/snippets/view/35044/insert-the-sequence-at-desire-location-in-multi-fasta-file-with-perl</guid>
	<pubDate>Wed, 03 Jan 2018 10:05:30 -0600</pubDate>
	<link>https://bioinformaticsonline.com/snippets/view/35044/insert-the-sequence-at-desire-location-in-multi-fasta-file-with-perl</link>
	<title><![CDATA[Insert the sequence at desire location in multi-fasta file with Perl]]></title>
	<description><![CDATA[<code>#!/usr/bin/perl

use warnings;
use strict;
use Bio::SeqIO;
use Bio::Seq;
use File::Copy;

#ARGV[0] should be in following format --- Keep the coordinate sorted by name+location
#GenomechrName locationStart AlienGene AlienLength

# The coordinate should not overlaps --- next postition shold be bigger than firstpos+alienLen

open(my $fh, &#039;&lt;:encoding(UTF-8)&#039;, $ARGV[0])
or die &quot;Could not open file $ARGV[0] $!&quot;;
my $genome = $ARGV[1]; # input fasta file (genome file)
my $out = &#039;tmp.fa&#039;; # output fasta file

while (&lt;$fh&gt;) {
chomp;
my @tmpLine = split &#039;\t&#039;, $_;
my $chr=$tmpLine[0]; #insertion chromosome
my $pos=$tmpLine[1]; # position of the insertion
my $seqI = $tmpLine[2]; #sequence of the insertion
my $alienLen=$tmpLine[3];

my $seq_in  = Bio::SeqIO-&gt;new( -format =&gt; &#039;fasta&#039;,-file =&gt; $genome);
my $seq_out = Bio::SeqIO-&gt;new( -format =&gt; &#039;fasta&#039;,-file =&gt; &quot;&gt;&quot;.$out);
while( my $seq = $seq_in-&gt;next_seq() ) { 
 
    if($seq-&gt;primary_id eq $chr){
        my $length = length($seq-&gt;seq);    
        my $upstream=substr($seq-&gt;seq, 0, $pos);
        my $downstream=substr($seq-&gt;seq, $pos,$length);        
        my $seq_obj = Bio::Seq-&gt;new(-seq =&gt; $upstream.$seqI.$downstream,-display_id =&gt; $seq-&gt;primary_id,-alphabet =&gt; &quot;dna&quot; );
            $seq_out-&gt;write_seq($seq_obj);
    }
    else{
        $seq_out-&gt;write_seq($seq);
    }
}

my $newLoc = $pos+$alienLen;
print &quot;$_\t$pos\t$newLoc\n&quot;;
move(&quot;$out&quot;,&quot;$genome&quot;);

}</code>]]></description>
	<dc:creator>Jit</dc:creator>
</item>
<item>
	<guid isPermaLink="true">https://bioinformaticsonline.com/snippets/view/34803/fill-up-the-form-and-blast-with-perl</guid>
	<pubDate>Sat, 23 Dec 2017 03:48:52 -0600</pubDate>
	<link>https://bioinformaticsonline.com/snippets/view/34803/fill-up-the-form-and-blast-with-perl</link>
	<title><![CDATA[Fill up the form and blast with perl]]></title>
	<description><![CDATA[<code>use WWW::Mechanize;
use strict;
use warnings;
my $mech = WWW::Mechanize-&gt;new;

my $sequence = &#039;GCCCGCGGTCTCAGAGATCTCGATATATTATA&#039;;

$mech-&gt;get(&#039;http://www.arabidopsis.org/Blast/&#039;);
$mech-&gt;submit_form(
  form_name =&gt; &#039;myForm&#039;,
  fields =&gt; {
    &#039;Algorithm&#039; =&gt; &#039;blastx&#039;,
    &#039;BlastTargetSet&#039; =&gt; &#039;ATH1_pep&#039;,
    &#039;QueryText&#039; =&gt; $sequence,
  },
);

print $mech-&gt;content;</code>]]></description>
	<dc:creator>BioStar</dc:creator>
</item>
<item>
	<guid isPermaLink="true">https://bioinformaticsonline.com/snippets/view/34698/convert-fastq-to-fasta-in-perl</guid>
	<pubDate>Sun, 17 Dec 2017 17:54:15 -0600</pubDate>
	<link>https://bioinformaticsonline.com/snippets/view/34698/convert-fastq-to-fasta-in-perl</link>
	<title><![CDATA[Convert fastq to fasta in Perl]]></title>
	<description><![CDATA[<code>use Bio::SeqIO;
#convert .fastq.gz to .fasta
open my $zcat, &#039;zcat seq.fastq.gz |&#039; or die $!;
my $in=Bio::SeqIO-&gt;new(-fh=&gt;$zcat,
                         -format=&gt;&#039;fastq&#039;);
my $out=Bio::SeqIO-&gt;new(-file=&gt;&#039;&gt;seq.fasta&#039;,
                          -format=&gt;&#039;fasta&#039;);
while (my $seq=$in-&gt;next_seq) {
      $out-&gt;write_seq($seq)
}</code>]]></description>
	<dc:creator>Jit</dc:creator>
</item>
<item>
	<guid isPermaLink="true">https://bioinformaticsonline.com/snippets/view/34693/loop-over-with-all-files-in-a-directory-in-bash</guid>
	<pubDate>Sat, 16 Dec 2017 20:23:51 -0600</pubDate>
	<link>https://bioinformaticsonline.com/snippets/view/34693/loop-over-with-all-files-in-a-directory-in-bash</link>
	<title><![CDATA[Loop over with all files in a directory in bash]]></title>
	<description><![CDATA[<code>#!/bin/bash
FILES=/media/ComparativeGenomics/ncbi-genomes-2017-11-13/*
ref=/media/ComparativeGenomics/ncbi-genomes-2017-11-13/GCA_000196735.1_ASM19673v1_genomic.fna
path=/home/urbe/Tools/SATSUMA/satsuma-code-0
for f in $FILES
do
  if [ ${f: -4} == &quot;.fna&quot; ]; then
  echo &quot;Processing $f file...&quot;
  ff=$(basename &quot;${f%.*}&quot;)
echo $ff
  # take action on each file. $f store current file name
  mkdir $ff
  $path/SatsumaSynteny -q $ref -t $f -o $ff
  #cat $f
  fi
done</code>]]></description>
	<dc:creator>Jit</dc:creator>
</item>
<item>
	<guid isPermaLink="true">https://bioinformaticsonline.com/snippets/view/34633/clump-finding-problem-solved-with-perl</guid>
	<pubDate>Thu, 14 Dec 2017 09:47:41 -0600</pubDate>
	<link>https://bioinformaticsonline.com/snippets/view/34633/clump-finding-problem-solved-with-perl</link>
	<title><![CDATA[Clump Finding Problem Solved with Perl]]></title>
	<description><![CDATA[<code>#Find patterns forming clumps in a string.
#Given: A string Genome, and integers k, L, and t.
#Return: All distinct k-mers forming (L, t)-clumps in Genome.

use strict;
use warnings;

my %myHash;
my $string=&quot;CGGACTCGACAGATGTGAAGAAATGTGAAGACTGAGTGAAGAGAAGAGGAAACACGACACGACATTGCGACATAATGTACGAATGTAATGTGCCTATGGC&quot;;
my $subStr=&quot;?&quot;; my $clump=4;
my $kmer=5;

for (my $aa=0; $aa&lt;=(length($string)-$kmer); $aa++) {
    my $myStr=substr  $string, $aa,$kmer;
    #print &quot;$myStr\n&quot;;
    my $km=kmerMatch ($string, $myStr, $kmer);
    #if ($km &gt; $max) { $max = $km;}
    #print &quot;$km\t$myStr\n&quot;;
    $myHash{$myStr}=$km;
    
}

#Print all key which have matching values
foreach my $name (keys %myHash){
    print &quot;$name &quot; if $myHash{$name} == $clump;
}

kmerMatch ($string, $subStr, $kmer);

sub kmerMatch { #Check the exact matching kmers with sliding window
my ($string, $myStr, $kmer)=@_;
my $count=0;
for (my $aa=0; $aa&lt;=(length($string)-4); $aa++) {
    my $myWin=substr  $string, $aa,$kmer;
    if ($myWin eq $myStr) {
        #print &quot;$myWin eq $myStr\n&quot;;
        $count++;
    }
}
return $count;
}</code>]]></description>
	<dc:creator>Jit</dc:creator>
</item>

</channel>
</rss>