<?xml version='1.0'?><rss version="2.0" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:georss="http://www.georss.org/georss" xmlns:atom="http://www.w3.org/2005/Atom" >
<channel>
	<title><![CDATA[BOL: Owner]]></title>
	<link>https://bioinformaticsonline.com/snippets/owner/radhaagarkar?</link>
	<atom:link href="https://bioinformaticsonline.com/snippets/owner/radhaagarkar?" rel="self" type="application/rss+xml" />
	<description><![CDATA[]]></description>
	
	<item>
	<guid isPermaLink="true">https://bioinformaticsonline.com/snippets/view/41481/check-os-version-in-linux</guid>
	<pubDate>Fri, 20 Mar 2020 06:28:14 -0500</pubDate>
	<link>https://bioinformaticsonline.com/snippets/view/41481/check-os-version-in-linux</link>
	<title><![CDATA[Check os version in Linux !]]></title>
	<description><![CDATA[<code>The procedure to find os name and version on Linux:

#Open the terminal application (bash shell)
#For remote server login using the ssh: ssh user@server-name
#Type any one of the following command to find os name and version in Linux:
    cat /etc/os-release
    lsb_release -a
    hostnamectl
#Type the following command to find Linux kernel version:
    uname -r</code>]]></description>
	<dc:creator>Radha Agarkar</dc:creator>
</item>
<item>
	<guid isPermaLink="true">https://bioinformaticsonline.com/snippets/view/31562/extract-ids-from-file-with-perl</guid>
	<pubDate>Wed, 15 Mar 2017 05:21:59 -0500</pubDate>
	<link>https://bioinformaticsonline.com/snippets/view/31562/extract-ids-from-file-with-perl</link>
	<title><![CDATA[Extract ids from file with perl]]></title>
	<description><![CDATA[<code>#!/usr/bin/perl
use strict;
use warnings;

my $fh=read_fh(&quot;fin.txt&quot;);
my %idHash;
my $lastKey;
while (&lt;$fh&gt;) {
  chomp $_;
  my @cells = split /\t/, $_;
  $idHash{$cells[0]}=$.;
  push @allIds, $cells[0];
}

#Delete the last id for secutiry -- might does not finish all steps 
delete $hash{$allIds[-1]};
next if exists $hash{$look_for};


############################################################
#Open and Read a file
sub read_fh {
    my $filename = shift @_;
    my $filehandle;
    if ($filename =~ /gz$/) {
        open $filehandle, &quot;gunzip -dc $filename |&quot; or die $!;
    }
    else {
        open $filehandle, &quot;&lt;$filename&quot; or die $!;
    }
    return $filehandle;
}</code>]]></description>
	<dc:creator>Radha Agarkar</dc:creator>
</item>
<item>
	<guid isPermaLink="true">https://bioinformaticsonline.com/snippets/view/27467/count-gc-content-in-nucleotide-sequence-with-perl</guid>
	<pubDate>Sat, 21 May 2016 22:56:18 -0500</pubDate>
	<link>https://bioinformaticsonline.com/snippets/view/27467/count-gc-content-in-nucleotide-sequence-with-perl</link>
	<title><![CDATA[Count GC Content in nucleotide sequence with Perl]]></title>
	<description><![CDATA[<code>#!/usr/bin/perl -w

### Usage: get_gc_content.pl &lt;fasta file&gt;                                                        ###

#---------------------------------------------------------------------------------------------------------------------------
#Deal with passed parameters
#---------------------------------------------------------------------------------------------------------------------------
if ($#ARGV == -1) {
    usage();
    exit;
}
$fasta_file = $ARGV[0];
$out_file = &quot;gc_out.txt&quot;;
unless ( open(IN, &quot;$fasta_file&quot;) ) {    
    print &quot;Got a bad fasta file: $fasta_file\n\n&quot;;
    exit;
}
unless ( open(OUT, &quot;&gt;$out_file&quot;) ) {
    print &quot;Couldn&#039;t create $out_file\n&quot;;
    exit;
}
print &quot;Parameters:\nfasta file = $fasta_file\noutput file = $out_file\n\n&quot;;
#---------------------------------------------------------------------------------------------------------------------------
#The main event
#---------------------------------------------------------------------------------------------------------------------------
print OUT &quot;ID\t% GCContent\tTotal Count\tG Count\tC Count\tA Count\tT Count\n&quot;;
$seq = &quot;&quot;;
while (&lt;IN&gt;) {
    chomp;
    if (/^&gt;/) {
	#finish up previous line.
	if (length($seq) &gt; 0) {
	    &amp;process_it;
	}
	#start new line.
	$id = $_;
	$id =~ s/^&gt;(.+?)\s.+$/$1/g;
	print OUT &quot;$id\t&quot;;
    }
    else {
	$seq = $seq . $_;
    }
}

#finish up last line.
&amp;process_it;

close(IN);
close(OUT);

sub usage {
   $0 get_gc_content.pl &lt;fasta file&gt;    
}

sub process_it {
    @letters = split(//, $seq);
    $gccount = 0;
    $totalcount = 0;
    $acount = 0;
    $tcount = 0;
    $gcount = 0;
    $ccount = 0;
    foreach $i (@letters) {
	if (lc($i) =~ /[a-z]/) {
	    $totalcount++;
	}
	if (lc($i) eq &quot;g&quot; || lc($i) eq &quot;c&quot;) {
	    $gccount++;
	}
	if (lc($i) eq &quot;a&quot;) {
	    $acount++;
	}
	if (lc($i) eq &quot;t&quot;) {
	    $tcount++;
	}
	if (lc($i) eq &quot;g&quot;) {
	    $gcount++;
	}
	if (lc($i) eq &quot;c&quot;) {
	    $ccount++;
	}
    }
    if ($totalcount &gt; 0) {
	$gccontent = (100 * $gccount) / $totalcount;
    }
    else {
	$gccontent = 0;
    }
    print OUT &quot;$gccontent\t$totalcount\t$gcount\t$ccount\t$acount\t$tcount\n&quot;;
    $seq = &quot;&quot;;
}</code>]]></description>
	<dc:creator>Radha Agarkar</dc:creator>
</item>
<item>
	<guid isPermaLink="true">https://bioinformaticsonline.com/snippets/view/27454/needleman-wunsch-algorithm-in-perl</guid>
	<pubDate>Sat, 21 May 2016 22:07:06 -0500</pubDate>
	<link>https://bioinformaticsonline.com/snippets/view/27454/needleman-wunsch-algorithm-in-perl</link>
	<title><![CDATA[Needleman-Wunsch  Algorithm in Perl]]></title>
	<description><![CDATA[<code>#!/usr/bin/perl

# USAGE:   perl nw.pl HEAGAWGHEE PAWHEAE BLOSUM50.txt -8

# See:     &quot;Biological sequence anaysis&quot; Durbin et al. ed. CUP 1998, Pg. 19
# Needleman-Wunsch global alignment algo (GOTHO 1982 mod)

# usage statement
die &quot;usage: $0 &lt;sequence 1&gt; &lt;sequence 2&gt; &lt;substmatrix&gt; &lt;gapscore&gt; \n&quot; unless @ARGV == 4;

# get sequences, matrix and gapcost from command line
my ($seq1, $seq2, $smfile, $gapcost) = @ARGV;

# scoring scheme (instead of using fixed MATCH and MISMATCH scores we will use values read from BLOSUM50)
my $MATCH    =  1; # +1 for letters that match
my $MISMATCH = -1; # -1 for letters that mismatch
my $GAP      = $gapcost; # for any gap
my %BLOSUM50 = ();
my @aalist = ();

# read substitution matrix
open in, $smfile;
while(&lt;in&gt;){
    if($.&lt;2){next;}
    # read columns names (aa)
    if($.&lt;3){
        chop $_;
        @aalist=split(/\s+/,$_);
        next;
        }
    chop $_;
    @vals=split(/\s+/,$_);
    $curaaROW=$vals[0];
    for($i=1;$i&lt;=$#vals;$i++){
        $curaaCOLUMN=$aalist[$i];
        $BLOSUM50{$curaaROW}{$curaaCOLUMN}=$vals[$i];
    }   
}
close in;


# initialization
my @matrix;
$matrix[0][0]{score}   = 0;
$matrix[0][0]{pointer} = &quot;none&quot;;
for(my $j = 1; $j &lt;= length($seq1); $j++) {
    $matrix[0][$j]{score}   = $GAP * $j;
    $matrix[0][$j]{pointer} = &quot;left&quot;;
}
for (my $i = 1; $i &lt;= length($seq2); $i++) {
    $matrix[$i][0]{score}   = $GAP * $i;
    $matrix[$i][0]{pointer} = &quot;up&quot;;
}

# fill
for(my $i = 1; $i &lt;= length($seq2); $i++) {
    for(my $j = 1; $j &lt;= length($seq1); $j++) {
        my ($diagonal_score, $left_score, $up_score);

        # calculate match score
        my $letter1 = substr($seq1, $j-1, 1);
        my $letter2 = substr($seq2, $i-1, 1);                            
        if ($letter1 eq $letter2) {
            $diagonal_score = $matrix[$i-1][$j-1]{score} + $BLOSUM50{$letter1}{$letter2};
        }
        else {
            $diagonal_score = $matrix[$i-1][$j-1]{score} + $BLOSUM50{$letter1}{$letter2};
        }

        # calculate gap scores
        $up_score   = $matrix[$i-1][$j]{score} + $GAP;
        $left_score = $matrix[$i][$j-1]{score} + $GAP;

        # choose best score
        if ($diagonal_score &gt;= $up_score) {
            if ($diagonal_score &gt;= $left_score) {
                $matrix[$i][$j]{score}   = $diagonal_score;
                $matrix[$i][$j]{pointer} = &quot;diagonal&quot;;
            }
        else {
                $matrix[$i][$j]{score}   = $left_score;
                $matrix[$i][$j]{pointer} = &quot;left&quot;;
            }
        } else {
            if ($up_score &gt;= $left_score) {
                $matrix[$i][$j]{score}   = $up_score;
                $matrix[$i][$j]{pointer} = &quot;up&quot;;
            }
            else {
                $matrix[$i][$j]{score}   = $left_score;
                $matrix[$i][$j]{pointer} = &quot;left&quot;;
            }
        }
    }
}

# trace-back

my $align1 = &quot;&quot;;
my $align2 = &quot;&quot;;
my $descrstr = &quot;&quot;;

# start at last cell of matrix
my $j = length($seq1);
my $i = length($seq2);

while (1) {
    last if $matrix[$i][$j]{pointer} eq &quot;none&quot;; # ends at first cell of matrix

    if ($matrix[$i][$j]{pointer} eq &quot;diagonal&quot;) {
        $align1 .= substr($seq1, $j-1, 1);
        $align2 .= substr($seq2, $i-1, 1);
        if(substr($seq1, $j-1,1) eq substr($seq2, $i-1,1)){$descrstr .=&quot;|&quot;;}else{$descrstr .= &quot;.&quot;;}
        $i--;
        $j--;
    }
    elsif ($matrix[$i][$j]{pointer} eq &quot;left&quot;) {
        $align1 .= substr($seq1, $j-1, 1);
        $align2 .= &quot;-&quot;;
        $descrstr .= &quot; &quot;;
        $j--;
    }
    elsif ($matrix[$i][$j]{pointer} eq &quot;up&quot;) {
        $align1 .= &quot;-&quot;;
        $align2 .= substr($seq2, $i-1, 1);
        $descrstr .= &quot; &quot;;
        $i--;
    }    
}

$align1 = reverse $align1;
$align2 = reverse $align2;
$descrstr = reverse $descrstr;

# print matrices:
print &quot;\n\n&quot;;

for(my $i = 0; $i &lt;= length($seq2); $i++) {
    for(my $j = 0; $j &lt;= length($seq1); $j++) {
        printf(&quot;%2.1f&quot;, $matrix[$i][$j]{score});
        print(&quot;\t&quot;);
    }
    print&quot;\n&quot;;
}
print &quot;\n\n&quot;;

# print the alignment:
print &quot;$align1\n&quot;;
print &quot;$descrstr\n&quot;;
print &quot;$align2\n&quot;;

__END__
# Entries for the BLOSUM50 matrix at a scale of ln(2)/3.0.
Find matrix at http://bioinformaticsonline.com/file/view/27455/blosum50-matrix</code>]]></description>
	<dc:creator>Radha Agarkar</dc:creator>
</item>
<item>
	<guid isPermaLink="true">https://bioinformaticsonline.com/snippets/view/27325/parse-a-fasta-file-with-perl</guid>
	<pubDate>Fri, 13 May 2016 05:00:18 -0500</pubDate>
	<link>https://bioinformaticsonline.com/snippets/view/27325/parse-a-fasta-file-with-perl</link>
	<title><![CDATA[Parse a Fasta file with Perl]]></title>
	<description><![CDATA[<code>#!/usr/bin/env perl

# Usage:  fastaRead.pl data.fa

use strict;
use warnings;

my $filename = $ARGV[0];
my  $sequence;
open my $fileH, &quot;&lt;&quot;, $filename or die &quot;could not open $filename\n&quot;;
while (&lt;$fileH&gt;) {
    chomp;
    if ($_ =~ /^&gt;/) {
        print &quot;this line is a header: $_\n&quot;;
    }
    else {
        print &quot;this line contains sequence data: $_\n&quot;;
        # Concatenate everything from the file into a single var
        $sequence .= $_;
    }
}
close $fileH;</code>]]></description>
	<dc:creator>Radha Agarkar</dc:creator>
</item>
<item>
	<guid isPermaLink="true">https://bioinformaticsonline.com/snippets/view/27320/perl-program-to-implement-sliding-window</guid>
	<pubDate>Fri, 13 May 2016 04:28:24 -0500</pubDate>
	<link>https://bioinformaticsonline.com/snippets/view/27320/perl-program-to-implement-sliding-window</link>
	<title><![CDATA[Perl program to implement sliding window !]]></title>
	<description><![CDATA[<code>#!/usr/bin/perl -w

my $filename = &#039;data.txt&#039;;
open(my TR, &#039;&lt;:encoding(UTF-8)&#039;, $filename)
  or die &quot;Could not open file &#039;$filename&#039; $!&quot;;

my %hash;
while (my $line1=&lt;TR&gt;)
{
    chomp($line1);
    my @ar = split(/\t/,$line1);
    $hash{$ar[1]} = $ar[3];
}
close TR;

open my $SC, &quot;&lt;&quot;, $file2 or die &quot;Error blah blah... $!&quot;;
while (my $line2 = &lt;$SC&gt;) 
{
    my ($id, $val) = split /\t/, $line2;
    my $val_file1 = $hash{$id};
    if ( $val &gt; $val_file1 - $margin and $val &lt; $val_file1 + $margin) {
        # print out something
    }
}
close $SC;</code>]]></description>
	<dc:creator>Radha Agarkar</dc:creator>
</item>
<item>
	<guid isPermaLink="true">https://bioinformaticsonline.com/snippets/view/27313/find-and-replace-ambiguous-characters-in-fasta-file-with-perl-and-bioperl</guid>
	<pubDate>Fri, 13 May 2016 03:20:09 -0500</pubDate>
	<link>https://bioinformaticsonline.com/snippets/view/27313/find-and-replace-ambiguous-characters-in-fasta-file-with-perl-and-bioperl</link>
	<title><![CDATA[Find and replace ambiguous characters in fasta file with Perl and Bioperl]]></title>
	<description><![CDATA[<code>#!/usr/bin/perl -w

my $usage=&quot;\nUsage: $0 [-h] [-m char] [fastaFileName1 ...]\n&quot;.
    &quot;  -h: help\n&quot;.
    &quot;  -m: missing character\n&quot;.
    &quot;Print out the name of sequences with characters other than ATGC-.\n&quot;.
    &quot;If -m is specified, the ambiguous characters are repleced with the\n&quot;.
    &quot;specified character.  e.g. -m &#039;?&#039; will place ? to the ambigous characters.\n&quot; .
    &quot;If multiple files are given, sequences in all files are marged.  If no \n&quot;.
    &quot;argument is given, it will take STDIN as the input\n&quot;;

our($opt_h, $opt_m);

use Bio::SeqIO;

use Getopt::Std;
getopts(&#039;hm:&#039;) || die &quot;$usage\n&quot;;
die &quot;$usage\n&quot; if (defined($opt_h));

my $format = &quot;fasta&quot;;
my @seqArr = ();

@ARGV = (&#039;-&#039;) unless @ARGV;
while (my $file = shift) {
    my $seqio_obj = Bio::SeqIO-&gt;new(-file =&gt; $file, -format =&gt; $format);
    while (my $seq = $seqio_obj-&gt;next_seq()) {
	push(@seqArr, $seq);
    }
}

#@seqArr = sort { $a-&gt;id() cmp $b-&gt;id() } @seqArr;

foreach my $s (@seqArr) {
    my $thisSeq = $s-&gt;seq();
    my $ambig = AmbiguousChar($thisSeq);
    if ($ambig ne &quot;&quot;) {
	print STDERR $s-&gt;id(), &quot;\t$ambig\n&quot;;
	if (defined($opt_m)) {
	    $thisSeq = ReplaceAmbiguousChar($thisSeq, $opt_m);
	    $s-&gt;seq($thisSeq);
	}
    }
}

if (defined($opt_m)) {
    my $seqOut = Bio::SeqIO-&gt;new(-fs =&gt; \*STDOUT, -format =&gt; $format);
    foreach my $s (@seqArr) {
	$seqOut-&gt;write_seq($s);
    }
}
exit;


sub AmbiguousChar {
    my $string = shift;
    $string =~ s/[ATGC-]//g;

    $string =~ s/\s+//g;
    return $string;
}

sub ReplaceAmbiguousChar {
    my ($string, $char) = @_;
    $string =~ s/[^ATGC-]/$char/g;
    return $string;
}</code>]]></description>
	<dc:creator>Radha Agarkar</dc:creator>
</item>
<item>
	<guid isPermaLink="true">https://bioinformaticsonline.com/snippets/view/27312/blast-result-parser-with-perl-and-bioperl</guid>
	<pubDate>Fri, 13 May 2016 03:15:06 -0500</pubDate>
	<link>https://bioinformaticsonline.com/snippets/view/27312/blast-result-parser-with-perl-and-bioperl</link>
	<title><![CDATA[Blast result parser with Perl and Bioperl]]></title>
	<description><![CDATA[<code>#!/usr/local/bin/perl

#
#	Dr. Xiaodong Bai
#	It may be freely distributed under GNU General Public License.
#	This script will parse a NCBI blastx output file and output the top N hits of each blast search result.
#	For each hit, the following results are reported:
#	accesion number, length, description, E value, bit score, query frame, query start, query end, hit start, hit end, positives, and identical
# 	The results are tab-deliminated and ready for import into a spreadsheet program for browsing and further analysis.
#

use strict;
use warnings;
use Bio::SearchIO;

# Usage information
die &quot;Usage: $0 &lt;BLAST-report-file&gt; &lt;number-of-top-hits&gt; &lt;output-file&gt;\n&quot;, if (@ARGV != 3);

my ($infile,$numHits,$outfile) = @ARGV;
print &quot;Parsing the BLAST result ...&quot;;
my $in = Bio::SearchIO-&gt;new(-format =&gt; &#039;blast&#039;, -file =&gt; $infile);
open (OUT,&quot;&gt;$outfile&quot;) or die &quot;Cannot open $outfile: $!&quot;;

# print the header info for tab-deliminated columns
print OUT &quot;query_name\tquery_length\taccession_number\tlength\tdescription\tE value\tbit score\tframe\tquery_start\t&quot;;
print OUT &quot;query_end\thit_start\thit_end\tpositives\tidentical\n&quot;;

# extraction of information for each result recursively
while ( my $result = $in-&gt;next_result ) {
	# the name of the query sequence
   	print OUT $result-&gt;query_name . &quot;\t&quot;;

        # the length of the query sequence
    	print OUT $result-&gt;query_length;

        # output &quot;no hits found&quot; if there is no hits
    	if ( $result-&gt;num_hits == 0 ) {
		print OUT &quot;\tNo hits found\n&quot;;
    	} else {
		my $count = 0;

                # process each hit recursively
		while (my $hit = $result-&gt;next_hit) {
			print OUT &quot;\t&quot; if ($count &gt; 0);
                        # get the accession numbers of the hits
			print OUT &quot;\t&quot; . $hit-&gt;accession . &quot;\t&quot;;
                        # get the lengths of the hit sequences
                        print OUT $hit-&gt;length . &quot;\t&quot;;
                        # get the description of the hit sequences
			print OUT $hit-&gt;description . &quot;\t&quot;;
                        # get the E value of the hit
			print OUT $hit-&gt;significance . &quot;\t&quot;;
                        #get the bit score of the hit
			print OUT $hit-&gt;bits . &quot;\t&quot;;

                        my $hspcount = 0;

                        # process the top HSP for the top hit
			while (my $hsp = $hit-&gt;next_hsp) {
                        	print OUT &quot;\t\t\t\t\t\t\t&quot;, if ($hspcount &gt; 0);
                        	# get the frame of the query sequence
				print OUT $hsp-&gt;query-&gt;frame . &quot;\t&quot;;
                                # get the start and the end of the query sequence in the alignment
				print OUT $hsp-&gt;start(&#039;query&#039;) . &quot;\t&quot; . $hsp-&gt;end(&#039;query&#039;). &quot;\t&quot;;
                                # get the start and the end of the hit sequence in the alignment
				print OUT $hsp-&gt;start(&#039;hit&#039;) . &quot;\t&quot; . $hsp-&gt;end(&#039;hit&#039;) . &quot;\t&quot;;
                                # get the similarity value
				printf OUT &quot;%.1f&quot; , ($hsp-&gt;frac_conserved * 100);
				print OUT &quot;%\t&quot;;
                                # get the identity value
				printf OUT &quot;%.1f&quot; , ($hsp-&gt;frac_identical * 100);
		       		print OUT &quot;%\n&quot;;
                                $hspcount++;
                        }
			$count++;

                        # flow control for the number of hits needed
			last if ($count == $numHits);
		}
    	}
}
close OUT;
print &quot; DONE!!!\n&quot;;</code>]]></description>
	<dc:creator>Radha Agarkar</dc:creator>
</item>

</channel>
</rss>