<?xml version='1.0'?><rss version="2.0" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:georss="http://www.georss.org/georss" xmlns:atom="http://www.w3.org/2005/Atom" >
<channel>
	<title><![CDATA[BOL: Perl script to calculate N50]]></title>
	<link>https://bioinformaticsonline.com/snippets/view/30092/perl-script-to-calculate-n50?</link>
	<atom:link href="https://bioinformaticsonline.com/snippets/view/30092/perl-script-to-calculate-n50?" rel="self" type="application/rss+xml" />
	<description><![CDATA[]]></description>
	
	<item>
	<guid isPermaLink="true">https://bioinformaticsonline.com/snippets/view/30092/perl-script-to-calculate-n50</guid>
	<pubDate>Fri, 09 Dec 2016 04:14:04 -0600</pubDate>
	<link>https://bioinformaticsonline.com/snippets/view/30092/perl-script-to-calculate-n50</link>
	<title><![CDATA[Perl script to calculate N50]]></title>
	<description><![CDATA[<code>#! /usr/local/bin/perl5.8.3 -w

###
### Script Name : calcN50.pl
### Description : This script calculates the N50 of a collection of sequences in a fasta file
### Input       : A fasta sequence file and the minimum length of sequence to consider it for the N50 calculation
### Output      : None.  The N50 value as well as the number of sequences &gt;= this value is sent to STDOUT
###

use strict;
use Getopt::Long;
use Bio::SeqIO;

my $fasta_file;
my $filter_length;
GetOptions(&quot;fasta_file=s&quot; =&gt; \$fasta_file, &quot;length_filter=i&quot; =&gt; \$filter_length);
die &quot;Usage: $0 [options]
\t\t-f /path/to/fasta/file [Required]
\t\t-l filter seq less than length (0 if no filter is to be done)\n\n&quot; if !$fasta_file || !defined($filter_length) || $filter_length !~ /\d+/;

###
### Get the length of each sequence:
###
my $in = Bio::SeqIO-&gt;new(-file =&gt; $fasta_file, -format =&gt; &#039;fasta&#039;);
my @seq_lengths = ();
while (my $seq_obj = $in-&gt;next_seq){
	next if length($seq_obj-&gt;seq) &lt; $filter_length;
	push @seq_lengths, length($seq_obj-&gt;seq);
}



###
### Determine the N50:
###
my @vals;
my $sum = 0;
for my $val (@seq_lengths){
    chomp $val;
    $sum += $val;
    push(@vals, $val);
}

my @sorted = sort { $a &lt;=&gt; $b } @vals;

my $runningSum = 0;
my $N50 = 0;
foreach my $v (@sorted)
{
    $runningSum += $v;
    if($runningSum &gt;= ($sum / 2))
    {
	$N50 = $v;
	last;
    }
}


###
### Determine the number of sequences &gt;= than the N50 in length:
###
my $count = 0;
foreach my $v (@sorted)
{
    if($v &gt;= $N50)
    {
	$count++;
    }
}


printf(&quot;N50: $N50 Count &gt;= N50: $count\n&quot;);</code>]]></description>
	<dc:creator>Poonam Mahapatra</dc:creator>
</item>

</channel>
</rss>