Our Sponsors



Download BioinformaticsOnline(BOL) Apps in your chrome browser.




Calculate Dinucleotide Frequency with Perl

  • Public
By Rahul Nayak 2331 days ago
#!/usr/bin/perl -w use strict; my ($genome, $head, $tail); my (%mono_nt, %di_nt); $/ = ">"; open my $fasta, '<', $ARGV[0] or die $!; while (<$fasta>) { chomp; s/\r//g; s/^\s*|\s*$//; if (/.+?\n(.+)/s) { (my $seq = $1) =~ s/\n//g; $genome .= uc $seq; $head = uc substr($seq, 0, 1); $di_nt{"$tail$head"}-- if $tail; $tail = uc substr($seq, -1); } } close $fasta; my $len = length $genome; for my $i (0..$len-2) { my $each_mono_nt = substr($genome, $i, 1); my $each_di_nt = substr($genome, $i, 2); $mono_nt{$each_mono_nt}++; $di_nt{$each_di_nt}++; } $mono_nt{$tail}++; print "-"x30, "\nSingle nucleotide frequency:\n"; for my $nt (sort keys %mono_nt) {print "$nt\t", $mono_nt{$nt} / $len, "\n";} print "\n", "-"x30, "\nDinucleotide frequency:\nDinucleotide\tObs. freq.\tExp. freq.\n"; for my $nt_pair (sort keys %di_nt) { my ($first_nt, $second_nt) = split //, $nt_pair; print "$nt_pair\t", $di_nt{$nt_pair} / ($len-1), "\t", $mono_nt{$first_nt} * $mono_nt{$second_nt} /$len /$len, "\n"; }