Our Sponsors



Download BioinformaticsOnline(BOL) Apps in your chrome browser.




Count GC Content in nucleotide sequence with Perl

  • Public
By Radha Agarkar 2897 days ago
#!/usr/bin/perl -w ### Usage: get_gc_content.pl <fasta file> ### #--------------------------------------------------------------------------------------------------------------------------- #Deal with passed parameters #--------------------------------------------------------------------------------------------------------------------------- if ($#ARGV == -1) { usage(); exit; } $fasta_file = $ARGV[0]; $out_file = "gc_out.txt"; unless ( open(IN, "$fasta_file") ) { print "Got a bad fasta file: $fasta_file\n\n"; exit; } unless ( open(OUT, ">$out_file") ) { print "Couldn't create $out_file\n"; exit; } print "Parameters:\nfasta file = $fasta_file\noutput file = $out_file\n\n"; #--------------------------------------------------------------------------------------------------------------------------- #The main event #--------------------------------------------------------------------------------------------------------------------------- print OUT "ID\t% GCContent\tTotal Count\tG Count\tC Count\tA Count\tT Count\n"; $seq = ""; while (<IN>) { chomp; if (/^>/) { #finish up previous line. if (length($seq) > 0) { &process_it; } #start new line. $id = $_; $id =~ s/^>(.+?)\s.+$/$1/g; print OUT "$id\t"; } else { $seq = $seq . $_; } } #finish up last line. &process_it; close(IN); close(OUT); sub usage { $0 get_gc_content.pl <fasta file> } sub process_it { @letters = split(//, $seq); $gccount = 0; $totalcount = 0; $acount = 0; $tcount = 0; $gcount = 0; $ccount = 0; foreach $i (@letters) { if (lc($i) =~ /[a-z]/) { $totalcount++; } if (lc($i) eq "g" || lc($i) eq "c") { $gccount++; } if (lc($i) eq "a") { $acount++; } if (lc($i) eq "t") { $tcount++; } if (lc($i) eq "g") { $gcount++; } if (lc($i) eq "c") { $ccount++; } } if ($totalcount > 0) { $gccontent = (100 * $gccount) / $totalcount; } else { $gccontent = 0; } print OUT "$gccontent\t$totalcount\t$gcount\t$ccount\t$acount\t$tcount\n"; $seq = ""; }