Our Sponsors



Download BioinformaticsOnline(BOL) Apps in your chrome browser.




Calculate ATGC percentage in parallel with perl

  • Public
By Jit 2861 days ago
#!/usr/bin/perl use strict; use Parallel::ForkManager; use Bio::SeqIO; #usage: perl testParallel.pl <multi fasta infile> my %sequences; my $seqio = Bio::SeqIO->new(-file => "$ARGV[0]", -format => "fasta"); while(my$seqobj = $seqio->next_seq) { my $id = $seqobj->display_id; # there's your key my $seq = $seqobj->seq; # and there's your value $sequences{$id} = $seq; } my $max_procs = 5; my @names = keys %sequences; # hash to resolve PID's back to child specific information my $pm = new Parallel::ForkManager($max_procs); # Setup a callback for when a child finishes up so we can # get it's exit code $pm->run_on_finish ( sub { my ($pid, $exit_code, $ident) = @_; #print "** $ident just got out of the pool ". "with PID $pid and exit code: $exit_code\n"; } ); $pm->run_on_start( sub { my ($pid,$ident)=@_; #print "** $ident started, pid: $pid\n"; } ); $pm->run_on_wait( sub { #print "** Have to wait for one children ...\n" }, 0.5 ); NAMES: foreach my $child ( 0 .. $#names ) { my $pid = $pm->start($names[$child]) and next NAMES; checkATCG($names[$child]); $pm->finish($child); # pass an exit code to finish } print "Waiting for Children...\n"; $pm->wait_all_children; print "Everybody is out of the pool!\n"; sub checkATCG { my $name=shift; my $DNA=$sequences{$name}; my $length=length $DNA; my $a=($DNA=~tr/A//); my $b=($DNA=~tr/C//); my $c=($DNA=~tr/G//); my $d=($DNA=~tr/T//); my $Total=$a+$b+$c+$d; my $GC=($DNA=~s/GC/GC/g); my $AT=($DNA=~s/AT/AT/g); my $GCper=($GC/($Total)*100); print"$name\t$Total\t$AT\t$GC\t$GCper:\n"; }