Alternative content
#!/usr/bin/perl
use strict;
use warnings;
use Parallel::ForkManager;
use Bio::SeqIO;
my ($sequence_data_ref) = parse_genome_files($ARGV[0]);
my %genome=%{$sequence_data_ref};
my $n_processes = 4;
my $pm = Parallel::ForkManager->new( $n_processes );
for my $i ( 1 .. $n_processes ) {
$pm->start and next;
my $count = 0;
foreach my $chr_set (keys %genome) {
$count++;
if ( ( $count % $i ) == 0 ) {
if ( !output_exists($genome{$chr_set}{name}) ) {
start_new_XFOIL_instance($genome{$chr_set}{name}, $genome{$chr_set}{nuc_seq});
}
}
}
$pm->finish;
}
$pm->wait_all_children;
sub output_exists {
my $chr_set = shift;
return ( -f "$chr_set.out" );
}
sub start_new_XFOIL_instance {
my ($chr_set, $chr_seq) = @_;
print "starting XFOIL instance with parameters $chr_set!\n";
touch( "$chr_set.out", $chr_seq );
print "finished run with parameters $chr_set!\n";
}
sub touch {
my ($fn, $seq) = @_;
open FILE, ">$fn" or die $!;
system ("augustus --species=caenorhabditis --outfile=$fn $seq --AUGUSTUS_CONFIG_PATH=/home/urbe/Tools/Alienomics_v1.1/augustus.2.5.5/config");
close FILE or die $!;
}
sub parse_genome_files {
my $file=shift;
my (%sequence_data);
my $file_content = new Bio::SeqIO(-format => 'fasta',-file => "$file");
my $out_content = Bio::SeqIO->newFh(-format => 'fasta', ,-file => ">genomeRES.fa");
while (my $gene_info = $file_content->next_seq()) {
my $sequence = $gene_info->seq();
my $accession_number = $gene_info->display_id;
my $len = $gene_info->length;
my $GCcount = $sequence =~ tr/GC|gc//;
my $GCcontent = ($GCcount / $len) * 100;
$sequence_data{$accession_number}{status} = "OK"; #everybody starts fine
$sequence_data{$accession_number}{problem_desc} = "-"; #everybody starts fine
if ($sequence_data{$accession_number}{status} eq "OK") { # Add check points here <<<<<<
$sequence_data{$accession_number}{nuc_seq} = $sequence;
$sequence_data{$accession_number}{len} = $len;
$sequence_data{$accession_number}{gc} = $GCcontent;
$sequence_data{$accession_number}{name} = $accession_number;
print $out_content $gene_info;
}
}
return (\%sequence_data);
}