Perl script to run SATSUMA in loop !

Jit — Tue, 26 Jun 2018 04:56:08 -0500
#!/usr/bin/perl -w
use strict;
use File::Temp qw(tempfile);

# Usage perl 1by1.pl for SATSUMA analysis

# User need to set the reference multifasta file name here
my $seqfile="";
my $queryfile = "genome.fasta"; # Ur query genome
my $tarfile = "renamedAdinetaV2.fa"; #Ur target file
my $satsumaLoc="/home/urbe/Tools/SATSUMA/satsuma-code-0"; # Location of ur SATSUMA
my $maxSize = 5000;

my $resolution = 5000;
my $dotsize = 1;
my $cpu=40;
my @ids; 

#Store the ids
if ($ARGV[1] eq "ids") {
my $idFile="palindrome_ids.txt";
open my $handle, '<', $idFile;
chomp(@ids = <$handle>);
close $handle;
}
my %params = map { $_ => 1 } @ids;
#foreach (sort keys %params) { print "$_ : $params{$_}\n"; }

if ($ARGV[0] eq "flip") {$seqfile = $tarfile;} else { $seqfile = $queryfile;}

local $/ = "\n>";  # read by FASTA record

open FASTA, $seqfile;
while () {
    chomp;
    my $seq = $_;
    my ($id) = $seq =~ /^>*(\S+)/;  # parse ID as first word in FASTA header
        $seq =~ s/^>*.+\n//;  # remove FASTA header
        $seq =~ s/\n//g;  # remove endlines
	next if length($seq) < $maxSize; # Size check
	if ($ARGV[1] eq "ids") { if (exists($params{$id})) { print "$id Working on it\n"; }  else { next; } } 
	# remove the file when the reference goes away with the UNLINK option
	my $tmp_fh = new File::Temp( UNLINK => 1 );
	print $tmp_fh ">$id\n$seq\n";
	if ($ARGV[0] eq "flip") {
		#FLIPPED
		#print "$id\n$seq\n";
		my $mySS="$satsumaLoc/SatsumaSynteny -q $tmp_fh -t $queryfile -o RESULT_OUT_FLIP_$id -m 32 -ni 10 -n $cpu -chain_only";
		system ("$mySS");
		
		#let me sleep -- I am doing this because it messed up with other running processes
		print "Sorry guy m tired let me sleep a while :)\n";
		sleep(600);

		#plot blocksynteny file
		my $myBDS="$satsumaLoc/BlockDisplaySatsuma -i RESULT_OUT_FLIP_$id/satsuma_summary.chained.out -t $queryfile -q $tmp_fh > RESULT_OUT_FLIP_$id/$id-out.synteny";

		 system ("$myBDS");

		#chromosome paint
		print "Painting chromosomes\n"; 
		my $myCP="$satsumaLoc/ChromosomePaint -i RESULT_OUT_FLIP_$id/$id-out.synteny -o RESULT_OUT_FLIP_$id/$id-out.cpaint -s 5000 -d 5000";

		 system ("$myCP");
		}
	
	else {
		#NORMAL
		#print "$id\n$seq\n";
		my $mySS="$satsumaLoc/SatsumaSynteny -q $tmp_fh -t $tarfile -o RESULT_OUT_NORMAL_$id -m 32 -ni 10 -n $cpu -chain_only";
		system ("$mySS");

		#let me sleep -- I am doing this because it messed up with other running processes
		print "Sorry guy m tired let me sleep a while :)\n";
		sleep(600);

		#plot blocksynteny file
		my $myBDS="$satsumaLoc/BlockDisplaySatsuma -i RESULT_OUT_NORMAL_$id/satsuma_summary.chained.out -t $tarfile -q $tmp_fh > RESULT_OUT_NORMAL_$id/$id-out.synteny";

		 system ("$myBDS");

		#chromosome paint 
		print "Painting chromosomes\n";
		my $myCP="$satsumaLoc/ChromosomePaint -i RESULT_OUT_NORMAL_$id/$id-out.synteny -o RESULT_OUT_NORMAL_$id/$id-out.cpaint -s 5000 -d 5000";

		 system ("$myCP");
	}

}

close FASTA;

__END__
SatsumaSynteny
##################################################################
# -q : query fasta sequence
# -t : target fasta sequence
# -o : output directory
# -l : minimum alignment length (def=0)
# -t_chunk : target chunk size (def=4096)
# -q_chunk : query chunk size (def=4096)
# -t_chunk_seed : target chunk size (seed) (def=8192)
# -q_chunk_seed : query chunk size (seed) (def=8192)
# -n : number of CPUs (def=1)
# -ni : number of initial search blocks (def=-1)
# -lsf : submit jobs to LSF (def=0)
# -nosubmit : do not run jobs (def=0)
# -nowait : do not wait for jobs (def=0)
# -chain_only : only chain the matches (def=0)
# -refine_only : only refine the matches (def=0)
# -min_prob : minimum probability to keep match (def=0.99999)
# -proteins : align in protein space (def=0)
# -cutoff : signal cutoff (def=1.8)
# -cutoff : signal cutoff (seed) (def=3)
# -m : number of jobs per block (def=8)
# -resume : resumes w/ the output of a previous run (xcorr*data) (def=)
# -seed : loads seeds and runs from there (xcorr*data) (def=)
#-pixel : number of blocks per pixel (def=24)
# -nofilter : do not pre-filter seeds (slower runtime) (def=0)
# –dups : allow for duplications in the query sequence (def=0)
#####################################################################################################################################
BOL: Perl script to run SATSUMA in loop !

Perl script to run SATSUMA in loop !