#!/usr/bin/perl -w
use strict;
use File::Temp qw(tempfile);
# Usage perl 1by1.pl for SATSUMA analysis
# User need to set the reference multifasta file name here
my $seqfile="";
my $queryfile = "genome.fasta"; # Ur query genome
my $tarfile = "renamedAdinetaV2.fa"; #Ur target file
my $satsumaLoc="/home/urbe/Tools/SATSUMA/satsuma-code-0"; # Location of ur SATSUMA
my $maxSize = 5000;
my $resolution = 5000;
my $dotsize = 1;
my $cpu=40;
my @ids;
#Store the ids
if ($ARGV[1] eq "ids") {
my $idFile="palindrome_ids.txt";
open my $handle, '<', $idFile;
chomp(@ids = <$handle>);
close $handle;
}
my %params = map { $_ => 1 } @ids;
#foreach (sort keys %params) { print "$_ : $params{$_}\n"; }
if ($ARGV[0] eq "flip") {$seqfile = $tarfile;} else { $seqfile = $queryfile;}
local $/ = "\n>"; # read by FASTA record
open FASTA, $seqfile;
while (<FASTA>) {
chomp;
my $seq = $_;
my ($id) = $seq =~ /^>*(\S+)/; # parse ID as first word in FASTA header
$seq =~ s/^>*.+\n//; # remove FASTA header
$seq =~ s/\n//g; # remove endlines
next if length($seq) < $maxSize; # Size check
if ($ARGV[1] eq "ids") { if (exists($params{$id})) { print "$id Working on it\n"; } else { next; } }
# remove the file when the reference goes away with the UNLINK option
my $tmp_fh = new File::Temp( UNLINK => 1 );
print $tmp_fh ">$id\n$seq\n";
if ($ARGV[0] eq "flip") {
#FLIPPED
#print "$id\n$seq\n";
my $mySS="$satsumaLoc/SatsumaSynteny -q $tmp_fh -t $queryfile -o RESULT_OUT_FLIP_$id -m 32 -ni 10 -n $cpu -chain_only";
system ("$mySS");
#let me sleep -- I am doing this because it messed up with other running processes
print "Sorry guy m tired let me sleep a while :)\n";
sleep(600);
#plot blocksynteny file
my $myBDS="$satsumaLoc/BlockDisplaySatsuma -i RESULT_OUT_FLIP_$id/satsuma_summary.chained.out -t $queryfile -q $tmp_fh > RESULT_OUT_FLIP_$id/$id-out.synteny";
system ("$myBDS");
#chromosome paint
print "Painting chromosomes\n";
my $myCP="$satsumaLoc/ChromosomePaint -i RESULT_OUT_FLIP_$id/$id-out.synteny -o RESULT_OUT_FLIP_$id/$id-out.cpaint -s 5000 -d 5000";
system ("$myCP");
}
else {
#NORMAL
#print "$id\n$seq\n";
my $mySS="$satsumaLoc/SatsumaSynteny -q $tmp_fh -t $tarfile -o RESULT_OUT_NORMAL_$id -m 32 -ni 10 -n $cpu -chain_only";
system ("$mySS");
#let me sleep -- I am doing this because it messed up with other running processes
print "Sorry guy m tired let me sleep a while :)\n";
sleep(600);
#plot blocksynteny file
my $myBDS="$satsumaLoc/BlockDisplaySatsuma -i RESULT_OUT_NORMAL_$id/satsuma_summary.chained.out -t $tarfile -q $tmp_fh > RESULT_OUT_NORMAL_$id/$id-out.synteny";
system ("$myBDS");
#chromosome paint
print "Painting chromosomes\n";
my $myCP="$satsumaLoc/ChromosomePaint -i RESULT_OUT_NORMAL_$id/$id-out.synteny -o RESULT_OUT_NORMAL_$id/$id-out.cpaint -s 5000 -d 5000";
system ("$myCP");
}
}
close FASTA;
__END__
SatsumaSynteny
##################################################################
# -q : query fasta sequence
# -t : target fasta sequence
# -o : output directory
# -l : minimum alignment length (def=0)
# -t_chunk : target chunk size (def=4096)
# -q_chunk : query chunk size (def=4096)
# -t_chunk_seed : target chunk size (seed) (def=8192)
# -q_chunk_seed : query chunk size (seed) (def=8192)
# -n : number of CPUs (def=1)
# -ni : number of initial search blocks (def=-1)
# -lsf : submit jobs to LSF (def=0)
# -nosubmit : do not run jobs (def=0)
# -nowait : do not wait for jobs (def=0)
# -chain_only : only chain the matches (def=0)
# -refine_only : only refine the matches (def=0)
# -min_prob : minimum probability to keep match (def=0.99999)
# -proteins : align in protein space (def=0)
# -cutoff : signal cutoff (def=1.8)
# -cutoff : signal cutoff (seed) (def=3)
# -m : number of jobs per block (def=8)
# -resume : resumes w/ the output of a previous run (xcorr*data) (def=)
# -seed : loads seeds and runs from there (xcorr*data) (def=)
#-pixel : number of blocks per pixel (def=24)
# -nofilter : do not pre-filter seeds (slower runtime) (def=0)
# –dups : allow for duplications in the query sequence (def=0)
#####################################################################################################################################