<?xml version='1.0'?><rss version="2.0" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:georss="http://www.georss.org/georss" xmlns:atom="http://www.w3.org/2005/Atom" >
<channel>
	<title><![CDATA[BOL: Perl script to run SATSUMA in loop !]]></title>
	<link>https://bioinformaticsonline.com/snippets/view/37054/perl-script-to-run-satsuma-in-loop?</link>
	<atom:link href="https://bioinformaticsonline.com/snippets/view/37054/perl-script-to-run-satsuma-in-loop?" rel="self" type="application/rss+xml" />
	<description><![CDATA[]]></description>
	
	<item>
	<guid isPermaLink="true">https://bioinformaticsonline.com/snippets/view/37054/perl-script-to-run-satsuma-in-loop</guid>
	<pubDate>Tue, 26 Jun 2018 04:56:08 -0500</pubDate>
	<link>https://bioinformaticsonline.com/snippets/view/37054/perl-script-to-run-satsuma-in-loop</link>
	<title><![CDATA[Perl script to run SATSUMA in loop !]]></title>
	<description><![CDATA[<code>#!/usr/bin/perl -w
use strict;
use File::Temp qw(tempfile);

# Usage perl 1by1.pl for SATSUMA analysis

# User need to set the reference multifasta file name here
my $seqfile=&quot;&quot;;
my $queryfile = &quot;genome.fasta&quot;; # Ur query genome
my $tarfile = &quot;renamedAdinetaV2.fa&quot;; #Ur target file
my $satsumaLoc=&quot;/home/urbe/Tools/SATSUMA/satsuma-code-0&quot;; # Location of ur SATSUMA
my $maxSize = 5000;

my $resolution = 5000;
my $dotsize = 1;
my $cpu=40;
my @ids; 

#Store the ids
if ($ARGV[1] eq &quot;ids&quot;) {
my $idFile=&quot;palindrome_ids.txt&quot;;
open my $handle, &#039;&lt;&#039;, $idFile;
chomp(@ids = &lt;$handle&gt;);
close $handle;
}
my %params = map { $_ =&gt; 1 } @ids;
#foreach (sort keys %params) { print &quot;$_ : $params{$_}\n&quot;; }

if ($ARGV[0] eq &quot;flip&quot;) {$seqfile = $tarfile;} else { $seqfile = $queryfile;}

local $/ = &quot;\n&gt;&quot;;  # read by FASTA record

open FASTA, $seqfile;
while (&lt;FASTA&gt;) {
    chomp;
    my $seq = $_;
    my ($id) = $seq =~ /^&gt;*(\S+)/;  # parse ID as first word in FASTA header
        $seq =~ s/^&gt;*.+\n//;  # remove FASTA header
        $seq =~ s/\n//g;  # remove endlines
	next if length($seq) &lt; $maxSize; # Size check
	if ($ARGV[1] eq &quot;ids&quot;) { if (exists($params{$id})) { print &quot;$id Working on it\n&quot;; }  else { next; } } 
	# remove the file when the reference goes away with the UNLINK option
	my $tmp_fh = new File::Temp( UNLINK =&gt; 1 );
	print $tmp_fh &quot;&gt;$id\n$seq\n&quot;;
	if ($ARGV[0] eq &quot;flip&quot;) {
		#FLIPPED
		#print &quot;$id\n$seq\n&quot;;
		my $mySS=&quot;$satsumaLoc/SatsumaSynteny -q $tmp_fh -t $queryfile -o RESULT_OUT_FLIP_$id -m 32 -ni 10 -n $cpu -chain_only&quot;;
		system (&quot;$mySS&quot;);
		
		#let me sleep -- I am doing this because it messed up with other running processes
		print &quot;Sorry guy m tired let me sleep a while :)\n&quot;;
		sleep(600);

		#plot blocksynteny file
		my $myBDS=&quot;$satsumaLoc/BlockDisplaySatsuma -i RESULT_OUT_FLIP_$id/satsuma_summary.chained.out -t $queryfile -q $tmp_fh &gt; RESULT_OUT_FLIP_$id/$id-out.synteny&quot;;

		 system (&quot;$myBDS&quot;);

		#chromosome paint
		print &quot;Painting chromosomes\n&quot;; 
		my $myCP=&quot;$satsumaLoc/ChromosomePaint -i RESULT_OUT_FLIP_$id/$id-out.synteny -o RESULT_OUT_FLIP_$id/$id-out.cpaint -s 5000 -d 5000&quot;;

		 system (&quot;$myCP&quot;);
		}
	
	else {
		#NORMAL
		#print &quot;$id\n$seq\n&quot;;
		my $mySS=&quot;$satsumaLoc/SatsumaSynteny -q $tmp_fh -t $tarfile -o RESULT_OUT_NORMAL_$id -m 32 -ni 10 -n $cpu -chain_only&quot;;
		system (&quot;$mySS&quot;);

		#let me sleep -- I am doing this because it messed up with other running processes
		print &quot;Sorry guy m tired let me sleep a while :)\n&quot;;
		sleep(600);

		#plot blocksynteny file
		my $myBDS=&quot;$satsumaLoc/BlockDisplaySatsuma -i RESULT_OUT_NORMAL_$id/satsuma_summary.chained.out -t $tarfile -q $tmp_fh &gt; RESULT_OUT_NORMAL_$id/$id-out.synteny&quot;;

		 system (&quot;$myBDS&quot;);

		#chromosome paint 
		print &quot;Painting chromosomes\n&quot;;
		my $myCP=&quot;$satsumaLoc/ChromosomePaint -i RESULT_OUT_NORMAL_$id/$id-out.synteny -o RESULT_OUT_NORMAL_$id/$id-out.cpaint -s 5000 -d 5000&quot;;

		 system (&quot;$myCP&quot;);
	}

}

close FASTA;

__END__
SatsumaSynteny
##################################################################
# -q : query fasta sequence
# -t : target fasta sequence
# -o : output directory
# -l : minimum alignment length (def=0)
# -t_chunk : target chunk size (def=4096)
# -q_chunk : query chunk size (def=4096)
# -t_chunk_seed : target chunk size (seed) (def=8192)
# -q_chunk_seed : query chunk size (seed) (def=8192)
# -n : number of CPUs (def=1)
# -ni : number of initial search blocks (def=-1)
# -lsf : submit jobs to LSF (def=0)
# -nosubmit : do not run jobs (def=0)
# -nowait : do not wait for jobs (def=0)
# -chain_only : only chain the matches (def=0)
# -refine_only : only refine the matches (def=0)
# -min_prob : minimum probability to keep match (def=0.99999)
# -proteins : align in protein space (def=0)
# -cutoff : signal cutoff (def=1.8)
# -cutoff : signal cutoff (seed) (def=3)
# -m : number of jobs per block (def=8)
# -resume : resumes w/ the output of a previous run (xcorr*data) (def=)
# -seed : loads seeds and runs from there (xcorr*data) (def=)
#-pixel : number of blocks per pixel (def=24)
# -nofilter : do not pre-filter seeds (slower runtime) (def=0)
# –dups : allow for duplications in the query sequence (def=0)
#####################################################################################################################################</code>]]></description>
	<dc:creator>Jit</dc:creator>
</item>

</channel>
</rss>