X BOL wishing you a very and Happy New year

Alternative content

Our Sponsors



Download BioinformaticsOnline(BOL) Apps in your chrome browser.




Perl script to run SATSUMA in loop !

  • Public
By Jit 2378 days ago
#!/usr/bin/perl -w use strict; use File::Temp qw(tempfile); # Usage perl 1by1.pl for SATSUMA analysis # User need to set the reference multifasta file name here my $seqfile=""; my $queryfile = "genome.fasta"; # Ur query genome my $tarfile = "renamedAdinetaV2.fa"; #Ur target file my $satsumaLoc="/home/urbe/Tools/SATSUMA/satsuma-code-0"; # Location of ur SATSUMA my $maxSize = 5000; my $resolution = 5000; my $dotsize = 1; my $cpu=40; my @ids; #Store the ids if ($ARGV[1] eq "ids") { my $idFile="palindrome_ids.txt"; open my $handle, '<', $idFile; chomp(@ids = <$handle>); close $handle; } my %params = map { $_ => 1 } @ids; #foreach (sort keys %params) { print "$_ : $params{$_}\n"; } if ($ARGV[0] eq "flip") {$seqfile = $tarfile;} else { $seqfile = $queryfile;} local $/ = "\n>"; # read by FASTA record open FASTA, $seqfile; while (<FASTA>) { chomp; my $seq = $_; my ($id) = $seq =~ /^>*(\S+)/; # parse ID as first word in FASTA header $seq =~ s/^>*.+\n//; # remove FASTA header $seq =~ s/\n//g; # remove endlines next if length($seq) < $maxSize; # Size check if ($ARGV[1] eq "ids") { if (exists($params{$id})) { print "$id Working on it\n"; } else { next; } } # remove the file when the reference goes away with the UNLINK option my $tmp_fh = new File::Temp( UNLINK => 1 ); print $tmp_fh ">$id\n$seq\n"; if ($ARGV[0] eq "flip") { #FLIPPED #print "$id\n$seq\n"; my $mySS="$satsumaLoc/SatsumaSynteny -q $tmp_fh -t $queryfile -o RESULT_OUT_FLIP_$id -m 32 -ni 10 -n $cpu -chain_only"; system ("$mySS"); #let me sleep -- I am doing this because it messed up with other running processes print "Sorry guy m tired let me sleep a while :)\n"; sleep(600); #plot blocksynteny file my $myBDS="$satsumaLoc/BlockDisplaySatsuma -i RESULT_OUT_FLIP_$id/satsuma_summary.chained.out -t $queryfile -q $tmp_fh > RESULT_OUT_FLIP_$id/$id-out.synteny"; system ("$myBDS"); #chromosome paint print "Painting chromosomes\n"; my $myCP="$satsumaLoc/ChromosomePaint -i RESULT_OUT_FLIP_$id/$id-out.synteny -o RESULT_OUT_FLIP_$id/$id-out.cpaint -s 5000 -d 5000"; system ("$myCP"); } else { #NORMAL #print "$id\n$seq\n"; my $mySS="$satsumaLoc/SatsumaSynteny -q $tmp_fh -t $tarfile -o RESULT_OUT_NORMAL_$id -m 32 -ni 10 -n $cpu -chain_only"; system ("$mySS"); #let me sleep -- I am doing this because it messed up with other running processes print "Sorry guy m tired let me sleep a while :)\n"; sleep(600); #plot blocksynteny file my $myBDS="$satsumaLoc/BlockDisplaySatsuma -i RESULT_OUT_NORMAL_$id/satsuma_summary.chained.out -t $tarfile -q $tmp_fh > RESULT_OUT_NORMAL_$id/$id-out.synteny"; system ("$myBDS"); #chromosome paint print "Painting chromosomes\n"; my $myCP="$satsumaLoc/ChromosomePaint -i RESULT_OUT_NORMAL_$id/$id-out.synteny -o RESULT_OUT_NORMAL_$id/$id-out.cpaint -s 5000 -d 5000"; system ("$myCP"); } } close FASTA; __END__ SatsumaSynteny ################################################################## # -q : query fasta sequence # -t : target fasta sequence # -o : output directory # -l : minimum alignment length (def=0) # -t_chunk : target chunk size (def=4096) # -q_chunk : query chunk size (def=4096) # -t_chunk_seed : target chunk size (seed) (def=8192) # -q_chunk_seed : query chunk size (seed) (def=8192) # -n : number of CPUs (def=1) # -ni : number of initial search blocks (def=-1) # -lsf : submit jobs to LSF (def=0) # -nosubmit : do not run jobs (def=0) # -nowait : do not wait for jobs (def=0) # -chain_only : only chain the matches (def=0) # -refine_only : only refine the matches (def=0) # -min_prob : minimum probability to keep match (def=0.99999) # -proteins : align in protein space (def=0) # -cutoff : signal cutoff (def=1.8) # -cutoff : signal cutoff (seed) (def=3) # -m : number of jobs per block (def=8) # -resume : resumes w/ the output of a previous run (xcorr*data) (def=) # -seed : loads seeds and runs from there (xcorr*data) (def=) #-pixel : number of blocks per pixel (def=24) # -nofilter : do not pre-filter seeds (slower runtime) (def=0) # –dups : allow for duplications in the query sequence (def=0) #####################################################################################################################################