Our Sponsors



Download BioinformaticsOnline(BOL) Apps in your chrome browser.




  • BioScripts
  • Jit
  • Perl script to find the distance beetween all the contigs and scaffolds

Perl script to find the distance beetween all the contigs and scaffolds

  • Public
By Jit 2132 days ago
#!/usr/bin/perl use strict; use warnings; use Bio::SeqIO; $| = 1; #Script to see the distance beetween all the contigs and scaffolds #Usage: perl clustalReads.pl genome.fa > HammingDist.txt #Dependancy: MAFFT my ($refSeq, $SeqIds) = fastafile2hash($ARGV[0]); my $tmpFile="tmpOutfile"; foreach my $chr (@$SeqIds) { my $cnt=0; my $value=scalar(@$SeqIds); do{{ $value=$value-1; my $ofh = write_fh($tmpFile); next if $chr eq @$SeqIds[$cnt]; print "$chr\t@$SeqIds[$cnt]\t"; print $ofh ">$chr\n$refSeq->{$chr}{seq}\n>@$SeqIds[$cnt]\n$refSeq->{@$SeqIds[$cnt]}{seq}\n"; close $ofh or die "Could not close file '$tmpFile' $!"; $cnt++; system ("mafft --retree 1 --maxiterate 0 --thread 40 $tmpFile > MAFFT_out"); #usage: perl clustalRead.pl MAFFT_out my ($seqRef, $ids) = fastafile2hash('MAFFT_out'); my ($fval, $sval) = trimMSA($seqRef->{$ids->[0]}{seq}, $seqRef->{$ids->[1]}{seq}); my $seq1 = substr $seqRef->{$ids->[0]}{seq}, $fval, ($seqRef->{$ids->[0]}{len}-$sval)-$fval; my $seq2 = substr $seqRef->{$ids->[1]}{seq}, $fval, ($seqRef->{$ids->[1]}{len}-$sval)-$fval; my $val = hammingDist($seq1, $seq2); print "$val\n"; }} until($value<=0); } #calculate the hamming distance sub hammingDist { return ($_[0] ^ $_[1]) =~ tr/\001-\255//; } #Remove '-' from both end sub trimMSA { my ($seq1, $seq2) = @_; my ($substring1) = $seq1 =~ /^.{0}(.*?)[a-zA-Z_]/s; my ($substring2) = $seq2 =~ /^.{0}(.*?)[a-zA-Z_]/s; my $trimLen1=length($substring1); my $trimLen2=length($substring2); my $val; #print "$trimLen1 <= $trimLen2\n"; if ($trimLen1 >= $trimLen2) {$val = $trimLen1; } elsif ($trimLen1 <= $trimLen2) {$val = $trimLen2; } my $seq1_rev = reverse $seq1; my $seq2_rev = reverse $seq2; my ($substring1_rev) = $seq1_rev =~ /^.{0}(.*?)[a-zA-Z_]/s; my ($substring2_rev) = $seq2_rev =~ /^.{0}(.*?)[a-zA-Z_]/s; my $trimLen1_rev=length($substring1_rev); my $trimLen2_rev=length($substring2_rev); my $val_rev; if ($trimLen1_rev >= $trimLen2_rev) {$val_rev = $trimLen1_rev; } elsif ($trimLen1_rev <= $trimLen2_rev) {$val_rev = $trimLen2_rev; } return $val, $val_rev; } #Store fasta file to hash sub fastafile2hash { my $fastafile = shift @_; my %sequences; my $fh = &read_fh($fastafile); my $seqid; my @allIds; while (my $line = <$fh>) { if ($line =~ /^>(\S+)(.*)/) { $seqid = $1; $sequences{$seqid}{desc} = $2; push @allIds, $seqid; } else { chomp $line; $sequences{$seqid}{seq} .= $line; $sequences{$seqid}{len} += length $line; $sequences{$seqid}{gc} += ($line =~ tr/gcGC/gcGC/); $line =~ s/[^atgc]/N/ig; $sequences{$seqid}{nonatgc} += ($line =~ tr/N/N/); } } close $fh; return \%sequences, \@allIds; } sub read_fh { my $filename = shift @_; my $filehandle; if ($filename =~ /gz$/) { open $filehandle, "gunzip -dc $filename |" or die $!; } else { open $filehandle, "<$filename" or die $!; } return $filehandle; } sub write_fh { my $filename = shift @_; my $filehandle; open $filehandle, ">$filename" or die $!; return $filehandle; }