<?xml version='1.0'?><rss version="2.0" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:georss="http://www.georss.org/georss" xmlns:atom="http://www.w3.org/2005/Atom" >
<channel>
	<title><![CDATA[BOL: Perl script to find the distance beetween all the contigs and scaffolds]]></title>
	<link>https://bioinformaticsonline.com/snippets/view/37053/perl-script-to-find-the-distance-beetween-all-the-contigs-and-scaffolds?</link>
	<atom:link href="https://bioinformaticsonline.com/snippets/view/37053/perl-script-to-find-the-distance-beetween-all-the-contigs-and-scaffolds?" rel="self" type="application/rss+xml" />
	<description><![CDATA[]]></description>
	
	<item>
	<guid isPermaLink="true">https://bioinformaticsonline.com/snippets/view/37053/perl-script-to-find-the-distance-beetween-all-the-contigs-and-scaffolds</guid>
	<pubDate>Tue, 26 Jun 2018 04:40:48 -0500</pubDate>
	<link>https://bioinformaticsonline.com/snippets/view/37053/perl-script-to-find-the-distance-beetween-all-the-contigs-and-scaffolds</link>
	<title><![CDATA[Perl script to find the distance beetween all the contigs and scaffolds]]></title>
	<description><![CDATA[<code>#!/usr/bin/perl
use strict;
use warnings;
use Bio::SeqIO;

$| = 1;

#Script to see the distance beetween all the contigs and scaffolds
#Usage: perl clustalReads.pl genome.fa &gt; HammingDist.txt
#Dependancy: MAFFT

my ($refSeq, $SeqIds) = fastafile2hash($ARGV[0]);
my $tmpFile=&quot;tmpOutfile&quot;;
foreach my $chr (@$SeqIds) {
	my $cnt=0;
	my $value=scalar(@$SeqIds);
	do{{ 		
		$value=$value-1;
		my $ofh = write_fh($tmpFile);
		next if $chr eq @$SeqIds[$cnt];
		print &quot;$chr\t@$SeqIds[$cnt]\t&quot;; 
		print $ofh &quot;&gt;$chr\n$refSeq-&gt;{$chr}{seq}\n&gt;@$SeqIds[$cnt]\n$refSeq-&gt;{@$SeqIds[$cnt]}{seq}\n&quot;;
		close $ofh or die &quot;Could not close file &#039;$tmpFile&#039; $!&quot;;
		$cnt++;
		system (&quot;mafft --retree 1 --maxiterate 0 --thread 40 $tmpFile &gt; MAFFT_out&quot;);
		#usage: perl clustalRead.pl MAFFT_out

		my ($seqRef, $ids) = fastafile2hash(&#039;MAFFT_out&#039;);

		my ($fval, $sval) = trimMSA($seqRef-&gt;{$ids-&gt;[0]}{seq}, $seqRef-&gt;{$ids-&gt;[1]}{seq});
		my $seq1  = substr $seqRef-&gt;{$ids-&gt;[0]}{seq}, $fval, ($seqRef-&gt;{$ids-&gt;[0]}{len}-$sval)-$fval;
		my $seq2  = substr $seqRef-&gt;{$ids-&gt;[1]}{seq}, $fval, ($seqRef-&gt;{$ids-&gt;[1]}{len}-$sval)-$fval;

		my $val = hammingDist($seq1, $seq2);
		print &quot;$val\n&quot;;
		}} until($value&lt;=0);
}


#calculate the hamming distance
sub hammingDist {
	return ($_[0] ^ $_[1]) =~ tr/\001-\255//;
}

#Remove &#039;-&#039; from both end
sub trimMSA {
    my ($seq1, $seq2) = @_;
    my ($substring1) = $seq1 =~ /^.{0}(.*?)[a-zA-Z_]/s;
    my ($substring2) = $seq2 =~ /^.{0}(.*?)[a-zA-Z_]/s;
    my $trimLen1=length($substring1); my $trimLen2=length($substring2);
    my $val;
#print &quot;$trimLen1 &lt;= $trimLen2\n&quot;;
    if ($trimLen1 &gt;= $trimLen2) {$val = $trimLen1; } elsif ($trimLen1 &lt;= $trimLen2) {$val = $trimLen2; }
    
    my $seq1_rev = reverse $seq1; my $seq2_rev = reverse $seq2;

    my ($substring1_rev) = $seq1_rev =~ /^.{0}(.*?)[a-zA-Z_]/s;
    my ($substring2_rev) = $seq2_rev =~ /^.{0}(.*?)[a-zA-Z_]/s;
    my $trimLen1_rev=length($substring1_rev); my $trimLen2_rev=length($substring2_rev);
    my $val_rev;
    if ($trimLen1_rev &gt;= $trimLen2_rev) {$val_rev = $trimLen1_rev; } elsif ($trimLen1_rev &lt;= $trimLen2_rev) {$val_rev = $trimLen2_rev; }
    
    return $val, $val_rev;
}


#Store fasta file to hash
sub fastafile2hash {
    my $fastafile = shift @_;
    my %sequences;
    my $fh = &amp;read_fh($fastafile);
    my $seqid; my @allIds;
    while (my $line = &lt;$fh&gt;) {
        if ($line =~ /^&gt;(\S+)(.*)/) {
            $seqid = $1;
            $sequences{$seqid}{desc} = $2;
	    push @allIds, $seqid;
        }
        else {
            chomp $line;
            $sequences{$seqid}{seq}     .= $line;
            $sequences{$seqid}{len}     += length $line;
            $sequences{$seqid}{gc}      += ($line =~ tr/gcGC/gcGC/);
            $line =~ s/[^atgc]/N/ig;
            $sequences{$seqid}{nonatgc} += ($line =~ tr/N/N/);
        }
    }
    close $fh;
    return \%sequences, \@allIds;
}


sub read_fh {
    my $filename = shift @_;
    my $filehandle;
    if ($filename =~ /gz$/) {
        open $filehandle, &quot;gunzip -dc $filename |&quot; or die $!;
    }
    else {
        open $filehandle, &quot;&lt;$filename&quot; or die $!;
    }
    return $filehandle;
}

sub write_fh {
    my $filename = shift @_;
    my $filehandle;
    open $filehandle, &quot;&gt;$filename&quot; or die $!;
    return $filehandle;
}</code>]]></description>
	<dc:creator>Jit</dc:creator>
</item>

</channel>
</rss>