<?xml version='1.0'?><rss version="2.0" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:georss="http://www.georss.org/georss" xmlns:atom="http://www.w3.org/2005/Atom" >
<channel>
	<title><![CDATA[BOL: Perl script for chi-squared test !]]></title>
	<link>https://bioinformaticsonline.com/snippets/view/44283/perl-script-for-chi-squared-test?</link>
	<atom:link href="https://bioinformaticsonline.com/snippets/view/44283/perl-script-for-chi-squared-test?" rel="self" type="application/rss+xml" />
	<description><![CDATA[]]></description>
	
	<item>
	<guid isPermaLink="true">https://bioinformaticsonline.com/snippets/view/44283/perl-script-for-chi-squared-test</guid>
	<pubDate>Tue, 21 Mar 2023 03:53:45 -0500</pubDate>
	<link>https://bioinformaticsonline.com/snippets/view/44283/perl-script-for-chi-squared-test</link>
	<title><![CDATA[Perl script for chi-squared test !]]></title>
	<description><![CDATA[<code>#!/usr/bin/perl
#
# chidi.pl 
#
# A script to perform a chi-squared test of the dinucleotide frequencies of two FASTA files
# Last updated by: $Author$
# Last updated on: $Date$

use strict;
use warnings;
use Getopt::Long;
use FAlite;



# sanity checks
die &quot;Usage: chidi.pl &lt;file1&gt; &lt;file2&gt;\n&quot; if (!$ARGV[1]);

my @dinucs = qw (AA AC AG AT CA CC CG CT GA GC GG GT TA TC TG TT);

# hashes for obersered and expected dinucleotide frequencies of both files

my %file1_ob;
my %file2_ob;
my %file1_ex;
my %file2_ex;
								
############################################################
# Read sequence file 1
############################################################

open(FILE,&quot;$ARGV[0]&quot;) || die &quot;Can&#039;t open $ARGV[0]\n&quot;;
my $fasta = new FAlite(\*FILE);

# loop through each sequence in file 1
while(my $entry = $fasta-&gt;nextEntry) {	
	my $seq = uc($entry-&gt;seq);
	# to count dinucleotides, loop through sequence, take 2 bp and increment the hash counter
	foreach my $i (0..length($seq)){
	    my $tmp = substr($seq,$i,2);		
		$file1_ob{$tmp}++;
	}
}
close(FILE);


############################################################
# Read sequence file 2
############################################################

open(FILE,&quot;$ARGV[1]&quot;) || die &quot;Can&#039;t open $ARGV[1]\n&quot;;
$fasta = new FAlite(\*FILE);

# loop through each sequence in file 1
while(my $entry = $fasta-&gt;nextEntry) {	
	my $seq = uc($entry-&gt;seq);
	# to count dinucleotides, loop through sequence, take 2 bp and increment the hash counter
	foreach my $i (0..length($seq)){
	    my $tmp = substr($seq,$i,2);		
		$file2_ob{$tmp}++;
	}
}
close(FILE);


############################################################
# Perform chi-squared test
############################################################

# need total of all counts in both sequences, plus totals of &#039;rows&#039; in chi-square table

my $total;
my $row1;
my $row2;

foreach my $di (@dinucs){
	$row1  += $file1_ob{$di};
	$row2  += $file2_ob{$di};
	$total += ($file1_ob{$di} + $file2_ob{$di});
}


# now calculate expected values

foreach my $di (@dinucs){
	# calculate (column total * row total) / $total
	$file1_ex{$di} = sprintf(&quot;%.2f&quot;,(($file1_ob{$di}+$file2_ob{$di}) * $row1) / $total);
	$file2_ex{$di} = sprintf(&quot;%.2f&quot;,(($file1_ob{$di}+$file2_ob{$di}) * $row2) / $total);	
}

# now calculate chi-squared values
my ($chi1,$chi2);
my $chi_total;
print &quot;\tObs1\tExp2\t\tChi1\tObs2\tExp2\t\tChi2\n&quot;;
foreach my $di (@dinucs){
	$chi1 = sprintf(&quot;%.2f&quot;,(($file1_ob{$di} - $file1_ex{$di})**2)/$file1_ex{$di});
	$chi2 = sprintf(&quot;%.2f&quot;,(($file2_ob{$di} - $file2_ex{$di})**2)/$file2_ex{$di});	
	print &quot;$di\t$file1_ob{$di}\t$file1_ex{$di}\t$chi1\t$file2_ob{$di}\t$file2_ex{$di}\t$chi2\n&quot;;

	$chi_total += ($chi1+$chi2);
}

printf  &quot;Chi squared value = %6.2f\n&quot;, $chi_total;
				
print &quot;Significance level at 5% = 25.00\n&quot;;
print &quot;Significance level at 1% = 30.58\n&quot;;


exit(0);</code>]]></description>
	<dc:creator>Neel</dc:creator>
</item>

</channel>
</rss>