#Find patterns forming clumps in a string.
#Given: A string Genome, and integers k, L, and t.
#Return: All distinct k-mers forming (L, t)-clumps in Genome.
use strict;
use warnings;
my %myHash;
my $string="CGGACTCGACAGATGTGAAGAAATGTGAAGACTGAGTGAAGAGAAGAGGAAACACGACACGACATTGCGACATAATGTACGAATGTAATGTGCCTATGGC";
my $subStr="?"; my $clump=4;
my $kmer=5;
for (my $aa=0; $aa<=(length($string)-$kmer); $aa++) {
my $myStr=substr $string, $aa,$kmer;
#print "$myStr\n";
my $km=kmerMatch ($string, $myStr, $kmer);
#if ($km > $max) { $max = $km;}
#print "$km\t$myStr\n";
$myHash{$myStr}=$km;
}
#Print all key which have matching values
foreach my $name (keys %myHash){
print "$name " if $myHash{$name} == $clump;
}
kmerMatch ($string, $subStr, $kmer);
sub kmerMatch { #Check the exact matching kmers with sliding window
my ($string, $myStr, $kmer)=@_;
my $count=0;
for (my $aa=0; $aa<=(length($string)-4); $aa++) {
my $myWin=substr $string, $aa,$kmer;
if ($myWin eq $myStr) {
#print "$myWin eq $myStr\n";
$count++;
}
}
return $count;
}