use strict;
use warnings;
my $pp = qr/(?: (\w) (?1) \g{-1} | \w? )/ix;
my $filename = $ARGV[0];
open(my $fh, '<:encoding(UTF-8)', $filename) or die "Could not open file '$filename' $!";
local $/ = '';
while (<$fh>) {
chomp;
my ($header, @lines) = split "\n";
my $data = join '', @lines;
print "$header\n$data\n";
while ($data =~ /(?=($pp))/g) {
my $end=($-[0]+length($1));
my $n=(length($1)/2);
my $len=length($1);
my $midPoint = ($n == int $n) ? $n : int($n + 1);
$midPoint=$midPoint+$-[0];
print "$-[0]\t$midPoint\t$end\t$1\t$len\n" if length($1) > 100;
}
}
__DATA__
>TRE|Q47404|Q47404 (409 AA) Glycosyl transferase [Escherichia coli]
MIFDASLKKLRKLFVNPIGFFRDSWFFNSKNKAEELLSPLKIKSKNIFIVAHLGQLKKAE
LFIQKFSRRSNFLIVLATKKNTEMPRLILEQMNKKLFSSYKLLFIPTEPNTFSLKKVIWF
YNVYKYIVLNSKAKDAYFMSYAQHYAIFIWLFKKNNIRCSLIEEGTGTYKTEKKKPLVNI
NFYSWIINSIILFHYPDLKFENVYGTFPNLLKEKFDAKKIFEFKTIPLVKSSTRMDNLIH
>seq1
TGAATTACTAGAAGTACTTAAAATGATGGTTGGAGGAAATATTCTTGATGATCAAATTGC
CGTTAAACTAGGATTTCTTATAAAGGAGGTTGGTAGTAAAATTCATGAAGATCATTAAGT
>TRE|Q8VRL9|Q8VRL9 (492 AA) SiaD [Neisseria meningitidis]
MLQKIRKALFHPKKFFQDSQWFATPLFSSFAPKSNLFIISTFAQLNQAHSLTKMQKLKNN
LLVILYTTQNMKMPKLIQKSVDKELFSVTYMFELPRKPGIVSPKKFLYIQRGYKKLLKTI
QPAHLYVMSFAGHYSSLLSLAKKMNITTHLVEEGTATYAPLLESFTYKPTKFEQRFVGNN
LHQKGYFDKFDILHVAFPEYAKKIFNANEYHRFFAHSGGISTSQSIAKIQDKYRISQNDY