#!/usr/bin/perl
use strict;
use warnings;
use List::Util qw(sum);
# Usage: ./parse_blast.pl blast_result.txt
die "Usage: ./parse_blast.pl blast_result.txt\n" unless @ARGV;
my $blast_file = shift @ARGV;
my @blast_entries = parse_blast($blast_file);
print "Total entries: ", scalar(@blast_entries), "\n";
print "---------------------------\n";
# Print detailed information for each entry
for my $entry (@blast_entries) {
print "Query: ", $entry->{QUERY}, "\n";
print "Subject: ", $entry->{SUBJECT}, "\n";
print "Percent Identity: ", $entry->{PERCENT_IDENTITY}, "\n";
print "Alignment Length: ", $entry->{ALIGNMENT_LENGTH}, "\n";
print "E-value: ", $entry->{EVALUE}, "\n";
print "Bit Score: ", $entry->{BITSCORE}, "\n";
print "---------------------------\n";
}
# Calculate additional statistics
my $avg_percent_identity = calculate_average(\@blast_entries, 'PERCENT_IDENTITY');
my $avg_alignment_length = calculate_average(\@blast_entries, 'ALIGNMENT_LENGTH');
my ($min_evalue, $max_evalue, $avg_evalue) = calculate_summary_stats(\@blast_entries, 'EVALUE');
my ($min_bitscore, $max_bitscore, $avg_bitscore) = calculate_summary_stats(\@blast_entries, 'BITSCORE');
# Print summary statistics
print "Average Percent Identity: $avg_percent_identity\n";
print "Average Alignment Length: $avg_alignment_length\n";
print "E-value Range: $min_evalue - $max_evalue\n";
print "Average E-value: $avg_evalue\n";
print "Bit Score Range: $min_bitscore - $max_bitscore\n";
print "Average Bit Score: $avg_bitscore\n";
sub parse_blast {
my ($file) = @_;
open my $fh, '<', $file or die "Cannot open file $file: $!\n";
my @entries;
while (my $line = <$fh>) {
next if $line =~ /^\s*$/; # skip empty lines
chomp $line;
my @fields = split /\t/, $line;
my %entry;
@entry{qw/QUERY SUBJECT PERCENT_IDENTITY ALIGNMENT_LENGTH EVALUE BITSCORE/} = @fields;
push @entries, \%entry;
}
close $fh;
return @entries;
}
sub calculate_average {
my ($entries, $field) = @_;
my @values = map { $_->{$field} } @$entries;
return @values ? sum(@values) / @values : 0;
}
sub calculate_summary_stats {
my ($entries, $field) = @_;
my @values = map { $_->{$field} } @$entries;
my $min = @values ? (sort { $a <=> $b } @values)[0] : 0;
my $max = @values ? (sort { $b <=> $a } @values)[0] : 0;
my $avg = @values ? sum(@values) / @values : 0;
return ($min, $max, $avg);
}