#!/usr/bin/perl
use strict;
use warnings;
# Usage: ./parse_vcf.pl input.vcf
die "Usage: ./parse_vcf.pl input.vcf\n" unless @ARGV;
my $vcf_file = shift @ARGV;
my @vcf_entries = parse_vcf($vcf_file);
print "Total entries: ", scalar(@vcf_entries), "\n";
print "---------------------------\n";
my %chromosome_counts;
for my $entry (@vcf_entries) {
$chromosome_counts{$entry->{CHROM}}++;
}
print "Chromosome counts:\n";
for my $chromosome (sort keys %chromosome_counts) {
print " $chromosome: $chromosome_counts{$chromosome}\n";
}
sub parse_vcf {
my ($file) = @_;
open my $fh, '<', $file or die "Cannot open file $file: $!\n";
my @entries;
while (my $line = <$fh>) {
next if $line =~ /^\s*$/; # skip empty lines
next if $line =~ /^\s*#/; # skip comments
chomp $line;
my @fields = split /\t/, $line;
my %entry;
@entry{qw/CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMPLES/} = @fields;
push @entries, \%entry;
}
close $fh;
return @entries;
}