#!/usr/bin/perl
use strict;
use warnings;
use Bio::DB::Fasta;
use Bio::SeqIO;
# Paths to your GFF file and genome FASTA file
my $gff_file = 'path/to/your/file.gff';
my $genome_fasta = 'path/to/your/genome.fasta';
# Gene ID to extract
my $gene_id_to_extract = 'your_gene_id';
# Step 1: Parse GFF file to get gene locations
my %gene_locations;
open my $gff_fh, '<', $gff_file or die "Cannot open GFF file: $!";
while (<$gff_fh>) {
next if /^#/; # Skip comments
my @fields = split(/\t/, $_);
next unless $fields[2] eq 'gene'; # Consider only gene features
my ($gene_id) = $fields[8] =~ /ID=([^;]+)/;
$gene_locations{$gene_id} = [$fields[3], $fields[4]];
}
close $gff_fh;
# Step 2: Extract DNA sequence from genome
my $db = Bio::DB::Fasta->new($genome_fasta);
my ($start, $end) = @{$gene_locations{$gene_id_to_extract}};
my $gene_dna_sequence = $db->seq($fields[0], $start, $end);
# Step 3: Translate DNA sequence into protein sequence
my $gene_protein_sequence = translate_dna_to_protein($gene_dna_sequence);
# Print the protein sequence
print "Protein Sequence:\n$gene_protein_sequence\n";
# Subroutine to translate DNA sequence to protein sequence
sub translate_dna_to_protein {
my ($dna_sequence) = @_;
my $seq_obj = Bio::Seq->new(-seq => $dna_sequence, -alphabet => 'dna');
my $protein_sequence = $seq_obj->translate->seq;
return $protein_sequence;
}