<?xml version='1.0'?><rss version="2.0" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:georss="http://www.georss.org/georss" xmlns:atom="http://www.w3.org/2005/Atom" >
<channel>
	<title><![CDATA[BOL: Perl and BioPerl script to extract protein sequences using GFF file !]]></title>
	<link>https://bioinformaticsonline.com/snippets/view/44449/perl-and-bioperl-script-to-extract-protein-sequences-using-gff-file?</link>
	<atom:link href="https://bioinformaticsonline.com/snippets/view/44449/perl-and-bioperl-script-to-extract-protein-sequences-using-gff-file?" rel="self" type="application/rss+xml" />
	<description><![CDATA[]]></description>
	
	<item>
	<guid isPermaLink="true">https://bioinformaticsonline.com/snippets/view/44449/perl-and-bioperl-script-to-extract-protein-sequences-using-gff-file</guid>
	<pubDate>Thu, 01 Feb 2024 01:51:00 -0600</pubDate>
	<link>https://bioinformaticsonline.com/snippets/view/44449/perl-and-bioperl-script-to-extract-protein-sequences-using-gff-file</link>
	<title><![CDATA[Perl and BioPerl script to extract protein sequences using GFF file !]]></title>
	<description><![CDATA[<code>#!/usr/bin/perl
use strict;
use warnings;
use Bio::DB::Fasta;
use Bio::SeqIO;

# Paths to your GFF file and genome FASTA file
my $gff_file = &#039;path/to/your/file.gff&#039;;
my $genome_fasta = &#039;path/to/your/genome.fasta&#039;;

# Gene ID to extract
my $gene_id_to_extract = &#039;your_gene_id&#039;;

# Step 1: Parse GFF file to get gene locations
my %gene_locations;
open my $gff_fh, &#039;&lt;&#039;, $gff_file or die &quot;Cannot open GFF file: $!&quot;;
while (&lt;$gff_fh&gt;) {
    next if /^#/;  # Skip comments
    my @fields = split(/\t/, $_);
    next unless $fields[2] eq &#039;gene&#039;;  # Consider only gene features
    my ($gene_id) = $fields[8] =~ /ID=([^;]+)/;
    $gene_locations{$gene_id} = [$fields[3], $fields[4]];
}
close $gff_fh;

# Step 2: Extract DNA sequence from genome
my $db = Bio::DB::Fasta-&gt;new($genome_fasta);
my ($start, $end) = @{$gene_locations{$gene_id_to_extract}};
my $gene_dna_sequence = $db-&gt;seq($fields[0], $start, $end);

# Step 3: Translate DNA sequence into protein sequence
my $gene_protein_sequence = translate_dna_to_protein($gene_dna_sequence);

# Print the protein sequence
print &quot;Protein Sequence:\n$gene_protein_sequence\n&quot;;

# Subroutine to translate DNA sequence to protein sequence
sub translate_dna_to_protein {
    my ($dna_sequence) = @_;
    my $seq_obj = Bio::Seq-&gt;new(-seq =&gt; $dna_sequence, -alphabet =&gt; &#039;dna&#039;);
    my $protein_sequence = $seq_obj-&gt;translate-&gt;seq;
    return $protein_sequence;
}</code>]]></description>
	<dc:creator>LEGE</dc:creator>
</item>

</channel>
</rss>