Our Sponsors



Download BioinformaticsOnline(BOL) Apps in your chrome browser.




  • BioScripts
  • Jit
  • Extract fasta sequence from a multifasta file with coordinates

Extract fasta sequence from a multifasta file with coordinates

  • Public
By Jit 2516 days ago
#!/usr/bin/perl use Bio::DB::Fasta; #USAGE perl extractFASTAwithSIZE.pl finalSample_filtered.fa 0 1000 > aaaaaa.fa my $fastaFile = shift; my $querySizeST = shift; my $querySizeED = shift; my $db = Bio::DB::Fasta->new( $fastaFile ); my @ids = $db->get_all_primary_ids; foreach my $id (@ids) { my $sequence = $db->seq($id, $querySizeST => $querySizeED); if (!defined( $sequence )) { die "Sequence $seq not found. \n" } print ">$id\n", "$sequence\n"; } __END__ use Bio::DB::Fasta; # Create database from a directory of Fasta files my $db = Bio::DB::Fasta->new('/path/to/fasta/files/'); my @ids = $db->get_all_primary_ids; # Simple access my $seqstr = $db->seq('CHROMOSOME_I', 4_000_000 => 4_100_000); my $revseq = $db->seq('CHROMOSOME_I', 4_100_000 => 4_000_000); my $length = $db->length('CHROMOSOME_I'); my $header = $db->header('CHROMOSOME_I'); my $alphabet = $db->alphabet('CHROMOSOME_I'); # Access to sequence objects. See Bio::PrimarySeqI. my $seq = $db->get_Seq_by_id('CHROMOSOME_I'); my $seqstr = $seq->seq; my $subseq = $seq->subseq(4_000_000 => 4_100_000); my $trunc = $seq->trunc(4_000_000 => 4_100_000); my $length = $seq->length; # Loop through sequence objects my $stream = $db->get_PrimarySeq_stream; while (my $seq = $stream->next_seq) { # Bio::PrimarySeqI stuff } # Filehandle access my $fh = Bio::DB::Fasta->newFh('/path/to/fasta/files/'); while (my $seq = <$fh>) { # Bio::PrimarySeqI stuff } # Tied hash access tie %sequences,'Bio::DB::Fasta','/path/to/fasta/files/'; print $sequences{'CHROMOSOME_I:1,20000'};