<?xml version='1.0'?><rss version="2.0" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:georss="http://www.georss.org/georss" xmlns:atom="http://www.w3.org/2005/Atom" >
<channel>
	<title><![CDATA[BOL: All]]></title>
	<link>https://bioinformaticsonline.com/snippets?offset=30</link>
	<atom:link href="https://bioinformaticsonline.com/snippets?offset=30" rel="self" type="application/rss+xml" />
	<description><![CDATA[]]></description>
	
	<item>
	<guid isPermaLink="true">https://bioinformaticsonline.com/snippets/view/44450/perl-script-for-six-frame-translation</guid>
	<pubDate>Thu, 01 Feb 2024 01:52:50 -0600</pubDate>
	<link>https://bioinformaticsonline.com/snippets/view/44450/perl-script-for-six-frame-translation</link>
	<title><![CDATA[Perl script for six frame translation !]]></title>
	<description><![CDATA[<code>#!/usr/bin/perl
use strict;
use warnings;
use Bio::SeqIO;

# Path to your input nucleotide sequence file in FASTA format
my $input_fasta = &#039;path/to/your/input.fasta&#039;;

# Step 1: Read the input FASTA file
my $seqio = Bio::SeqIO-&gt;new(-file =&gt; $input_fasta, -format =&gt; &#039;fasta&#039;);
my $sequence = $seqio-&gt;next_seq;

# Step 2: Perform six-frame translation
my @frames = (1, 2, 3, -1, -2, -3);
foreach my $frame (@frames) {
    my $translated_seq = translate_frame($sequence, $frame);
    my $frame_type = $frame &gt; 0 ? &quot;Forward&quot; : &quot;Reverse&quot;;
    print &quot;Frame $frame_type $frame Translation:\n$translated_seq\n&quot;;
}

# Subroutine to translate a sequence in a specific frame
sub translate_frame {
    my ($sequence, $frame) = @_;

    my $translated_seq;
    if ($frame &gt; 0) {
        $translated_seq = $sequence-&gt;translate(-frame =&gt; $frame)-&gt;seq;
    } else {
        # If frame is negative, reverse and complement the sequence before translation
        my $revcomp_seq = $sequence-&gt;revcom;
        $translated_seq = $revcomp_seq-&gt;translate(-frame =&gt; abs($frame))-&gt;seq;
    }

    return $translated_seq;
}</code>]]></description>
	<dc:creator>LEGE</dc:creator>
</item>
<item>
	<guid isPermaLink="true">https://bioinformaticsonline.com/snippets/view/44449/perl-and-bioperl-script-to-extract-protein-sequences-using-gff-file</guid>
	<pubDate>Thu, 01 Feb 2024 01:51:00 -0600</pubDate>
	<link>https://bioinformaticsonline.com/snippets/view/44449/perl-and-bioperl-script-to-extract-protein-sequences-using-gff-file</link>
	<title><![CDATA[Perl and BioPerl script to extract protein sequences using GFF file !]]></title>
	<description><![CDATA[<code>#!/usr/bin/perl
use strict;
use warnings;
use Bio::DB::Fasta;
use Bio::SeqIO;

# Paths to your GFF file and genome FASTA file
my $gff_file = &#039;path/to/your/file.gff&#039;;
my $genome_fasta = &#039;path/to/your/genome.fasta&#039;;

# Gene ID to extract
my $gene_id_to_extract = &#039;your_gene_id&#039;;

# Step 1: Parse GFF file to get gene locations
my %gene_locations;
open my $gff_fh, &#039;&lt;&#039;, $gff_file or die &quot;Cannot open GFF file: $!&quot;;
while (&lt;$gff_fh&gt;) {
    next if /^#/;  # Skip comments
    my @fields = split(/\t/, $_);
    next unless $fields[2] eq &#039;gene&#039;;  # Consider only gene features
    my ($gene_id) = $fields[8] =~ /ID=([^;]+)/;
    $gene_locations{$gene_id} = [$fields[3], $fields[4]];
}
close $gff_fh;

# Step 2: Extract DNA sequence from genome
my $db = Bio::DB::Fasta-&gt;new($genome_fasta);
my ($start, $end) = @{$gene_locations{$gene_id_to_extract}};
my $gene_dna_sequence = $db-&gt;seq($fields[0], $start, $end);

# Step 3: Translate DNA sequence into protein sequence
my $gene_protein_sequence = translate_dna_to_protein($gene_dna_sequence);

# Print the protein sequence
print &quot;Protein Sequence:\n$gene_protein_sequence\n&quot;;

# Subroutine to translate DNA sequence to protein sequence
sub translate_dna_to_protein {
    my ($dna_sequence) = @_;
    my $seq_obj = Bio::Seq-&gt;new(-seq =&gt; $dna_sequence, -alphabet =&gt; &#039;dna&#039;);
    my $protein_sequence = $seq_obj-&gt;translate-&gt;seq;
    return $protein_sequence;
}</code>]]></description>
	<dc:creator>LEGE</dc:creator>
</item>
<item>
	<guid isPermaLink="true">https://bioinformaticsonline.com/snippets/view/44448/python-script-to-extract-a-protein-sequence-from-a-genome-using-a-general-feature-format-gff-file</guid>
	<pubDate>Thu, 01 Feb 2024 01:48:49 -0600</pubDate>
	<link>https://bioinformaticsonline.com/snippets/view/44448/python-script-to-extract-a-protein-sequence-from-a-genome-using-a-general-feature-format-gff-file</link>
	<title><![CDATA[Python script to extract a protein sequence from a genome using a General Feature Format (GFF) file !]]></title>
	<description><![CDATA[<code>#You typically need the corresponding genome sequence file in FASTA format. The GFF file contains information about the #features (such as genes) in the genome, including their locations and annotations.

#The outline of the steps :

#Parse the GFF file to extract information about the gene locations.
#Use the gene locations to extract the corresponding DNA sequences from the genome in FASTA format.
#Translate the DNA sequences into protein sequences.

#Simple example using Python and Biopython


from Bio import SeqIO
from Bio.SeqRecord import SeqRecord
from Bio.Seq import Seq

def extract_protein_sequence(gff_file, genome_fasta, gene_id):
    # Step 1: Parse the GFF file
    gene_locations = {}
    with open(gff_file, &#039;r&#039;) as gff:
        for line in gff:
            if not line.startswith(&#039;#&#039;):
                fields = line.strip().split(&#039;\t&#039;)
                if fields[2] == &#039;gene&#039;:
                    gene_id = fields[8].split(&#039;;&#039;)[0].split(&#039;=&#039;)[1]
                    gene_locations[gene_id] = (int(fields[3]), int(fields[4]))

    # Step 2: Extract DNA sequence from the genome
    genome_record = SeqIO.read(genome_fasta, &#039;fasta&#039;)
    gene_start, gene_end = gene_locations[gene_id]
    gene_dna_sequence = genome_record.seq[gene_start - 1:gene_end]

    # Step 3: Translate DNA sequence into protein sequence
    gene_protein_sequence = gene_dna_sequence.translate()

    return gene_protein_sequence

# Example usage
gff_file = &#039;path/to/your/file.gff&#039;
genome_fasta = &#039;path/to/your/genome.fasta&#039;
gene_id_to_extract = &#039;your_gene_id&#039;

protein_sequence = extract_protein_sequence(gff_file, genome_fasta, gene_id_to_extract)
print(protein_sequence)</code>]]></description>
	<dc:creator>LEGE</dc:creator>
</item>
<item>
	<guid isPermaLink="true">https://bioinformaticsonline.com/snippets/view/44447/bash-script-to-calculate-difference-between-column</guid>
	<pubDate>Thu, 01 Feb 2024 01:28:44 -0600</pubDate>
	<link>https://bioinformaticsonline.com/snippets/view/44447/bash-script-to-calculate-difference-between-column</link>
	<title><![CDATA[Bash script to calculate difference between column !]]></title>
	<description><![CDATA[<code>Space Separated

awk &#039;{ $5 = $2 - $4 } 1&#039; inputput.txt &gt; outdiff.txt

Or with tab separation:

awk &#039;BEGIN { OFS = &quot;\t&quot; } { $5 = $2 - $4 } 1&#039; inputput.txt &gt; outdiff.txt</code>]]></description>
	<dc:creator>LEGE</dc:creator>
</item>
<item>
	<guid isPermaLink="true">https://bioinformaticsonline.com/snippets/view/44440/bash-script-to-convert-multi-line-fasta-to-single-line-fasta</guid>
	<pubDate>Wed, 31 Jan 2024 00:39:21 -0600</pubDate>
	<link>https://bioinformaticsonline.com/snippets/view/44440/bash-script-to-convert-multi-line-fasta-to-single-line-fasta</link>
	<title><![CDATA[Bash script to convert Multi-line Fasta to Single-line Fasta !]]></title>
	<description><![CDATA[<code>#!/bin/bash

input_filename=&quot;multi_line.fasta&quot;
output_filename=&quot;single_line.fasta&quot;

awk &#039;/^&gt;/ {printf(&quot;\n%s\n&quot;, $0);next; } { printf(&quot;%s&quot;, $0);} END {printf(&quot;\n&quot;);}&#039; &quot;$input_filename&quot; &gt; &quot;$output_filename&quot;

echo &quot;Successfully converted $input_filename to $output_filename in single-line FASTA format.&quot;</code>]]></description>
	<dc:creator>BioStar</dc:creator>
</item>
<item>
	<guid isPermaLink="true">https://bioinformaticsonline.com/snippets/view/44439/perl-script-to-convert-multi-line-fasta-to-single-line-fasta</guid>
	<pubDate>Wed, 31 Jan 2024 00:38:21 -0600</pubDate>
	<link>https://bioinformaticsonline.com/snippets/view/44439/perl-script-to-convert-multi-line-fasta-to-single-line-fasta</link>
	<title><![CDATA[Perl script to convert Multi-line Fasta to Single-line Fasta !]]></title>
	<description><![CDATA[<code>#!/usr/bin/perl

use strict;
use warnings;

sub multi_to_single_line_fasta {
    my ($input_filename, $output_filename) = @_;

    open my $input_file, &#039;&lt;&#039;, $input_filename or die &quot;Error: Could not open file &#039;$input_filename&#039;: $!&quot;;
    open my $output_file, &#039;&gt;&#039;, $output_filename or die &quot;Error: Could not open file &#039;$output_filename&#039;: $!&quot;;

    my $current_sequence = &quot;&quot;;

    while (my $line = &lt;$input_file&gt;) {
        chomp $line;
        if ($line =~ /^&gt;/) {
            # If a header line, write the previous sequence and then the new header
            print $output_file $current_sequence . &quot;\n&quot; if $current_sequence;
            print $output_file $line . &quot;\n&quot;;
            $current_sequence = &quot;&quot;;
        } else {
            # If a sequence line, concatenate to the current sequence
            $current_sequence .= $line;
        }
    }

    # Write the last sequence
    print $output_file $current_sequence . &quot;\n&quot; if $current_sequence;

    close $input_file;
    close $output_file;

    print &quot;Successfully converted $input_filename to $output_filename in single-line FASTA format.\n&quot;;
}

# Example usage:
# multi_to_single_line_fasta(&#039;multi_line.fasta&#039;, &#039;single_line.fasta&#039;);</code>]]></description>
	<dc:creator>BioStar</dc:creator>
</item>
<item>
	<guid isPermaLink="true">https://bioinformaticsonline.com/snippets/view/44438/python-script-to-convert-multi-line-fasta-to-single-line-fasta</guid>
	<pubDate>Wed, 31 Jan 2024 00:37:15 -0600</pubDate>
	<link>https://bioinformaticsonline.com/snippets/view/44438/python-script-to-convert-multi-line-fasta-to-single-line-fasta</link>
	<title><![CDATA[Python script to convert Multi-line Fasta to Single-line Fasta]]></title>
	<description><![CDATA[<code>def multi_to_single_line_fasta(input_filename, output_filename):
    try:
        with open(input_filename, &#039;r&#039;) as input_file:
            with open(output_filename, &#039;w&#039;) as output_file:
                current_sequence = &quot;&quot;
                for line in input_file:
                    if line.startswith(&#039;&gt;&#039;):
                        # If a header line, write the previous sequence and then the new header
                        if current_sequence:
                            output_file.write(current_sequence + &#039;\n&#039;)
                        output_file.write(line.strip() + &#039;\n&#039;)
                        current_sequence = &quot;&quot;
                    else:
                        # If a sequence line, concatenate to the current sequence
                        current_sequence += line.strip()
                
                # Write the last sequence
                if current_sequence:
                    output_file.write(current_sequence + &#039;\n&#039;)

        print(f&quot;Successfully converted {input_filename} to {output_filename} in single-line FASTA format.&quot;)

    except FileNotFoundError:
        print(f&quot;Error: File &#039;{input_filename}&#039; not found.&quot;)

# Example usage:
# multi_to_single_line_fasta(&#039;multi_line.fasta&#039;, &#039;single_line.fasta&#039;)</code>]]></description>
	<dc:creator>BioStar</dc:creator>
</item>
<item>
	<guid isPermaLink="true">https://bioinformaticsonline.com/snippets/view/44437/perl-script-to-calculate-gc-content</guid>
	<pubDate>Tue, 30 Jan 2024 05:20:10 -0600</pubDate>
	<link>https://bioinformaticsonline.com/snippets/view/44437/perl-script-to-calculate-gc-content</link>
	<title><![CDATA[Perl script to calculate GC content !]]></title>
	<description><![CDATA[<code>#!/usr/bin/perl

sub calculate_gc_content {
    my ($sequence) = @_;
    $sequence = uc($sequence);  # Convert the sequence to uppercase
    my $gc_count = () = $sequence =~ /[GC]/g;
    my $total_bases = length($sequence);
    my $gc_content = ($gc_count / $total_bases) * 100;
    return $gc_content;
}

# Example usage:
my $dna_sequence = &quot;ATGCGCTAAAGCGAGCGAAGCGCTAGATCGATCGATCGATCGATCGATCGATCGATCGATCG&quot;;
my $gc_content = calculate_gc_content($dna_sequence);
printf &quot;GC content: %.2f%%\n&quot;, $gc_content;</code>]]></description>
	<dc:creator>BioStar</dc:creator>
</item>
<item>
	<guid isPermaLink="true">https://bioinformaticsonline.com/snippets/view/44430/raku-script-to-find-ssrs-in-fastq-file</guid>
	<pubDate>Sun, 14 Jan 2024 12:05:24 -0600</pubDate>
	<link>https://bioinformaticsonline.com/snippets/view/44430/raku-script-to-find-ssrs-in-fastq-file</link>
	<title><![CDATA[Raku script to find SSRs in fastq file !]]></title>
	<description><![CDATA[<code>sub find-ssrs(Str $sequence) {
    my @ssrs;

    for 2..$sequence.chars -&gt; $min-repeats {
        for $sequence.chars...$min-repeats -&gt; $max-repeat {
            my $repeat = $sequence.substr($min-repeats - 1, $max-repeat - $min-repeats + 1);
            my $repeat-length = $max-repeat - $min-repeats + 1;

            if $sequence.substr($max-repeat).index($repeat) == 0 {
                push @ssrs, {
                    start   =&gt; $min-repeats,
                    end     =&gt; $max-repeat,
                    length  =&gt; $repeat-length,
                    sequence =&gt; $repeat
                };
            }
        }
    }

    return @ssrs;
}

sub process-fastq-file(Str $filename) {
    my $fh = open $filename, :r;

    my $line-number = 0;
    while $fh.readline -&gt; $header {
        $line-number++;
        my $sequence = $fh.readline.chomp;

        # Skipping the next two lines (comment and quality lines)
        $fh.readline;
        $fh.readline;

        my @ssrs = find-ssrs($sequence);

        if @ssrs {
            say &quot;SSRs found in sequence at line $line-number:&quot;;
            for @ssrs -&gt; $ssr {
                say &quot;  Start: $ssr&lt;start&gt;, End: $ssr&lt;end&gt;, Length: $ssr&lt;length&gt;, Sequence: $ssr&lt;sequence&gt;&quot;;
            }
        }
    }

    $fh.close;
}

# Replace &#039;your_fastq_file.fastq&#039; with the path to your FASTQ file
process-fastq-file(&#039;your_fastq_file.fastq&#039;);</code>]]></description>
	<dc:creator>LEGE</dc:creator>
</item>
<item>
	<guid isPermaLink="true">https://bioinformaticsonline.com/snippets/view/44429/raku-script-to-calculate-gc-content</guid>
	<pubDate>Sun, 14 Jan 2024 11:56:51 -0600</pubDate>
	<link>https://bioinformaticsonline.com/snippets/view/44429/raku-script-to-calculate-gc-content</link>
	<title><![CDATA[Raku script to calculate GC content !]]></title>
	<description><![CDATA[<code>sub calculate-gc-content(Str $sequence) {
    my $gc-count = $sequence.comb(/&lt;[GCgc]&gt;/).elems;
    my $total-bases = $sequence.chars;

    return $gc-count / $total-bases * 100;
}

my $dna_sequence = &quot;ATGCGCTAAAGCGCGCGCCTTACGCGCGCGCGC&quot;;
my $gc_content = calculate-gc-content($dna_sequence);

say &quot;DNA Sequence: $dna_sequence&quot;;
say &quot;GC Content: $gc_content%&quot;;</code>]]></description>
	<dc:creator>LEGE</dc:creator>
</item>

</channel>
</rss>