<?xml version='1.0'?><rss version="2.0" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:georss="http://www.georss.org/georss" xmlns:atom="http://www.w3.org/2005/Atom" >
<channel>
	<title><![CDATA[BOL: Owner]]></title>
	<link>https://bioinformaticsonline.com/snippets/owner/lege?offset=20</link>
	<atom:link href="https://bioinformaticsonline.com/snippets/owner/lege?offset=20" rel="self" type="application/rss+xml" />
	<description><![CDATA[]]></description>
	
	<item>
	<guid isPermaLink="true">https://bioinformaticsonline.com/snippets/view/44454/raku-script-to-find-microsatellites-in-dna-fragments</guid>
	<pubDate>Thu, 01 Feb 2024 02:00:27 -0600</pubDate>
	<link>https://bioinformaticsonline.com/snippets/view/44454/raku-script-to-find-microsatellites-in-dna-fragments</link>
	<title><![CDATA[Raku script to find microsatellites in DNA fragments !]]></title>
	<description><![CDATA[<code>sub find-microsatellites($sequence, $min-repeat-length = 2, $max-repeat-length = 6, $min-repeat-count = 3) {
    my @microsatellites;

    for my $repeat-length ($min-repeat-length..$max-repeat-length) {
        for ^($sequence.chars - $repeat-length * $min-repeat-count + 1) -&gt; $i {
            my $substring = $sequence.substr($i, $repeat-length);

            if $sequence.contains($substring x $min-repeat-count) {
                @microsatellites.push($substring);
            }
        }
    }

    return @microsatellites.unique;
}

# Example usage
my $genome-sequence = &quot;ATCGATCGATCGATCGATCG&quot;;
my @result = find-microsatellites($genome-sequence);

say &quot;Microsatellites found: &quot;, @result;</code>]]></description>
	<dc:creator>LEGE</dc:creator>
</item>
<item>
	<guid isPermaLink="true">https://bioinformaticsonline.com/snippets/view/44453/python-script-to-find-repeats-in-the-dna-sequence</guid>
	<pubDate>Thu, 01 Feb 2024 01:57:25 -0600</pubDate>
	<link>https://bioinformaticsonline.com/snippets/view/44453/python-script-to-find-repeats-in-the-dna-sequence</link>
	<title><![CDATA[Python script to find repeats in the DNA sequence !]]></title>
	<description><![CDATA[<code>def find_repeats(sequence, min_repeat_length=3):
    repeats = []
    
    for i in range(len(sequence) - min_repeat_length + 1):
        substring = sequence[i:i+min_repeat_length]
        
        if sequence.count(substring) &gt; 1 and substring not in repeats:
            repeats.append(substring)
    
    return repeats

# Example usage
genome_sequence = &quot;ATCGATCGATCGATCG&quot;
result = find_repeats(genome_sequence)

print(&quot;Repeats found:&quot;, result)</code>]]></description>
	<dc:creator>LEGE</dc:creator>
</item>
<item>
	<guid isPermaLink="true">https://bioinformaticsonline.com/snippets/view/44452/raku-script-to-find-repeats-in-sequences</guid>
	<pubDate>Thu, 01 Feb 2024 01:56:36 -0600</pubDate>
	<link>https://bioinformaticsonline.com/snippets/view/44452/raku-script-to-find-repeats-in-sequences</link>
	<title><![CDATA[Raku script to find repeats in sequences !]]></title>
	<description><![CDATA[<code>sub find-repeats($sequence, $min-repeat-length = 3) {
    my @repeats;

    for ^($sequence.chars - $min-repeat-length + 1) -&gt; $i {
        my $substring = $sequence.substr($i, $min-repeat-length);

        if $sequence.contains($substring) &gt; 1 &amp;&amp; $substring !~~ any(@repeats) {
            @repeats.push($substring);
        }
    }

    return @repeats;
}

# Example usage
my $genome-sequence = &quot;ATCGATCGATCGATCG&quot;;
my @result = find-repeats($genome-sequence);

say &quot;Repeats found: &quot;, @result;</code>]]></description>
	<dc:creator>LEGE</dc:creator>
</item>
<item>
	<guid isPermaLink="true">https://bioinformaticsonline.com/snippets/view/44451/python-script-for-six-frame-translation-of-sequences</guid>
	<pubDate>Thu, 01 Feb 2024 01:54:14 -0600</pubDate>
	<link>https://bioinformaticsonline.com/snippets/view/44451/python-script-for-six-frame-translation-of-sequences</link>
	<title><![CDATA[Python script for six frame translation of sequences !]]></title>
	<description><![CDATA[<code>from Bio import SeqIO
from Bio.Seq import Seq

def translate_frame(sequence, frame):
    if frame &gt; 0:
        translated_seq = sequence[frame-1:].translate()
    else:
        reverse_complement = sequence.reverse_complement()
        translated_seq = reverse_complement[abs(frame)-1:].translate()

    return translated_seq

def six_frame_translation(fasta_file):
    records = list(SeqIO.parse(fasta_file, &quot;fasta&quot;))

    for record in records:
        print(f&quot;Sequence ID: {record.id}&quot;)
        for frame in range(1, 7):
            protein_sequence = translate_frame(record.seq, frame)
            frame_type = &quot;Forward&quot; if frame &gt; 0 else &quot;Reverse&quot;
            print(f&quot;Frame {frame_type} {abs(frame)} Translation:\n{protein_sequence}\n&quot;)

# Replace &#039;path/to/your/input.fasta&#039; with the actual path to your input nucleotide sequence in FASTA format
input_fasta = &#039;path/to/your/input.fasta&#039;
six_frame_translation(input_fasta)</code>]]></description>
	<dc:creator>LEGE</dc:creator>
</item>
<item>
	<guid isPermaLink="true">https://bioinformaticsonline.com/snippets/view/44450/perl-script-for-six-frame-translation</guid>
	<pubDate>Thu, 01 Feb 2024 01:52:50 -0600</pubDate>
	<link>https://bioinformaticsonline.com/snippets/view/44450/perl-script-for-six-frame-translation</link>
	<title><![CDATA[Perl script for six frame translation !]]></title>
	<description><![CDATA[<code>#!/usr/bin/perl
use strict;
use warnings;
use Bio::SeqIO;

# Path to your input nucleotide sequence file in FASTA format
my $input_fasta = &#039;path/to/your/input.fasta&#039;;

# Step 1: Read the input FASTA file
my $seqio = Bio::SeqIO-&gt;new(-file =&gt; $input_fasta, -format =&gt; &#039;fasta&#039;);
my $sequence = $seqio-&gt;next_seq;

# Step 2: Perform six-frame translation
my @frames = (1, 2, 3, -1, -2, -3);
foreach my $frame (@frames) {
    my $translated_seq = translate_frame($sequence, $frame);
    my $frame_type = $frame &gt; 0 ? &quot;Forward&quot; : &quot;Reverse&quot;;
    print &quot;Frame $frame_type $frame Translation:\n$translated_seq\n&quot;;
}

# Subroutine to translate a sequence in a specific frame
sub translate_frame {
    my ($sequence, $frame) = @_;

    my $translated_seq;
    if ($frame &gt; 0) {
        $translated_seq = $sequence-&gt;translate(-frame =&gt; $frame)-&gt;seq;
    } else {
        # If frame is negative, reverse and complement the sequence before translation
        my $revcomp_seq = $sequence-&gt;revcom;
        $translated_seq = $revcomp_seq-&gt;translate(-frame =&gt; abs($frame))-&gt;seq;
    }

    return $translated_seq;
}</code>]]></description>
	<dc:creator>LEGE</dc:creator>
</item>
<item>
	<guid isPermaLink="true">https://bioinformaticsonline.com/snippets/view/44449/perl-and-bioperl-script-to-extract-protein-sequences-using-gff-file</guid>
	<pubDate>Thu, 01 Feb 2024 01:51:00 -0600</pubDate>
	<link>https://bioinformaticsonline.com/snippets/view/44449/perl-and-bioperl-script-to-extract-protein-sequences-using-gff-file</link>
	<title><![CDATA[Perl and BioPerl script to extract protein sequences using GFF file !]]></title>
	<description><![CDATA[<code>#!/usr/bin/perl
use strict;
use warnings;
use Bio::DB::Fasta;
use Bio::SeqIO;

# Paths to your GFF file and genome FASTA file
my $gff_file = &#039;path/to/your/file.gff&#039;;
my $genome_fasta = &#039;path/to/your/genome.fasta&#039;;

# Gene ID to extract
my $gene_id_to_extract = &#039;your_gene_id&#039;;

# Step 1: Parse GFF file to get gene locations
my %gene_locations;
open my $gff_fh, &#039;&lt;&#039;, $gff_file or die &quot;Cannot open GFF file: $!&quot;;
while (&lt;$gff_fh&gt;) {
    next if /^#/;  # Skip comments
    my @fields = split(/\t/, $_);
    next unless $fields[2] eq &#039;gene&#039;;  # Consider only gene features
    my ($gene_id) = $fields[8] =~ /ID=([^;]+)/;
    $gene_locations{$gene_id} = [$fields[3], $fields[4]];
}
close $gff_fh;

# Step 2: Extract DNA sequence from genome
my $db = Bio::DB::Fasta-&gt;new($genome_fasta);
my ($start, $end) = @{$gene_locations{$gene_id_to_extract}};
my $gene_dna_sequence = $db-&gt;seq($fields[0], $start, $end);

# Step 3: Translate DNA sequence into protein sequence
my $gene_protein_sequence = translate_dna_to_protein($gene_dna_sequence);

# Print the protein sequence
print &quot;Protein Sequence:\n$gene_protein_sequence\n&quot;;

# Subroutine to translate DNA sequence to protein sequence
sub translate_dna_to_protein {
    my ($dna_sequence) = @_;
    my $seq_obj = Bio::Seq-&gt;new(-seq =&gt; $dna_sequence, -alphabet =&gt; &#039;dna&#039;);
    my $protein_sequence = $seq_obj-&gt;translate-&gt;seq;
    return $protein_sequence;
}</code>]]></description>
	<dc:creator>LEGE</dc:creator>
</item>
<item>
	<guid isPermaLink="true">https://bioinformaticsonline.com/snippets/view/44448/python-script-to-extract-a-protein-sequence-from-a-genome-using-a-general-feature-format-gff-file</guid>
	<pubDate>Thu, 01 Feb 2024 01:48:49 -0600</pubDate>
	<link>https://bioinformaticsonline.com/snippets/view/44448/python-script-to-extract-a-protein-sequence-from-a-genome-using-a-general-feature-format-gff-file</link>
	<title><![CDATA[Python script to extract a protein sequence from a genome using a General Feature Format (GFF) file !]]></title>
	<description><![CDATA[<code>#You typically need the corresponding genome sequence file in FASTA format. The GFF file contains information about the #features (such as genes) in the genome, including their locations and annotations.

#The outline of the steps :

#Parse the GFF file to extract information about the gene locations.
#Use the gene locations to extract the corresponding DNA sequences from the genome in FASTA format.
#Translate the DNA sequences into protein sequences.

#Simple example using Python and Biopython


from Bio import SeqIO
from Bio.SeqRecord import SeqRecord
from Bio.Seq import Seq

def extract_protein_sequence(gff_file, genome_fasta, gene_id):
    # Step 1: Parse the GFF file
    gene_locations = {}
    with open(gff_file, &#039;r&#039;) as gff:
        for line in gff:
            if not line.startswith(&#039;#&#039;):
                fields = line.strip().split(&#039;\t&#039;)
                if fields[2] == &#039;gene&#039;:
                    gene_id = fields[8].split(&#039;;&#039;)[0].split(&#039;=&#039;)[1]
                    gene_locations[gene_id] = (int(fields[3]), int(fields[4]))

    # Step 2: Extract DNA sequence from the genome
    genome_record = SeqIO.read(genome_fasta, &#039;fasta&#039;)
    gene_start, gene_end = gene_locations[gene_id]
    gene_dna_sequence = genome_record.seq[gene_start - 1:gene_end]

    # Step 3: Translate DNA sequence into protein sequence
    gene_protein_sequence = gene_dna_sequence.translate()

    return gene_protein_sequence

# Example usage
gff_file = &#039;path/to/your/file.gff&#039;
genome_fasta = &#039;path/to/your/genome.fasta&#039;
gene_id_to_extract = &#039;your_gene_id&#039;

protein_sequence = extract_protein_sequence(gff_file, genome_fasta, gene_id_to_extract)
print(protein_sequence)</code>]]></description>
	<dc:creator>LEGE</dc:creator>
</item>
<item>
	<guid isPermaLink="true">https://bioinformaticsonline.com/snippets/view/44447/bash-script-to-calculate-difference-between-column</guid>
	<pubDate>Thu, 01 Feb 2024 01:28:44 -0600</pubDate>
	<link>https://bioinformaticsonline.com/snippets/view/44447/bash-script-to-calculate-difference-between-column</link>
	<title><![CDATA[Bash script to calculate difference between column !]]></title>
	<description><![CDATA[<code>Space Separated

awk &#039;{ $5 = $2 - $4 } 1&#039; inputput.txt &gt; outdiff.txt

Or with tab separation:

awk &#039;BEGIN { OFS = &quot;\t&quot; } { $5 = $2 - $4 } 1&#039; inputput.txt &gt; outdiff.txt</code>]]></description>
	<dc:creator>LEGE</dc:creator>
</item>
<item>
	<guid isPermaLink="true">https://bioinformaticsonline.com/snippets/view/44430/raku-script-to-find-ssrs-in-fastq-file</guid>
	<pubDate>Sun, 14 Jan 2024 12:05:24 -0600</pubDate>
	<link>https://bioinformaticsonline.com/snippets/view/44430/raku-script-to-find-ssrs-in-fastq-file</link>
	<title><![CDATA[Raku script to find SSRs in fastq file !]]></title>
	<description><![CDATA[<code>sub find-ssrs(Str $sequence) {
    my @ssrs;

    for 2..$sequence.chars -&gt; $min-repeats {
        for $sequence.chars...$min-repeats -&gt; $max-repeat {
            my $repeat = $sequence.substr($min-repeats - 1, $max-repeat - $min-repeats + 1);
            my $repeat-length = $max-repeat - $min-repeats + 1;

            if $sequence.substr($max-repeat).index($repeat) == 0 {
                push @ssrs, {
                    start   =&gt; $min-repeats,
                    end     =&gt; $max-repeat,
                    length  =&gt; $repeat-length,
                    sequence =&gt; $repeat
                };
            }
        }
    }

    return @ssrs;
}

sub process-fastq-file(Str $filename) {
    my $fh = open $filename, :r;

    my $line-number = 0;
    while $fh.readline -&gt; $header {
        $line-number++;
        my $sequence = $fh.readline.chomp;

        # Skipping the next two lines (comment and quality lines)
        $fh.readline;
        $fh.readline;

        my @ssrs = find-ssrs($sequence);

        if @ssrs {
            say &quot;SSRs found in sequence at line $line-number:&quot;;
            for @ssrs -&gt; $ssr {
                say &quot;  Start: $ssr&lt;start&gt;, End: $ssr&lt;end&gt;, Length: $ssr&lt;length&gt;, Sequence: $ssr&lt;sequence&gt;&quot;;
            }
        }
    }

    $fh.close;
}

# Replace &#039;your_fastq_file.fastq&#039; with the path to your FASTQ file
process-fastq-file(&#039;your_fastq_file.fastq&#039;);</code>]]></description>
	<dc:creator>LEGE</dc:creator>
</item>
<item>
	<guid isPermaLink="true">https://bioinformaticsonline.com/snippets/view/44429/raku-script-to-calculate-gc-content</guid>
	<pubDate>Sun, 14 Jan 2024 11:56:51 -0600</pubDate>
	<link>https://bioinformaticsonline.com/snippets/view/44429/raku-script-to-calculate-gc-content</link>
	<title><![CDATA[Raku script to calculate GC content !]]></title>
	<description><![CDATA[<code>sub calculate-gc-content(Str $sequence) {
    my $gc-count = $sequence.comb(/&lt;[GCgc]&gt;/).elems;
    my $total-bases = $sequence.chars;

    return $gc-count / $total-bases * 100;
}

my $dna_sequence = &quot;ATGCGCTAAAGCGCGCGCCTTACGCGCGCGCGC&quot;;
my $gc_content = calculate-gc-content($dna_sequence);

say &quot;DNA Sequence: $dna_sequence&quot;;
say &quot;GC Content: $gc_content%&quot;;</code>]]></description>
	<dc:creator>LEGE</dc:creator>
</item>

</channel>
</rss>