<?xml version='1.0'?><rss version="2.0" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:georss="http://www.georss.org/georss" xmlns:atom="http://www.w3.org/2005/Atom" >
<channel>
	<title><![CDATA[BOL: All]]></title>
	<link>https://bioinformaticsonline.com/snippets?offset=40</link>
	<atom:link href="https://bioinformaticsonline.com/snippets?offset=40" rel="self" type="application/rss+xml" />
	<description><![CDATA[]]></description>
	
	<item>
	<guid isPermaLink="true">https://bioinformaticsonline.com/snippets/view/44340/r-script-for-circos-plot</guid>
	<pubDate>Tue, 11 Jul 2023 01:41:03 -0500</pubDate>
	<link>https://bioinformaticsonline.com/snippets/view/44340/r-script-for-circos-plot</link>
	<title><![CDATA[R script for Circos plot !]]></title>
	<description><![CDATA[<code>#!/usr/bin/env Rscript
library(RCircos)

# usage: Rscript make_circos.r &lt;sv table&gt; &lt;sample name&gt; &lt;gene label table&gt; &lt;cnv data&gt; &lt;out&gt;

# parse args
args = commandArgs(trailingOnly=TRUE)
sv.file &lt;- args[1]
sample.name &lt;- args[2]
gene.label.file &lt;- args[3]
cnv.file &lt;- args[4]
out.file &lt;- args[5]
# TMP &lt;- Sys.getenv(&quot;TMP_DIR&quot;) 
# tmp.bed = paste0(TMP ,&quot;/&quot; , sample.name, &quot;_bkpts.bed&quot;)
tmp.bed = paste0(sample.name, &quot;_bkpts.bed&quot;)

# load prereq data
data(UCSC.HG19.Human.CytoBandIdeogram)

# set core parameters
chr.exclude &lt;- NULL;
cyto.info &lt;- UCSC.HG19.Human.CytoBandIdeogram;
tracks.inside &lt;- 10;
tracks.outside &lt;- 5;
RCircos.Set.Core.Components(cyto.info, chr.exclude, tracks.inside, tracks.outside);
rcircos.params &lt;- RCircos.Get.Plot.Parameters();
rcircos.params$text.size &lt;- 1
RCircos.Reset.Plot.Parameters(rcircos.params)
rcircos.cyto &lt;- RCircos.Get.Plot.Ideogram();
rcircos.position &lt;- RCircos.Get.Plot.Positions();
RCircos.List.Plot.Parameters()

link.data &lt;- tryCatch(read.table(sv.file, sep = &#039;,&#039;, stringsAsFactors = F, header = T), error=function(e) data.frame())
                    
if (nrow(link.data) != 0) {
  
  link.data &lt;- transform(link.data,
                         chromStart = as.numeric(chromStart),
                         chromEnd = as.numeric(chromEnd),
                         chromStart.1 = as.numeric(chromStart.1),
                         chromEnd.1 = as.numeric(chromEnd.1))
  
  # write a bed file of all breakpoints to intersect with gene label table
  bkpts.1 &lt;- link.data[c(&quot;Chromosome&quot;, &quot;chromStart&quot;, &quot;chromEnd&quot;)]
  bkpts.2 &lt;- link.data[c(&quot;Chromosome.1&quot;, &quot;chromStart.1&quot;, &quot;chromEnd.1&quot;)]
  colnames(bkpts.2) &lt;- colnames(bkpts.1)
  write.table(rbind(bkpts.1, bkpts.2), tmp.bed, sep = &#039;\t&#039;, quote = F, col.names = F, row.names = F)
  
  # only keep labels that fall within an event
  print(paste0(&quot;bedtools intersect -wb -a &quot;, tmp.bed, &quot; -b &quot;, gene.label.file))
  gene.labels &lt;- system(paste0(&quot;bedtools intersect -wb -a &quot;, tmp.bed, &quot; -b &quot;, gene.label.file), intern = T)
  gene.labels &lt;- data.frame(do.call(&#039;rbind&#039;, strsplit(gene.labels, &#039;\t&#039;, fixed=TRUE)), stringsAsFactors = F)
  if (nrow(gene.labels) &gt; 0) {
    gene.labels &lt;- gene.labels[,4:ncol(gene.labels)]
    
    # deduplicate labels
    gene.labels &lt;- gene.labels[!duplicated(gene.labels),]
    colnames(gene.labels) &lt;- c(&quot;Chromosome&quot;, &quot;chromStart&quot;, &quot;chromEnd&quot;, &quot;gene&quot;)
    gene.labels &lt;- transform(gene.labels,
                             chromStart = as.numeric(chromStart),
                             chromEnd = as.numeric(chromEnd))
  }
  
  # make the plot
  png(file=out.file, height=3000, width=3000, res = 500)
  RCircos.Set.Plot.Area()
  RCircos.Chromosome.Ideogram.Plot()
  track.num &lt;- 2
  RCircos.Link.Plot(link.data, track.num, TRUE)
  title(sample.name, line=-1)
  
  # label the genes
  if (nrow(gene.labels) &gt; 0) {
    name.col &lt;- 4
    side &lt;- &quot;out&quot;
    track.num &lt;- 1
    RCircos.Gene.Connector.Plot(gene.labels, track.num, side);
    track.num &lt;- 2
    RCircos.Gene.Name.Plot(gene.labels, name.col, track.num, side);
  }
    
  # remove intermediate file
  system(paste0(&quot;rm -f &quot;, tmp.bed))
  
} else {
  # make empty plot
  png(file=out.file, height=3000, width=3000, res = 500)
  RCircos.Set.Plot.Area()
  RCircos.Chromosome.Ideogram.Plot()
  title(sample.name, line=-1)
}
                    
# parse cnv data
cnv = tryCatch(read.csv(cnv.file, stringsAsFactors = F), error=function(e) data.frame())
               
if (nrow(cnv) != 0) {
    colnames(cnv) &lt;- c(&quot;Chromosome&quot;, &quot;chromStart&quot;, &quot;chromEnd&quot;, &quot;cnv&quot;)
    cnv$Chromosome &lt;- paste0(&#039;chr&#039;, cnv$Chromosome)
    cnv$GeneName &lt;- &quot;gene&quot;
    cnv &lt;- cnv[, c(&quot;Chromosome&quot;, &quot;chromStart&quot;, &quot;chromEnd&quot;, &quot;GeneName&quot;, &quot;cnv&quot;)]
}
                    
# add CNV heatmap track
if (nrow(cnv) != 0) {
  RCircos.Heatmap.Plot(cnv, data.col = 5, track.num = 1, side = &quot;in&quot;)
}
                   
dev.off()

#-------- DATA FORMAT ------
chr1	11869	14412	DDX11L1
chr1	14363	29806	WASH7P
chr1	29554	31109	MIR1302-10
chr1	34554	36081	FAM138A
chr1	52473	54936	OR4G4P
chr1	62948	63887	OR4G11P
chr1	69091	70008	OR4F5
chr1	131025	134836	CICP27
chr1	134901	139379	AL627309.1
chr1	157784	157887	RNU6-1100P
chr1	227615	267253	AP006222.2
chr1	228292	228775	AP006222.1
chr1	317720	453948	RP4-669L17.10
chr1	326096	328112	RP4-669L17.8
chr1	329431	332236	CICP7
chr1	334126	334305	RP4-669L17.4
chr1	367640	368634	OR4F29
chr1	379105	379467	WBP1LP7</code>]]></description>
	<dc:creator>Abhi</dc:creator>
</item>
<item>
	<guid isPermaLink="true">https://bioinformaticsonline.com/snippets/view/44286/download-lumpy-skin-disease-data</guid>
	<pubDate>Wed, 22 Mar 2023 05:12:12 -0500</pubDate>
	<link>https://bioinformaticsonline.com/snippets/view/44286/download-lumpy-skin-disease-data</link>
	<title><![CDATA[Download lumpy skin disease data !]]></title>
	<description><![CDATA[<code>Location

https://www.ncbi.nlm.nih.gov/sra?linkname=bioproject_sra_all&amp;from_uid=880745


The raw genome sequence data from the 2022 outbreak in India is available in the SRA Project PRJNA880745</code>]]></description>
	<dc:creator>BioStar</dc:creator>
</item>
<item>
	<guid isPermaLink="true">https://bioinformaticsonline.com/snippets/view/44283/perl-script-for-chi-squared-test</guid>
	<pubDate>Tue, 21 Mar 2023 03:53:45 -0500</pubDate>
	<link>https://bioinformaticsonline.com/snippets/view/44283/perl-script-for-chi-squared-test</link>
	<title><![CDATA[Perl script for chi-squared test !]]></title>
	<description><![CDATA[<code>#!/usr/bin/perl
#
# chidi.pl 
#
# A script to perform a chi-squared test of the dinucleotide frequencies of two FASTA files
# Last updated by: $Author$
# Last updated on: $Date$

use strict;
use warnings;
use Getopt::Long;
use FAlite;



# sanity checks
die &quot;Usage: chidi.pl &lt;file1&gt; &lt;file2&gt;\n&quot; if (!$ARGV[1]);

my @dinucs = qw (AA AC AG AT CA CC CG CT GA GC GG GT TA TC TG TT);

# hashes for obersered and expected dinucleotide frequencies of both files

my %file1_ob;
my %file2_ob;
my %file1_ex;
my %file2_ex;
								
############################################################
# Read sequence file 1
############################################################

open(FILE,&quot;$ARGV[0]&quot;) || die &quot;Can&#039;t open $ARGV[0]\n&quot;;
my $fasta = new FAlite(\*FILE);

# loop through each sequence in file 1
while(my $entry = $fasta-&gt;nextEntry) {	
	my $seq = uc($entry-&gt;seq);
	# to count dinucleotides, loop through sequence, take 2 bp and increment the hash counter
	foreach my $i (0..length($seq)){
	    my $tmp = substr($seq,$i,2);		
		$file1_ob{$tmp}++;
	}
}
close(FILE);


############################################################
# Read sequence file 2
############################################################

open(FILE,&quot;$ARGV[1]&quot;) || die &quot;Can&#039;t open $ARGV[1]\n&quot;;
$fasta = new FAlite(\*FILE);

# loop through each sequence in file 1
while(my $entry = $fasta-&gt;nextEntry) {	
	my $seq = uc($entry-&gt;seq);
	# to count dinucleotides, loop through sequence, take 2 bp and increment the hash counter
	foreach my $i (0..length($seq)){
	    my $tmp = substr($seq,$i,2);		
		$file2_ob{$tmp}++;
	}
}
close(FILE);


############################################################
# Perform chi-squared test
############################################################

# need total of all counts in both sequences, plus totals of &#039;rows&#039; in chi-square table

my $total;
my $row1;
my $row2;

foreach my $di (@dinucs){
	$row1  += $file1_ob{$di};
	$row2  += $file2_ob{$di};
	$total += ($file1_ob{$di} + $file2_ob{$di});
}


# now calculate expected values

foreach my $di (@dinucs){
	# calculate (column total * row total) / $total
	$file1_ex{$di} = sprintf(&quot;%.2f&quot;,(($file1_ob{$di}+$file2_ob{$di}) * $row1) / $total);
	$file2_ex{$di} = sprintf(&quot;%.2f&quot;,(($file1_ob{$di}+$file2_ob{$di}) * $row2) / $total);	
}

# now calculate chi-squared values
my ($chi1,$chi2);
my $chi_total;
print &quot;\tObs1\tExp2\t\tChi1\tObs2\tExp2\t\tChi2\n&quot;;
foreach my $di (@dinucs){
	$chi1 = sprintf(&quot;%.2f&quot;,(($file1_ob{$di} - $file1_ex{$di})**2)/$file1_ex{$di});
	$chi2 = sprintf(&quot;%.2f&quot;,(($file2_ob{$di} - $file2_ex{$di})**2)/$file2_ex{$di});	
	print &quot;$di\t$file1_ob{$di}\t$file1_ex{$di}\t$chi1\t$file2_ob{$di}\t$file2_ex{$di}\t$chi2\n&quot;;

	$chi_total += ($chi1+$chi2);
}

printf  &quot;Chi squared value = %6.2f\n&quot;, $chi_total;
				
print &quot;Significance level at 5% = 25.00\n&quot;;
print &quot;Significance level at 1% = 30.58\n&quot;;


exit(0);</code>]]></description>
	<dc:creator>Neel</dc:creator>
</item>
<item>
	<guid isPermaLink="true">https://bioinformaticsonline.com/snippets/view/44260/r-script-to-covert-and-export-html-page-to-png</guid>
	<pubDate>Tue, 14 Mar 2023 07:03:42 -0500</pubDate>
	<link>https://bioinformaticsonline.com/snippets/view/44260/r-script-to-covert-and-export-html-page-to-png</link>
	<title><![CDATA[R script to covert and export html page to png]]></title>
	<description><![CDATA[<code># Library
library(streamgraph)
# Create data:
data &lt;- data.frame(
  year=rep(seq(1990,2016) , each=10),
  name=rep(letters[1:10] , 27),
  value=sample( seq(0,1,0.0001) , 270)
)
# Start with a classic stream graph. It is supposed to open in a browser
streamgraph(data, key=&quot;name&quot;, value=&quot;value&quot;, date=&quot;year&quot;)
# Copy the URL of the html window you get
# load webshot library
library(webshot)

#install phantom:
webshot::install_phantomjs()
# Make a webshot in pdf : high quality but can not choose printed zone
webshot(&quot;paste_your_html_here.html&quot; , &quot;output.pdf&quot;, delay = 0.2)

# Make a webshot in png : Low quality - but you can choose shape
webshot(&quot;paste_your_html_here&quot; , &quot;output.png&quot;, delay = 0.2 , cliprect = c(440, 0, 1000, 10))</code>]]></description>
	<dc:creator>BioStar</dc:creator>
</item>
<item>
	<guid isPermaLink="true">https://bioinformaticsonline.com/snippets/view/44222/raku-script-to-find-palindrome-in-genomes</guid>
	<pubDate>Tue, 07 Mar 2023 14:15:17 -0600</pubDate>
	<link>https://bioinformaticsonline.com/snippets/view/44222/raku-script-to-find-palindrome-in-genomes</link>
	<title><![CDATA[Raku script to find palindrome in genomes !]]></title>
	<description><![CDATA[<code>sub is-palindrome(Str $str) returns Bool {
    $str.=uc; # convert to uppercase
    $str.=subst:g/\s+//; # remove any spaces
    return $str eq $str.flip;
}

sub find-palindromes(Str $dna, Int $min-length, Int $max-length) {
    for $min-length..$max-length -&gt; $length {
        for 0..^$dna.chars - $length -&gt; $pos {
            my $substring = $dna.substr($pos, $length);
            if is-palindrome($substring) {
                say &quot;Palindrome found at position $pos: $substring&quot;;
            }
        }
    }
}

# Example usage
my $dna = &quot;GGATCCATGGCCTAGG&quot;; # example DNA sequence
find-palindromes($dna, 3, 8); # find palindromes with length between 3 and 8</code>]]></description>
	<dc:creator>BioStar</dc:creator>
</item>
<item>
	<guid isPermaLink="true">https://bioinformaticsonline.com/snippets/view/44221/perl-script-to-find-edit-distance-between-two-sequences</guid>
	<pubDate>Tue, 07 Mar 2023 14:11:00 -0600</pubDate>
	<link>https://bioinformaticsonline.com/snippets/view/44221/perl-script-to-find-edit-distance-between-two-sequences</link>
	<title><![CDATA[Perl script to find edit distance between two sequences !]]></title>
	<description><![CDATA[<code>#!/usr/bin/perl

use strict;
use warnings;

sub edit_distance {
    my ($s1, $s2) = @_;

    my $len1 = length($s1);
    my $len2 = length($s2);

    my @dp;
    for (my $i = 0; $i &lt;= $len1; $i++) {
        for (my $j = 0; $j &lt;= $len2; $j++) {
            $dp[$i][$j] = 0;
        }
    }

    for (my $i = 0; $i &lt;= $len1; $i++) {
        $dp[$i][0] = $i;
    }

    for (my $j = 0; $j &lt;= $len2; $j++) {
        $dp[0][$j] = $j;
    }

    for (my $i = 1; $i &lt;= $len1; $i++) {
        for (my $j = 1; $j &lt;= $len2; $j++) {
            my $cost = substr($s1, $i-1, 1) eq substr($s2, $j-1, 1) ? 0 : 1;
            $dp[$i][$j] = min($dp[$i-1][$j]+1, $dp[$i][$j-1]+1, $dp[$i-1][$j-1]+$cost);
        }
    }

    return $dp[$len1][$len2];
}

sub min {
    my $min = shift @_;
    foreach (@_) {
        $min = $_ if $_ &lt; $min;
    }
    return $min;
}

# Example usage
my $seq1 = &quot;ACGTAGCTAGCTGACTGAC&quot;;
my $seq2 = &quot;CGTAGCTAGCTGACAGCTA&quot;;
my $distance = edit_distance($seq1, $seq2);
print &quot;The edit distance between $seq1 and $seq2 is $distance.\n&quot;;</code>]]></description>
	<dc:creator>BioStar</dc:creator>
</item>
<item>
	<guid isPermaLink="true">https://bioinformaticsonline.com/snippets/view/44212/perl-script-to-find-inverted-repeats</guid>
	<pubDate>Tue, 07 Mar 2023 06:25:23 -0600</pubDate>
	<link>https://bioinformaticsonline.com/snippets/view/44212/perl-script-to-find-inverted-repeats</link>
	<title><![CDATA[Perl script to find inverted repeats !]]></title>
	<description><![CDATA[<code>#!/usr/bin/perl

use strict;
use warnings;

use Bio::SeqIO;
use Bio::Tools::Run::RepeatMasker;

my $genome_file = &quot;genome.fasta&quot;;

# read genome sequence
my $seqio = Bio::SeqIO-&gt;new(-file =&gt; $genome_file, -format =&gt; &quot;fasta&quot;);
my $seqobj = $seqio-&gt;next_seq();
my $seq = $seqobj-&gt;seq();

# run RepeatMasker
my $rm = Bio::Tools::Run::RepeatMasker-&gt;new();
my $rm_report = $rm-&gt;run($genome_file);

# parse RepeatMasker output
while (my $rm_result = $rm_report-&gt;next_result()) {
    my $rm_match = $rm_result-&gt;repeat_consensus();
    my $rm_class = $rm_result-&gt;repeat_class();
    my $rm_start = $rm_result-&gt;start();
    my $rm_end = $rm_result-&gt;end();
    my $rm_strand = $rm_result-&gt;strand();
    
    if ($rm_class eq &quot;Inverted&quot;) {
        my $rm_seq = substr($seq, $rm_start-1, $rm_end-$rm_start+1);
        if ($rm_strand eq &quot;-&quot;) {
            $rm_seq = reverse_complement($rm_seq);
        }
        print &quot;Inverted repeat found at positions $rm_start-$rm_end: $rm_seq\n&quot;;
    }
}

sub reverse_complement {
    my ($seq) = @_;
    $seq = reverse($seq);
    $seq =~ tr/ACGTacgt/TGCAtgca/;
    return $seq;
}</code>]]></description>
	<dc:creator>BioStar</dc:creator>
</item>
<item>
	<guid isPermaLink="true">https://bioinformaticsonline.com/snippets/view/44170/identify-genome-wide-synteny-with-lastz-alignment</guid>
	<pubDate>Mon, 05 Dec 2022 04:52:48 -0600</pubDate>
	<link>https://bioinformaticsonline.com/snippets/view/44170/identify-genome-wide-synteny-with-lastz-alignment</link>
	<title><![CDATA[Identify genome-wide synteny with LASTZ alignment]]></title>
	<description><![CDATA[<code>#This is the walkstrough how to identifiy genome-wide synteny markers based on LASTZ alignment.

Step1：Mask the repeat sequences for both genomes and chromosomes.

RepeatMasker -pa 40 -nolow -norna -gff -xmall -lib custom.TE.lib_for_rice.fa AAChr1.txt RepeatMasker -pa 40 -nolow -norna -gff -xmall -lib custom.TE.lib_for_FF.fa FFChr1.txt

Step2: Alignment using LASTZ and Chain/Net

lastz AAChr1.txt FFChr1.txt K=2200 L=6000 Y=3400 E=30 H=0 O=400 T=1 --format=axt --out=chr01.axt axtChain -linearGap=medium chr01.axt AAChr1.txt FFChr1.txt chr01.axt.chain chainPreNet chr01.axt.chain AAChr1.txt.sizes FFChr1.txt.sizes chr01.chain.filter chainNet chr01.chain.filter -minSpace=1 AAChr1.txt.sizes FFChr1.txt.sizes chr1.chain.filter.tnet chr1.chain.filter.qnet netSyntenic chr1.chain.filter.tnet chr1.chain.filter.tnet.synnet netToAxt chr1.chain.filter.tnet.synnet chr1.chain.filter.tnet.synnet chr01.chain.filter AAChr1.txt FFChr1.txt chr1.chain.filter.tnet.synnet.axt axtSort chr1.chain.filter.tnet.synnet.axt chr1.chain.filter.tnet.synnet.Sort.axt axtToMaf chr1.chain.filter.tnet.synnet.axt AAChr1.txt.sizes FFChr1.txt.sizes -tPrefix=target. -qPrefix=query. chr1.chain.filter.tnet.synnet.axt.maf

Step 3: Get syntenic markers

perl Maf2rawsynteny.pl chr1.chain.filter.tnet.synnet.axt.maf target.AAChr1 query.FFChr1 
perl Get_synteny.pl -i chr1.chain.filter.tnet.synnet.axt.maf -n 0 -m 3 -t target.AAChr1 -q query.FFChr1 -o syn.final.out

@ https://github.com/yiliao1022/Centromere_synteny_project</code>]]></description>
	<dc:creator>BioStar</dc:creator>
</item>
<item>
	<guid isPermaLink="true">https://bioinformaticsonline.com/snippets/view/43992/perl-script-to-read-the-next-line-of-a-file</guid>
	<pubDate>Mon, 17 Oct 2022 23:19:56 -0500</pubDate>
	<link>https://bioinformaticsonline.com/snippets/view/43992/perl-script-to-read-the-next-line-of-a-file</link>
	<title><![CDATA[Perl script to read the next line of a file !]]></title>
	<description><![CDATA[<code>my $line = &lt;$fileHandler&gt;;
while(1) { # keep looping until I say so
    my $nextLine = &lt;$fileHandler&gt;;

    if ($line =~ m/&gt;/ || !defined $nextLine) {
        ### Do the stuff
    }
    ### Do any other stuff;

    last unless defined $nextLine;
    $line = $nextLine;
}</code>]]></description>
	<dc:creator>Abhi</dc:creator>
</item>
<item>
	<guid isPermaLink="true">https://bioinformaticsonline.com/snippets/view/43965/extract-the-mapped-and-unmapped-reads</guid>
	<pubDate>Fri, 23 Sep 2022 06:18:33 -0500</pubDate>
	<link>https://bioinformaticsonline.com/snippets/view/43965/extract-the-mapped-and-unmapped-reads</link>
	<title><![CDATA[Extract the mapped and unmapped reads !]]></title>
	<description><![CDATA[<code>PROCESSORS=20

#Single_End_Layout:
samtools view --threads $PROCESSORS -b -F 4 in.bam &gt; mapped.bam
samtools view --threads $PROCESSORS -b -f 4 in.bam &gt; unmapped.bam

#Paired_End_Layout
samtools view --threads $PROCESSORS -b -f 2 in.bam &gt; mapped.bam
samtools view --threads $PROCESSORS -b -F 2 in.bam &gt; unmapped.bam</code>]]></description>
	<dc:creator>Abhi</dc:creator>
</item>

</channel>
</rss>