<?xml version='1.0'?><rss version="2.0" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:georss="http://www.georss.org/georss" xmlns:atom="http://www.w3.org/2005/Atom" >
<channel>
	<title><![CDATA[BOL: Simulate the reads !]]></title>
	<link>https://bioinformaticsonline.com/snippets/view/43556/simulate-the-reads?</link>
	<atom:link href="https://bioinformaticsonline.com/snippets/view/43556/simulate-the-reads?" rel="self" type="application/rss+xml" />
	<description><![CDATA[]]></description>
	
	<item>
	<guid isPermaLink="true">https://bioinformaticsonline.com/snippets/view/43556/simulate-the-reads</guid>
	<pubDate>Wed, 20 Oct 2021 04:52:40 -0500</pubDate>
	<link>https://bioinformaticsonline.com/snippets/view/43556/simulate-the-reads</link>
	<title><![CDATA[Simulate the reads !]]></title>
	<description><![CDATA[<code># make reference for randomreads.sh
# randomreads.sh part of BBTools/BBMap https://sourceforge.net/projects/bbmap/
/genetics/elbers/bbmap-38.86/randomreads.sh build=1 \
seed=1 \
ref=GCA_003401745.1_ASM340174v1_genomic.fna_upper.diploid.fasta.gz \
illuminanames=t addslash=t \
pacbio=t pbmin=0.13 pbmax=0.17 \
reads=100 paired=f \
gaussianlength=t \
minlength=1000 midlength=20000 maxlength=100000 \
out=/dev/null




# make 60x haploid coverage for Illumina reads
/genetics/elbers/bbmap-38.86/randomreads.sh build=1 \
ref=GCA_003401745.1_ASM340174v1_genomic.fna_upper.diploid.fasta.gz \
illuminanames=t addslash=t \
coverage=30 paired=t maxinsert=550 mininsert=450 \
out1=illumina1.fastq.gz out2=illumina2.fastq.gz &gt; random_reads_illumina.log 2&gt;&amp;1




# interleave the paired-end reads
# reformat.sh part of BBTools/BBMap https://sourceforge.net/projects/bbmap/
/genetics/elbers/bbmap-38.86/reformat.sh \
in=illumina1.fastq.gz in2=illumina2.fastq.gz out=illumina.int.fastq 2&gt;/dev/null




# use KmerGenie 1.7051 to get an idea of k-mer with that produces longest N50
# http://kmergenie.bx.psu.edu/
mkdir -p /genetics/elbers/test/fly2/kmergenie-illumina-raw-reads

cd /genetics/elbers/test/fly2/kmergenie-illumina-raw-reads
/genetics/elbers/kmergenie-1.7051/kmergenie ../illumina.int.fastq \
&gt; kmergenie-illumina-raw-reads.log 2&gt;&amp;1
rm ../illumina.int.fastq

k=`grep &quot;^best k:&quot; \
kmergenie-illumina-raw-reads.log | grep -Po &quot;\d+&quot;` 
echo &quot;best k=${k}&quot;




# make 30x haploid coverage for PacBio CLR reads
# error rate from 13 - 15 % minimum 1000bp midlength 20000bp maximum 30000bp
cd /genetics/elbers/test/fly2

/genetics/elbers/bbmap-38.86/randomreads.sh build=1 \
ow=t seed=1 \
ref=GCA_003401745.1_ASM340174v1_genomic.fna_upper.diploid.fasta.gz \
illuminanames=t addslash=t \
pacbio=t pbmin=0.13 pbmax=0.15 \
coverage=15 paired=f \
gaussianlength=t \
minlength=1000 midlength=20000 maxlength=30000 \
out=pacbio.fastq.gz &gt; random_reads_pacbio.log 2&gt;&amp;1



# make 30x haploid coverage for PacBio reads for Hifi reads
# error rate from 1 - 0.1 % minimum 9000bp midlength 10000bp max 12000bp
/genetics/elbers/bbmap-38.86/randomreads.sh build=1 \
ow=t seed=1 \
ref=GCA_003401745.1_ASM340174v1_genomic.fna_upper.diploid.fasta.gz \
illuminanames=t addslash=t \
pacbio=t pbmin=0.001 pbmax=0.01 \
coverage=15 paired=f \
gaussianlength=t \
minlength=9000 midlength=10000 maxlength=12000 \
out=hifi.fastq.gz &gt; random_reads_pacbio_hifi.log 2&gt;&amp;1</code>]]></description>
	<dc:creator>Surabhi Chaudhary</dc:creator>
</item>

</channel>
</rss>