# make reference for randomreads.sh
# randomreads.sh part of BBTools/BBMap https://sourceforge.net/projects/bbmap/
/genetics/elbers/bbmap-38.86/randomreads.sh build=1 \
seed=1 \
ref=GCA_003401745.1_ASM340174v1_genomic.fna_upper.diploid.fasta.gz \
illuminanames=t addslash=t \
pacbio=t pbmin=0.13 pbmax=0.17 \
reads=100 paired=f \
gaussianlength=t \
minlength=1000 midlength=20000 maxlength=100000 \
out=/dev/null
# make 60x haploid coverage for Illumina reads
/genetics/elbers/bbmap-38.86/randomreads.sh build=1 \
ref=GCA_003401745.1_ASM340174v1_genomic.fna_upper.diploid.fasta.gz \
illuminanames=t addslash=t \
coverage=30 paired=t maxinsert=550 mininsert=450 \
out1=illumina1.fastq.gz out2=illumina2.fastq.gz > random_reads_illumina.log 2>&1
# interleave the paired-end reads
# reformat.sh part of BBTools/BBMap https://sourceforge.net/projects/bbmap/
/genetics/elbers/bbmap-38.86/reformat.sh \
in=illumina1.fastq.gz in2=illumina2.fastq.gz out=illumina.int.fastq 2>/dev/null
# use KmerGenie 1.7051 to get an idea of k-mer with that produces longest N50
# http://kmergenie.bx.psu.edu/
mkdir -p /genetics/elbers/test/fly2/kmergenie-illumina-raw-reads
cd /genetics/elbers/test/fly2/kmergenie-illumina-raw-reads
/genetics/elbers/kmergenie-1.7051/kmergenie ../illumina.int.fastq \
> kmergenie-illumina-raw-reads.log 2>&1
rm ../illumina.int.fastq
k=`grep "^best k:" \
kmergenie-illumina-raw-reads.log | grep -Po "\d+"`
echo "best k=${k}"
# make 30x haploid coverage for PacBio CLR reads
# error rate from 13 - 15 % minimum 1000bp midlength 20000bp maximum 30000bp
cd /genetics/elbers/test/fly2
/genetics/elbers/bbmap-38.86/randomreads.sh build=1 \
ow=t seed=1 \
ref=GCA_003401745.1_ASM340174v1_genomic.fna_upper.diploid.fasta.gz \
illuminanames=t addslash=t \
pacbio=t pbmin=0.13 pbmax=0.15 \
coverage=15 paired=f \
gaussianlength=t \
minlength=1000 midlength=20000 maxlength=30000 \
out=pacbio.fastq.gz > random_reads_pacbio.log 2>&1
# make 30x haploid coverage for PacBio reads for Hifi reads
# error rate from 1 - 0.1 % minimum 9000bp midlength 10000bp max 12000bp
/genetics/elbers/bbmap-38.86/randomreads.sh build=1 \
ow=t seed=1 \
ref=GCA_003401745.1_ASM340174v1_genomic.fna_upper.diploid.fasta.gz \
illuminanames=t addslash=t \
pacbio=t pbmin=0.001 pbmax=0.01 \
coverage=15 paired=f \
gaussianlength=t \
minlength=9000 midlength=10000 maxlength=12000 \
out=hifi.fastq.gz > random_reads_pacbio_hifi.log 2>&1