Extract fasta sequences with ids in another file !
#Ids are in test.txt - one ids per line #sequences are in test.fa grep -w -A 2 -f test.txt test.fa --no-group-separator # seqtk seqtk subseq test.fa test.txt #faSomeRecods faSomeRecords in.fa listFile out.fa # seqkit seqkit grep -n -f list.txt sequences.fas > newfile2.fas944 days ago
Bash script to split multifasta file !
#Using awk, we can easily split a file (multi.fa) into chunks of size N (here, N=500), by using the following one-liner: awk 'BEGIN {n=0;} /^>/ {if(n%500==0){file=sprin...878 days ago
878 days ago
Command line to print disk usage on Linux terminal !
#Print disk usage - perl du -h |perl -e'%h=map{/.\s/;99**(ord$&&7)-$`,$_}`du -h`;die@h{sort%h}' #Bash du -k * | sort -nr | cut -f2 | xargs -d '\n' du -sh #Base du -scBM | sort -n #More du -s * | sort -rn | cut -f2- | xargs -d "\n" du -sh869 days ago
866 days ago
Multiline fasta to single line fasta !
perl -pe '$. > 1 and /^>/ ? print "\n" : chomp' in.fasta > out.fasta850 days ago
Bash command to count reads in fastq file !
#ref fastq file #ref1_1.fq) echo $(( $(wc -l847 days ago
847 days ago
Bash command to explore assembly summary genbank !
wget https://ftp.ncbi.nlm.nih.gov/genomes/genbank/assembly_summary_genbank.txt pip3 install csvkit csvcut -t -K 1 -c 'excluded_from_refseq' assembly_summary_genbank.txt \ | tail -n +2 | tr ";" "\n" \ | sed -e 's/^ //' -e 's/ $//' | grep -v '""' \ | sort | uniq -c | sort -nr823 days ago
Bash script to convert multiline fasta to single line fasta !
#file.fa is multiline fasta awk '/^>/ {printf("\n%s\n",$0);next; } { printf("%s",$0);} END {printf("\n");}' < file.fa821 days ago