Transpose the file coordinates and plot dendrogram in R
#Save this as tr.awk { for (i=1; i2660 days ago
Download the genome from NCBI using bash script/command
...efseq/fungi/assembly_summary.txt' | awk '{FS="\t"} !/^#/ {print $20}...nk/bacteria/assembly_summary.txt' | awk '{FS="\t"} !/^#/ {print $20}...seq/archaea/assembly_summary.txt' | awk '{FS="\t"} !/^#/ {print $20}...nvertebrate/assembly_summary.txt' | awk '{FS="\t"} !/^#/ {print $20}...2575 days ago
Unzip all the genome file and remove all fasta header except first one
#!/bin/bash gzip -d *.gz FILES=$(pwd)/* for f in $FILES do echo "Processing $f file..." if [[ $f =~ \.fna$ ]]; then awk ' /^>/ && FNR > 1 {next} {print $0} ' $f | s...2574 days ago
Download the gff files from NCBI using bash script/command
...efseq/fungi/assembly_summary.txt' | awk '{FS="\t"} !/^#/ {print $20}...nk/bacteria/assembly_summary.txt' | awk '{FS="\t"} !/^#/ {print $20}...seq/archaea/assembly_summary.txt' | awk '{FS="\t"} !/^#/ {print $20}...nvertebrate/assembly_summary.txt' | awk '{FS="\t"} !/^#/ {print $20}...2566 days ago
Download genomes in batch from NCBI
curl 'ftp://ftp.ncbi.nlm.nih.gov/genomes/genbank/bacteria/assembly_summary.txt' | awk '{FS="\t"} !/^#/ {print $20}' | sed -r 's|(ftp://ftp.ncbi.nlm.nih.gov/genomes/all/)(GCA/)([0-9]{3}/)([0-9]{3}/)([0-9]{3}/)(GCA_.+)|\1\2\3\4\5\6/\6_genomic.fna.gz|' > genomic_file2306 days ago
893 days ago
2252 days ago
2252 days ago
Bash oneliner to extract all ids from a multifasta file
#List of ids - one per line in allIds.txt $ awk 'BEGIN{while((getline0)l[">"$1]=1}/^>/{f=!l[$1]}f' seq.fa # You can play with this f=!l[$1 ] if wanted to extract or not extract the ids1581 days ago
2047 days ago