-
Notifications
You must be signed in to change notification settings - Fork 2
Useful PostgresQL Commands
pg_dump -U gtdb -h watson.ace.uq.edu.au -T "genes_in_genomes" -T "genes_unique" -T "pfam_" -T "tigrfam_" gtdb_release80 > ~/gtdb_bk/gtdb_r80_20170926.sql
Those following commands create 2 temporary tables then display the presence of markers for both Bacterial and Archaeal sets:
CREATE TEMPORARY TABLE temp_bacterial AS (select genome_id,count(*) as count_bacterial_count from aligned_markers where evalue is not NULL and marker_id in (SELECT marker_id from marker_set_contents WHERE set_id = 1) GROUP BY genome_id);
CREATE TEMPORARY TABLE temp_archaea AS (select genome_id,count(*) as count_archaea from aligned_markers where evalue is not NULL and marker_id in (SELECT marker_id from marker_set_contents WHERE set_id = 2) GROUP BY genome_id);
select ta.genome_id,round(count_archaea::numeric/(SELECT count(*) from marker_set_contents WHERE set_id = 2 GROUP BY set_id)100,2) as percent_archaea, round(count_bacterial_count::numeric/(SELECT count() from marker_set_contents WHERE set_id = 1 GROUP BY set_id)*100,2) as percent_bacterial, gtdb_domain from temp_archaea as ta FULL OUTER JOIN temp_bacterial tb on tb.genome_id=ta.genome_id LEFT JOIN genomes g on g.id = ta.genome_id LEFT JOIN metadata_taxonomy as mt on mt.id = g.id WHERE g.id in (SELECT id from genomes where genome_source_id not in (2,3)) ORDER BY gtdb_domain;
Displays the number of NCBI genomes and SRA genomes with gtdb_domain equals to "d__Bacteria"
SELECT * from (SELECT 1 as lvl,'PHYLA' as rank,count(distinct(gtdb_phylum)) from metadata_taxonomy LEFT join genomes using(id) where (genome_source_id != 1 or id in (SELECT genome_id from genome_list_contents where list_id = 479)) and gtdb_domain like 'd__Bacteria' and gtdb_phylum is not NULL UNION SELECT 2 as lvl,'CLASS' as rank,count(distinct(gtdb_class)) from metadata_taxonomy LEFT join genomes using(id) where (genome_source_id != 1 or id in (SELECT genome_id from genome_list_contents where list_id = 479)) and gtdb_domain like 'd__Bacteria' and gtdb_class is not NULL UNION SELECT 3 as lvl,'ORDER' as rank,count(distinct(gtdb_order)) from metadata_taxonomy LEFT join genomes using(id) where (genome_source_id != 1 or id in (SELECT genome_id from genome_list_contents where list_id = 479)) and gtdb_domain like 'd__Bacteria' and gtdb_order is not NULL UNION SELECT 4 as lvl,'FAMILY' as rank,count(distinct(gtdb_family)) from metadata_taxonomy LEFT join genomes using(id) where (genome_source_id != 1 or id in (SELECT genome_id from genome_list_contents where list_id = 479)) and gtdb_domain like 'd__Bacteria' and gtdb_family is not NULL UNION SELECT 5 as lvl,'GENUS' as rank,count(distinct(gtdb_genus)) from metadata_taxonomy LEFT join genomes using(id) where (genome_source_id != 1 or id in (SELECT genome_id from genome_list_contents where list_id = 479)) and gtdb_domain like 'd__Bacteria' and gtdb_genus is not NULL UNION SELECT 6 as lvl,'SPECIES' as rank,count(distinct(gtdb_species)) from metadata_taxonomy LEFT join genomes using(id) where (genome_source_id != 1 or id in (SELECT genome_id from genome_list_contents where list_id = 479)) and gtdb_domain like 'd__Bacteria' and gtdb_species is not NULL) as t order by lvl;