Skip to content

Commit

Permalink
Merge pull request #90 from galaxyproject/hunter/89-update-organism-list
Browse files Browse the repository at this point in the history
chore: update organisms list for 2024-09-20 (#89)
  • Loading branch information
NoopDog authored Sep 23, 2024
2 parents 5efec54 + 85efa7b commit e33d5db
Show file tree
Hide file tree
Showing 3 changed files with 594 additions and 6 deletions.
12 changes: 6 additions & 6 deletions files/build-genomes-files.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,16 @@
import requests

GENOMES_SOURCE_URL = "https://docs.google.com/spreadsheets/d/1NRfTvebPl6zJ0l9tCqBtq6YCrwV6_XDBlheq3L5HcvQ/gviz/tq?tqx=out:csv&sheet=GenomeDataTypes_Summary.csv"
ASSEMBLIES_URL = "https://hgdownload.soe.ucsc.edu/hubs/BRC/assembly.list.json"
ASSEMBLIES_URL = "https://hgdownload.soe.ucsc.edu/hubs/BRC/assemblyList.json"

OUTPUT_PATH = "files/source/genomes.tsv"

def get_duplicate_ids(genomes_df):
counts = genomes_df["Genome Version/Assembly ID"].value_counts()
return list(counts.index.to_series().loc[counts > 1])

def get_num_unmatched_assemblies(assemblies_df, result_df):
return len(set(assemblies_df["asmId"]) - set(result_df["asmId"]))
def get_unmatched_assemblies(assemblies_df, result_df):
return set(assemblies_df["asmId"]) - set(result_df["asmId"])

def build_genomes_files():
print("Building files")
Expand All @@ -30,9 +30,9 @@ def build_genomes_files():

result_df = gen_bank_merge_df.combine_first(ref_seq_merge_df).dropna(subset=["ucscBrowser"])

num_unmatched_assemblies = get_num_unmatched_assemblies(assemblies_df, result_df)
if (num_unmatched_assemblies != 0):
print(f"{num_unmatched_assemblies} assemblies had no matches and are omitted")
unmatched_assemblies = get_unmatched_assemblies(assemblies_df, result_df)
if (len(unmatched_assemblies) != 0):
print(f"Omitted {len(unmatched_assemblies)} assemblies that had no matches: {", ".join(unmatched_assemblies)}")

result_df.to_csv(OUTPUT_PATH, index=False, sep="\t")

Expand Down
Loading

0 comments on commit e33d5db

Please sign in to comment.