Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Polars load fix #39

Merged
merged 2 commits into from
Oct 11, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions src/meds_etl/omop.py
Original file line number Diff line number Diff line change
Expand Up @@ -451,7 +451,7 @@ def extract_metadata(path_to_src_omop_dir: str, path_to_decompressed_dir: str, v
# from (concept ID -> concept code) and (concept ID -> concept name)
print("Generating metadata from OMOP `concept` table")
for concept_file in tqdm(itertools.chain(*get_table_files(path_to_src_omop_dir, "concept")),
total=len(get_table_files(path_to_src_omop_dir, "concept")[0]),
total=len(get_table_files(path_to_src_omop_dir, "concept")[0]) + len(get_table_files(path_to_src_omop_dir, "concept")[1]),
desc="Generating metadata from OMOP `concept` table"):
# Note: Concept table is often split into gzipped shards by default
if verbose:
Expand Down Expand Up @@ -493,7 +493,7 @@ def extract_metadata(path_to_src_omop_dir: str, path_to_decompressed_dir: str, v
# Include map from custom concepts to normalized (ie standard ontology)
# parent concepts, where possible, in the code_metadata dictionary
for concept_relationship_file in tqdm(itertools.chain(*get_table_files(path_to_src_omop_dir, "concept_relationship")),
total=len(get_table_files(path_to_src_omop_dir, "concept_relationship")),
total=len(get_table_files(path_to_src_omop_dir, "concept_relationship")[0]) + len(get_table_files(path_to_src_omop_dir, "concept_relationship")[1]),
desc="Generating metadata from OMOP `concept_relationship` table"):
with load_file(path_to_decompressed_dir, concept_relationship_file) as f:
# This table has `concept_id_1`, `concept_id_2`, `relationship_id` columns
Expand Down Expand Up @@ -522,7 +522,7 @@ def extract_metadata(path_to_src_omop_dir: str, path_to_decompressed_dir: str, v
datasets: List[str] = []
dataset_versions: List[str] = []
for cdm_source_file in tqdm(itertools.chain(*get_table_files(path_to_src_omop_dir, "cdm_source")),
total=get_table_files(path_to_src_omop_dir, "cdm_source"),
total=len(get_table_files(path_to_src_omop_dir, "cdm_source")[0]) + len(get_table_files(path_to_src_omop_dir, "cdm_source")[1]),
desc="Extracting dataset metadata"):
with load_file(path_to_decompressed_dir, cdm_source_file) as f:
cdm_source = read_polars_df(f.name)
Expand Down
Loading