Skip to content

Commit

Permalink
🎨 Polish
Browse files Browse the repository at this point in the history
Signed-off-by: zethson <lukas.heumos@posteo.net>
  • Loading branch information
Zethson committed Nov 18, 2024
1 parent 56412c1 commit 53b99d7
Showing 1 changed file with 5 additions and 2 deletions.
7 changes: 5 additions & 2 deletions lamin_utils/_map_synonyms.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,10 +81,9 @@ def map_synonyms(
# __agg__ is a column of identifiers based on case_sensitive
df["__agg__"] = to_str(df[field], case_sensitive=case_sensitive)

# Get both exact matches and synonyms
# field_map is {"__agg__": field_value} for mappable values
field_map = pd.merge(mapped_df, df, on="__agg__").set_index("__agg__")[field]

# Always check synonyms for unmatched terms
unmapped_terms = set(mapped_df["__agg__"]) - set(field_map.index)
if unmapped_terms:
syn_map = explode_aggregated_column_to_map(
Expand All @@ -96,7 +95,9 @@ def map_synonyms(
)

if not case_sensitive:
# convert the synonyms to the same case_sensitive
syn_map.index = syn_map.index.str.lower()
# TODO: allow returning duplicated entries
syn_map = syn_map[syn_map.index.drop_duplicates()]

# Only keep synonym mappings for terms not found in field_map
Expand All @@ -113,6 +114,7 @@ def map_synonyms(
logger.info(f"standardized {n_mapped}/{n_input} terms")

if return_mapper:
# only returns mapped synonyms
mapper = mapped[~mapped.isna()].to_dict()
mapper = {k: v for k, v in mapper.items() if k != v}
if keep is False:
Expand All @@ -124,6 +126,7 @@ def map_synonyms(
else:
return mapper
else:
# returns a list in the input order with synonyms replaced
mapped_list = (
mapped.infer_objects(copy=False).fillna(mapped_df["orig_ids"]).tolist()
)
Expand Down

0 comments on commit 53b99d7

Please sign in to comment.