Skip to content

Commit

Permalink
curation: add grant agreement number checks; docstrings fix
Browse files Browse the repository at this point in the history
  • Loading branch information
yashlamba committed Dec 17, 2024
1 parent 504e0cd commit 0bb8b66
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 11 deletions.
12 changes: 9 additions & 3 deletions site/zenodo_rdm/curation/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,9 @@
award_acronym_in_additional_description,
award_acronym_in_description,
award_acronym_in_title,
community_name_award_acronym,
award_number_in_additional_description,
award_number_in_description,
community_data_award_acronym,
contains_high_conf_keywords,
contains_low_conf_keywords,
eu_community_request,
Expand All @@ -36,7 +38,9 @@
"award_acronym_in_additional_description": award_acronym_in_additional_description,
"eu_community_request": eu_community_request,
"eu_subcommunity_declined_request": eu_subcommunity_declined_request,
"community_name_award_acronym": community_name_award_acronym,
"community_data_award_acronym": community_data_award_acronym,
"award_number_in_additional_description": award_number_in_additional_description,
"award_number_in_description": award_number_in_description,
}
"""Rules to run for EU Curation."""

Expand All @@ -53,7 +57,9 @@
"award_acronym_in_additional_description": 0,
"eu_community_request": False,
"eu_subcommunity_declined_request": False,
"community_name_award_acronym": 0,
"community_data_award_acronym": 0,
"award_number_in_additional_description": 0,
"award_number_in_description": 0,
}
"""Rule scores for EU Curation (bool value implies direct approval/decline)."""

Expand Down
44 changes: 36 additions & 8 deletions site/zenodo_rdm/curation/rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,16 +16,22 @@
from invenio_search.engine import dsl


def _award_acronym_number_in_text(award, text):
def _award_acronym_in_text(award, text):
"""Check for award number/acronym in data"""
if award.get("acronym") and (award.get("acronym").lower() in text.lower().split()):
return True
if award.get("number") and (award.get("number") in text):
return False


def _award_number_in_text(award, text):
"""Check for award number/acronym in data"""
if award.get("number") and (str(award.get("number")) in text):
return True
return False


def _get_ec_awards(record):
"""Get all EC funded awards of record."""
award_service = current_service_registry.get("awards")
awards = []
funding = record.metadata.get("funding", [])
Expand All @@ -42,7 +48,17 @@ def award_acronym_in_description(record):
if description := record.metadata.get("description"):
awards = _get_ec_awards(record)
for award in awards:
if _award_acronym_number_in_text(award, description):
if _award_acronym_in_text(award, description):
return True
return False


def award_number_in_description(record):
"""Check if EU award number in record description."""
if description := record.metadata.get("description"):
awards = _get_ec_awards(record)
for award in awards:
if _award_number_in_text(award, description):
return True
return False

Expand All @@ -53,7 +69,7 @@ def award_acronym_in_title(record):

awards = _get_ec_awards(record)
for award in awards:
if _award_acronym_number_in_text(award, title):
if _award_acronym_in_text(award, title):
return True
return False

Expand Down Expand Up @@ -154,7 +170,19 @@ def award_acronym_in_additional_description(record):

awards = _get_ec_awards(record)
for award in awards:
if _award_acronym_number_in_text(award, record_data):
if _award_acronym_in_text(award, record_data):
return True
return False


def award_number_in_additional_description(record):
"""Check if EU award number in record additional description."""
additional_descriptions = record.metadata.get("additional_descriptions", [])
record_data = " ".join([x.get("description", "") for x in additional_descriptions])

awards = _get_ec_awards(record)
for award in awards:
if _award_number_in_text(award, record_data):
return True
return False

Expand Down Expand Up @@ -224,8 +252,8 @@ def eu_subcommunity_declined_request(record):
return False


def community_name_award_acronym(record):
"""Check if award acronym in community name."""
def community_data_award_acronym(record):
"""Check if award acronym in community data."""
comm_text = ""
for comm in record.parent.communities:
comm_text += comm.metadata.get("title", "")
Expand All @@ -234,6 +262,6 @@ def community_name_award_acronym(record):
if comm_text:
awards = _get_ec_awards(record)
for award in awards:
if _award_acronym_number_in_text(award, comm_text):
if _award_acronym_in_text(award, comm_text):
return True
return False

0 comments on commit 0bb8b66

Please sign in to comment.