diff --git a/site/zenodo_rdm/curation/tasks.py b/site/zenodo_rdm/curation/tasks.py index db997b85..aafbf694 100644 --- a/site/zenodo_rdm/curation/tasks.py +++ b/site/zenodo_rdm/curation/tasks.py @@ -32,7 +32,7 @@ def _send_result_email(content): """Send curation result as email.""" - subject = f"EU Record Curation Processesed {datetime.now().date()}" + subject = f"EU Record Curation Processed {datetime.now().date()}" body = RESULT_EMAIL_BODY.format(finished_at=datetime.now(timezone.utc), **content) sender = current_app.config["MAIL_DEFAULT_SENDER"] admin_email = current_app.config["APP_RDM_ADMIN_EMAIL_RECIPIENT"] @@ -44,22 +44,13 @@ def _send_result_email(content): mail_ext.send(msg) -@shared_task -def run_eu_record_curation(since): - """Run EC Curator.""" - ctx = { - "processed": 0, - "approved": 0, - "failed": 0, - "since": since, - "records_moved": [], - } - dry_run = not current_app.config.get("CURATION_ENABLE_EU_CURATOR") - curator = EURecordCurator(dry=dry_run) - created_before = (datetime.now(timezone.utc) - timedelta(days=30)).isoformat() - updated_after = (datetime.fromisoformat(since) - timedelta(hours=12)).isoformat() +def _get_eu_records_query(since): + """Get dsl query for records to be processed.""" + created_before = datetime.now(timezone.utc) - timedelta(days=30) + updated_after = datetime.fromisoformat(since) - timedelta(hours=12) - query = dsl.Q( + # Get records with EC funding and not in EU community already and not created in last 30 days + ec_funded = dsl.Q( "bool", must=[ dsl.Q("term", **{"metadata.funding.funder.id": "00k4n6c32"}), @@ -67,13 +58,7 @@ def run_eu_record_curation(since): dsl.Q( "range", created={ - "lte": created_before, - }, - ), - dsl.Q( - "range", - updated={ - "gte": updated_after, + "lte": created_before.isoformat(), }, ), ], @@ -85,14 +70,56 @@ def run_eu_record_curation(since): "EU_COMMUNITY_UUID" ) }, - ) + ), + ], + ) + + updated_after_since = dsl.Q( + "bool", + must=[ + dsl.Q( + "range", + updated={ + "gte": updated_after.isoformat(), + }, + ), + ], + ) + + # Created 31 days before (with a 6 hour buffer) + new_created = dsl.Q( + "bool", + must=[ + dsl.Q( + "range", + created={ + "gte": (created_before - timedelta(days=1, hours=6)).isoformat(), + }, + ), ], ) + + return ec_funded & (updated_after_since | new_created) + + +@shared_task +def run_eu_record_curation(since): + """Run EC Curator.""" + ctx = { + "processed": 0, + "approved": 0, + "failed": 0, + "since": since, + "records_moved": [], + } + dry_run = not current_app.config.get("CURATION_ENABLE_EU_CURATOR") + curator = EURecordCurator(dry=dry_run) + search = records_service.create_search( system_identity, records_service.record_cls, records_service.config.search, - extra_filter=query, + extra_filter=_get_eu_records_query(since), ) for item in search.scan():