From 4c95e4289f2b89376b3cee55afddca795985d4cd Mon Sep 17 00:00:00 2001 From: Jannis Mohlin Tsiroyannis Date: Tue, 21 Apr 2020 14:30:49 +0200 Subject: [PATCH] Add cleaning script for empty "hasPart" entities containg only types. --- .../scripts/cleanups/2020/04/lxl-2963.groovy | 34 +++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 whelktool/scripts/cleanups/2020/04/lxl-2963.groovy diff --git a/whelktool/scripts/cleanups/2020/04/lxl-2963.groovy b/whelktool/scripts/cleanups/2020/04/lxl-2963.groovy new file mode 100644 index 0000000000..367eaf08ba --- /dev/null +++ b/whelktool/scripts/cleanups/2020/04/lxl-2963.groovy @@ -0,0 +1,34 @@ +PrintWriter scheduledForUpdating = getReportWriter("scheduled-updates") +PrintWriter failedUpdating = getReportWriter("failed-updates") + +String where = "collection = 'bib' and data#>'{@graph,2,hasPart}' is not null" + +selectBySqlWhere(where) { data -> + def (record, mainEntity, work) = data.graph + + boolean changed = false + + Object parts = work.hasPart + if ( parts == null || ! (parts instanceof List) ) + return + + Iterator it = parts.iterator() + while (it.hasNext()) { + Map part = (Map) it.next() + + if (part.get("@type") != null && part.size() == 1) { + changed = true + it.remove() + } + } + + if (parts.isEmpty()) + work.remove("hasPart") + + if (changed) { + scheduledForUpdating.println("${data.doc.getURI()}") + data.scheduleSave(onError: { e -> + failedUpdating.println("Failed to update ${data.doc.shortId} due to: $e") + }) + } +}