Skip to content

Commit

Permalink
Merge pull request #1147 from libris/feature/handle-linked-works
Browse files Browse the repository at this point in the history
Feature/handle linked works
  • Loading branch information
kwahlin authored Jan 24, 2023
2 parents e40a509 + 461624e commit 5ba1292
Show file tree
Hide file tree
Showing 11 changed files with 179 additions and 77 deletions.
17 changes: 12 additions & 5 deletions whelk-core/src/main/groovy/se/kb/libris/Normalizers.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import whelk.util.DocumentUtil.Remove
import static whelk.JsonLd.GRAPH_KEY
import static whelk.JsonLd.ID_KEY
import static whelk.JsonLd.TYPE_KEY
import static whelk.JsonLd.WORK_KEY
import static whelk.JsonLd.asList
import static whelk.util.DocumentUtil.traverse

Expand Down Expand Up @@ -195,13 +196,19 @@ class Normalizers {
}
}

static Map getWork(JsonLd jsonLd, Document doc) {
def (_record, thing) = doc.data['@graph']
if (thing && isInstanceOf(jsonLd, thing, 'Work')) {
static Map getWork(Whelk whelk, Document doc) {
def (_record, thing) = doc.data[GRAPH_KEY]
if (thing && isInstanceOf(whelk.jsonld, thing, 'Work')) {
return thing
}
else if(thing && thing['instanceOf'] && isInstanceOf(jsonLd, thing['instanceOf'], 'Work')) {
return thing['instanceOf']
else if (thing && thing[WORK_KEY]) {
def linked = thing[WORK_KEY][ID_KEY]
if (linked) {
return getWork(whelk, whelk.storage.getDocumentByIri(linked))
}
if (isInstanceOf(whelk.jsonld, thing[WORK_KEY], 'Work')) {
return thing[WORK_KEY]
}
}
return null
}
Expand Down
6 changes: 0 additions & 6 deletions whelktool/scripts/analysis/find-work-clusters.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,6 @@ selectByCollection('bib') { bib ->
}
}

exit()

Map<String, List<String>> buildQuery(bib) {
def title = title(bib)

Expand Down Expand Up @@ -65,10 +63,6 @@ Map<String, List<String>> buildQuery(bib) {
return null
}

synchronized void exit() {
System.exit(0)
}

private void insertLinkedAgents(bib) {
getPathSafe(bib.doc.data, ['@graph', 1, 'instanceOf', 'contribution']).each {
if (it.agent && it.agent['@id']) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,12 +56,12 @@ class DisplayDoc {
}

protected String chipString(def thing) {
Util.chipString(thing, whelk)
Util.chipString(thing, doc.whelk)
}

private String reproductionOfLink() {
def shortId = Util.getPathSafe(getMainEntity(), ['reproductionOf', '@id'])
?.tokenize("/#")
?.split('[#/]')
?.dropRight(1)
?.last() ?: ''

Expand Down Expand Up @@ -154,7 +154,7 @@ class DisplayDoc {
if (isInstance()) {
framed = JsonLd.frame(doc.doc.getThingIdentifiers().first(), doc.whelk.loadEmbellished(doc.doc.shortId).data)
} else {
Document copy = doc.clone()
Document copy = doc.doc.clone()
doc.whelk.embellish(copy)
framed = JsonLd.frame(doc.doc.getThingIdentifiers().first(), copy.data)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,22 +62,26 @@ class Doc {
}

static Map getWork(Whelk whelk, Document d) {
Map work = Normalizers.getWork(whelk.jsonld, d)
Map work = Normalizers.getWork(whelk, d)
if (!work) {
throw new NoWorkException(d.shortId)
}
work = new HashMap<>(work)

//TODO 'marc:fieldref'

work.remove('@id')
// work.remove('@id')
return work
}

Map workCopy() {
return getWork(whelk, doc.clone())
}

String workIri() {
getWork()['@id']
}

Map getMainEntity() {
return doc.data['@graph'][1]
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
package datatool.scripts.mergeworks

import whelk.Document

interface MergedWork {
Document doc
Collection<Doc> derivedFrom
File reportDir
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
package datatool.scripts.mergeworks

import whelk.Document
import whelk.IdGenerator

class NewWork implements MergedWork {
Document doc
Collection<Doc> derivedFrom
File reportDir

NewWork(Map data, Collection<Doc> derivedFrom, File reportDir) {
this.derivedFrom = derivedFrom
this.reportDir = new File(reportDir, 'new')
this.doc = buildWorkDocument(data)
}

private Document buildWorkDocument(Map workData) {
String workId = IdGenerator.generate()
def reportUri = "http://xlbuild.libris.kb.se/works/${reportDir.getPath().replace('report/', '')}/${workId}.html"

workData['@id'] = "TEMPID#it"
Document d = new Document([
"@graph": [
[
"@id" : "TEMPID",
"@type" : "Record",
"mainEntity" : ["@id": "TEMPID#it"],
"technicalNote": [[
"@type" : "TechnicalNote",
"hasNote": [[
"@type": "Note",
"label": ["Maskinellt utbrutet verk... TODO"]
]],
"uri" : [reportUri]
]
]],
workData
]
])

d.deepReplaceId(Document.BASE_URI.toString() + workId)
return d
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
package datatool.scripts.mergeworks

import whelk.Document

class UpdatedWork implements MergedWork {
Document doc
Collection<Doc> derivedFrom
File reportDir
String checksum

UpdatedWork(Document doc, Collection<Doc> derivedFrom, File reportDir, String checksum) {
this.doc = doc
this.derivedFrom = derivedFrom
this.reportDir = new File(reportDir, 'updated')
this.checksum = checksum
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ class Util {
static List flatTitles(List hasTitle) {
dropSubTitles(hasTitle).collect {
def title = new TreeMap<>()
title['flatTitle'] = normalize(Doc.flatten(it, titleComponents))
title['flatTitle'] = normalize(DisplayDoc.flatten(it, titleComponents))
if (it['@type']) {
title['@type'] = it['@type']
}
Expand Down Expand Up @@ -188,6 +188,14 @@ class Util {

// Return the most common title for the best encodingLevel
static Object bestTitle(Collection<Doc> docs) {
def linkedWorkTitle = docs.findResult {
def w = it.getWork()
w['@id'] ? w['hasTitle'] : null
}
if (linkedWorkTitle) {
return linkedWorkTitle
}

def isTitle = { it.'@type' == 'Title' }
def addSource = { t, d -> t.plus(['source': [d.getMainEntity().subMap('@id')]]) }

Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package datatool.scripts.mergeworks

import datatool.scripts.mergeworks.compare.Classification
import datatool.scripts.mergeworks.compare.Id
import datatool.scripts.mergeworks.compare.SameOrEmpty
import datatool.scripts.mergeworks.compare.Default
import datatool.scripts.mergeworks.compare.Extent
Expand Down Expand Up @@ -29,6 +30,7 @@ class WorkComparator {
'subject' : new Subject(),
'summary' : new StuffSet(),
'translationOf' : new TranslationOf(),
'@id' : new Id()
]

static FieldHandler DEFAULT = new Default()
Expand Down Expand Up @@ -113,7 +115,7 @@ class WorkComparator {
static Set<String> allFields(Collection<Doc> cluster) {
Set<String> fields = new HashSet<>()
cluster.each { fields.addAll(it.getWork().keySet()) }
return fields - 'summary' // - 'summary' only temporary, remove when summaries have been moved to instance (LXL-3303)
return fields
}

Map<String, FieldStatus> fieldStatuses(Collection<Doc> cluster) {
Expand Down
Loading

0 comments on commit 5ba1292

Please sign in to comment.