Skip to content
This repository has been archived by the owner on Jan 27, 2025. It is now read-only.

Commit

Permalink
Browse files Browse the repository at this point in the history
Works like fix function 'lookup', also using a Map. The Map is build dynamically
querying an RDF model.
  • Loading branch information
dr0i committed Jun 21, 2022
1 parent e35f821 commit 89e7a01
Show file tree
Hide file tree
Showing 7 changed files with 398 additions and 23 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,10 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- name: Set up JDK 1.8
- name: Set up JDK 11
uses: actions/setup-java@v1
with:
java-version: 1.8
java-version: 11
- name: Grant execute permission for gradlew
run: chmod +x gradlew
- name: Build with Gradle
Expand Down
1 change: 1 addition & 0 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ subprojects {
'ace': '1.3.3',
'equalsverifier': '3.8.2',
'jetty': '9.4.14.v20181114',
'jena': '4.5.0',
'jquery': '3.3.1-1',
'junit_jupiter': '5.8.2',
'junit_platform': '1.4.2',
Expand Down
2 changes: 2 additions & 0 deletions metafix/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ def passSystemProperties = {
}

dependencies {
implementation "org.apache.jena:jena-core:${versions.jena}"
implementation "org.apache.jena:jena-arq:${versions.jena}"
implementation "org.eclipse.xtext:org.eclipse.xtext:${versions.xtext}"
implementation "org.eclipse.xtext:org.eclipse.xtext.xbase:${versions.xtext}"
implementation "com.google.guava:guava:${versions.guava}"
Expand Down
79 changes: 58 additions & 21 deletions metafix/src/main/java/org/metafacture/metafix/FixMethod.java
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
package org.metafacture.metafix;

import org.metafacture.metafix.api.FixFunction;
import org.metafacture.metafix.maps.RdfMap;
import org.metafacture.metamorph.api.Maps;
import org.metafacture.metamorph.maps.FileMap;

Expand Down Expand Up @@ -77,6 +78,24 @@ public void apply(final Metafix metafix, final Record record, final List<String>
metafix.putMap(params.get(0), options);
}
},
put_rdfmap {
@Override
public void apply(final Metafix metafix, final Record record, final List<String> params, final Map<String, String> options) {
final String fileName = params.get(0);
final RdfMap rdf = new RdfMap();
rdf.setFile(metafix.resolvePath(fileName));
if (options.containsKey("targetLanguage")) {
rdf.setTargetLanguage(options.get("targetLanguage"));
}
if (options.containsKey("target")) {
rdf.setTarget(options.get("target"));
}
if (options.containsKey("default")) {
rdf.setDefault(options.get("default"));
}
metafix.putMap(params.size() > 1 ? params.get(1) : fileName, rdf);
}
},
put_var {
@Override
public void apply(final Metafix metafix, final Record record, final List<String> params, final Map<String, String> options) {
Expand Down Expand Up @@ -357,27 +376,17 @@ public void apply(final Metafix metafix, final Record record, final List<String>
lookup {
@Override
public void apply(final Metafix metafix, final Record record, final List<String> params, final Map<String, String> options) {
final Map<String, String> map;

if (params.size() <= 1) {
map = options;
}
else {
final String mapName = params.get(1);

if (!metafix.getMapNames().contains(mapName)) {
if (mapName.contains(".") || mapName.contains(File.separator)) {
put_filemap.apply(metafix, record, Arrays.asList(mapName), options);
}
else {
// Probably an unknown internal map? Log a warning?
}
}

map = metafix.getMap(mapName);
}

final String defaultValue = map.get(Maps.DEFAULT_MAP_KEY); // TODO: Catmandu uses 'default'
Map<String, String> map = extracted(metafix, record, params, options, KIND_OF_FILEMAP);
record.transform(params.get(0), oldValue -> {
final String newValue = map.getOrDefault(oldValue, defaultValue);
return newValue != null ? newValue : getBoolean(options, "delete") ? null : oldValue;
});
}
},
lookup_rdf {
@Override
public void apply(final Metafix metafix, final Record record, final List<String> params, final Map<String, String> options) {
Map<String, String> map = extracted(metafix, record, params, options, KIND_OF_RDFMAP);
record.transform(params.get(0), oldValue -> {
final String newValue = map.getOrDefault(oldValue, defaultValue);
return newValue != null ? newValue : getBoolean(options, "delete") ? null : oldValue;
Expand Down Expand Up @@ -488,11 +497,39 @@ public void apply(final Metafix metafix, final Record record, final List<String>
}
};

public static final String KIND_OF_RDFMAP = "rdfmap";
public static final String KIND_OF_FILEMAP = "filemap";
private static final Pattern NAMED_GROUP_PATTERN = Pattern.compile("\\(\\?<(.+?)>");

private static final String FILEMAP_SEPARATOR_OPTION = "sep_char";
private static final String FILEMAP_DEFAULT_SEPARATOR = ",";

private static final Random RANDOM = new Random();
private static String defaultValue;

private static Map<String, String> extracted(Metafix metafix, Record record, List<String> params, Map<String, String> options, String kindOfMap) {
final Map<String, String> map;
if (params.size() <= 1) {
map = options;
} else {
final String mapName = params.get(1);

if (!metafix.getMapNames().contains(mapName)) {
if (mapName.contains(".") || mapName.contains(File.separator)) {
if (kindOfMap.equals(KIND_OF_FILEMAP)) {
put_filemap.apply(metafix, record, Arrays.asList(mapName), options);
}
if (kindOfMap.equals(KIND_OF_RDFMAP)) {
put_rdfmap.apply(metafix, record, Arrays.asList(mapName), options);
}
} else {
// Probably an unknown internal map? Log a warning?
}
}
map = metafix.getMap(mapName);
}
defaultValue = map.get(Maps.DEFAULT_MAP_KEY); // TODO: Catmandu uses 'default'
return map;
}

}
246 changes: 246 additions & 0 deletions metafix/src/main/java/org/metafacture/metafix/maps/RdfMap.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,246 @@
/*
* Copyright 2013, 2014, 2021 Deutsche Nationalbibliothek et al
*
* Licensed under the Apache License, Version 2.0 the "License";
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.metafacture.metafix.maps;

import org.apache.jena.rdf.model.*;
import org.apache.jena.riot.RDFDataMgr;
import org.apache.jena.riot.RiotNotFoundException;
import org.apache.jena.shared.PropertyNotFoundException;
import org.metafacture.metafix.FixExecutionException;
import org.metafacture.metamorph.api.Maps;
import org.metafacture.metamorph.api.helpers.AbstractReadOnlyMap;

import java.io.*;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.*;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;


/**
* Provides a dynamically build {@link Map} based on RDF files. Can be one file or a comma separated list of RDF files.
* The files are supposed to be UTF-8 encoded.
* <p>
*
* <strong>Important:</strong> All other lines that are not split in two parts
* by the separator are ignored!
*
* @author Markus Michael Geipel
* @author Pascal Christoph (dr0i)
* @see org.metafacture.metamorph.maps.FileMap
*/
public final class RdfMap extends AbstractReadOnlyMap<String, String> {
private static final Logger LOG = LoggerFactory.getLogger(RdfMap.class);
private Model model = null;
private boolean isUninitialized = true;
private final ArrayList<String> filenames = new ArrayList<>();
private final Map<String, String> map = new HashMap<>();

private static String targetLanguage;
private static String target;
private static String defaultReturn;

/**
* Creates an instance of {@link RdfMap}.
*/
public RdfMap() {
RdfMap.targetLanguage = "";
}

private void init() {
loadFiles();
map.put(Maps.DEFAULT_MAP_KEY, defaultReturn);
String[] nsPrefixAndProperty = target.split(":");
if (nsPrefixAndProperty.length == 2) {
target = model.getNsPrefixURI(nsPrefixAndProperty[0]) + nsPrefixAndProperty[1];
}

isUninitialized = false;
}

/**
* Sets a comma separated list of files which provides the {@link Model}.
*
* @param files a comma separated list of files
*/
public void setFiles(final String files) {
Collections.addAll(filenames, files.split("\\s*,\\s*"));
}

/**
* Sets a file which provides the {@link Model}.
*
* @param file the file
*/
public void setFile(final String file) {
Collections.addAll(filenames, file);
}

private void loadFiles() {
filenames.forEach(this::loadFile);
}

private void loadFile(final String file) {
try {
if (model == null) {
model = RDFDataMgr.loadModel(file);
} else {
RDFDataMgr.read(model, file);
}
} catch (final RiotNotFoundException e) {
throw new FixExecutionException("rdf file: cannot read file", e);
}
}

private InputStream openStream(final String file) {
return openAsFile(file)
.orElseGet(() -> openAsResource(file)
.orElseGet(() -> openAsUrl(file)
.orElseThrow(() -> new FixExecutionException(
"File not found: " + file))));
}

private Optional<InputStream> openAsFile(final String file) {
try {
return Optional.of(new FileInputStream(file));
} catch (final FileNotFoundException e) {
return Optional.empty();
}
}

private Optional<InputStream> openAsResource(final String file) {
return Optional.ofNullable(Thread.currentThread()
.getContextClassLoader().getResourceAsStream(file));
}

private Optional<InputStream> openAsUrl(final String file) {
final URL url;
try {
url = new URL(file);
} catch (final MalformedURLException e) {
return Optional.empty();
}
try {
return Optional.of(url.openStream());
} catch (final IOException e) {
throw new UncheckedIOException(e);
}
}

/**
* Builds a Map dynamically by querying an RDF model based on key, the {@code RdfMap.target} and an optional
* {@code RdfMap.targetLanguage}.
* The Map acts as a cache.
* <p>
* To minimize the need of parameters three different querying modes are gone through. If one fails, the next is tried:
* <p>
* 1. get the Object based on an Subject an
*
* @param key the data to be looked up
*/
@Override
public String get(final Object key) {
if (isUninitialized) {
init();
}
String ret = Maps.DEFAULT_MAP_KEY;
if (map.containsKey(key.toString()))
ret = map.get(key.toString());
else {
Resource resource = ResourceFactory.createResource(key.toString());
Property targetProperty = ResourceFactory.createProperty(target);
try {
//first try to get LITERAL using SUBJECT and PROPERTY
if (RdfMap.targetLanguage != null) {
ret = model.getRequiredProperty(resource, targetProperty, RdfMap.targetLanguage).getString();
} else {
ret = model.getRequiredProperty(resource, targetProperty).getString();
}
} catch (PropertyNotFoundException | NullPointerException | NoSuchElementException e) {
//second try to get SUBJECT using PROPERTY and LITERAL
ResIterator iter = model.listSubjectsWithProperty(targetProperty);
if (iter.hasNext()) {
while (iter.hasNext()) {
resource = iter.nextResource();

if (resource
.getProperty(targetProperty)
.getString().equals(key.toString())) {
if (RdfMap.targetLanguage != null) {
if (resource
.getProperty(targetProperty).getLanguage().equals(RdfMap.targetLanguage)) {
ret = resource.getURI();
}
}
}
}
//third try: get LITERAL of PREDICATE A using PREDICATE B
if (ret == Maps.DEFAULT_MAP_KEY) {
iter = model.listSubjectsWithProperty(targetProperty);
if (iter.hasNext()) {
while (iter.hasNext()) {
resource = iter.nextResource();

if (resource
.getProperty(targetProperty)
.getString().equals(key.toString())) {
Statement stmt = resource.getProperty(targetProperty);
StmtIterator iterProp =
resource.listProperties(targetProperty);
while (iterProp.hasNext()) {
stmt = iterProp.nextStatement();
if (stmt.getLanguage().equals(RdfMap.targetLanguage)) {
ret = stmt.getString();
}
}
}

}
}

}
} else
LOG.warn("Could not lookup:'" + key + " for " + target + "'. Going with default value.");
}
map.put(key.toString(), ret);
}
return ret;
}


@Override
public Set<String> keySet() {
if (isUninitialized) {
init();
}
return Collections.unmodifiableSet(map.keySet());
}

public void setTargetLanguage(String targetLanguage) {
RdfMap.targetLanguage = targetLanguage;
}

public void setTarget(String target) {
RdfMap.target = target;
}

public void setDefault(String defaultReturn) {
RdfMap.defaultReturn = defaultReturn;
}
}
Loading

0 comments on commit 89e7a01

Please sign in to comment.