Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Extend Maps support. #86

Merged
merged 2 commits into from
Dec 9, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
99 changes: 60 additions & 39 deletions metafix/src/main/java/org/metafacture/metafix/FixMethod.java
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

package org.metafacture.metafix;

import org.metafacture.metamorph.api.Maps;
import org.metafacture.metamorph.maps.FileMap;

import java.util.Arrays;
Expand All @@ -27,18 +28,37 @@

enum FixMethod {

// SCRIPT-LEVEL METHODS:

put_map {
public void apply(final Metafix metafix, final Record record, final List<String> params, final Map<String, String> options) {
metafix.putMap(params.get(0), options);
}
},
put_filemap {
public void apply(final Metafix metafix, final Record record, final List<String> params, final Map<String, String> options) {
final String fileName = params.get(0);
final FileMap fileMap = new FileMap();

fileMap.setSeparator(options.getOrDefault(FILEMAP_SEPARATOR_OPTION, FILEMAP_DEFAULT_SEPARATOR));
fileMap.setFile(fileName);

metafix.putMap(params.size() <= 1 ? fileName : params.get(1), fileMap);
}
},

// RECORD-LEVEL METHODS:

set_field {
public void apply(final Record record, final List<String> params, final Map<String, String> options) {
public void apply(final Metafix metafix, final Record record, final List<String> params, final Map<String, String> options) {
final String field = params.get(0);

record.remove(field);
record.replace(field, params.get(1));
}
},
set_array {
public void apply(final Record record, final List<String> params, final Map<String, String> options) {
public void apply(final Metafix metafix, final Record record, final List<String> params, final Map<String, String> options) {
final String field = params.get(0);
final List<String> toAdd = params.subList(1, params.size());
if (field.endsWith(DOT_APPEND)) {
Expand All @@ -50,7 +70,7 @@ public void apply(final Record record, final List<String> params, final Map<Stri
}
},
set_hash {
public void apply(final Record record, final List<String> params, final Map<String, String> options) {
public void apply(final Metafix metafix, final Record record, final List<String> params, final Map<String, String> options) {
final String field = params.get(0);

final Value value = record.get(field.replace(DOT_APPEND, EMPTY));
Expand All @@ -65,7 +85,7 @@ public void apply(final Record record, final List<String> params, final Map<Stri
}
},
array { // array-from-hash
public void apply(final Record record, final List<String> params, final Map<String, String> options) {
public void apply(final Metafix metafix, final Record record, final List<String> params, final Map<String, String> options) {
final String field = params.get(0);

record.getList(field, a -> a.forEach(recordEntry -> {
Expand All @@ -81,7 +101,7 @@ public void apply(final Record record, final List<String> params, final Map<Stri
}
},
hash { // hash-from-array
public void apply(final Record record, final List<String> params, final Map<String, String> options) {
public void apply(final Metafix metafix, final Record record, final List<String> params, final Map<String, String> options) {
final String field = params.get(0);

record.getList(field, a -> record.put(field, Value.newHash(h -> {
Expand All @@ -92,28 +112,28 @@ public void apply(final Record record, final List<String> params, final Map<Stri
}
},
add_field {
public void apply(final Record record, final List<String> params, final Map<String, String> options) {
public void apply(final Metafix metafix, final Record record, final List<String> params, final Map<String, String> options) {
record.append(params.get(0), params.get(1));
}
},
move_field {
public void apply(final Record record, final List<String> params, final Map<String, String> options) {
public void apply(final Metafix metafix, final Record record, final List<String> params, final Map<String, String> options) {
record.copy(params);
record.removeNested(params.get(0));
}
},
copy_field {
public void apply(final Record record, final List<String> params, final Map<String, String> options) {
public void apply(final Metafix metafix, final Record record, final List<String> params, final Map<String, String> options) {
record.copy(params);
}
},
remove_field {
public void apply(final Record record, final List<String> params, final Map<String, String> options) {
public void apply(final Metafix metafix, final Record record, final List<String> params, final Map<String, String> options) {
params.forEach(record::removeNested);
}
},
format {
public void apply(final Record record, final List<String> params, final Map<String, String> options) {
public void apply(final Metafix metafix, final Record record, final List<String> params, final Map<String, String> options) {
final String field = params.get(0);

record.getList(field, oldValues -> {
Expand All @@ -123,7 +143,7 @@ public void apply(final Record record, final List<String> params, final Map<Stri
}
},
parse_text {
public void apply(final Record record, final List<String> params, final Map<String, String> options) {
public void apply(final Metafix metafix, final Record record, final List<String> params, final Map<String, String> options) {
final String field = params.get(0);

record.getList(field, a -> a.forEach(v -> {
Expand Down Expand Up @@ -160,7 +180,7 @@ public void apply(final Record record, final List<String> params, final Map<Stri
}
},
paste {
public void apply(final Record record, final List<String> params, final Map<String, String> options) {
public void apply(final Metafix metafix, final Record record, final List<String> params, final Map<String, String> options) {
final String joinChar = options.get("join_char");
record.replace(params.get(0), params.subList(1, params.size()).stream()
.filter(f -> literalString(f) || record.find(f) != null)
Expand All @@ -173,71 +193,69 @@ private boolean literalString(final String s) {
}
},
reject {
public void apply(final Record record, final List<String> params, final Map<String, String> options) {
public void apply(final Metafix metafix, final Record record, final List<String> params, final Map<String, String> options) {
record.setReject(true);
}
},
retain {
public void apply(final Record record, final List<String> params, final Map<String, String> options) {
public void apply(final Metafix metafix, final Record record, final List<String> params, final Map<String, String> options) {
record.retainFields(params);
}
},
vacuum {
public void apply(final Record record, final List<String> params, final Map<String, String> options) {
public void apply(final Metafix metafix, final Record record, final List<String> params, final Map<String, String> options) {
record.removeEmptyValues();
}
},

// FIELD-LEVEL METHODS:

// TODO SPEC: switch to morph-style named params in general?

substring {
public void apply(final Record record, final List<String> params, final Map<String, String> options) {
public void apply(final Metafix metafix, final Record record, final List<String> params, final Map<String, String> options) {
record.transformFields(params, s -> s.substring(Integer.parseInt(params.get(1)), Integer.parseInt(params.get(2)) - 1));
}
},
trim {
public void apply(final Record record, final List<String> params, final Map<String, String> options) {
public void apply(final Metafix metafix, final Record record, final List<String> params, final Map<String, String> options) {
record.transformFields(params, String::trim);
}
},
upcase {
public void apply(final Record record, final List<String> params, final Map<String, String> options) {
public void apply(final Metafix metafix, final Record record, final List<String> params, final Map<String, String> options) {
record.transformFields(params, String::toUpperCase);
}
},
downcase {
public void apply(final Record record, final List<String> params, final Map<String, String> options) {
public void apply(final Metafix metafix, final Record record, final List<String> params, final Map<String, String> options) {
record.transformFields(params, String::toLowerCase);
}
},
capitalize {
public void apply(final Record record, final List<String> params, final Map<String, String> options) {
public void apply(final Metafix metafix, final Record record, final List<String> params, final Map<String, String> options) {
record.transformFields(params, s -> s.substring(0, 1).toUpperCase() + s.substring(1));
}
},
lookup {
public void apply(final Record record, final List<String> params, final Map<String, String> options) {
record.transformFields(params, s -> {
final Map<String, String> map = buildMap(options, params.size() <= 1 ? null : params.get(1));
return map.getOrDefault(s, map.get("__default")); // TODO Catmandu uses 'default'
});
}
public void apply(final Metafix metafix, final Record record, final List<String> params, final Map<String, String> options) {
final Map<String, String> map;

private Map<String, String> buildMap(final Map<String, String> options, final String fileLocation) {
final String sep = "sep_char";
final Map<String, String> map = fileLocation != null ? fileMap(fileLocation, options.get(sep)) : options;
return map;
}
if (params.size() <= 1) {
map = options;
}
else {
final String mapName = params.get(1);

private Map<String, String> fileMap(final String location, final String separator) {
final FileMap fileMap = new FileMap();
fileMap.setSeparator(","); // CSV as default
if (separator != null) { // override with option
fileMap.setSeparator(separator);
if (!metafix.getMapNames().contains(mapName)) {
put_filemap.apply(metafix, record, Arrays.asList(mapName), options);
}

map = metafix.getMap(mapName);
}
fileMap.setFile(location);
return fileMap;

final String defaultValue = map.get(Maps.DEFAULT_MAP_KEY); // TODO: Catmandu uses 'default'
record.transformFields(params, k -> map.getOrDefault(k, defaultValue));
}
};

Expand All @@ -246,6 +264,9 @@ private Map<String, String> fileMap(final String location, final String separato
private static final String EMPTY = "";
private static final String DOT_APPEND = "." + Value.APPEND_FIELD;

abstract void apply(Record record, List<String> params, Map<String, String> options);
private static final String FILEMAP_SEPARATOR_OPTION = "sep_char";
private static final String FILEMAP_DEFAULT_SEPARATOR = ",";

abstract void apply(Metafix metafix, Record record, List<String> params, Map<String, String> options);

}
65 changes: 56 additions & 9 deletions metafix/src/main/java/org/metafacture/metafix/Metafix.java
Original file line number Diff line number Diff line change
Expand Up @@ -18,24 +18,30 @@

package org.metafacture.metafix;

import org.metafacture.framework.MetafactureException;
import org.metafacture.framework.StandardEventNames;
import org.metafacture.framework.StreamPipe;
import org.metafacture.framework.StreamReceiver;
import org.metafacture.framework.helpers.DefaultStreamReceiver;
import org.metafacture.mangling.StreamFlattener;
import org.metafacture.metafix.fix.Expression;
import org.metafacture.metafix.fix.Fix;
import org.metafacture.metamorph.api.Maps;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.Closeable;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Deque;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
Expand All @@ -48,22 +54,26 @@
* @author Christoph Böhme (Metamorph)
* @author Fabian Steeg (Metafix)
*/
public class Metafix implements StreamPipe<StreamReceiver> {
public class Metafix implements StreamPipe<StreamReceiver>, Maps { // checkstyle-disable-line ClassDataAbstractionCoupling

public static final String VAR_START = "$[";
public static final String VAR_END = "]";
public static final Map<String, String> NO_VARS = Collections.emptyMap();
private static final String ENTITIES_NOT_BALANCED = "Entity starts and ends are not balanced";

private static final Logger LOG = LoggerFactory.getLogger(Metafix.class);

private static final String ENTITIES_NOT_BALANCED = "Entity starts and ends are not balanced";

private final Deque<Integer> entityCountStack = new LinkedList<>();
private final List<Closeable> resources = new ArrayList<>();
private final List<Expression> expressions = new ArrayList<>();
private final Map<String, Map<String, String>> maps = new HashMap<>();
private final StreamFlattener flattener = new StreamFlattener();

// TODO: Use SimpleRegexTrie / WildcardTrie for wildcard, alternation and character class support
private Record currentRecord = new Record();
private Fix fix;
private final List<Expression> expressions = new ArrayList<>();
private Map<String, String> vars = NO_VARS;
private final StreamFlattener flattener = new StreamFlattener();
private final Deque<Integer> entityCountStack = new LinkedList<>();
private int entityCount;
private StreamReceiver outputStreamReceiver;
private String recordIdentifier;
Expand Down Expand Up @@ -105,8 +115,7 @@ public void literal(final String name, final String value) {
}

private void buildPipeline(final Reader fixDef, final Map<String, String> theVars) {
final Fix f = FixStandaloneSetup.parseFix(fixDef);
this.fix = f;
this.fix = FixStandaloneSetup.parseFix(fixDef);
this.vars = theVars;
}

Expand All @@ -131,8 +140,7 @@ public void endRecord() {
}
flattener.endRecord();
LOG.debug("End record, walking fix: {}", currentRecord);
final RecordTransformer transformer = new RecordTransformer(currentRecord, vars, fix);
currentRecord = transformer.transform();
currentRecord = new RecordTransformer(this, fix).transform();
if (!currentRecord.getReject()) {
outputStreamReceiver.startRecord(recordIdentifier);
LOG.debug("Sending results to {}", outputStreamReceiver);
Expand Down Expand Up @@ -207,6 +215,15 @@ public void resetStream() {

@Override
public void closeStream() {
for (final Closeable closeable : resources) {
try {
closeable.close();
}
catch (final IOException e) {
throw new MetafactureException("Error while executing the Metafix transformation pipeline: " + e.getMessage(), e);
}
}

outputStreamReceiver.closeStream();
}

Expand Down Expand Up @@ -234,4 +251,34 @@ public Record getCurrentRecord() {
return currentRecord;
}

@Override
public Collection<String> getMapNames() {
return Collections.unmodifiableSet(maps.keySet());
}

@Override
public Map<String, String> getMap(final String mapName) {
return maps.getOrDefault(mapName, Collections.emptyMap());
}

@Override
public String getValue(final String mapName, final String key) {
final Map<String, String> map = getMap(mapName);
return map.containsKey(key) ? map.get(key) : map.get(Maps.DEFAULT_MAP_KEY);
}

@Override
public Map<String, String> putMap(final String mapName, final Map<String, String> map) {
if (map instanceof Closeable) {
resources.add((Closeable) map);
}

return maps.put(mapName, map);
}

@Override
public String putValue(final String mapName, final String key, final String value) {
return maps.computeIfAbsent(mapName, k -> new HashMap<>()).put(key, value);
}

}
Loading