diff --git a/api/pom.xml b/api/pom.xml
index 9172630..783fc97 100644
--- a/api/pom.xml
+++ b/api/pom.xml
@@ -21,6 +21,7 @@
3.0.0
31.1-jre
3.0.0
+ 2.0.3-SNAPSHOT
@@ -59,6 +60,21 @@
+
+ gov.nsa.datawave.microservice
+ type-utils
+ ${version.microservice.type-utils}
+
+
+ log4j
+ log4j
+
+
+ avro
+ org.apache.avro
+
+
+
@@ -74,6 +90,10 @@
gov.nsa.datawave.microservice
metadata-utils
+
+ gov.nsa.datawave.microservice
+ type-utils
+
org.projectlombok
lombok
diff --git a/service/pom.xml b/service/pom.xml
index 96ebf2f..6786468 100644
--- a/service/pom.xml
+++ b/service/pom.xml
@@ -22,7 +22,7 @@
2.1.1
5.2.0
3.0.1
- 3.0.0
+ 3.0.1-SNAPSHOT
3.0.0
2.0.0
1.11.4
diff --git a/service/src/main/java/datawave/microservice/AccumuloConnectionService.java b/service/src/main/java/datawave/microservice/AccumuloConnectionService.java
index b1d6701..0be6919 100644
--- a/service/src/main/java/datawave/microservice/AccumuloConnectionService.java
+++ b/service/src/main/java/datawave/microservice/AccumuloConnectionService.java
@@ -20,7 +20,7 @@
import org.apache.accumulo.core.data.Mutation;
import org.apache.accumulo.core.iterators.user.RegExFilter;
import org.apache.accumulo.core.security.Authorizations;
-import org.apache.commons.lang.StringUtils;
+import org.apache.commons.lang3.StringUtils;
import org.springframework.beans.factory.annotation.Qualifier;
import org.springframework.stereotype.Service;
diff --git a/service/src/main/java/datawave/microservice/dictionary/DataDictionaryController.java b/service/src/main/java/datawave/microservice/dictionary/DataDictionaryController.java
index 66830e8..fc3c769 100644
--- a/service/src/main/java/datawave/microservice/dictionary/DataDictionaryController.java
+++ b/service/src/main/java/datawave/microservice/dictionary/DataDictionaryController.java
@@ -11,7 +11,7 @@
import java.util.Set;
import java.util.function.Consumer;
-import org.apache.commons.lang.StringUtils;
+import org.apache.commons.lang3.StringUtils;
import org.springframework.boot.context.properties.EnableConfigurationProperties;
import org.springframework.http.MediaType;
import org.springframework.security.access.annotation.Secured;
@@ -73,7 +73,6 @@ public DataDictionaryController(DataDictionaryProperties dataDictionaryConfigura
this.dataDictionary = dataDictionary;
this.responseObjectFactory = responseObjectFactory;
this.accumuloConnectionService = accumloConnectionService;
- dataDictionary.setNormalizationMap(dataDictionaryConfiguration.getNormalizerMap());
}
/**
diff --git a/service/src/main/java/datawave/microservice/dictionary/EdgeDictionaryController.java b/service/src/main/java/datawave/microservice/dictionary/EdgeDictionaryController.java
index 4edf5d1..d15fe82 100644
--- a/service/src/main/java/datawave/microservice/dictionary/EdgeDictionaryController.java
+++ b/service/src/main/java/datawave/microservice/dictionary/EdgeDictionaryController.java
@@ -2,7 +2,7 @@
import static datawave.microservice.http.converter.protostuff.ProtostuffHttpMessageConverter.PROTOSTUFF_VALUE;
-import org.apache.commons.lang.StringUtils;
+import org.apache.commons.lang3.StringUtils;
import org.springframework.boot.context.properties.EnableConfigurationProperties;
import org.springframework.http.MediaType;
import org.springframework.security.core.annotation.AuthenticationPrincipal;
diff --git a/service/src/main/java/datawave/microservice/dictionary/data/DataDictionary.java b/service/src/main/java/datawave/microservice/dictionary/data/DataDictionary.java
index c2b64a2..b4ce57b 100644
--- a/service/src/main/java/datawave/microservice/dictionary/data/DataDictionary.java
+++ b/service/src/main/java/datawave/microservice/dictionary/data/DataDictionary.java
@@ -14,10 +14,6 @@
public interface DataDictionary,DESC extends DescriptionBase,FIELD extends DictionaryFieldBase> {
- Map getNormalizationMap();
-
- void setNormalizationMap(Map normalizationMap);
-
Collection getFields(Connection connectionConfig, Collection dataTypeFilters, int numThreads) throws Exception;
void setDescription(Connection connectionConfig, FIELD description) throws Exception;
diff --git a/service/src/main/java/datawave/microservice/dictionary/data/DataDictionaryImpl.java b/service/src/main/java/datawave/microservice/dictionary/data/DataDictionaryImpl.java
index 85dd4b8..91c81ce 100644
--- a/service/src/main/java/datawave/microservice/dictionary/data/DataDictionaryImpl.java
+++ b/service/src/main/java/datawave/microservice/dictionary/data/DataDictionaryImpl.java
@@ -36,7 +36,6 @@ public class DataDictionaryImpl implements DataDictionary responseObjectFactory;
private final MetadataHelperFactory metadataHelperFactory;
private final MetadataDescriptionsHelperFactory metadataDescriptionsHelperFactory;
- private Map normalizationMap = Maps.newHashMap();
public DataDictionaryImpl(MarkingFunctions markingFunctions,
ResponseObjectFactory responseObjectFactory,
@@ -47,16 +46,6 @@ public DataDictionaryImpl(MarkingFunctions markingFunctions,
this.metadataDescriptionsHelperFactory = metadataDescriptionsHelperFactory;
}
- @Override
- public Map getNormalizationMap() {
- return normalizationMap;
- }
-
- @Override
- public void setNormalizationMap(Map normalizationMap) {
- this.normalizationMap = normalizationMap;
- }
-
/**
* Retrieve metadata fields from the specified metadata table, aggregated by field name and data type.
*
@@ -76,8 +65,7 @@ public void setNormalizationMap(Map normalizationMap) {
@Override
public Collection getFields(Connection connectionConfig, Collection dataTypeFilters, int numThreads) throws Exception {
Map aliases = getAliases(connectionConfig);
- DefaultMetadataFieldScanner scanner = new DefaultMetadataFieldScanner(markingFunctions, responseObjectFactory, normalizationMap, connectionConfig,
- numThreads);
+ DefaultMetadataFieldScanner scanner = new DefaultMetadataFieldScanner(markingFunctions, responseObjectFactory, connectionConfig, numThreads);
return scanner.getFields(aliases, dataTypeFilters);
}
diff --git a/service/src/main/java/datawave/microservice/metadata/DefaultMetadataFieldScanner.java b/service/src/main/java/datawave/microservice/metadata/DefaultMetadataFieldScanner.java
index bd5a845..8f56c3f 100644
--- a/service/src/main/java/datawave/microservice/metadata/DefaultMetadataFieldScanner.java
+++ b/service/src/main/java/datawave/microservice/metadata/DefaultMetadataFieldScanner.java
@@ -1,14 +1,12 @@
package datawave.microservice.metadata;
import java.io.IOException;
-import java.text.SimpleDateFormat;
+import java.lang.reflect.InvocationTargetException;
import java.time.Instant;
-import java.time.LocalDate;
import java.time.ZoneId;
import java.time.format.DateTimeFormatter;
import java.util.Collection;
import java.util.Collections;
-import java.util.Date;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedList;
@@ -23,6 +21,7 @@
import org.apache.accumulo.core.data.Range;
import org.apache.accumulo.core.data.Value;
import org.apache.accumulo.core.iterators.user.WholeRowIterator;
+import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.io.Text;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -30,6 +29,7 @@
import com.google.common.collect.Maps;
import datawave.data.ColumnFamilyConstants;
+import datawave.data.type.util.TypePrettyNameSupplier;
import datawave.marking.MarkingFunctions;
import datawave.microservice.Connection;
import datawave.microservice.dictionary.config.ResponseObjectFactory;
@@ -47,16 +47,14 @@ public class DefaultMetadataFieldScanner {
private final MarkingFunctions markingFunctions;
private final ResponseObjectFactory responseObjectFactory;
- private final Map normalizationMap;
private final Connection connectionConfig;
private final int numThreads;
public DefaultMetadataFieldScanner(MarkingFunctions markingFunctions,
ResponseObjectFactory responseObjectFactory,
- Map normalizationMap, Connection connectionConfig, int numThreads) {
+ Connection connectionConfig, int numThreads) {
this.markingFunctions = markingFunctions;
this.responseObjectFactory = responseObjectFactory;
- this.normalizationMap = normalizationMap;
this.connectionConfig = connectionConfig;
this.numThreads = numThreads;
}
@@ -266,13 +264,38 @@ private void setDescriptions() throws MarkingFunctions.Exception {
currField.getDescriptions().add(description);
}
- // Set the normalized type for the current {@link DefaultMetadataField}. If no normalized version can be found for the type, the type will default to
- // "Unknown".
+ // Ensures first letter of the type is always capitalized.
+ // Ensures redundant terminology like 'Type' is removed.
+ private String determineUnknownType(String unknown) {
+ String[] unknownType = unknown.split("\\.");
+ return StringUtils.capitalize(unknownType[unknownType.length - 1].replace("Type", ""));
+ }
+
+ // Set the normalized type for the current {@link DefaultMetadataField}.
private void setType() {
int nullPos = currColumnQualifier.indexOf('\0');
String type = currColumnQualifier.substring(nullPos + 1);
- String normalizedType = normalizationMap.get(type);
- currField.addType(normalizedType != null ? normalizedType : "Unknown");
+ /*
+ * Attempt to get a new instance of the class within 'type'. This will be used to determine what value(s) should be placed into the 'Types' field in
+ * the data dictionary.
+ *
+ * Use the value returned from getDataDictionaryTypeValue when: The class can be found AND it is an instance of TypePrettyNameSupplier AND
+ * getDataDictionaryTypeValue is not null.
+ *
+ * Use the DEFAULT_DATA_DICTIONARY_NAME provided in TypePrettyNameSupplier when: The class is found but getDataDictionaryTypeValue is null OR the
+ * class is found but is not an instance of TypePrettyNameSupplier.
+ *
+ * Use the value from determineUnknownType when: An exception occurs
+ */
+ try {
+ Object typeObject = Class.forName(type).getDeclaredConstructor().newInstance();
+ currField.addType(typeObject instanceof TypePrettyNameSupplier && ((TypePrettyNameSupplier) typeObject).getDataDictionaryTypeValue() != null
+ ? ((TypePrettyNameSupplier) typeObject).getDataDictionaryTypeValue()
+ : TypePrettyNameSupplier.DEFAULT_DATA_DICTIONARY_NAME);
+ } catch (RuntimeException | ClassNotFoundException | NoSuchMethodException | InvocationTargetException | InstantiationException
+ | IllegalAccessException e) {
+ currField.addType(determineUnknownType(type));
+ }
}
// Set the last updated date for the current {@link DefaultMetadataField} based on the timestamp of the current entry.
diff --git a/service/src/test/java/datawave/microservice/metadata/DefaultMetadataFieldScannerTest.java b/service/src/test/java/datawave/microservice/metadata/DefaultMetadataFieldScannerTest.java
index 502959c..ca85fb6 100644
--- a/service/src/test/java/datawave/microservice/metadata/DefaultMetadataFieldScannerTest.java
+++ b/service/src/test/java/datawave/microservice/metadata/DefaultMetadataFieldScannerTest.java
@@ -5,6 +5,7 @@
import java.time.LocalDateTime;
import java.time.ZoneId;
import java.time.ZonedDateTime;
+import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
@@ -77,16 +78,12 @@ public void setUp() throws Exception {
connector.tableOperations().create(MODEL_TABLE);
populateMetadataTable();
- Map normalizerMapping = new HashMap<>();
- normalizerMapping.put("datawave.data.type.LcNoDiacriticsType", "Text");
- normalizerMapping.put("datawave.data.type.NumberType", "Number");
-
Connection connectionConfig = new Connection();
connectionConfig.setAccumuloClient(connector);
connectionConfig.setMetadataTable(METADATA_TABLE);
connectionConfig.setAuths(AUTHS);
- scanner = new DefaultMetadataFieldScanner(new MarkingFunctions.Default(), RESPONSE_OBJECT_FACTORY, normalizerMapping, connectionConfig, 1);
+ scanner = new DefaultMetadataFieldScanner(new MarkingFunctions.Default(), RESPONSE_OBJECT_FACTORY, connectionConfig, 1);
}
@Test
@@ -109,16 +106,24 @@ public void whenRetrievingFields_givenNoDataTypeFilters_shouldReturnUnfilteredRe
contributorId.setDescription(Collections.singleton(createDescription("ContributorId Description")));
contributorId.setLastUpdated(DATE);
+ DefaultMetadataField ipAddress = new DefaultMetadataField();
+ ipAddress.setFieldName("IP_ADDRESS");
+ ipAddress.setDataType("csv");
+ ipAddress.setForwardIndexed(true);
+ ipAddress.setTypes(Collections.singletonList("IP Address"));
+ ipAddress.setDescription(Collections.singleton(createDescription("IpAddress Description")));
+ ipAddress.setLastUpdated(DATE);
+
DefaultMetadataField name = new DefaultMetadataField();
name.setFieldName("NAME");
name.setDataType("tvmaze");
name.setForwardIndexed(true);
name.setReverseIndexed(true);
- name.setTypes(Collections.singletonList("Unknown"));
+ name.setTypes(Collections.singletonList("Cat"));
name.setLastUpdated(DATE);
Collection fields = scanner.getFields(Collections.emptyMap(), Collections.emptySet());
- assertThat(fields).containsExactlyInAnyOrder(barField, contributorId, name);
+ assertThat(fields).containsExactlyInAnyOrder(barField, contributorId, ipAddress, name);
}
@Test
@@ -141,11 +146,19 @@ public void whenRetrievingFields_givenDataTypeFilters_shouldReturnFilteredResult
contributorId.setDescription(Collections.singleton(createDescription("ContributorId Description")));
contributorId.setLastUpdated(DATE);
+ DefaultMetadataField ipAddress = new DefaultMetadataField();
+ ipAddress.setFieldName("IP_ADDRESS");
+ ipAddress.setDataType("csv");
+ ipAddress.setForwardIndexed(true);
+ ipAddress.setTypes(Collections.singletonList("IP Address"));
+ ipAddress.setDescription(Collections.singleton(createDescription("IpAddress Description")));
+ ipAddress.setLastUpdated(DATE);
+
Set dataTypeFilters = new HashSet<>();
dataTypeFilters.add("csv");
dataTypeFilters.add("enwiki");
Collection fields = scanner.getFields(Collections.emptyMap(), dataTypeFilters);
- assertThat(fields).containsExactlyInAnyOrder(barField, contributorId);
+ assertThat(fields).containsExactlyInAnyOrder(barField, contributorId, ipAddress);
}
@Test
@@ -170,19 +183,29 @@ public void whenRetrievingFields_givenAliases_shouldReturnResultsWithAliases() t
contributorId.setDescription(Collections.singleton(createDescription("ContributorId Description")));
contributorId.setLastUpdated(DATE);
+ DefaultMetadataField ipAddress = new DefaultMetadataField();
+ ipAddress.setFieldName("ip_address");
+ ipAddress.setInternalFieldName("IP_ADDRESS");
+ ipAddress.setDataType("csv");
+ ipAddress.setForwardIndexed(true);
+ ipAddress.setTypes(Collections.singletonList("IP Address"));
+ ipAddress.setDescription(Collections.singleton(createDescription("IpAddress Description")));
+ ipAddress.setLastUpdated(DATE);
+
DefaultMetadataField name = new DefaultMetadataField();
name.setFieldName("NAME");
name.setDataType("tvmaze");
name.setForwardIndexed(true);
name.setReverseIndexed(true);
- name.setTypes(Collections.singletonList("Unknown"));
+ name.setTypes(Collections.singletonList("Cat"));
name.setLastUpdated(DATE);
Map aliases = new HashMap<>();
aliases.put("BAR_FIELD", "bar_field_alias");
aliases.put("CONTRIBUTOR_ID", "contributor_id_alias");
+ aliases.put("IP_ADDRESS", "ip_address");
Collection fields = scanner.getFields(aliases, Collections.emptySet());
- assertThat(fields).containsExactlyInAnyOrder(barField, contributorId, name);
+ assertThat(fields).containsExactlyInAnyOrder(barField, contributorId, ipAddress, name);
}
private void populateMetadataTable() throws TableNotFoundException, MutationsRejectedException {
@@ -201,16 +224,24 @@ private void populateMetadataTable() throws TableNotFoundException, MutationsRej
contributorId.put(new Text(ColumnFamilyConstants.COLF_DESC), new Text("enwiki"), new ColumnVisibility("PRIVATE"), TIMESTAMP,
new Value("ContributorId Description"));
+ Mutation ipAddress = new Mutation(new Text("IP_ADDRESS"));
+ ipAddress.put(new Text(ColumnFamilyConstants.COLF_E), new Text("csv"), TIMESTAMP, new Value());
+ ipAddress.put(new Text(ColumnFamilyConstants.COLF_I), new Text("csv"), TIMESTAMP, new Value());
+ ipAddress.put(new Text(ColumnFamilyConstants.COLF_T), new Text("csv\0datawave.data.type.IpAddressType"), TIMESTAMP, new Value());
+ ipAddress.put(new Text(ColumnFamilyConstants.COLF_DESC), new Text("csv"), new ColumnVisibility("PRIVATE"), TIMESTAMP,
+ new Value("IpAddress Description"));
+
Mutation name = new Mutation(new Text("NAME"));
name.put(new Text(ColumnFamilyConstants.COLF_E), new Text("tvmaze"), TIMESTAMP, new Value());
name.put(new Text(ColumnFamilyConstants.COLF_I), new Text("tvmaze"), TIMESTAMP, new Value());
name.put(new Text(ColumnFamilyConstants.COLF_RI), new Text("tvmaze"), TIMESTAMP, new Value());
- name.put(new Text(ColumnFamilyConstants.COLF_T), new Text("tvmaze\0not.a.known.type"), TIMESTAMP, new Value());
+ name.put(new Text(ColumnFamilyConstants.COLF_T), new Text("tvmaze\0datawave.data.type.catType"), TIMESTAMP, new Value());
BatchWriterConfig bwConfig = new BatchWriterConfig().setMaxMemory(10L).setMaxLatency(1, TimeUnit.SECONDS).setMaxWriteThreads(1);
BatchWriter writer = connector.createBatchWriter(METADATA_TABLE, bwConfig);
writer.addMutation(barField);
writer.addMutation(contributorId);
+ writer.addMutation(ipAddress);
writer.addMutation(name);
writer.flush();
writer.close();