diff --git a/api/pom.xml b/api/pom.xml index 9172630..783fc97 100644 --- a/api/pom.xml +++ b/api/pom.xml @@ -21,6 +21,7 @@ 3.0.0 31.1-jre 3.0.0 + 2.0.3-SNAPSHOT @@ -59,6 +60,21 @@ + + gov.nsa.datawave.microservice + type-utils + ${version.microservice.type-utils} + + + log4j + log4j + + + avro + org.apache.avro + + + @@ -74,6 +90,10 @@ gov.nsa.datawave.microservice metadata-utils + + gov.nsa.datawave.microservice + type-utils + org.projectlombok lombok diff --git a/service/pom.xml b/service/pom.xml index 96ebf2f..6786468 100644 --- a/service/pom.xml +++ b/service/pom.xml @@ -22,7 +22,7 @@ 2.1.1 5.2.0 3.0.1 - 3.0.0 + 3.0.1-SNAPSHOT 3.0.0 2.0.0 1.11.4 diff --git a/service/src/main/java/datawave/microservice/AccumuloConnectionService.java b/service/src/main/java/datawave/microservice/AccumuloConnectionService.java index b1d6701..0be6919 100644 --- a/service/src/main/java/datawave/microservice/AccumuloConnectionService.java +++ b/service/src/main/java/datawave/microservice/AccumuloConnectionService.java @@ -20,7 +20,7 @@ import org.apache.accumulo.core.data.Mutation; import org.apache.accumulo.core.iterators.user.RegExFilter; import org.apache.accumulo.core.security.Authorizations; -import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang3.StringUtils; import org.springframework.beans.factory.annotation.Qualifier; import org.springframework.stereotype.Service; diff --git a/service/src/main/java/datawave/microservice/dictionary/DataDictionaryController.java b/service/src/main/java/datawave/microservice/dictionary/DataDictionaryController.java index 66830e8..fc3c769 100644 --- a/service/src/main/java/datawave/microservice/dictionary/DataDictionaryController.java +++ b/service/src/main/java/datawave/microservice/dictionary/DataDictionaryController.java @@ -11,7 +11,7 @@ import java.util.Set; import java.util.function.Consumer; -import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang3.StringUtils; import org.springframework.boot.context.properties.EnableConfigurationProperties; import org.springframework.http.MediaType; import org.springframework.security.access.annotation.Secured; @@ -73,7 +73,6 @@ public DataDictionaryController(DataDictionaryProperties dataDictionaryConfigura this.dataDictionary = dataDictionary; this.responseObjectFactory = responseObjectFactory; this.accumuloConnectionService = accumloConnectionService; - dataDictionary.setNormalizationMap(dataDictionaryConfiguration.getNormalizerMap()); } /** diff --git a/service/src/main/java/datawave/microservice/dictionary/EdgeDictionaryController.java b/service/src/main/java/datawave/microservice/dictionary/EdgeDictionaryController.java index 4edf5d1..d15fe82 100644 --- a/service/src/main/java/datawave/microservice/dictionary/EdgeDictionaryController.java +++ b/service/src/main/java/datawave/microservice/dictionary/EdgeDictionaryController.java @@ -2,7 +2,7 @@ import static datawave.microservice.http.converter.protostuff.ProtostuffHttpMessageConverter.PROTOSTUFF_VALUE; -import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang3.StringUtils; import org.springframework.boot.context.properties.EnableConfigurationProperties; import org.springframework.http.MediaType; import org.springframework.security.core.annotation.AuthenticationPrincipal; diff --git a/service/src/main/java/datawave/microservice/dictionary/data/DataDictionary.java b/service/src/main/java/datawave/microservice/dictionary/data/DataDictionary.java index c2b64a2..b4ce57b 100644 --- a/service/src/main/java/datawave/microservice/dictionary/data/DataDictionary.java +++ b/service/src/main/java/datawave/microservice/dictionary/data/DataDictionary.java @@ -14,10 +14,6 @@ public interface DataDictionary,DESC extends DescriptionBase,FIELD extends DictionaryFieldBase> { - Map getNormalizationMap(); - - void setNormalizationMap(Map normalizationMap); - Collection getFields(Connection connectionConfig, Collection dataTypeFilters, int numThreads) throws Exception; void setDescription(Connection connectionConfig, FIELD description) throws Exception; diff --git a/service/src/main/java/datawave/microservice/dictionary/data/DataDictionaryImpl.java b/service/src/main/java/datawave/microservice/dictionary/data/DataDictionaryImpl.java index 85dd4b8..91c81ce 100644 --- a/service/src/main/java/datawave/microservice/dictionary/data/DataDictionaryImpl.java +++ b/service/src/main/java/datawave/microservice/dictionary/data/DataDictionaryImpl.java @@ -36,7 +36,6 @@ public class DataDictionaryImpl implements DataDictionary responseObjectFactory; private final MetadataHelperFactory metadataHelperFactory; private final MetadataDescriptionsHelperFactory metadataDescriptionsHelperFactory; - private Map normalizationMap = Maps.newHashMap(); public DataDictionaryImpl(MarkingFunctions markingFunctions, ResponseObjectFactory responseObjectFactory, @@ -47,16 +46,6 @@ public DataDictionaryImpl(MarkingFunctions markingFunctions, this.metadataDescriptionsHelperFactory = metadataDescriptionsHelperFactory; } - @Override - public Map getNormalizationMap() { - return normalizationMap; - } - - @Override - public void setNormalizationMap(Map normalizationMap) { - this.normalizationMap = normalizationMap; - } - /** * Retrieve metadata fields from the specified metadata table, aggregated by field name and data type. * @@ -76,8 +65,7 @@ public void setNormalizationMap(Map normalizationMap) { @Override public Collection getFields(Connection connectionConfig, Collection dataTypeFilters, int numThreads) throws Exception { Map aliases = getAliases(connectionConfig); - DefaultMetadataFieldScanner scanner = new DefaultMetadataFieldScanner(markingFunctions, responseObjectFactory, normalizationMap, connectionConfig, - numThreads); + DefaultMetadataFieldScanner scanner = new DefaultMetadataFieldScanner(markingFunctions, responseObjectFactory, connectionConfig, numThreads); return scanner.getFields(aliases, dataTypeFilters); } diff --git a/service/src/main/java/datawave/microservice/metadata/DefaultMetadataFieldScanner.java b/service/src/main/java/datawave/microservice/metadata/DefaultMetadataFieldScanner.java index bd5a845..8f56c3f 100644 --- a/service/src/main/java/datawave/microservice/metadata/DefaultMetadataFieldScanner.java +++ b/service/src/main/java/datawave/microservice/metadata/DefaultMetadataFieldScanner.java @@ -1,14 +1,12 @@ package datawave.microservice.metadata; import java.io.IOException; -import java.text.SimpleDateFormat; +import java.lang.reflect.InvocationTargetException; import java.time.Instant; -import java.time.LocalDate; import java.time.ZoneId; import java.time.format.DateTimeFormatter; import java.util.Collection; import java.util.Collections; -import java.util.Date; import java.util.HashMap; import java.util.Iterator; import java.util.LinkedList; @@ -23,6 +21,7 @@ import org.apache.accumulo.core.data.Range; import org.apache.accumulo.core.data.Value; import org.apache.accumulo.core.iterators.user.WholeRowIterator; +import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.io.Text; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -30,6 +29,7 @@ import com.google.common.collect.Maps; import datawave.data.ColumnFamilyConstants; +import datawave.data.type.util.TypePrettyNameSupplier; import datawave.marking.MarkingFunctions; import datawave.microservice.Connection; import datawave.microservice.dictionary.config.ResponseObjectFactory; @@ -47,16 +47,14 @@ public class DefaultMetadataFieldScanner { private final MarkingFunctions markingFunctions; private final ResponseObjectFactory responseObjectFactory; - private final Map normalizationMap; private final Connection connectionConfig; private final int numThreads; public DefaultMetadataFieldScanner(MarkingFunctions markingFunctions, ResponseObjectFactory responseObjectFactory, - Map normalizationMap, Connection connectionConfig, int numThreads) { + Connection connectionConfig, int numThreads) { this.markingFunctions = markingFunctions; this.responseObjectFactory = responseObjectFactory; - this.normalizationMap = normalizationMap; this.connectionConfig = connectionConfig; this.numThreads = numThreads; } @@ -266,13 +264,38 @@ private void setDescriptions() throws MarkingFunctions.Exception { currField.getDescriptions().add(description); } - // Set the normalized type for the current {@link DefaultMetadataField}. If no normalized version can be found for the type, the type will default to - // "Unknown". + // Ensures first letter of the type is always capitalized. + // Ensures redundant terminology like 'Type' is removed. + private String determineUnknownType(String unknown) { + String[] unknownType = unknown.split("\\."); + return StringUtils.capitalize(unknownType[unknownType.length - 1].replace("Type", "")); + } + + // Set the normalized type for the current {@link DefaultMetadataField}. private void setType() { int nullPos = currColumnQualifier.indexOf('\0'); String type = currColumnQualifier.substring(nullPos + 1); - String normalizedType = normalizationMap.get(type); - currField.addType(normalizedType != null ? normalizedType : "Unknown"); + /* + * Attempt to get a new instance of the class within 'type'. This will be used to determine what value(s) should be placed into the 'Types' field in + * the data dictionary. + * + * Use the value returned from getDataDictionaryTypeValue when: The class can be found AND it is an instance of TypePrettyNameSupplier AND + * getDataDictionaryTypeValue is not null. + * + * Use the DEFAULT_DATA_DICTIONARY_NAME provided in TypePrettyNameSupplier when: The class is found but getDataDictionaryTypeValue is null OR the + * class is found but is not an instance of TypePrettyNameSupplier. + * + * Use the value from determineUnknownType when: An exception occurs + */ + try { + Object typeObject = Class.forName(type).getDeclaredConstructor().newInstance(); + currField.addType(typeObject instanceof TypePrettyNameSupplier && ((TypePrettyNameSupplier) typeObject).getDataDictionaryTypeValue() != null + ? ((TypePrettyNameSupplier) typeObject).getDataDictionaryTypeValue() + : TypePrettyNameSupplier.DEFAULT_DATA_DICTIONARY_NAME); + } catch (RuntimeException | ClassNotFoundException | NoSuchMethodException | InvocationTargetException | InstantiationException + | IllegalAccessException e) { + currField.addType(determineUnknownType(type)); + } } // Set the last updated date for the current {@link DefaultMetadataField} based on the timestamp of the current entry. diff --git a/service/src/test/java/datawave/microservice/metadata/DefaultMetadataFieldScannerTest.java b/service/src/test/java/datawave/microservice/metadata/DefaultMetadataFieldScannerTest.java index 502959c..ca85fb6 100644 --- a/service/src/test/java/datawave/microservice/metadata/DefaultMetadataFieldScannerTest.java +++ b/service/src/test/java/datawave/microservice/metadata/DefaultMetadataFieldScannerTest.java @@ -5,6 +5,7 @@ import java.time.LocalDateTime; import java.time.ZoneId; import java.time.ZonedDateTime; +import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.HashMap; @@ -77,16 +78,12 @@ public void setUp() throws Exception { connector.tableOperations().create(MODEL_TABLE); populateMetadataTable(); - Map normalizerMapping = new HashMap<>(); - normalizerMapping.put("datawave.data.type.LcNoDiacriticsType", "Text"); - normalizerMapping.put("datawave.data.type.NumberType", "Number"); - Connection connectionConfig = new Connection(); connectionConfig.setAccumuloClient(connector); connectionConfig.setMetadataTable(METADATA_TABLE); connectionConfig.setAuths(AUTHS); - scanner = new DefaultMetadataFieldScanner(new MarkingFunctions.Default(), RESPONSE_OBJECT_FACTORY, normalizerMapping, connectionConfig, 1); + scanner = new DefaultMetadataFieldScanner(new MarkingFunctions.Default(), RESPONSE_OBJECT_FACTORY, connectionConfig, 1); } @Test @@ -109,16 +106,24 @@ public void whenRetrievingFields_givenNoDataTypeFilters_shouldReturnUnfilteredRe contributorId.setDescription(Collections.singleton(createDescription("ContributorId Description"))); contributorId.setLastUpdated(DATE); + DefaultMetadataField ipAddress = new DefaultMetadataField(); + ipAddress.setFieldName("IP_ADDRESS"); + ipAddress.setDataType("csv"); + ipAddress.setForwardIndexed(true); + ipAddress.setTypes(Collections.singletonList("IP Address")); + ipAddress.setDescription(Collections.singleton(createDescription("IpAddress Description"))); + ipAddress.setLastUpdated(DATE); + DefaultMetadataField name = new DefaultMetadataField(); name.setFieldName("NAME"); name.setDataType("tvmaze"); name.setForwardIndexed(true); name.setReverseIndexed(true); - name.setTypes(Collections.singletonList("Unknown")); + name.setTypes(Collections.singletonList("Cat")); name.setLastUpdated(DATE); Collection fields = scanner.getFields(Collections.emptyMap(), Collections.emptySet()); - assertThat(fields).containsExactlyInAnyOrder(barField, contributorId, name); + assertThat(fields).containsExactlyInAnyOrder(barField, contributorId, ipAddress, name); } @Test @@ -141,11 +146,19 @@ public void whenRetrievingFields_givenDataTypeFilters_shouldReturnFilteredResult contributorId.setDescription(Collections.singleton(createDescription("ContributorId Description"))); contributorId.setLastUpdated(DATE); + DefaultMetadataField ipAddress = new DefaultMetadataField(); + ipAddress.setFieldName("IP_ADDRESS"); + ipAddress.setDataType("csv"); + ipAddress.setForwardIndexed(true); + ipAddress.setTypes(Collections.singletonList("IP Address")); + ipAddress.setDescription(Collections.singleton(createDescription("IpAddress Description"))); + ipAddress.setLastUpdated(DATE); + Set dataTypeFilters = new HashSet<>(); dataTypeFilters.add("csv"); dataTypeFilters.add("enwiki"); Collection fields = scanner.getFields(Collections.emptyMap(), dataTypeFilters); - assertThat(fields).containsExactlyInAnyOrder(barField, contributorId); + assertThat(fields).containsExactlyInAnyOrder(barField, contributorId, ipAddress); } @Test @@ -170,19 +183,29 @@ public void whenRetrievingFields_givenAliases_shouldReturnResultsWithAliases() t contributorId.setDescription(Collections.singleton(createDescription("ContributorId Description"))); contributorId.setLastUpdated(DATE); + DefaultMetadataField ipAddress = new DefaultMetadataField(); + ipAddress.setFieldName("ip_address"); + ipAddress.setInternalFieldName("IP_ADDRESS"); + ipAddress.setDataType("csv"); + ipAddress.setForwardIndexed(true); + ipAddress.setTypes(Collections.singletonList("IP Address")); + ipAddress.setDescription(Collections.singleton(createDescription("IpAddress Description"))); + ipAddress.setLastUpdated(DATE); + DefaultMetadataField name = new DefaultMetadataField(); name.setFieldName("NAME"); name.setDataType("tvmaze"); name.setForwardIndexed(true); name.setReverseIndexed(true); - name.setTypes(Collections.singletonList("Unknown")); + name.setTypes(Collections.singletonList("Cat")); name.setLastUpdated(DATE); Map aliases = new HashMap<>(); aliases.put("BAR_FIELD", "bar_field_alias"); aliases.put("CONTRIBUTOR_ID", "contributor_id_alias"); + aliases.put("IP_ADDRESS", "ip_address"); Collection fields = scanner.getFields(aliases, Collections.emptySet()); - assertThat(fields).containsExactlyInAnyOrder(barField, contributorId, name); + assertThat(fields).containsExactlyInAnyOrder(barField, contributorId, ipAddress, name); } private void populateMetadataTable() throws TableNotFoundException, MutationsRejectedException { @@ -201,16 +224,24 @@ private void populateMetadataTable() throws TableNotFoundException, MutationsRej contributorId.put(new Text(ColumnFamilyConstants.COLF_DESC), new Text("enwiki"), new ColumnVisibility("PRIVATE"), TIMESTAMP, new Value("ContributorId Description")); + Mutation ipAddress = new Mutation(new Text("IP_ADDRESS")); + ipAddress.put(new Text(ColumnFamilyConstants.COLF_E), new Text("csv"), TIMESTAMP, new Value()); + ipAddress.put(new Text(ColumnFamilyConstants.COLF_I), new Text("csv"), TIMESTAMP, new Value()); + ipAddress.put(new Text(ColumnFamilyConstants.COLF_T), new Text("csv\0datawave.data.type.IpAddressType"), TIMESTAMP, new Value()); + ipAddress.put(new Text(ColumnFamilyConstants.COLF_DESC), new Text("csv"), new ColumnVisibility("PRIVATE"), TIMESTAMP, + new Value("IpAddress Description")); + Mutation name = new Mutation(new Text("NAME")); name.put(new Text(ColumnFamilyConstants.COLF_E), new Text("tvmaze"), TIMESTAMP, new Value()); name.put(new Text(ColumnFamilyConstants.COLF_I), new Text("tvmaze"), TIMESTAMP, new Value()); name.put(new Text(ColumnFamilyConstants.COLF_RI), new Text("tvmaze"), TIMESTAMP, new Value()); - name.put(new Text(ColumnFamilyConstants.COLF_T), new Text("tvmaze\0not.a.known.type"), TIMESTAMP, new Value()); + name.put(new Text(ColumnFamilyConstants.COLF_T), new Text("tvmaze\0datawave.data.type.catType"), TIMESTAMP, new Value()); BatchWriterConfig bwConfig = new BatchWriterConfig().setMaxMemory(10L).setMaxLatency(1, TimeUnit.SECONDS).setMaxWriteThreads(1); BatchWriter writer = connector.createBatchWriter(METADATA_TABLE, bwConfig); writer.addMutation(barField); writer.addMutation(contributorId); + writer.addMutation(ipAddress); writer.addMutation(name); writer.flush(); writer.close();