Skip to content

Commit

Permalink
Adds support for dense_vector type
Browse files Browse the repository at this point in the history
A dense_vector field stores dense vectors of float values. The maximum number of dimensions that can be in a vector should not exceed 2048. A dense_vector field is a single-valued field.

Closes spring-projects#1700
  • Loading branch information
bkimmig authored and morganlutz committed Feb 25, 2021
1 parent 1c549b7 commit 07be2d8
Show file tree
Hide file tree
Showing 7 changed files with 100 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@
* @author Peter-Josef Meisch
* @author Xiao Yu
* @author Aleksei Arsenev
* @author Brian Kimmig
* @author Morgan Lutz
*/
@Retention(RetentionPolicy.RUNTIME)
@Target({ ElementType.FIELD, ElementType.ANNOTATION_TYPE })
Expand Down Expand Up @@ -157,14 +159,14 @@

/**
* if true, the field will be stored in Elasticsearch even if it has a null value
*
*
* @since 4.1
*/
boolean storeNullValue() default false;

/**
* to be used in combination with {@link FieldType#Rank_Feature}
*
*
* @since 4.1
*/
boolean positiveScoreImpact() default true;
Expand All @@ -185,4 +187,11 @@
* @since 4.1
*/
NullValueType nullValueType() default NullValueType.String;

/**
* to be used in combination with {@link FieldType#Dense_Vector}
*
* @since 4.2
*/
int dims() default -1;
}
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@
* @author Zeng Zetang
* @author Peter-Josef Meisch
* @author Aleksei Arsenev
* @author Brian Kimmig
* @author Morgan Lutz
*/
public enum FieldType {
Auto, //
Expand Down Expand Up @@ -57,5 +59,7 @@ public enum FieldType {
/** @since 4.1 */
Rank_Features, //
/** since 4.2 */
Wildcard //
Wildcard, //
/** @since 4.2 */
Dense_Vector //
}
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@
* @author Xiao Yu
* @author Peter-Josef Meisch
* @author Aleksei Arsenev
* @author Brian Kimmig
* @author Morgan Lutz
*/
@Retention(RetentionPolicy.RUNTIME)
@Target(ElementType.ANNOTATION_TYPE)
Expand Down Expand Up @@ -140,4 +142,11 @@
* @since 4.1
*/
NullValueType nullValueType() default NullValueType.String;

/**
* to be used in combination with {@link FieldType#Dense_Vector}
*
* @since 4.2
*/
int dims() default -1;
}
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@
*
* @author Peter-Josef Meisch
* @author Aleksei Arsenev
* @author Brian Kimmig
* @author Morgan Lutz
* @since 4.0
*/
public final class MappingParameters {
Expand All @@ -65,6 +67,7 @@ public final class MappingParameters {
static final String FIELD_PARAM_NULL_VALUE = "null_value";
static final String FIELD_PARAM_POSITION_INCREMENT_GAP = "position_increment_gap";
static final String FIELD_PARAM_POSITIVE_SCORE_IMPACT = "positive_score_impact";
static final String FIELD_PARAM_DIMS = "dims";
static final String FIELD_PARAM_SCALING_FACTOR = "scaling_factor";
static final String FIELD_PARAM_SEARCH_ANALYZER = "search_analyzer";
static final String FIELD_PARAM_STORE = "store";
Expand Down Expand Up @@ -94,6 +97,7 @@ public final class MappingParameters {
private final NullValueType nullValueType;
private final Integer positionIncrementGap;
private final boolean positiveScoreImpact;
private final Integer dims;
private final String searchAnalyzer;
private final double scalingFactor;
private final Similarity similarity;
Expand Down Expand Up @@ -153,6 +157,8 @@ private MappingParameters(Field field) {
|| (maxShingleSize >= 2 && maxShingleSize <= 4), //
"maxShingleSize must be in inclusive range from 2 to 4 for field type search_as_you_type");
positiveScoreImpact = field.positiveScoreImpact();
dims = field.dims();
Assert.isTrue(dims <= 2048, "The maximum number of dimensions that can be in a vector should not exceed 2048.");
Assert.isTrue(field.enabled() || type == FieldType.Object, "enabled false is only allowed for field type object");
enabled = field.enabled();
eagerGlobalOrdinals = field.eagerGlobalOrdinals();
Expand Down Expand Up @@ -191,6 +197,8 @@ private MappingParameters(InnerField field) {
|| (maxShingleSize >= 2 && maxShingleSize <= 4), //
"maxShingleSize must be in inclusive range from 2 to 4 for field type search_as_you_type");
positiveScoreImpact = field.positiveScoreImpact();
dims = field.dims();
Assert.isTrue(dims <= 2048, "The maximum number of dimensions that can be in a vector should not exceed 2048.");
enabled = true;
eagerGlobalOrdinals = field.eagerGlobalOrdinals();
}
Expand Down Expand Up @@ -323,6 +331,10 @@ public void writeTypeAndParametersTo(XContentBuilder builder) throws IOException
builder.field(FIELD_PARAM_POSITIVE_SCORE_IMPACT, positiveScoreImpact);
}

if (dims >= 1) {
builder.field(FIELD_PARAM_DIMS, dims);
}

if (!enabled) {
builder.field(FIELD_PARAM_ENABLED, enabled);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,8 @@
* @author Peter-Josef Meisch
* @author Xiao Yu
* @author Roman Puchkovskiy
* @author Brian Kimmig
* @author Morgan Lutz
*/
@SpringIntegrationTest
@ContextConfiguration(classes = { ElasticsearchRestTemplateConfiguration.class })
Expand Down Expand Up @@ -271,6 +273,16 @@ void shouldWriteWildcardFieldMapping() {
indexOps.putMapping();
}

@Test // #1700
@DisplayName("should write dense_vector field mapping")
void shouldWriteDenseVectorFieldMapping() {

IndexOperations indexOps = operations.indexOps(DenseVectorEntity.class);
indexOps.create();
indexOps.putMapping();
indexOps.delete();
}

@Test // #1370
@DisplayName("should write mapping for disabled entity")
void shouldWriteMappingForDisabledEntity() {
Expand Down Expand Up @@ -657,4 +669,11 @@ static class DisabledMappingProperty {
@Field(type = Text) private String text;
@Mapping(enabled = false) @Field(type = Object) private Object object;
}

@Data
@Document(indexName = "densevector-test")
static class DenseVectorEntity {
@Id private String id;
@Field(type = Dense_Vector, dims = 3) private float[] dense_vector;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@

import org.elasticsearch.search.suggest.completion.context.ContextMapping;
import org.json.JSONException;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.DisplayName;
import org.junit.jupiter.api.Test;
import org.springframework.data.annotation.Id;
Expand Down Expand Up @@ -69,6 +70,8 @@
* @author Peter-Josef Meisch
* @author Xiao Yu
* @author Roman Puchkovskiy
* @author Brian Kimmig
* @author Morgan Lutz
*/
public class MappingBuilderUnitTests extends MappingContextBaseTests {

Expand Down Expand Up @@ -506,6 +509,23 @@ void shouldWriteRankFeatureProperties() throws JSONException {
assertEquals(expected, mapping, false);
}

@Test // #1700
@DisplayName("should write dense_vector properties")
void shouldWriteDenseVectorProperties() throws JSONException {
String expected = "{\n" + //
" \"properties\": {\n" + //
" \"my_vector\": {\n" + //
" \"type\": \"dense_vector\",\n" + //
" \"dims\": 16\n" + //
" }\n" + //
" }\n" + //
"}\n"; //

String mapping = getMappingBuilder().buildPropertyMapping(DenseVectorEntity.class);

assertEquals(expected, mapping, false);
}

@Test // #1370
@DisplayName("should not write mapping when enabled is false on entity")
void shouldNotWriteMappingWhenEnabledIsFalseOnEntity() throws JSONException {
Expand Down Expand Up @@ -963,6 +983,13 @@ static class RankFeatureEntity {
@Field(type = FieldType.Rank_Features) private Map<String, Integer> topics;
}

@Data
static class DenseVectorEntity {

@Id private String id;
@Field(type = FieldType.Dense_Vector, dims = 16) private float[] my_vector;
}

@Data
@Mapping(enabled = false)
static class DisabledMappingEntity {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package org.springframework.data.elasticsearch.core.index;

import static org.assertj.core.api.Assertions.*;
import static org.springframework.data.elasticsearch.annotations.FieldType.Dense_Vector;
import static org.springframework.data.elasticsearch.annotations.FieldType.Object;

import java.lang.annotation.Annotation;
Expand All @@ -17,6 +18,8 @@

/**
* @author Peter-Josef Meisch
* @author Brian Kimmig
* @author Morgan Lutz
*/
public class MappingParametersTest extends MappingContextBaseTests {

Expand Down Expand Up @@ -66,6 +69,16 @@ void shouldAllowEnabledFalseOnlyOnObjectFields() {
assertThatThrownBy(() -> MappingParameters.from(annotation)).isInstanceOf(IllegalArgumentException.class);
}

@Test // #1700
@DisplayName("should not allow dims length greater than 2048")
void shouldNotAllowDimsLengthGreaterThan2048() {
ElasticsearchPersistentEntity<?> failEntity = elasticsearchConverter.get().getMappingContext()
.getRequiredPersistentEntity(InvalidDenseVectorClass.class);
Annotation annotation = failEntity.getRequiredPersistentProperty("dense_vector").findAnnotation(Field.class);

assertThatThrownBy(() -> MappingParameters.from(annotation)).isInstanceOf(IllegalArgumentException.class);
}

static class AnnotatedClass {
@Nullable @Field private String field;
@Nullable @MultiField(mainField = @Field,
Expand All @@ -79,4 +92,8 @@ static class AnnotatedClass {
static class InvalidEnabledFieldClass {
@Nullable @Field(type = FieldType.Text, enabled = false) private String disabledObject;
}

static class InvalidDenseVectorClass {
@Field(type = Dense_Vector, dims = 2049) private float[] dense_vector;
}
}

0 comments on commit 07be2d8

Please sign in to comment.