From a027f044b552e87958966337f2066df112da97b6 Mon Sep 17 00:00:00 2001 From: Dawid Weiss Date: Mon, 5 Oct 2020 13:53:07 +0200 Subject: [PATCH] Apply spotless. Fix esplugin application. --- build.gradle | 13 +- gradle/validation/spotless.gradle | 24 + gradle/validation/spotless/source-header.txt | 0 .../elasticsearch/ClusteringAction.java | 2237 ++++++++-------- .../elasticsearch/ClusteringContext.java | 23 +- .../elasticsearch/ClusteringException.java | 34 +- .../elasticsearch/ClusteringPlugin.java | 259 +- .../carrot2/elasticsearch/DocumentGroup.java | 297 ++- .../elasticsearch/FieldMappingSpec.java | 45 +- .../carrot2/elasticsearch/FieldSource.java | 71 +- .../carrot2/elasticsearch/InputDocument.java | 45 +- .../elasticsearch/ListAlgorithmsAction.java | 398 ++- .../carrot2/elasticsearch/LoggerUtils.java | 17 +- .../carrot2/elasticsearch/LogicalField.java | 49 +- .../OptionalQueryHintSetterVisitor.java | 65 +- .../elasticsearch/PathResourceLookup.java | 90 +- .../carrot2/elasticsearch/Preconditions.java | 28 +- .../org/carrot2/elasticsearch/ToString.java | 46 +- .../elasticsearch/ClusteringActionIT.java | 522 ++-- .../elasticsearch/ClusteringActionRestIT.java | 380 +-- .../org/carrot2/elasticsearch/Lingo3G.java | 4 +- .../elasticsearch/ListAlgorithmsActionIT.java | 43 +- .../MultithreadedClusteringIT.java | 70 +- .../elasticsearch/SampleDocumentData.java | 2331 +++++++++-------- .../elasticsearch/SampleIndexTestCase.java | 539 ++-- 25 files changed, 3835 insertions(+), 3795 deletions(-) create mode 100644 gradle/validation/spotless.gradle create mode 100644 gradle/validation/spotless/source-header.txt diff --git a/build.gradle b/build.gradle index 39b8bfa..c9d7b29 100644 --- a/build.gradle +++ b/build.gradle @@ -1,8 +1,4 @@ -// This plugin's version (typically must match that of ES). -version = '7.9.2' -group = 'org.carrot2' - buildscript { ext { version_es = '7.9.2' @@ -22,9 +18,16 @@ buildscript { plugins { id 'java-library' id 'idea' - id 'elasticsearch.esplugin' + id 'com.diffplug.gradle.spotless' version "4.5.1" apply false } +apply plugin: 'elasticsearch.esplugin' +apply from: file('gradle/validation/spotless.gradle') + +// This plugin's version (typically must match that of ES). +version = '7.9.2' +group = 'org.carrot2' + repositories { mavenLocal() mavenCentral() diff --git a/gradle/validation/spotless.gradle b/gradle/validation/spotless.gradle new file mode 100644 index 0000000..e753a73 --- /dev/null +++ b/gradle/validation/spotless.gradle @@ -0,0 +1,24 @@ + +allprojects { prj -> + plugins.withType(JavaPlugin) { + prj.apply plugin: 'com.diffplug.gradle.spotless' + + spotless { + java { + licenseHeaderFile rootProject.file("gradle/validation/spotless/source-header.txt") + lineEndings 'UNIX' + endWithNewline() + googleJavaFormat('1.8') + } + + check.dependsOn(spotlessCheck) + } + + task tidy() { + description "Applies formatters and cleanups to sources." + group "verification" + + dependsOn spotlessApply + } + } +} diff --git a/gradle/validation/spotless/source-header.txt b/gradle/validation/spotless/source-header.txt new file mode 100644 index 0000000..e69de29 diff --git a/src/main/java/org/carrot2/elasticsearch/ClusteringAction.java b/src/main/java/org/carrot2/elasticsearch/ClusteringAction.java index 9b740fb..7e16170 100644 --- a/src/main/java/org/carrot2/elasticsearch/ClusteringAction.java +++ b/src/main/java/org/carrot2/elasticsearch/ClusteringAction.java @@ -1,5 +1,28 @@ package org.carrot2.elasticsearch; +import static org.carrot2.elasticsearch.LoggerUtils.emitErrorResponse; +import static org.elasticsearch.action.ValidateActions.addValidationError; +import static org.elasticsearch.rest.RestRequest.Method.GET; +import static org.elasticsearch.rest.RestRequest.Method.POST; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.EnumMap; +import java.util.HashMap; +import java.util.HashSet; +import java.util.LinkedHashMap; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Locale; +import java.util.Map; +import java.util.Objects; +import java.util.Set; +import java.util.concurrent.CopyOnWriteArraySet; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.stream.Collectors; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.carrot2.attrs.Attrs; @@ -60,749 +83,717 @@ import org.elasticsearch.transport.TransportRequestHandler; import org.elasticsearch.transport.TransportService; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.EnumMap; -import java.util.HashMap; -import java.util.HashSet; -import java.util.LinkedHashMap; -import java.util.LinkedHashSet; -import java.util.List; -import java.util.Locale; -import java.util.Map; -import java.util.Objects; -import java.util.Set; -import java.util.concurrent.CopyOnWriteArraySet; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicInteger; -import java.util.stream.Collectors; - -import static org.carrot2.elasticsearch.LoggerUtils.emitErrorResponse; -import static org.elasticsearch.action.ValidateActions.addValidationError; -import static org.elasticsearch.rest.RestRequest.Method.GET; -import static org.elasticsearch.rest.RestRequest.Method.POST; - -/** - * Perform clustering of search results. - */ -public class ClusteringAction - extends ActionType { - /* Action name. */ - public static final String NAME = "indices:data/read/cluster"; - - /* Reusable singleton. */ - public static final ClusteringAction INSTANCE = new ClusteringAction(); - - private ClusteringAction() { - super(NAME, ClusteringActionResponse::new); - } - - @Override - public Writeable.Reader getResponseReader() { - return ClusteringActionResponse::new; - } - - /** - * An {@link ActionRequest} for {@link ClusteringAction}. - */ - public static class ClusteringActionRequest extends ActionRequest implements IndicesRequest.Replaceable { - public static String JSON_QUERY_HINT = "query_hint"; - public static String JSON_FIELD_MAPPING = "field_mapping"; - public static String JSON_ALGORITHM = "algorithm"; - public static String JSON_ATTRIBUTES = "attributes"; - public static String JSON_SEARCH_REQUEST = "search_request"; - public static String JSON_MAX_HITS = "max_hits"; - public static String JSON_CREATE_UNGROUPED_CLUSTER = "create_ungrouped"; - public static String JSON_LANGUAGE = "language"; - - private SearchRequest searchRequest; - private String queryHint; - private List fieldMapping = new ArrayList<>(); - private String algorithm; - private int maxHits = Integer.MAX_VALUE; - private Map attributes; - private boolean createUngroupedDocumentsCluster; - private String defaultLanguage = "English"; - - /** - * Set the {@link SearchRequest} to use for fetching documents to be clustered. - * The search request must fetch enough documents for clustering to make sense - * (set size appropriately). - * - * @param searchRequest search request to set - * @return same builder instance - */ - public ClusteringActionRequest setSearchRequest(SearchRequest searchRequest) { - this.searchRequest = searchRequest; - return this; - } - - /** - * @see #setSearchRequest(SearchRequest) - * - * @param builder The search builder - * @return Returns same object for chaining. - */ - public ClusteringActionRequest setSearchRequest(SearchRequestBuilder builder) { - return setSearchRequest(builder.request()); - } - - ClusteringActionRequest() { - } - - public ClusteringActionRequest(StreamInput in) throws IOException { - SearchRequest searchRequest = new SearchRequest(in); - - this.searchRequest = searchRequest; - this.queryHint = in.readOptionalString(); - this.algorithm = in.readOptionalString(); - this.maxHits = in.readInt(); - this.createUngroupedDocumentsCluster = in.readBoolean(); - this.defaultLanguage = in.readString(); - - int count = in.readVInt(); - while (count-- > 0) { - FieldMappingSpec spec = new FieldMappingSpec(in); - fieldMapping.add(spec); - } - - boolean hasAttributes = in.readBoolean(); - if (hasAttributes) { - attributes = in.readMap(); - } - } - - public SearchRequest getSearchRequest() { - return searchRequest; - } - - /** - * @param queryHint A set of terms which correspond to the query. This hint helps the - * clustering algorithm to avoid trivial clusters around the query terms. Typically the query - * terms hint will be identical to what the user typed in the search box. - *

- * The hint may be an empty string but must not be null. - * @return same builder instance - */ - public ClusteringActionRequest setQueryHint(String queryHint) { - this.queryHint = queryHint; - return this; - } - - /** - * @see #setQueryHint(String) - * - * @return Query hint - */ - public String getQueryHint() { - return queryHint; - } - - /** - * Sets the identifier of the clustering algorithm to use. If null, the default - * algorithm will be used (depending on what's available). - * - * @param algorithm identifier of the clustering algorithm to use. - * @return Same object for chaining - */ - public ClusteringActionRequest setAlgorithm(String algorithm) { - this.algorithm = algorithm; - return this; - } - - /** - * @see #setAlgorithm - * - * @return The current algorithm to use for clustering - */ - public String getAlgorithm() { - return algorithm; - } - - /** - * Sets the maximum number of hits to return with the response. Setting this - * value to zero will only return clusters, without any hits (can be used - * to save bandwidth if only cluster labels are needed). - *

- * Set to {@link Integer#MAX_VALUE} to include all the hits. - * - * @param maxHits Maximum hits - */ - public void setMaxHits(int maxHits) { - assert maxHits >= 0; - this.maxHits = maxHits; - } - - /** - * Sets {@link #setMaxHits(int)} from a string. An empty string or null means - * all hits should be included. - * - * @param value Maximum number of hits. - */ - public void setMaxHits(String value) { - if (value == null || value.trim().isEmpty()) { - setMaxHits(Integer.MAX_VALUE); - } else { - setMaxHits(Integer.parseInt(value)); - } - } - - /** - * @return Returns the maximum number of hits to be returned as part of the response. * If equal - * to {@link Integer#MAX_VALUE}, then all hits will be returned. - */ - public int getMaxHits() { - return maxHits; - } - - /** - * Sets a map of runtime override attributes for clustering algorithms. - * - * @param map Clustering attributes to use. - * @return Same object for chaining - */ - public ClusteringActionRequest setAttributes(Map map) { - this.attributes = map; - return this; - } - - /** - * @see #setAttributes(Map) - * - * @return Clustering algorithm attributes map - */ - public Map getAttributes() { - return attributes; - } - - /** - * Parses some {@link org.elasticsearch.common.xcontent.XContent} and fills in the request. - * - * @param source arg - * @param xContentType arg - * @param xContentRegistry arg - */ - @SuppressWarnings("unchecked") - public void source(BytesReference source, XContentType xContentType, NamedXContentRegistry xContentRegistry) { - if (source == null || source.length() == 0) { - return; - } - - try (XContentParser parser = XContentHelper.createParser(xContentRegistry, - DeprecationHandler.THROW_UNSUPPORTED_OPERATION, source, xContentType)) { - // We should avoid reparsing search_request here - // but it's terribly difficult to slice the underlying byte - // buffer to get just the search request. - Map asMap = parser.mapOrdered(); - - Boolean createUngrouped = (Boolean) asMap.get(JSON_CREATE_UNGROUPED_CLUSTER); - if (createUngrouped != null) { - setCreateUngroupedDocumentsCluster(createUngrouped); - } - - String queryHint = (String) asMap.get(JSON_QUERY_HINT); - if (queryHint != null) { - setQueryHint(queryHint); - } - - String defaultLanguage = (String) asMap.get(JSON_LANGUAGE); - if (defaultLanguage != null) { - setDefaultLanguage(defaultLanguage); - } - - Map> fieldMapping = (Map>) asMap.get(JSON_FIELD_MAPPING); - if (fieldMapping != null) { - parseFieldSpecs(fieldMapping); - } - - String algorithm = (String) asMap.get(JSON_ALGORITHM); - if (algorithm != null) { - setAlgorithm(algorithm); - } - - Map attributes = (Map) asMap.get(JSON_ATTRIBUTES); - if (attributes != null) { - setAttributes(attributes); - } - - Map searchRequestMap = (Map) asMap.get(JSON_SEARCH_REQUEST); - if (searchRequestMap != null) { - if (this.searchRequest == null) { - searchRequest = new SearchRequest(); - } - - XContentBuilder builder = XContentFactory.contentBuilder(XContentType.JSON).map(searchRequestMap); - XContentParser searchXParser = XContentFactory.xContent(XContentType.JSON) - .createParser(xContentRegistry, DeprecationHandler.THROW_UNSUPPORTED_OPERATION, Strings.toString(builder)); - SearchSourceBuilder searchSourceBuilder = - SearchSourceBuilder.fromXContent(searchXParser); - searchRequest.source(searchSourceBuilder); - } - - Object maxHits = asMap.get(JSON_MAX_HITS); - if (maxHits != null) { - setMaxHits(maxHits.toString()); - } - } catch (Exception e) { - String sSource = "_na_"; - try { - sSource = XContentHelper.convertToJson(source, false, false, xContentType); - } catch (Throwable e1) { - // ignore - } - throw new ClusteringException("Failed to parse source [" + sSource + "]", e); - } - } - - private void parseFieldSpecs(Map> fieldSpecs) { - for (Map.Entry> e : fieldSpecs.entrySet()) { - LogicalField logicalField = LogicalField.valueOfCaseInsensitive(e.getKey()); - if (logicalField != null) { - for (String fieldSpec : e.getValue()) { - addFieldMappingSpec(fieldSpec, logicalField); - } - } - } - } - - /** - * Map a hit's field to a logical section of a document to be clustered (title, content or URL). - * - * @see LogicalField - * - * @param fieldName field name - * @param logicalField logical field mapping. - * @return Same object for chaining - */ - public ClusteringActionRequest addFieldMapping(String fieldName, LogicalField logicalField) { - fieldMapping.add(new FieldMappingSpec(fieldName, logicalField, FieldSource.FIELD)); - return this; - } - - /** - * Map a hit's source field (field unpacked from the _source document) - * to a logical section of a document to be clustered (title, content or URL). - * - * @see LogicalField - * - * @param sourceFieldName field name - * @param logicalField logical field mapping. - * @return Same object for chaining - */ - public ClusteringActionRequest addSourceFieldMapping(String sourceFieldName, LogicalField logicalField) { - fieldMapping.add(new FieldMappingSpec(sourceFieldName, logicalField, FieldSource.SOURCE)); - return this; - } - - /** - * Map a hit's highligted field (fragments of the original field) to a logical section - * of a document to be clustered. This may be used to decrease the amount of information - * passed to the clustering engine but also to "focus" the clustering engine on the context - * of the query. - * - * @param fieldName field name - * @param logicalField logical field mapping. - * @return Same object for chaining - */ - public ClusteringActionRequest addHighlightedFieldMapping(String fieldName, LogicalField logicalField) { - fieldMapping.add(new FieldMappingSpec(fieldName, logicalField, FieldSource.HIGHLIGHT)); - return this; - } - - /** - * Add a (valid!) field mapping specification to a logical field. - * - * @see FieldSource - * - * @param fieldSpec field specification - * @param logicalField logical field mapping. - * @return Same object for chaining - */ - public ClusteringActionRequest addFieldMappingSpec(String fieldSpec, LogicalField logicalField) { - FieldSource.ParsedFieldSource pfs = FieldSource.parseSpec(fieldSpec); - if (pfs.source != null) { - switch (pfs.source) { - case HIGHLIGHT: - addHighlightedFieldMapping(pfs.fieldName, logicalField); - break; - - case FIELD: - addFieldMapping(pfs.fieldName, logicalField); - break; - - case SOURCE: - addSourceFieldMapping(pfs.fieldName, logicalField); - break; - - default: - throw new RuntimeException(); - } - } - - if (pfs.source == null) { - throw new ElasticsearchException("Field mapping specification must contain a " + - " valid source prefix for the field source: " + fieldSpec); - } - - return this; - } - - /** - * Access to prepared field mapping. - */ - List getFieldMapping() { - return fieldMapping; - } - - @Override - public ActionRequestValidationException validate() { - ActionRequestValidationException validationException = null; - if (searchRequest == null) { - validationException = addValidationError("No delegate search request", +/** Perform clustering of search results. */ +public class ClusteringAction extends ActionType { + /* Action name. */ + public static final String NAME = "indices:data/read/cluster"; + + /* Reusable singleton. */ + public static final ClusteringAction INSTANCE = new ClusteringAction(); + + private ClusteringAction() { + super(NAME, ClusteringActionResponse::new); + } + + @Override + public Writeable.Reader getResponseReader() { + return ClusteringActionResponse::new; + } + + /** An {@link ActionRequest} for {@link ClusteringAction}. */ + public static class ClusteringActionRequest extends ActionRequest + implements IndicesRequest.Replaceable { + public static String JSON_QUERY_HINT = "query_hint"; + public static String JSON_FIELD_MAPPING = "field_mapping"; + public static String JSON_ALGORITHM = "algorithm"; + public static String JSON_ATTRIBUTES = "attributes"; + public static String JSON_SEARCH_REQUEST = "search_request"; + public static String JSON_MAX_HITS = "max_hits"; + public static String JSON_CREATE_UNGROUPED_CLUSTER = "create_ungrouped"; + public static String JSON_LANGUAGE = "language"; + + private SearchRequest searchRequest; + private String queryHint; + private List fieldMapping = new ArrayList<>(); + private String algorithm; + private int maxHits = Integer.MAX_VALUE; + private Map attributes; + private boolean createUngroupedDocumentsCluster; + private String defaultLanguage = "English"; + + /** + * Set the {@link SearchRequest} to use for fetching documents to be clustered. The search + * request must fetch enough documents for clustering to make sense (set size + * appropriately). + * + * @param searchRequest search request to set + * @return same builder instance + */ + public ClusteringActionRequest setSearchRequest(SearchRequest searchRequest) { + this.searchRequest = searchRequest; + return this; + } + + /** + * @see #setSearchRequest(SearchRequest) + * @param builder The search builder + * @return Returns same object for chaining. + */ + public ClusteringActionRequest setSearchRequest(SearchRequestBuilder builder) { + return setSearchRequest(builder.request()); + } + + ClusteringActionRequest() {} + + public ClusteringActionRequest(StreamInput in) throws IOException { + SearchRequest searchRequest = new SearchRequest(in); + + this.searchRequest = searchRequest; + this.queryHint = in.readOptionalString(); + this.algorithm = in.readOptionalString(); + this.maxHits = in.readInt(); + this.createUngroupedDocumentsCluster = in.readBoolean(); + this.defaultLanguage = in.readString(); + + int count = in.readVInt(); + while (count-- > 0) { + FieldMappingSpec spec = new FieldMappingSpec(in); + fieldMapping.add(spec); + } + + boolean hasAttributes = in.readBoolean(); + if (hasAttributes) { + attributes = in.readMap(); + } + } + + public SearchRequest getSearchRequest() { + return searchRequest; + } + + /** + * @param queryHint A set of terms which correspond to the query. This hint helps the clustering + * algorithm to avoid trivial clusters around the query terms. Typically the query terms + * hint will be identical to what the user typed in the search box. + *

The hint may be an empty string but must not be null. + * @return same builder instance + */ + public ClusteringActionRequest setQueryHint(String queryHint) { + this.queryHint = queryHint; + return this; + } + + /** + * @see #setQueryHint(String) + * @return Query hint + */ + public String getQueryHint() { + return queryHint; + } + + /** + * Sets the identifier of the clustering algorithm to use. If null, the default + * algorithm will be used (depending on what's available). + * + * @param algorithm identifier of the clustering algorithm to use. + * @return Same object for chaining + */ + public ClusteringActionRequest setAlgorithm(String algorithm) { + this.algorithm = algorithm; + return this; + } + + /** + * @see #setAlgorithm + * @return The current algorithm to use for clustering + */ + public String getAlgorithm() { + return algorithm; + } + + /** + * Sets the maximum number of hits to return with the response. Setting this value to zero will + * only return clusters, without any hits (can be used to save bandwidth if only cluster labels + * are needed). + * + *

Set to {@link Integer#MAX_VALUE} to include all the hits. + * + * @param maxHits Maximum hits + */ + public void setMaxHits(int maxHits) { + assert maxHits >= 0; + this.maxHits = maxHits; + } + + /** + * Sets {@link #setMaxHits(int)} from a string. An empty string or null means all hits should be + * included. + * + * @param value Maximum number of hits. + */ + public void setMaxHits(String value) { + if (value == null || value.trim().isEmpty()) { + setMaxHits(Integer.MAX_VALUE); + } else { + setMaxHits(Integer.parseInt(value)); + } + } + + /** + * @return Returns the maximum number of hits to be returned as part of the response. * If equal + * to {@link Integer#MAX_VALUE}, then all hits will be returned. + */ + public int getMaxHits() { + return maxHits; + } + + /** + * Sets a map of runtime override attributes for clustering algorithms. + * + * @param map Clustering attributes to use. + * @return Same object for chaining + */ + public ClusteringActionRequest setAttributes(Map map) { + this.attributes = map; + return this; + } + + /** + * @see #setAttributes(Map) + * @return Clustering algorithm attributes map + */ + public Map getAttributes() { + return attributes; + } + + /** + * Parses some {@link org.elasticsearch.common.xcontent.XContent} and fills in the request. + * + * @param source arg + * @param xContentType arg + * @param xContentRegistry arg + */ + @SuppressWarnings("unchecked") + public void source( + BytesReference source, XContentType xContentType, NamedXContentRegistry xContentRegistry) { + if (source == null || source.length() == 0) { + return; + } + + try (XContentParser parser = + XContentHelper.createParser( + xContentRegistry, + DeprecationHandler.THROW_UNSUPPORTED_OPERATION, + source, + xContentType)) { + // We should avoid reparsing search_request here + // but it's terribly difficult to slice the underlying byte + // buffer to get just the search request. + Map asMap = parser.mapOrdered(); + + Boolean createUngrouped = (Boolean) asMap.get(JSON_CREATE_UNGROUPED_CLUSTER); + if (createUngrouped != null) { + setCreateUngroupedDocumentsCluster(createUngrouped); + } + + String queryHint = (String) asMap.get(JSON_QUERY_HINT); + if (queryHint != null) { + setQueryHint(queryHint); + } + + String defaultLanguage = (String) asMap.get(JSON_LANGUAGE); + if (defaultLanguage != null) { + setDefaultLanguage(defaultLanguage); + } + + Map> fieldMapping = + (Map>) asMap.get(JSON_FIELD_MAPPING); + if (fieldMapping != null) { + parseFieldSpecs(fieldMapping); + } + + String algorithm = (String) asMap.get(JSON_ALGORITHM); + if (algorithm != null) { + setAlgorithm(algorithm); + } + + Map attributes = (Map) asMap.get(JSON_ATTRIBUTES); + if (attributes != null) { + setAttributes(attributes); + } + + Map searchRequestMap = (Map) asMap.get(JSON_SEARCH_REQUEST); + if (searchRequestMap != null) { + if (this.searchRequest == null) { + searchRequest = new SearchRequest(); + } + + XContentBuilder builder = + XContentFactory.contentBuilder(XContentType.JSON).map(searchRequestMap); + XContentParser searchXParser = + XContentFactory.xContent(XContentType.JSON) + .createParser( + xContentRegistry, + DeprecationHandler.THROW_UNSUPPORTED_OPERATION, + Strings.toString(builder)); + SearchSourceBuilder searchSourceBuilder = SearchSourceBuilder.fromXContent(searchXParser); + searchRequest.source(searchSourceBuilder); + } + + Object maxHits = asMap.get(JSON_MAX_HITS); + if (maxHits != null) { + setMaxHits(maxHits.toString()); + } + } catch (Exception e) { + String sSource = "_na_"; + try { + sSource = XContentHelper.convertToJson(source, false, false, xContentType); + } catch (Throwable e1) { + // ignore + } + throw new ClusteringException("Failed to parse source [" + sSource + "]", e); + } + } + + private void parseFieldSpecs(Map> fieldSpecs) { + for (Map.Entry> e : fieldSpecs.entrySet()) { + LogicalField logicalField = LogicalField.valueOfCaseInsensitive(e.getKey()); + if (logicalField != null) { + for (String fieldSpec : e.getValue()) { + addFieldMappingSpec(fieldSpec, logicalField); + } + } + } + } + + /** + * Map a hit's field to a logical section of a document to be clustered (title, content or URL). + * + * @see LogicalField + * @param fieldName field name + * @param logicalField logical field mapping. + * @return Same object for chaining + */ + public ClusteringActionRequest addFieldMapping(String fieldName, LogicalField logicalField) { + fieldMapping.add(new FieldMappingSpec(fieldName, logicalField, FieldSource.FIELD)); + return this; + } + + /** + * Map a hit's source field (field unpacked from the _source document) to a logical + * section of a document to be clustered (title, content or URL). + * + * @see LogicalField + * @param sourceFieldName field name + * @param logicalField logical field mapping. + * @return Same object for chaining + */ + public ClusteringActionRequest addSourceFieldMapping( + String sourceFieldName, LogicalField logicalField) { + fieldMapping.add(new FieldMappingSpec(sourceFieldName, logicalField, FieldSource.SOURCE)); + return this; + } + + /** + * Map a hit's highligted field (fragments of the original field) to a logical section of a + * document to be clustered. This may be used to decrease the amount of information passed to + * the clustering engine but also to "focus" the clustering engine on the context of the query. + * + * @param fieldName field name + * @param logicalField logical field mapping. + * @return Same object for chaining + */ + public ClusteringActionRequest addHighlightedFieldMapping( + String fieldName, LogicalField logicalField) { + fieldMapping.add(new FieldMappingSpec(fieldName, logicalField, FieldSource.HIGHLIGHT)); + return this; + } + + /** + * Add a (valid!) field mapping specification to a logical field. + * + * @see FieldSource + * @param fieldSpec field specification + * @param logicalField logical field mapping. + * @return Same object for chaining + */ + public ClusteringActionRequest addFieldMappingSpec( + String fieldSpec, LogicalField logicalField) { + FieldSource.ParsedFieldSource pfs = FieldSource.parseSpec(fieldSpec); + if (pfs.source != null) { + switch (pfs.source) { + case HIGHLIGHT: + addHighlightedFieldMapping(pfs.fieldName, logicalField); + break; + + case FIELD: + addFieldMapping(pfs.fieldName, logicalField); + break; + + case SOURCE: + addSourceFieldMapping(pfs.fieldName, logicalField); + break; + + default: + throw new RuntimeException(); + } + } + + if (pfs.source == null) { + throw new ElasticsearchException( + "Field mapping specification must contain a " + + " valid source prefix for the field source: " + + fieldSpec); + } + + return this; + } + + /** Access to prepared field mapping. */ + List getFieldMapping() { + return fieldMapping; + } + + @Override + public ActionRequestValidationException validate() { + ActionRequestValidationException validationException = null; + if (searchRequest == null) { + validationException = addValidationError("No delegate search request", validationException); + } + + if (queryHint == null) { + validationException = + addValidationError( + "query hint may be empty but must not be null.", validationException); + } + + if (fieldMapping.isEmpty()) { + validationException = + addValidationError( + "At least one field should be mapped to a logical document field.", validationException); - } - - if (queryHint == null) { - validationException = addValidationError("query hint may be empty but must not be null.", - validationException); - } - - if (fieldMapping.isEmpty()) { - validationException = addValidationError("At least one field should be mapped to a logical document field.", - validationException); - } - - ActionRequestValidationException ex = searchRequest.validate(); - if (ex != null) { - if (validationException == null) { - validationException = new ActionRequestValidationException(); - } - validationException.addValidationErrors(ex.validationErrors()); - } - - return validationException; - } - - @Override - public void writeTo(StreamOutput out) throws IOException { - assert searchRequest != null; - this.searchRequest.writeTo(out); - out.writeOptionalString(queryHint); - out.writeOptionalString(algorithm); - out.writeInt(maxHits); - out.writeBoolean(createUngroupedDocumentsCluster); - out.writeString(defaultLanguage); - - out.writeVInt(fieldMapping.size()); - for (FieldMappingSpec spec : fieldMapping) { - spec.writeTo(out); - } - - boolean hasAttributes = (attributes != null); - out.writeBoolean(hasAttributes); - if (hasAttributes) { - out.writeMap(attributes); - } - } - - @Override - public IndicesRequest indices(String... strings) { - return searchRequest.indices(strings); - } - - @Override - public String[] indices() { - return searchRequest.indices(); } - @Override - public IndicesOptions indicesOptions() { - return searchRequest.indicesOptions(); - } - - public void setCreateUngroupedDocumentsCluster(boolean enabled) { - this.createUngroupedDocumentsCluster = enabled; - } - - public void setDefaultLanguage(String defaultLanguage) { - this.defaultLanguage = Objects.requireNonNull(defaultLanguage); - } - - public String getDefaultLanguage() { - return defaultLanguage; - } - } - - /** - * An {@link ActionRequestBuilder} for {@link ClusteringAction}. - */ - public static class ClusteringActionRequestBuilder - extends ActionRequestBuilder { - - public ClusteringActionRequestBuilder(ElasticsearchClient client) { - super(client, ClusteringAction.INSTANCE, new ClusteringActionRequest()); - } - - public ClusteringActionRequestBuilder setSearchRequest(SearchRequestBuilder builder) { - super.request.setSearchRequest(builder); - return this; - } - - public ClusteringActionRequestBuilder setSearchRequest(SearchRequest searchRequest) { - super.request.setSearchRequest(searchRequest); - return this; - } - - public ClusteringActionRequestBuilder setQueryHint(String queryHint) { - if (queryHint == null) { - throw new IllegalArgumentException("Query hint may be empty but must not be null."); - } - super.request.setQueryHint(queryHint); - return this; - } - - public ClusteringActionRequestBuilder setAlgorithm(String algorithm) { - super.request.setAlgorithm(algorithm); - return this; - } - - public ClusteringActionRequestBuilder setSource(BytesReference content, - XContentType xContentType, - NamedXContentRegistry xContentRegistry) { - super.request.source(content, xContentType, xContentRegistry); - return this; - } - - public ClusteringActionRequestBuilder setMaxHits(int maxHits) { - super.request.setMaxHits(maxHits); - return this; - } - - public ClusteringActionRequestBuilder setMaxHits(String maxHits) { - super.request.setMaxHits(maxHits); - return this; - } - - public ClusteringActionRequestBuilder addAttributes(Map attributes) { - if (super.request.getAttributes() == null) { - super.request.setAttributes(new HashMap()); - } - super.request.getAttributes().putAll(attributes); - return this; - } - - public ClusteringActionRequestBuilder addAttribute(String key, Object value) { - HashMap tmp = new HashMap(); - tmp.put(key, value); - return addAttributes(tmp); - } - - public ClusteringActionRequestBuilder setAttributes(Map attributes) { - super.request.setAttributes(attributes); - return this; - } - - public ClusteringActionRequestBuilder addFieldMapping(String fieldName, LogicalField logicalField) { - super.request.addFieldMapping(fieldName, logicalField); - return this; - } - - public ClusteringActionRequestBuilder addSourceFieldMapping(String fieldName, LogicalField logicalField) { - super.request.addSourceFieldMapping(fieldName, logicalField); - return this; - } - - public ClusteringActionRequestBuilder addHighlightedFieldMapping(String fieldName, LogicalField logicalField) { - super.request.addHighlightedFieldMapping(fieldName, logicalField); - return this; - } - - public ClusteringActionRequestBuilder addFieldMappingSpec(String fieldSpec, LogicalField logicalField) { - super.request.addFieldMappingSpec(fieldSpec, logicalField); - return this; - } - - public ClusteringActionRequestBuilder setCreateUngroupedDocumentsCluster(boolean enabled) { - super.request.setCreateUngroupedDocumentsCluster(enabled); - return this; - } - - public ClusteringActionRequestBuilder setDefaultLanguage(String language) { - super.request.setDefaultLanguage(language); - return this; - } - } - - /** - * An {@link ActionResponse} for {@link ClusteringAction}. - */ - public static class ClusteringActionResponse extends ActionResponse implements ToXContent { - /** - * Clustering-related response fields. - */ - static final class Fields { - static final String SEARCH_RESPONSE = "search_response"; - static final String CLUSTERS = "clusters"; - static final String INFO = "info"; - - // from SearchResponse - static final String _SCROLL_ID = "_scroll_id"; - static final String _SHARDS = "_shards"; - static final String TOTAL = "total"; - static final String SUCCESSFUL = "successful"; - static final String FAILED = "failed"; - static final String FAILURES = "failures"; - static final String STATUS = "status"; - static final String INDEX = "index"; - static final String SHARD = "shard"; - static final String REASON = "reason"; - static final String TOOK = "took"; - static final String TIMED_OUT = "timed_out"; - - /** - * {@link Fields#INFO} keys. - */ - static final class Info { - public static final String ALGORITHM = "algorithm"; - public static final String SEARCH_MILLIS = "search-millis"; - public static final String CLUSTERING_MILLIS = "clustering-millis"; - public static final String TOTAL_MILLIS = "total-millis"; - public static final String INCLUDE_HITS = "include-hits"; - public static final String MAX_HITS = "max-hits"; - public static final String LANGUAGES = "languages"; - } - } - - private SearchResponse searchResponse; - private DocumentGroup[] topGroups; - private Map info; - - ClusteringActionResponse(StreamInput in) throws IOException { - boolean hasSearchResponse = in.readBoolean(); - if (hasSearchResponse) { - this.searchResponse = new SearchResponse(in); - } - - int documentGroupsCount = in.readVInt(); - topGroups = new DocumentGroup[documentGroupsCount]; - for (int i = 0; i < documentGroupsCount; i++) { - DocumentGroup group = new DocumentGroup(in); - topGroups[i] = group; - } - - int entries = in.readVInt(); - info = new LinkedHashMap<>(); - for (int i = 0; i < entries; i++) { - info.put(in.readOptionalString(), in.readOptionalString()); - } - } - - public ClusteringActionResponse( - SearchResponse searchResponse, - DocumentGroup[] topGroups, - Map info) { - this.searchResponse = Preconditions.checkNotNull(searchResponse); - this.topGroups = Preconditions.checkNotNull(topGroups); - this.info = Collections.unmodifiableMap(Preconditions.checkNotNull(info)); - } - - public SearchResponse getSearchResponse() { - return searchResponse; - } - - public DocumentGroup[] getDocumentGroups() { - return topGroups; - } - - public Map getInfo() { - return info; - } - - @Override - public XContentBuilder toXContent(XContentBuilder builder, Params params) - throws IOException { - if (searchResponse != null) { - searchResponse.innerToXContent(builder, ToXContent.EMPTY_PARAMS); - } - - builder.startArray(Fields.CLUSTERS); - if (topGroups != null) { - for (DocumentGroup group : topGroups) { - group.toXContent(builder, params); - } - } - builder.endArray(); - builder.field(Fields.INFO, info); - return builder; - } - - @Override - public void writeTo(StreamOutput out) throws IOException { - boolean hasSearchResponse = searchResponse != null; - out.writeBoolean(hasSearchResponse); - if (hasSearchResponse) { - this.searchResponse.writeTo(out); - } - - out.writeVInt(topGroups == null ? 0 : topGroups.length); - if (topGroups != null) { - for (DocumentGroup group : topGroups) { - group.writeTo(out); - } - } - - out.writeVInt(info == null ? 0 : info.size()); - if (info != null) { - for (Map.Entry e : info.entrySet()) { - out.writeOptionalString(e.getKey()); - out.writeOptionalString(e.getValue()); - } - } - } - - @Override - public String toString() { - return ToString.objectToJson(this); - } - } - - /** - * A {@link TransportAction} for {@link ClusteringAction}. - */ - public static class TransportClusteringAction - extends TransportAction { - protected Logger logger = LogManager.getLogger(getClass()); - - private final Set langCodeWarnings = new CopyOnWriteArraySet<>(); - - private final TransportSearchAction searchAction; - private final ClusteringContext context; - - @Inject - public TransportClusteringAction(TransportService transportService, - TransportSearchAction searchAction, - ClusteringContext controllerSingleton, - ActionFilters actionFilters) { - super(ClusteringAction.NAME, - actionFilters, - transportService.getTaskManager()); - - this.searchAction = searchAction; - this.context = controllerSingleton; - transportService.registerRequestHandler( - ClusteringAction.NAME, - ThreadPool.Names.SAME, - ClusteringActionRequest::new, - new TransportHandler()); - } - - @Override - protected void doExecute(Task task, - final ClusteringActionRequest clusteringRequest, - final ActionListener listener) { - final long tsSearchStart = System.nanoTime(); + ActionRequestValidationException ex = searchRequest.validate(); + if (ex != null) { + if (validationException == null) { + validationException = new ActionRequestValidationException(); + } + validationException.addValidationErrors(ex.validationErrors()); + } + + return validationException; + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + assert searchRequest != null; + this.searchRequest.writeTo(out); + out.writeOptionalString(queryHint); + out.writeOptionalString(algorithm); + out.writeInt(maxHits); + out.writeBoolean(createUngroupedDocumentsCluster); + out.writeString(defaultLanguage); + + out.writeVInt(fieldMapping.size()); + for (FieldMappingSpec spec : fieldMapping) { + spec.writeTo(out); + } + + boolean hasAttributes = (attributes != null); + out.writeBoolean(hasAttributes); + if (hasAttributes) { + out.writeMap(attributes); + } + } + + @Override + public IndicesRequest indices(String... strings) { + return searchRequest.indices(strings); + } + + @Override + public String[] indices() { + return searchRequest.indices(); + } + + @Override + public IndicesOptions indicesOptions() { + return searchRequest.indicesOptions(); + } + + public void setCreateUngroupedDocumentsCluster(boolean enabled) { + this.createUngroupedDocumentsCluster = enabled; + } + + public void setDefaultLanguage(String defaultLanguage) { + this.defaultLanguage = Objects.requireNonNull(defaultLanguage); + } + + public String getDefaultLanguage() { + return defaultLanguage; + } + } + + /** An {@link ActionRequestBuilder} for {@link ClusteringAction}. */ + public static class ClusteringActionRequestBuilder + extends ActionRequestBuilder { + + public ClusteringActionRequestBuilder(ElasticsearchClient client) { + super(client, ClusteringAction.INSTANCE, new ClusteringActionRequest()); + } + + public ClusteringActionRequestBuilder setSearchRequest(SearchRequestBuilder builder) { + super.request.setSearchRequest(builder); + return this; + } + + public ClusteringActionRequestBuilder setSearchRequest(SearchRequest searchRequest) { + super.request.setSearchRequest(searchRequest); + return this; + } + + public ClusteringActionRequestBuilder setQueryHint(String queryHint) { + if (queryHint == null) { + throw new IllegalArgumentException("Query hint may be empty but must not be null."); + } + super.request.setQueryHint(queryHint); + return this; + } + + public ClusteringActionRequestBuilder setAlgorithm(String algorithm) { + super.request.setAlgorithm(algorithm); + return this; + } + + public ClusteringActionRequestBuilder setSource( + BytesReference content, XContentType xContentType, NamedXContentRegistry xContentRegistry) { + super.request.source(content, xContentType, xContentRegistry); + return this; + } + + public ClusteringActionRequestBuilder setMaxHits(int maxHits) { + super.request.setMaxHits(maxHits); + return this; + } + + public ClusteringActionRequestBuilder setMaxHits(String maxHits) { + super.request.setMaxHits(maxHits); + return this; + } + + public ClusteringActionRequestBuilder addAttributes(Map attributes) { + if (super.request.getAttributes() == null) { + super.request.setAttributes(new HashMap()); + } + super.request.getAttributes().putAll(attributes); + return this; + } + + public ClusteringActionRequestBuilder addAttribute(String key, Object value) { + HashMap tmp = new HashMap(); + tmp.put(key, value); + return addAttributes(tmp); + } + + public ClusteringActionRequestBuilder setAttributes(Map attributes) { + super.request.setAttributes(attributes); + return this; + } + + public ClusteringActionRequestBuilder addFieldMapping( + String fieldName, LogicalField logicalField) { + super.request.addFieldMapping(fieldName, logicalField); + return this; + } + + public ClusteringActionRequestBuilder addSourceFieldMapping( + String fieldName, LogicalField logicalField) { + super.request.addSourceFieldMapping(fieldName, logicalField); + return this; + } + + public ClusteringActionRequestBuilder addHighlightedFieldMapping( + String fieldName, LogicalField logicalField) { + super.request.addHighlightedFieldMapping(fieldName, logicalField); + return this; + } + + public ClusteringActionRequestBuilder addFieldMappingSpec( + String fieldSpec, LogicalField logicalField) { + super.request.addFieldMappingSpec(fieldSpec, logicalField); + return this; + } + + public ClusteringActionRequestBuilder setCreateUngroupedDocumentsCluster(boolean enabled) { + super.request.setCreateUngroupedDocumentsCluster(enabled); + return this; + } + + public ClusteringActionRequestBuilder setDefaultLanguage(String language) { + super.request.setDefaultLanguage(language); + return this; + } + } + + /** An {@link ActionResponse} for {@link ClusteringAction}. */ + public static class ClusteringActionResponse extends ActionResponse implements ToXContent { + /** Clustering-related response fields. */ + static final class Fields { + static final String SEARCH_RESPONSE = "search_response"; + static final String CLUSTERS = "clusters"; + static final String INFO = "info"; + + // from SearchResponse + static final String _SCROLL_ID = "_scroll_id"; + static final String _SHARDS = "_shards"; + static final String TOTAL = "total"; + static final String SUCCESSFUL = "successful"; + static final String FAILED = "failed"; + static final String FAILURES = "failures"; + static final String STATUS = "status"; + static final String INDEX = "index"; + static final String SHARD = "shard"; + static final String REASON = "reason"; + static final String TOOK = "took"; + static final String TIMED_OUT = "timed_out"; + + /** {@link Fields#INFO} keys. */ + static final class Info { + public static final String ALGORITHM = "algorithm"; + public static final String SEARCH_MILLIS = "search-millis"; + public static final String CLUSTERING_MILLIS = "clustering-millis"; + public static final String TOTAL_MILLIS = "total-millis"; + public static final String INCLUDE_HITS = "include-hits"; + public static final String MAX_HITS = "max-hits"; + public static final String LANGUAGES = "languages"; + } + } + + private SearchResponse searchResponse; + private DocumentGroup[] topGroups; + private Map info; + + ClusteringActionResponse(StreamInput in) throws IOException { + boolean hasSearchResponse = in.readBoolean(); + if (hasSearchResponse) { + this.searchResponse = new SearchResponse(in); + } + + int documentGroupsCount = in.readVInt(); + topGroups = new DocumentGroup[documentGroupsCount]; + for (int i = 0; i < documentGroupsCount; i++) { + DocumentGroup group = new DocumentGroup(in); + topGroups[i] = group; + } + + int entries = in.readVInt(); + info = new LinkedHashMap<>(); + for (int i = 0; i < entries; i++) { + info.put(in.readOptionalString(), in.readOptionalString()); + } + } + + public ClusteringActionResponse( + SearchResponse searchResponse, DocumentGroup[] topGroups, Map info) { + this.searchResponse = Preconditions.checkNotNull(searchResponse); + this.topGroups = Preconditions.checkNotNull(topGroups); + this.info = Collections.unmodifiableMap(Preconditions.checkNotNull(info)); + } + + public SearchResponse getSearchResponse() { + return searchResponse; + } + + public DocumentGroup[] getDocumentGroups() { + return topGroups; + } + + public Map getInfo() { + return info; + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + if (searchResponse != null) { + searchResponse.innerToXContent(builder, ToXContent.EMPTY_PARAMS); + } + + builder.startArray(Fields.CLUSTERS); + if (topGroups != null) { + for (DocumentGroup group : topGroups) { + group.toXContent(builder, params); + } + } + builder.endArray(); + builder.field(Fields.INFO, info); + return builder; + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + boolean hasSearchResponse = searchResponse != null; + out.writeBoolean(hasSearchResponse); + if (hasSearchResponse) { + this.searchResponse.writeTo(out); + } + + out.writeVInt(topGroups == null ? 0 : topGroups.length); + if (topGroups != null) { + for (DocumentGroup group : topGroups) { + group.writeTo(out); + } + } + + out.writeVInt(info == null ? 0 : info.size()); + if (info != null) { + for (Map.Entry e : info.entrySet()) { + out.writeOptionalString(e.getKey()); + out.writeOptionalString(e.getValue()); + } + } + } + + @Override + public String toString() { + return ToString.objectToJson(this); + } + } + + /** A {@link TransportAction} for {@link ClusteringAction}. */ + public static class TransportClusteringAction + extends TransportAction< + ClusteringAction.ClusteringActionRequest, ClusteringAction.ClusteringActionResponse> { + protected Logger logger = LogManager.getLogger(getClass()); + + private final Set langCodeWarnings = new CopyOnWriteArraySet<>(); + + private final TransportSearchAction searchAction; + private final ClusteringContext context; + + @Inject + public TransportClusteringAction( + TransportService transportService, + TransportSearchAction searchAction, + ClusteringContext controllerSingleton, + ActionFilters actionFilters) { + super(ClusteringAction.NAME, actionFilters, transportService.getTaskManager()); + + this.searchAction = searchAction; + this.context = controllerSingleton; + transportService.registerRequestHandler( + ClusteringAction.NAME, + ThreadPool.Names.SAME, + ClusteringActionRequest::new, + new TransportHandler()); + } + + @Override + protected void doExecute( + Task task, + final ClusteringActionRequest clusteringRequest, + final ActionListener listener) { + final long tsSearchStart = System.nanoTime(); searchAction.execute( clusteringRequest.getSearchRequest(), new ActionListener() { @@ -977,412 +968,424 @@ private void removeReferenced( }); } }); - } - - public static T requireNonNullElse(T first, T def) { - return first != null ? first : def; - } + } + + public static T requireNonNullElse(T first, T def) { + return first != null ? first : def; + } + + protected SearchResponse filterMaxHits(SearchResponse response, int maxHits) { + // We will use internal APIs here for efficiency. The plugin has restricted explicit ES + // compatibility + // anyway. Alternatively, we could serialize/ filter/ deserialize JSON, but this seems + // simpler. + SearchHits allHits = response.getHits(); + SearchHit[] trimmedHits = new SearchHit[Math.min(maxHits, allHits.getHits().length)]; + System.arraycopy(allHits.getHits(), 0, trimmedHits, 0, trimmedHits.length); + + InternalAggregations _internalAggregations = null; + if (response.getAggregations() != null) { + _internalAggregations = + new InternalAggregations(toInternal(response.getAggregations().asList()), null); + } + + SearchHits _searchHits = + new SearchHits(trimmedHits, allHits.getTotalHits(), allHits.getMaxScore()); + + SearchProfileShardResults _searchProfileShardResults = + new SearchProfileShardResults(response.getProfileResults()); + + InternalSearchResponse _searchResponse = + new InternalSearchResponse( + _searchHits, + _internalAggregations, + response.getSuggest(), + _searchProfileShardResults, + response.isTimedOut(), + response.isTerminatedEarly(), + response.getNumReducePhases()); + + return new SearchResponse( + _searchResponse, + response.getScrollId(), + response.getTotalShards(), + response.getSuccessfulShards(), + response.getSkippedShards(), + response.getTook().getMillis(), + response.getShardFailures(), + response.getClusters()); + } + + private List toInternal(List list) { + List t = new ArrayList<>(list.size()); + for (Aggregation a : list) { + t.add((InternalAggregation) a); + } + return t; + } + + protected DocumentGroup[] adapt(List> clusters, AtomicInteger groupId) { + DocumentGroup[] groups = new DocumentGroup[clusters.size()]; + for (int i = 0; i < groups.length; i++) { + groups[i] = adapt(clusters.get(i), groupId); + } + return groups; + } + + private DocumentGroup adapt(Cluster cluster, AtomicInteger groupId) { + DocumentGroup group = new DocumentGroup(); + group.setId(groupId.incrementAndGet()); + group.setPhrases(cluster.getLabels().toArray(new String[0])); + group.setScore(cluster.getScore()); + + List documents = cluster.getDocuments(); + String[] documentReferences = new String[documents.size()]; + for (int i = 0; i < documentReferences.length; i++) { + documentReferences[i] = documents.get(i).getStringId(); + } + group.setDocumentReferences(documentReferences); + group.setSubgroups(adapt(cluster.getClusters(), groupId)); + + return group; + } + + /** Map {@link SearchHit} fields to logical fields of Carrot2 {@link Document}. */ + private List prepareDocumentsForClustering( + final ClusteringActionRequest request, SearchResponse response) { + SearchHit[] hits = response.getHits().getHits(); + List documents = new ArrayList<>(hits.length); + List fieldMapping = request.getFieldMapping(); + StringBuilder title = new StringBuilder(); + StringBuilder content = new StringBuilder(); + StringBuilder language = new StringBuilder(); + boolean emptySourceWarningEmitted = false; + + for (SearchHit hit : hits) { + // Prepare logical fields for each hit. + title.setLength(0); + content.setLength(0); + language.setLength(0); + + Map fields = hit.getFields(); + Map highlightFields = hit.getHighlightFields(); + + Map sourceAsMap = null; + for (FieldMappingSpec spec : fieldMapping) { + // Determine the content source. + Object appendContent = null; + outer: + switch (spec.source) { + case FIELD: + DocumentField hitField = fields.get(spec.field); + if (hitField != null) { + appendContent = hitField.getValue(); + } + break; - protected SearchResponse filterMaxHits(SearchResponse response, int maxHits) { - // We will use internal APIs here for efficiency. The plugin has restricted explicit ES compatibility - // anyway. Alternatively, we could serialize/ filter/ deserialize JSON, but this seems simpler. - SearchHits allHits = response.getHits(); - SearchHit[] trimmedHits = new SearchHit[Math.min(maxHits, allHits.getHits().length)]; - System.arraycopy(allHits.getHits(), 0, trimmedHits, 0, trimmedHits.length); - - InternalAggregations _internalAggregations = null; - if (response.getAggregations() != null) { - _internalAggregations = new InternalAggregations(toInternal(response.getAggregations().asList()), null); - } - - SearchHits _searchHits = - new SearchHits(trimmedHits, allHits.getTotalHits(), allHits.getMaxScore()); - - SearchProfileShardResults _searchProfileShardResults = new SearchProfileShardResults(response.getProfileResults()); - - InternalSearchResponse _searchResponse = - new InternalSearchResponse( - _searchHits, - _internalAggregations, - response.getSuggest(), - _searchProfileShardResults, - response.isTimedOut(), - response.isTerminatedEarly(), - response.getNumReducePhases()); - - return new SearchResponse( - _searchResponse, - response.getScrollId(), - response.getTotalShards(), - response.getSuccessfulShards(), - response.getSkippedShards(), - response.getTook().getMillis(), - response.getShardFailures(), - response.getClusters()); - } + case HIGHLIGHT: + HighlightField highlightField = highlightFields.get(spec.field); + if (highlightField != null) { + appendContent = join(Arrays.asList(highlightField.fragments())); + } + break; + + case SOURCE: + if (sourceAsMap == null) { + if (!hit.hasSource()) { + if (!emptySourceWarningEmitted) { + emptySourceWarningEmitted = true; + logger.warn( + "_source field mapping used but no source available for: {}, field {}", + hit.getId(), + spec.field); + } + } else { + sourceAsMap = hit.getSourceAsMap(); + } + } - private List toInternal(List list) { - List t = new ArrayList<>(list.size()); - for (Aggregation a : list) { - t.add((InternalAggregation) a); - } - return t; - } + if (sourceAsMap != null) { + String[] fieldNames = spec.field.split("\\."); + Object value = sourceAsMap; - protected DocumentGroup[] adapt(List> clusters, AtomicInteger groupId) { - DocumentGroup[] groups = new DocumentGroup[clusters.size()]; - for (int i = 0; i < groups.length; i++) { - groups[i] = adapt(clusters.get(i), groupId); - } - return groups; - } + // Descend into maps. + for (String fieldName : fieldNames) { + if (Map.class.isInstance(value)) { + value = ((Map) value).get(fieldName); + if (value == null) { + // No such key. + logger.warn( + "Cannot find field named '{}' from spec: '{}'", fieldName, spec.field); + break outer; + } + } else { + logger.warn("Field is not a map: {} in spec.: {}", fieldName, spec.field); + break outer; + } + } - private DocumentGroup adapt(Cluster cluster, AtomicInteger groupId) { - DocumentGroup group = new DocumentGroup(); - group.setId(groupId.incrementAndGet()); - group.setPhrases(cluster.getLabels().toArray(new String[0])); - group.setScore(cluster.getScore()); - - List documents = cluster.getDocuments(); - String[] documentReferences = new String[documents.size()]; - for (int i = 0; i < documentReferences.length; i++) { - documentReferences[i] = documents.get(i).getStringId(); - } - group.setDocumentReferences(documentReferences); - group.setSubgroups(adapt(cluster.getClusters(), groupId)); - - return group; - } + if (value instanceof List) { + appendContent = join((List) value); + } else { + appendContent = value; + } + } + break; - /** - * Map {@link SearchHit} fields to logical fields of Carrot2 {@link Document}. - */ - private List prepareDocumentsForClustering( - final ClusteringActionRequest request, - SearchResponse response) { - SearchHit[] hits = response.getHits().getHits(); - List documents = new ArrayList<>(hits.length); - List fieldMapping = request.getFieldMapping(); - StringBuilder title = new StringBuilder(); - StringBuilder content = new StringBuilder(); - StringBuilder language = new StringBuilder(); - boolean emptySourceWarningEmitted = false; - - for (SearchHit hit : hits) { - // Prepare logical fields for each hit. - title.setLength(0); - content.setLength(0); - language.setLength(0); - - Map fields = hit.getFields(); - Map highlightFields = hit.getHighlightFields(); - - Map sourceAsMap = null; - for (FieldMappingSpec spec : fieldMapping) { - // Determine the content source. - Object appendContent = null; - outer: - switch (spec.source) { - case FIELD: - DocumentField hitField = fields.get(spec.field); - if (hitField != null) { - appendContent = hitField.getValue(); - } - break; - - case HIGHLIGHT: - HighlightField highlightField = highlightFields.get(spec.field); - if (highlightField != null) { - appendContent = join(Arrays.asList(highlightField.fragments())); - } - break; - - case SOURCE: - if (sourceAsMap == null) { - if (!hit.hasSource()) { - if (!emptySourceWarningEmitted) { - emptySourceWarningEmitted = true; - logger.warn("_source field mapping used but no source available for: {}, field {}", - hit.getId(), - spec.field); - } - } else { - sourceAsMap = hit.getSourceAsMap(); - } - } - - if (sourceAsMap != null) { - String[] fieldNames = spec.field.split("\\."); - Object value = sourceAsMap; - - // Descend into maps. - for (String fieldName : fieldNames) { - if (Map.class.isInstance(value)) { - value = ((Map) value).get(fieldName); - if (value == null) { - // No such key. - logger.warn("Cannot find field named '{}' from spec: '{}'", - fieldName, - spec.field); - break outer; - } - } else { - logger.warn("Field is not a map: {} in spec.: {}", - fieldName, - spec.field); - break outer; - } - } - - if (value instanceof List) { - appendContent = join((List) value); - } else { - appendContent = value; - } - } - break; - - default: - throw org.carrot2.elasticsearch.Preconditions.unreachable(); - } - - // Determine the target field. - if (appendContent != null) { - StringBuilder target; - switch (spec.logicalField) { - case LANGUAGE: - language.setLength(0); // Clear previous (single mapping allowed); - target = language; - break; - case TITLE: - target = title; - break; - case CONTENT: - target = content; - break; - default: - throw org.carrot2.elasticsearch.Preconditions.unreachable(); - } + default: + throw org.carrot2.elasticsearch.Preconditions.unreachable(); + } + + // Determine the target field. + if (appendContent != null) { + StringBuilder target; + switch (spec.logicalField) { + case LANGUAGE: + language.setLength(0); // Clear previous (single mapping allowed); + target = language; + break; + case TITLE: + target = title; + break; + case CONTENT: + target = content; + break; + default: + throw org.carrot2.elasticsearch.Preconditions.unreachable(); + } - // Separate multiple fields with a single dot (prevent accidental phrase gluing). - if (target.length() > 0) { - target.append(" . "); - } - target.append(appendContent); - } + // Separate multiple fields with a single dot (prevent accidental phrase gluing). + if (target.length() > 0) { + target.append(" . "); } + target.append(appendContent); + } + } - String langCode = language.length() > 0 ? language.toString() : null; - InputDocument doc = new InputDocument( - title.toString(), - content.toString(), - langCode, - hit.getId()); + String langCode = language.length() > 0 ? language.toString() : null; + InputDocument doc = + new InputDocument(title.toString(), content.toString(), langCode, hit.getId()); + + documents.add(doc); + } - documents.add(doc); - } + return documents; + } - return documents; + static String join(List list) { + StringBuilder sb = new StringBuilder(); + for (Object t : list) { + if (sb.length() > 0) { + sb.append(" . "); + } + sb.append(t != null ? t.toString() : ""); } + return sb.toString(); + } - static String join(List list) { - StringBuilder sb = new StringBuilder(); - for (Object t : list) { - if (sb.length() > 0) { - sb.append(" . "); - } - sb.append(t != null ? t.toString() : ""); - } - return sb.toString(); + private final class TransportHandler + implements TransportRequestHandler { + @Override + public void messageReceived( + final ClusteringActionRequest request, final TransportChannel channel, Task task) + throws Exception { + execute( + request, + new ActionListener() { + @Override + public void onResponse(ClusteringActionResponse response) { + try { + channel.sendResponse(response); + } catch (Exception e) { + onFailure(e); + } + } + + @Override + public void onFailure(Exception e) { + try { + channel.sendResponse(e); + } catch (Exception e1) { + logger.warn( + "Failed to send error response for action [" + + ClusteringAction.NAME + + "] and request [" + + request + + "]", + e1); + } + } + }); + } + } + } + + /** An {@link BaseRestHandler} for {@link ClusteringAction}. */ + public static class RestClusteringAction extends BaseRestHandler { + protected Logger logger = LogManager.getLogger(getClass()); + + /** Action name suffix. */ + public static String NAME = "_search_with_clusters"; + + @Override + public List routes() { + return Arrays.asList( + new Route(POST, "/" + NAME), + new Route(POST, "/{index}/" + NAME), + new Route(POST, "/{index}/{type}/" + NAME), + new Route(GET, "/" + NAME), + new Route(GET, "/{index}/" + NAME), + new Route(GET, "/{index}/{type}/" + NAME)); + } + + @Override + public String getName() { + return NAME; + } + + @Override + @SuppressWarnings({"try", "deprecation"}) + public RestChannelConsumer prepareRequest(RestRequest request, NodeClient client) + throws IOException { + // A POST request must have a body. + if (request.method() == POST && !request.hasContent()) { + return channel -> + emitErrorResponse( + channel, + logger, + new IllegalArgumentException("Request body was expected for a POST request.")); } - private final class TransportHandler implements TransportRequestHandler { - @Override - public void messageReceived(final ClusteringActionRequest request, final TransportChannel channel, Task task) throws Exception { - execute(request, new ActionListener() { - @Override - public void onResponse(ClusteringActionResponse response) { - try { - channel.sendResponse(response); - } catch (Exception e) { - onFailure(e); - } - } + // Contrary to ES's default search handler we will not support + // GET requests with a body (this is against HTTP spec guidance + // in my opinion -- GET requests should not have a body). + if (request.method() == GET && request.hasContent()) { + return channel -> + emitErrorResponse( + channel, + logger, + new IllegalArgumentException("Request body was unexpected for a GET request.")); + } - @Override - public void onFailure(Exception e) { + // Build an action request with data from the request. + + // Parse incoming arguments depending on the HTTP method used to make + // the request. + final ClusteringActionRequestBuilder actionBuilder = + new ClusteringActionRequestBuilder(client); + SearchRequest searchRequest = new SearchRequest(); + switch (request.method()) { + case POST: + searchRequest.indices(Strings.splitStringByCommaToArray(request.param("index"))); + searchRequest.types(Strings.splitStringByCommaToArray(request.param("type"))); + actionBuilder.setSearchRequest(searchRequest); + actionBuilder.setSource( + request.content(), request.getXContentType(), request.getXContentRegistry()); + break; + + case GET: + RestSearchAction.parseSearchRequest( + searchRequest, + request, + null, + (size) -> { + searchRequest.source().size(size); + }); + actionBuilder.setSearchRequest(searchRequest); + fillFromGetRequest(actionBuilder, request); + break; + + default: + throw org.carrot2.elasticsearch.Preconditions.unreachable(); + } + + Set passSecurityHeaders = + new HashSet<>(Arrays.asList("es-security-runas-user", "_xpack_security_authentication")); + + Map securityHeaders = + client.threadPool().getThreadContext().getHeaders().entrySet().stream() + .filter(e -> passSecurityHeaders.contains(e.getKey())) + .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); + + // Dispatch clustering request. + return channel -> { + try (ThreadContext.StoredContext ignored = + client.threadPool().getThreadContext().stashContext()) { + client.threadPool().getThreadContext().copyHeaders(securityHeaders.entrySet()); + client.execute( + ClusteringAction.INSTANCE, + actionBuilder.request(), + new ActionListener() { + @Override + public void onResponse(ClusteringActionResponse response) { try { - channel.sendResponse(e); - } catch (Exception e1) { - logger.warn("Failed to send error response for action [" - + ClusteringAction.NAME + "] and request [" + request + "]", e1); + XContentBuilder builder = channel.newBuilder(); + builder.startObject(); + response.toXContent(builder, request); + builder.endObject(); + channel.sendResponse( + new BytesRestResponse(response.getSearchResponse().status(), builder)); + } catch (Exception e) { + logger.debug("Failed to emit response.", e); + onFailure(e); } - } - }); - } - } - } + } - /** - * An {@link BaseRestHandler} for {@link ClusteringAction}. - */ - public static class RestClusteringAction extends BaseRestHandler { - protected Logger logger = LogManager.getLogger(getClass()); + @Override + public void onFailure(Exception e) { + emitErrorResponse(channel, logger, e); + } + }); + } + }; + } - /** - * Action name suffix. - */ - public static String NAME = "_search_with_clusters"; + private static final EnumMap GET_REQUEST_FIELDMAPPERS; - @Override - public List routes() { - return Arrays.asList( - new Route(POST, "/" + NAME), - new Route(POST, "/{index}/" + NAME), - new Route(POST, "/{index}/{type}/" + NAME), - new Route(GET, "/" + NAME), - new Route(GET, "/{index}/" + NAME), - new Route(GET, "/{index}/{type}/" + NAME) - ); + static { + GET_REQUEST_FIELDMAPPERS = new EnumMap<>(LogicalField.class); + for (LogicalField lf : LogicalField.values()) { + GET_REQUEST_FIELDMAPPERS.put(lf, "field_mapping_" + lf.name().toLowerCase(Locale.ROOT)); } + } - @Override - public String getName() { - return NAME; + /** Extract and parse HTTP GET parameters for the clustering request. */ + private void fillFromGetRequest( + ClusteringActionRequestBuilder actionBuilder, RestRequest request) { + // Use the search query as the query hint, if explicit query hint + // is not available. + if (request.hasParam(ClusteringActionRequest.JSON_QUERY_HINT)) { + actionBuilder.setQueryHint(request.param(ClusteringActionRequest.JSON_QUERY_HINT)); + } else { + actionBuilder.setQueryHint(request.param("q")); } - @Override - @SuppressWarnings({"try", "deprecation"}) - public RestChannelConsumer prepareRequest(RestRequest request, NodeClient client) throws IOException { - // A POST request must have a body. - if (request.method() == POST && !request.hasContent()) { - return channel -> emitErrorResponse(channel, logger, - new IllegalArgumentException("Request body was expected for a POST request.")); - } - - // Contrary to ES's default search handler we will not support - // GET requests with a body (this is against HTTP spec guidance - // in my opinion -- GET requests should not have a body). - if (request.method() == GET && request.hasContent()) { - return channel -> emitErrorResponse(channel, logger, - new IllegalArgumentException("Request body was unexpected for a GET request.")); - } - - // Build an action request with data from the request. - - // Parse incoming arguments depending on the HTTP method used to make - // the request. - final ClusteringActionRequestBuilder actionBuilder = new ClusteringActionRequestBuilder(client); - SearchRequest searchRequest = new SearchRequest(); - switch (request.method()) { - case POST: - searchRequest.indices(Strings.splitStringByCommaToArray(request.param("index"))); - searchRequest.types(Strings.splitStringByCommaToArray(request.param("type"))); - actionBuilder.setSearchRequest(searchRequest); - actionBuilder.setSource(request.content(), request.getXContentType(), request.getXContentRegistry()); - break; - - case GET: - RestSearchAction.parseSearchRequest(searchRequest, request, null, (size) -> { - searchRequest.source().size(size); - }); - actionBuilder.setSearchRequest(searchRequest); - fillFromGetRequest(actionBuilder, request); - break; + if (request.hasParam(ClusteringActionRequest.JSON_ALGORITHM)) { + actionBuilder.setAlgorithm(request.param(ClusteringActionRequest.JSON_ALGORITHM)); + } - default: - throw org.carrot2.elasticsearch.Preconditions.unreachable(); - } - - Set passSecurityHeaders = new HashSet<>(Arrays.asList("es-security-runas-user", "_xpack_security_authentication")); - - Map securityHeaders = - client.threadPool().getThreadContext().getHeaders() - .entrySet().stream() - .filter(e -> passSecurityHeaders.contains(e.getKey())) - .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); - - // Dispatch clustering request. - return channel -> { - try (ThreadContext.StoredContext ignored = client.threadPool().getThreadContext().stashContext()) { - client.threadPool().getThreadContext().copyHeaders(securityHeaders.entrySet()); - client.execute(ClusteringAction.INSTANCE, actionBuilder.request(), - new ActionListener() { - @Override - public void onResponse(ClusteringActionResponse response) { - try { - XContentBuilder builder = channel.newBuilder(); - builder.startObject(); - response.toXContent(builder, request); - builder.endObject(); - channel.sendResponse( - new BytesRestResponse( - response.getSearchResponse().status(), - builder)); - } catch (Exception e) { - logger.debug("Failed to emit response.", e); - onFailure(e); - } - } - - @Override - public void onFailure(Exception e) { - emitErrorResponse(channel, logger, e); - } - }); - } - }; + if (request.hasParam(ClusteringActionRequest.JSON_MAX_HITS)) { + actionBuilder.setMaxHits(request.param(ClusteringActionRequest.JSON_MAX_HITS)); } - private static final EnumMap GET_REQUEST_FIELDMAPPERS; + if (request.hasParam(ClusteringActionRequest.JSON_CREATE_UNGROUPED_CLUSTER)) { + actionBuilder.setCreateUngroupedDocumentsCluster( + Boolean.parseBoolean( + request.param(ClusteringActionRequest.JSON_CREATE_UNGROUPED_CLUSTER))); + } - static { - GET_REQUEST_FIELDMAPPERS = new EnumMap<>(LogicalField.class); - for (LogicalField lf : LogicalField.values()) { - GET_REQUEST_FIELDMAPPERS.put(lf, "field_mapping_" + lf.name().toLowerCase(Locale.ROOT)); - } + if (request.hasParam(ClusteringActionRequest.JSON_LANGUAGE)) { + actionBuilder.setDefaultLanguage(request.param(ClusteringActionRequest.JSON_LANGUAGE)); } - /** - * Extract and parse HTTP GET parameters for the clustering request. - */ - private void fillFromGetRequest( - ClusteringActionRequestBuilder actionBuilder, - RestRequest request) { - // Use the search query as the query hint, if explicit query hint - // is not available. - if (request.hasParam(ClusteringActionRequest.JSON_QUERY_HINT)) { - actionBuilder.setQueryHint(request.param(ClusteringActionRequest.JSON_QUERY_HINT)); - } else { - actionBuilder.setQueryHint(request.param("q")); - } - - if (request.hasParam(ClusteringActionRequest.JSON_ALGORITHM)) { - actionBuilder.setAlgorithm(request.param(ClusteringActionRequest.JSON_ALGORITHM)); - } - - if (request.hasParam(ClusteringActionRequest.JSON_MAX_HITS)) { - actionBuilder.setMaxHits(request.param(ClusteringActionRequest.JSON_MAX_HITS)); - } - - if (request.hasParam(ClusteringActionRequest.JSON_CREATE_UNGROUPED_CLUSTER)) { - actionBuilder.setCreateUngroupedDocumentsCluster( - Boolean.parseBoolean(request.param(ClusteringActionRequest.JSON_CREATE_UNGROUPED_CLUSTER))); - } - - if (request.hasParam(ClusteringActionRequest.JSON_LANGUAGE)) { - actionBuilder.setDefaultLanguage( - request.param(ClusteringActionRequest.JSON_LANGUAGE)); - } - - // Field mappers. - for (Map.Entry e : GET_REQUEST_FIELDMAPPERS.entrySet()) { - if (request.hasParam(e.getValue())) { - for (String spec : Strings.splitStringByCommaToArray(request.param(e.getValue()))) { - actionBuilder.addFieldMappingSpec(spec, e.getKey()); - } - } - } + // Field mappers. + for (Map.Entry e : GET_REQUEST_FIELDMAPPERS.entrySet()) { + if (request.hasParam(e.getValue())) { + for (String spec : Strings.splitStringByCommaToArray(request.param(e.getValue()))) { + actionBuilder.addFieldMappingSpec(spec, e.getKey()); + } + } } - } + } + } } diff --git a/src/main/java/org/carrot2/elasticsearch/ClusteringContext.java b/src/main/java/org/carrot2/elasticsearch/ClusteringContext.java index 3c794ba..bd3999a 100644 --- a/src/main/java/org/carrot2/elasticsearch/ClusteringContext.java +++ b/src/main/java/org/carrot2/elasticsearch/ClusteringContext.java @@ -1,5 +1,16 @@ package org.carrot2.elasticsearch; +import java.nio.file.Files; +import java.nio.file.Path; +import java.security.AccessController; +import java.security.PrivilegedExceptionAction; +import java.util.Arrays; +import java.util.Collection; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Optional; +import java.util.function.Supplier; +import java.util.stream.Collectors; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.carrot2.clustering.ClusteringAlgorithm; @@ -16,18 +27,6 @@ import org.elasticsearch.env.Environment; import org.elasticsearch.node.Node; -import java.nio.file.Files; -import java.nio.file.Path; -import java.security.AccessController; -import java.security.PrivilegedExceptionAction; -import java.util.Arrays; -import java.util.Collection; -import java.util.LinkedHashMap; -import java.util.List; -import java.util.Optional; -import java.util.function.Supplier; -import java.util.stream.Collectors; - /** Holds the language components initialized and ready throughout the {@link Node}'s lifecycle. */ public class ClusteringContext extends AbstractLifecycleComponent { public static final String PROP_RESOURCES = "resources"; diff --git a/src/main/java/org/carrot2/elasticsearch/ClusteringException.java b/src/main/java/org/carrot2/elasticsearch/ClusteringException.java index 26f4875..fd39e84 100644 --- a/src/main/java/org/carrot2/elasticsearch/ClusteringException.java +++ b/src/main/java/org/carrot2/elasticsearch/ClusteringException.java @@ -1,30 +1,28 @@ package org.carrot2.elasticsearch; +import java.io.IOException; import org.elasticsearch.ElasticsearchException; import org.elasticsearch.ElasticsearchWrapperException; import org.elasticsearch.common.io.stream.StreamInput; -import java.io.IOException; - -/** - * Generic exception implementing {@link org.elasticsearch.ElasticsearchWrapperException} - */ +/** Generic exception implementing {@link org.elasticsearch.ElasticsearchWrapperException} */ @SuppressWarnings("serial") -public class ClusteringException extends ElasticsearchException implements ElasticsearchWrapperException { +public class ClusteringException extends ElasticsearchException + implements ElasticsearchWrapperException { - public ClusteringException(Throwable cause) { - super(cause); - } + public ClusteringException(Throwable cause) { + super(cause); + } - public ClusteringException(String msg, Object... args) { - super(msg, args); - } + public ClusteringException(String msg, Object... args) { + super(msg, args); + } - public ClusteringException(String msg, Throwable cause, Object... args) { - super(msg, cause, args); - } + public ClusteringException(String msg, Throwable cause, Object... args) { + super(msg, cause, args); + } - public ClusteringException(StreamInput in) throws IOException { - super(in); - } + public ClusteringException(StreamInput in) throws IOException { + super(in); + } } diff --git a/src/main/java/org/carrot2/elasticsearch/ClusteringPlugin.java b/src/main/java/org/carrot2/elasticsearch/ClusteringPlugin.java index c046e42..6238075 100644 --- a/src/main/java/org/carrot2/elasticsearch/ClusteringPlugin.java +++ b/src/main/java/org/carrot2/elasticsearch/ClusteringPlugin.java @@ -1,5 +1,14 @@ package org.carrot2.elasticsearch; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.ServiceLoader; +import java.util.function.Supplier; import org.carrot2.clustering.ClusteringAlgorithmProvider; import org.carrot2.elasticsearch.ClusteringAction.TransportClusteringAction; import org.carrot2.language.LanguageComponentsProvider; @@ -28,142 +37,134 @@ import org.elasticsearch.threadpool.ThreadPool; import org.elasticsearch.watcher.ResourceWatcherService; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collection; -import java.util.Collections; -import java.util.LinkedHashMap; -import java.util.List; -import java.util.Map; -import java.util.ServiceLoader; -import java.util.function.Supplier; - public class ClusteringPlugin extends Plugin implements ExtensiblePlugin, ActionPlugin { - /** - * Master on/off switch property for the plugin (general settings). - */ - public static final String DEFAULT_ENABLED_PROPERTY_NAME = "carrot2.enabled"; - - /** - * Plugin name. - */ - public static final String PLUGIN_NAME = "elasticsearch-carrot2"; - - /** - * All algorithm providers. - */ - private LinkedHashMap algorithmProviders = new LinkedHashMap<>(); - - /** - * All language component providers. - */ - private Map> languageComponentProviders = new LinkedHashMap<>(); - - private final boolean transportClient; - private final boolean pluginEnabled; - - public ClusteringPlugin(Settings settings) { - this.pluginEnabled = settings.getAsBoolean(DEFAULT_ENABLED_PROPERTY_NAME, true); - this.transportClient = TransportClient.CLIENT_TYPE.equals(Client.CLIENT_TYPE_SETTING_S.get(settings)); - - // load our own class loader's extensions. - loadExtensions(getClass().getClassLoader()); - } - - @Override - public List> getActions() { - if (pluginEnabled) { - return Arrays.asList( - new ActionHandler<>(ClusteringAction.INSTANCE, TransportClusteringAction.class), - new ActionHandler<>(ListAlgorithmsAction.INSTANCE, ListAlgorithmsAction.TransportListAlgorithmsAction.class)); - } - return Collections.emptyList(); - } - - @Override - public List getRestHandlers(Settings settings, RestController restController, - ClusterSettings clusterSettings, - IndexScopedSettings indexScopedSettings, SettingsFilter settingsFilter, - IndexNameExpressionResolver indexNameExpressionResolver, - Supplier nodesInCluster) { + /** Master on/off switch property for the plugin (general settings). */ + public static final String DEFAULT_ENABLED_PROPERTY_NAME = "carrot2.enabled"; + + /** Plugin name. */ + public static final String PLUGIN_NAME = "elasticsearch-carrot2"; + + /** All algorithm providers. */ + private LinkedHashMap algorithmProviders = + new LinkedHashMap<>(); + + /** All language component providers. */ + private Map> languageComponentProviders = + new LinkedHashMap<>(); + + private final boolean transportClient; + private final boolean pluginEnabled; + + public ClusteringPlugin(Settings settings) { + this.pluginEnabled = settings.getAsBoolean(DEFAULT_ENABLED_PROPERTY_NAME, true); + this.transportClient = + TransportClient.CLIENT_TYPE.equals(Client.CLIENT_TYPE_SETTING_S.get(settings)); + + // load our own class loader's extensions. + loadExtensions(getClass().getClassLoader()); + } + + @Override + public List> getActions() { + if (pluginEnabled) { return Arrays.asList( - new ClusteringAction.RestClusteringAction(), - new ListAlgorithmsAction.RestListAlgorithmsAction()); - } - - @Override - public Collection createComponents(Client client, ClusterService clusterService, - ThreadPool threadPool, - ResourceWatcherService resourceWatcherService, - ScriptService scriptService, NamedXContentRegistry xContentRegistry, - Environment environment, - NodeEnvironment nodeEnvironment, - NamedWriteableRegistry namedWriteableRegistry, - IndexNameExpressionResolver indexNameExpressionResolver, - Supplier repositoriesServiceSupplier) { - List components = new ArrayList<>(); - if (pluginEnabled && !transportClient) { - components.add(new ClusteringContext(environment, - reorderAlgorithms(algorithmProviders), - new LinkedHashMap<>(languageComponentProviders))); - } - return components; - } - - /** - * This places Lingo3G in front of the algorithm list if it is available. - */ - private LinkedHashMap reorderAlgorithms( - LinkedHashMap providers) { - String[] desiredOrder = { - "Lingo3G", - "Lingo", - "STC", - "Bisecting K-Means" - }; - LinkedHashMap copy = new LinkedHashMap<>(); - for (String name : desiredOrder) { - if (providers.containsKey(name)) { - copy.put(name, providers.get(name)); - } + new ActionHandler<>(ClusteringAction.INSTANCE, TransportClusteringAction.class), + new ActionHandler<>( + ListAlgorithmsAction.INSTANCE, + ListAlgorithmsAction.TransportListAlgorithmsAction.class)); + } + return Collections.emptyList(); + } + + @Override + public List getRestHandlers( + Settings settings, + RestController restController, + ClusterSettings clusterSettings, + IndexScopedSettings indexScopedSettings, + SettingsFilter settingsFilter, + IndexNameExpressionResolver indexNameExpressionResolver, + Supplier nodesInCluster) { + return Arrays.asList( + new ClusteringAction.RestClusteringAction(), + new ListAlgorithmsAction.RestListAlgorithmsAction()); + } + + @Override + public Collection createComponents( + Client client, + ClusterService clusterService, + ThreadPool threadPool, + ResourceWatcherService resourceWatcherService, + ScriptService scriptService, + NamedXContentRegistry xContentRegistry, + Environment environment, + NodeEnvironment nodeEnvironment, + NamedWriteableRegistry namedWriteableRegistry, + IndexNameExpressionResolver indexNameExpressionResolver, + Supplier repositoriesServiceSupplier) { + List components = new ArrayList<>(); + if (pluginEnabled && !transportClient) { + components.add( + new ClusteringContext( + environment, + reorderAlgorithms(algorithmProviders), + new LinkedHashMap<>(languageComponentProviders))); + } + return components; + } + + /** This places Lingo3G in front of the algorithm list if it is available. */ + private LinkedHashMap reorderAlgorithms( + LinkedHashMap providers) { + String[] desiredOrder = {"Lingo3G", "Lingo", "STC", "Bisecting K-Means"}; + LinkedHashMap copy = new LinkedHashMap<>(); + for (String name : desiredOrder) { + if (providers.containsKey(name)) { + copy.put(name, providers.get(name)); } - providers.forEach((name, provider) -> { - if (!copy.containsKey(name)) { + } + providers.forEach( + (name, provider) -> { + if (!copy.containsKey(name)) { copy.put(name, provider); - } - }); - return copy; - } - - @Override - public void loadExtensions(ExtensionLoader loader) { - loadExtensions( - loader.loadExtensions(ClusteringAlgorithmProvider.class), - loader.loadExtensions(LanguageComponentsProvider.class)); - } + } + }); + return copy; + } + + @Override + public void loadExtensions(ExtensionLoader loader) { + loadExtensions( + loader.loadExtensions(ClusteringAlgorithmProvider.class), + loader.loadExtensions(LanguageComponentsProvider.class)); + } private void loadExtensions(ClassLoader classLoader) { - loadExtensions( - ServiceLoader.load(ClusteringAlgorithmProvider.class, classLoader), - ServiceLoader.load(LanguageComponentsProvider.class, classLoader)); + loadExtensions( + ServiceLoader.load(ClusteringAlgorithmProvider.class, classLoader), + ServiceLoader.load(LanguageComponentsProvider.class, classLoader)); } - private void loadExtensions(Iterable clusteringAlgorithmProviders, - Iterable languageComponentsProviders) { - clusteringAlgorithmProviders.forEach((provider) -> { - String name = provider.name(); - if (algorithmProviders.containsKey(name)) { - throw new RuntimeException("More than one provider for algorithm " + name + "?"); - } - algorithmProviders.put(name, provider); - }); - - languageComponentsProviders.forEach(provider -> { - for (String lang : provider.languages()) { - languageComponentProviders - .computeIfAbsent(lang, (k) -> new ArrayList<>()) - .add(provider); - } - }); + private void loadExtensions( + Iterable clusteringAlgorithmProviders, + Iterable languageComponentsProviders) { + clusteringAlgorithmProviders.forEach( + (provider) -> { + String name = provider.name(); + if (algorithmProviders.containsKey(name)) { + throw new RuntimeException("More than one provider for algorithm " + name + "?"); + } + algorithmProviders.put(name, provider); + }); + + languageComponentsProviders.forEach( + provider -> { + for (String lang : provider.languages()) { + languageComponentProviders + .computeIfAbsent(lang, (k) -> new ArrayList<>()) + .add(provider); + } + }); } } diff --git a/src/main/java/org/carrot2/elasticsearch/DocumentGroup.java b/src/main/java/org/carrot2/elasticsearch/DocumentGroup.java index 4f420e5..48597fb 100644 --- a/src/main/java/org/carrot2/elasticsearch/DocumentGroup.java +++ b/src/main/java/org/carrot2/elasticsearch/DocumentGroup.java @@ -1,5 +1,9 @@ package org.carrot2.elasticsearch; +import java.io.IOException; +import java.util.Arrays; +import java.util.HashSet; +import java.util.Set; import org.carrot2.clustering.Cluster; import org.elasticsearch.common.Strings; import org.elasticsearch.common.io.stream.StreamInput; @@ -9,159 +13,152 @@ import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentFactory; -import java.io.IOException; -import java.util.Arrays; -import java.util.HashSet; -import java.util.Set; - /** * A {@link DocumentGroup} acts as an adapter over {@link Cluster}, providing additional * serialization methods and only exposing a subset of {@link Cluster}'s data. */ public class DocumentGroup implements ToXContent, Writeable { - private static final DocumentGroup[] EMPTY_DOC_GROUP = new DocumentGroup[0]; - private static final String[] EMPTY_STRING_ARRAY = new String[0]; - - private int id; - private String[] phrases = EMPTY_STRING_ARRAY; - private double score; - private String[] documentReferences = EMPTY_STRING_ARRAY; - private DocumentGroup[] subgroups = EMPTY_DOC_GROUP; - private boolean ungroupedDocuments; - private Set uniqueDocuments; - - public DocumentGroup() { - } - - DocumentGroup(StreamInput in) throws IOException { - id = in.readVInt(); - score = in.readDouble(); - phrases = in.readStringArray(); - ungroupedDocuments = in.readBoolean(); - documentReferences = in.readStringArray(); - - int max = in.readVInt(); - subgroups = new DocumentGroup[max]; - for (int i = 0; i < max; i++) { - subgroups[i] = new DocumentGroup(in); - } - } - - public DocumentGroup[] getSubgroups() { - return subgroups; - } - - public void setSubgroups(DocumentGroup[] subclusters) { - this.subgroups = Preconditions.checkNotNull(subclusters); - } - - public void setId(int id) { - this.id = id; - } - - public int getId() { - return id; - } - - public void setPhrases(String[] phrases) { - this.phrases = Preconditions.checkNotNull(phrases); - } - - public String[] getPhrases() { - return phrases; - } - - public String getLabel() { - return String.join(", ", getPhrases()); - } - - public void setScore(Double score) { - this.score = (score == null ? 0 : score); - } - - public double getScore() { - return score; - } - - public void setDocumentReferences(String[] documentReferences) { - this.documentReferences = Preconditions.checkNotNull(documentReferences); - } - - public String[] getDocumentReferences() { - return documentReferences; - } - - public void setUngroupedDocuments(boolean ungroupedDocuments) { - this.ungroupedDocuments = ungroupedDocuments; - } - - public boolean isUngroupedDocuments() { - return ungroupedDocuments; - } - - public Set uniqueDocuments() { - // Compute lazily. - if (uniqueDocuments == null) { - uniqueDocuments = new HashSet<>(); - uniqueDocuments.addAll(Arrays.asList(getDocumentReferences())); - for (DocumentGroup group : subgroups) { - uniqueDocuments.addAll(group.uniqueDocuments); - } - } - return uniqueDocuments; - } - - @Override - public void writeTo(StreamOutput out) throws IOException { - out.writeVInt(id); - out.writeDouble(score); - out.writeStringArray(phrases); - out.writeBoolean(ungroupedDocuments); - out.writeStringArray(documentReferences); - - out.writeVInt(subgroups.length); - for (DocumentGroup group : subgroups) { - group.writeTo(out); - } - } - - @Override - public XContentBuilder toXContent(XContentBuilder builder, Params params) - throws IOException { - builder.startObject(); - builder - .field("id", id) - .field("score", score) - .field("label", getLabel()) - .array("phrases", phrases); - - if (ungroupedDocuments) { - builder.field("other_topics", ungroupedDocuments); - } - - if (documentReferences.length > 0) { - builder.array("documents", documentReferences); - } - - if (subgroups.length > 0) { - builder.startArray("clusters"); - for (DocumentGroup group : subgroups) { - group.toXContent(builder, params); - } - builder.endArray(); - } - - builder.endObject(); - return builder; - } - - public String toString() { - try { - XContentBuilder builder = XContentFactory.jsonBuilder().prettyPrint(); - toXContent(builder, EMPTY_PARAMS); - return Strings.toString(builder); - } catch (IOException e) { - return "{ \"error\" : \"" + e.getMessage() + "\"}"; - } - } + private static final DocumentGroup[] EMPTY_DOC_GROUP = new DocumentGroup[0]; + private static final String[] EMPTY_STRING_ARRAY = new String[0]; + + private int id; + private String[] phrases = EMPTY_STRING_ARRAY; + private double score; + private String[] documentReferences = EMPTY_STRING_ARRAY; + private DocumentGroup[] subgroups = EMPTY_DOC_GROUP; + private boolean ungroupedDocuments; + private Set uniqueDocuments; + + public DocumentGroup() {} + + DocumentGroup(StreamInput in) throws IOException { + id = in.readVInt(); + score = in.readDouble(); + phrases = in.readStringArray(); + ungroupedDocuments = in.readBoolean(); + documentReferences = in.readStringArray(); + + int max = in.readVInt(); + subgroups = new DocumentGroup[max]; + for (int i = 0; i < max; i++) { + subgroups[i] = new DocumentGroup(in); + } + } + + public DocumentGroup[] getSubgroups() { + return subgroups; + } + + public void setSubgroups(DocumentGroup[] subclusters) { + this.subgroups = Preconditions.checkNotNull(subclusters); + } + + public void setId(int id) { + this.id = id; + } + + public int getId() { + return id; + } + + public void setPhrases(String[] phrases) { + this.phrases = Preconditions.checkNotNull(phrases); + } + + public String[] getPhrases() { + return phrases; + } + + public String getLabel() { + return String.join(", ", getPhrases()); + } + + public void setScore(Double score) { + this.score = (score == null ? 0 : score); + } + + public double getScore() { + return score; + } + + public void setDocumentReferences(String[] documentReferences) { + this.documentReferences = Preconditions.checkNotNull(documentReferences); + } + + public String[] getDocumentReferences() { + return documentReferences; + } + + public void setUngroupedDocuments(boolean ungroupedDocuments) { + this.ungroupedDocuments = ungroupedDocuments; + } + + public boolean isUngroupedDocuments() { + return ungroupedDocuments; + } + + public Set uniqueDocuments() { + // Compute lazily. + if (uniqueDocuments == null) { + uniqueDocuments = new HashSet<>(); + uniqueDocuments.addAll(Arrays.asList(getDocumentReferences())); + for (DocumentGroup group : subgroups) { + uniqueDocuments.addAll(group.uniqueDocuments); + } + } + return uniqueDocuments; + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeVInt(id); + out.writeDouble(score); + out.writeStringArray(phrases); + out.writeBoolean(ungroupedDocuments); + out.writeStringArray(documentReferences); + + out.writeVInt(subgroups.length); + for (DocumentGroup group : subgroups) { + group.writeTo(out); + } + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(); + builder + .field("id", id) + .field("score", score) + .field("label", getLabel()) + .array("phrases", phrases); + + if (ungroupedDocuments) { + builder.field("other_topics", ungroupedDocuments); + } + + if (documentReferences.length > 0) { + builder.array("documents", documentReferences); + } + + if (subgroups.length > 0) { + builder.startArray("clusters"); + for (DocumentGroup group : subgroups) { + group.toXContent(builder, params); + } + builder.endArray(); + } + + builder.endObject(); + return builder; + } + + public String toString() { + try { + XContentBuilder builder = XContentFactory.jsonBuilder().prettyPrint(); + toXContent(builder, EMPTY_PARAMS); + return Strings.toString(builder); + } catch (IOException e) { + return "{ \"error\" : \"" + e.getMessage() + "\"}"; + } + } } diff --git a/src/main/java/org/carrot2/elasticsearch/FieldMappingSpec.java b/src/main/java/org/carrot2/elasticsearch/FieldMappingSpec.java index 52df8a9..db0990f 100644 --- a/src/main/java/org/carrot2/elasticsearch/FieldMappingSpec.java +++ b/src/main/java/org/carrot2/elasticsearch/FieldMappingSpec.java @@ -1,32 +1,31 @@ package org.carrot2.elasticsearch; +import java.io.IOException; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.io.stream.Writeable; -import java.io.IOException; - class FieldMappingSpec implements Writeable { - String field; - LogicalField logicalField; - FieldSource source; + String field; + LogicalField logicalField; + FieldSource source; + + FieldMappingSpec(String field, LogicalField logicalField, FieldSource source) { + this.field = field; + this.logicalField = logicalField; + this.source = source; + } - FieldMappingSpec(String field, LogicalField logicalField, FieldSource source) { - this.field = field; - this.logicalField = logicalField; - this.source = source; - } - - FieldMappingSpec(StreamInput in) throws IOException { - field = in.readString(); - logicalField = LogicalField.fromOrdinal(in.readVInt()); - source = FieldSource.fromOrdinal(in.readVInt()); - } + FieldMappingSpec(StreamInput in) throws IOException { + field = in.readString(); + logicalField = LogicalField.fromOrdinal(in.readVInt()); + source = FieldSource.fromOrdinal(in.readVInt()); + } - @Override - public void writeTo(StreamOutput out) throws IOException { - out.writeString(field); - out.writeVInt(logicalField.ordinal()); - out.writeVInt(source.ordinal()); - } -} \ No newline at end of file + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeString(field); + out.writeVInt(logicalField.ordinal()); + out.writeVInt(source.ordinal()); + } +} diff --git a/src/main/java/org/carrot2/elasticsearch/FieldSource.java b/src/main/java/org/carrot2/elasticsearch/FieldSource.java index 61bc5a3..54d604c 100644 --- a/src/main/java/org/carrot2/elasticsearch/FieldSource.java +++ b/src/main/java/org/carrot2/elasticsearch/FieldSource.java @@ -1,45 +1,42 @@ package org.carrot2.elasticsearch; -/** - * The source of data for a logical document field. - */ +/** The source of data for a logical document field. */ enum FieldSource { - HIGHLIGHT("highlight."), - FIELD("fields."), - SOURCE("_source."); - - /** - * Field specification prefix for this source. - */ - private final String fieldSpecPrefix; - - static class ParsedFieldSource { - final FieldSource source; - final String fieldName; - - ParsedFieldSource(FieldSource source, String fieldName) { - this.source = source; - this.fieldName = fieldName; - } + HIGHLIGHT("highlight."), + FIELD("fields."), + SOURCE("_source."); + + /** Field specification prefix for this source. */ + private final String fieldSpecPrefix; + + static class ParsedFieldSource { + final FieldSource source; + final String fieldName; + + ParsedFieldSource(FieldSource source, String fieldName) { + this.source = source; + this.fieldName = fieldName; } + } - static ParsedFieldSource parseSpec(String fieldSourceSpec) { - if (fieldSourceSpec != null) { - for (FieldSource fs : cachedByOrdinal) { - if (fieldSourceSpec.startsWith(fs.fieldSpecPrefix)) { - return new ParsedFieldSource(fs, fieldSourceSpec.substring(fs.fieldSpecPrefix.length())); - } - } + static ParsedFieldSource parseSpec(String fieldSourceSpec) { + if (fieldSourceSpec != null) { + for (FieldSource fs : cachedByOrdinal) { + if (fieldSourceSpec.startsWith(fs.fieldSpecPrefix)) { + return new ParsedFieldSource(fs, fieldSourceSpec.substring(fs.fieldSpecPrefix.length())); } - return null; - } - - static FieldSource [] cachedByOrdinal = values(); - static FieldSource fromOrdinal(int ordinal) { - return cachedByOrdinal[ordinal]; + } } + return null; + } - FieldSource(String fieldSpecPrefix) { - this.fieldSpecPrefix = fieldSpecPrefix; - } -} \ No newline at end of file + static FieldSource[] cachedByOrdinal = values(); + + static FieldSource fromOrdinal(int ordinal) { + return cachedByOrdinal[ordinal]; + } + + FieldSource(String fieldSpecPrefix) { + this.fieldSpecPrefix = fieldSpecPrefix; + } +} diff --git a/src/main/java/org/carrot2/elasticsearch/InputDocument.java b/src/main/java/org/carrot2/elasticsearch/InputDocument.java index 496c60d..ac7d77f 100644 --- a/src/main/java/org/carrot2/elasticsearch/InputDocument.java +++ b/src/main/java/org/carrot2/elasticsearch/InputDocument.java @@ -1,34 +1,33 @@ package org.carrot2.elasticsearch; -import org.carrot2.clustering.Document; - import java.util.Objects; import java.util.function.BiConsumer; +import org.carrot2.clustering.Document; public class InputDocument implements Document { - private final String title; - private final String content; - private final String language; - private final String hitId; + private final String title; + private final String content; + private final String language; + private final String hitId; - public InputDocument(String title, String content, String language, String hitId) { - this.title = title; - this.content = content; - this.language = language; - this.hitId = Objects.requireNonNull(hitId); - } + public InputDocument(String title, String content, String language, String hitId) { + this.title = title; + this.content = content; + this.language = language; + this.hitId = Objects.requireNonNull(hitId); + } - @Override - public void visitFields(BiConsumer fieldConsumer) { - fieldConsumer.accept("title", title); - fieldConsumer.accept("content", content); - } + @Override + public void visitFields(BiConsumer fieldConsumer) { + fieldConsumer.accept("title", title); + fieldConsumer.accept("content", content); + } - public String getStringId() { - return hitId; - } + public String getStringId() { + return hitId; + } - public String language() { - return language; - } + public String language() { + return language; + } } diff --git a/src/main/java/org/carrot2/elasticsearch/ListAlgorithmsAction.java b/src/main/java/org/carrot2/elasticsearch/ListAlgorithmsAction.java index 40c99a7..9eb192d 100644 --- a/src/main/java/org/carrot2/elasticsearch/ListAlgorithmsAction.java +++ b/src/main/java/org/carrot2/elasticsearch/ListAlgorithmsAction.java @@ -1,5 +1,12 @@ package org.carrot2.elasticsearch; +import static org.carrot2.elasticsearch.LoggerUtils.emitErrorResponse; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.elasticsearch.action.ActionListener; @@ -29,222 +36,207 @@ import org.elasticsearch.transport.TransportRequestHandler; import org.elasticsearch.transport.TransportService; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.List; +/** List all available clustering algorithms. */ +public class ListAlgorithmsAction + extends ActionType { + /* Action name. */ + public static final String NAME = "cluster:monitor/carrot2/algorithms"; -import static org.carrot2.elasticsearch.LoggerUtils.emitErrorResponse; + /* Reusable singleton. */ + public static final ListAlgorithmsAction INSTANCE = new ListAlgorithmsAction(); + + private ListAlgorithmsAction() { + super(NAME, ListAlgorithmsActionResponse::new); + } + + @Override + public Writeable.Reader getResponseReader() { + return ListAlgorithmsActionResponse::new; + } + + /** An {@link ActionRequest} for {@link ListAlgorithmsAction}. */ + public static class ListAlgorithmsActionRequest extends ActionRequest { + + ListAlgorithmsActionRequest() {} + + ListAlgorithmsActionRequest(StreamInput in) throws IOException { + super(in); + } + + @Override + public ActionRequestValidationException validate() { + return /* Nothing to validate. */ null; + } + } + + /** An {@link ActionRequestBuilder} for {@link ListAlgorithmsAction}. */ + public static class ListAlgorithmsActionRequestBuilder + extends ActionRequestBuilder { + public ListAlgorithmsActionRequestBuilder(ElasticsearchClient client) { + super(client, ListAlgorithmsAction.INSTANCE, new ListAlgorithmsActionRequest()); + } + } + + /** A {@link ActionResponse} for {@link ListAlgorithmsAction}. */ + public static class ListAlgorithmsActionResponse extends ActionResponse implements ToXContent { + private static final String[] EMPTY_LIST = {}; + private String[] algorithms; + + /** Clustering-related response fields. */ + static final class Fields { + static final String ALGORITHMS = "algorithms"; + } + + public ListAlgorithmsActionResponse(StreamInput in) throws IOException { + super(in); + algorithms = in.readStringArray(); + } + + public ListAlgorithmsActionResponse(List algorithms) { + this.algorithms = algorithms.toArray(new String[0]); + } + + public List getAlgorithms() { + return Collections.unmodifiableList(Arrays.asList(algorithms)); + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + return builder.array(Fields.ALGORITHMS, algorithms); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeStringArray(algorithms); + } + + @Override + public String toString() { + return ToString.objectToJson(this); + } + } + + /** + * A {@link TransportAction} for actually executing {@link ListAlgorithmsActionRequest} and + * providing {@link ListAlgorithmsActionResponse}. + */ + public static class TransportListAlgorithmsAction + extends TransportAction { + + protected Logger logger = LogManager.getLogger(getClass()); + private final ClusteringContext controllerSingleton; + + @Inject + public TransportListAlgorithmsAction( + TransportService transportService, + ClusteringContext controllerSingleton, + ActionFilters actionFilters) { + super(ListAlgorithmsAction.NAME, actionFilters, transportService.getTaskManager()); + this.controllerSingleton = controllerSingleton; + transportService.registerRequestHandler( + ListAlgorithmsAction.NAME, + ThreadPool.Names.SAME, + ListAlgorithmsActionRequest::new, + new TransportHandler()); + } + + @Override + protected void doExecute( + Task task, + ListAlgorithmsActionRequest request, + ActionListener listener) { + listener.onResponse( + new ListAlgorithmsActionResponse( + new ArrayList<>(controllerSingleton.getAlgorithms().keySet()))); + } + + private final class TransportHandler + implements TransportRequestHandler { + @Override + public void messageReceived( + final ListAlgorithmsActionRequest request, final TransportChannel channel, Task task) + throws Exception { + execute( + request, + new ActionListener() { + @Override + public void onResponse(ListAlgorithmsActionResponse response) { + try { + channel.sendResponse(response); + } catch (Exception e) { + onFailure(e); + } + } + + @Override + public void onFailure(Exception e) { + try { + channel.sendResponse(e); + } catch (Exception e1) { + logger.warn( + "Failed to send error response for action [" + + NAME + + "] and request [" + + request + + "]", + e1); + } + } + }); + } + } + } -/** - * List all available clustering algorithms. - */ -public class ListAlgorithmsAction extends ActionType { - /* Action name. */ - public static final String NAME = "cluster:monitor/carrot2/algorithms"; + /** {@link BaseRestHandler} for serving {@link ListAlgorithmsAction}. */ + public static class RestListAlgorithmsAction extends BaseRestHandler { + /* Action name suffix. */ + public static String NAME = "_algorithms"; - /* Reusable singleton. */ - public static final ListAlgorithmsAction INSTANCE = new ListAlgorithmsAction(); + protected Logger logger = LogManager.getLogger(getClass()); - private ListAlgorithmsAction() { - super(NAME, ListAlgorithmsActionResponse::new); + @Override + public List routes() { + return Arrays.asList(new Route(Method.POST, "/" + NAME), new Route(Method.GET, "/" + NAME)); } @Override - public Writeable.Reader getResponseReader() { - return ListAlgorithmsActionResponse::new; - } - - /** - * An {@link ActionRequest} for {@link ListAlgorithmsAction}. - */ - public static class ListAlgorithmsActionRequest - extends ActionRequest { - - ListAlgorithmsActionRequest() {} - - ListAlgorithmsActionRequest(StreamInput in) throws IOException { - super(in); - } - - @Override - public ActionRequestValidationException validate() { - return /* Nothing to validate. */ null; - } - } - - /** - * An {@link ActionRequestBuilder} for {@link ListAlgorithmsAction}. - */ - public static class ListAlgorithmsActionRequestBuilder - extends ActionRequestBuilder { - public ListAlgorithmsActionRequestBuilder(ElasticsearchClient client) { - super(client, ListAlgorithmsAction.INSTANCE, new ListAlgorithmsActionRequest()); - } - } - - /** - * A {@link ActionResponse} for {@link ListAlgorithmsAction}. - */ - public static class ListAlgorithmsActionResponse extends ActionResponse implements ToXContent { - private static final String[] EMPTY_LIST = {}; - private String [] algorithms; - - /** - * Clustering-related response fields. - */ - static final class Fields { - static final String ALGORITHMS = "algorithms"; - } - - public ListAlgorithmsActionResponse(StreamInput in) throws IOException { - super(in); - algorithms = in.readStringArray(); - } - - public ListAlgorithmsActionResponse(List algorithms) { - this.algorithms = algorithms.toArray(new String[0]); - } - - public List getAlgorithms() { - return Collections.unmodifiableList(Arrays.asList(algorithms)); - } - - @Override - public XContentBuilder toXContent(XContentBuilder builder, Params params) - throws IOException { - return builder.array(Fields.ALGORITHMS, algorithms); - } - - @Override - public void writeTo(StreamOutput out) throws IOException { - out.writeStringArray(algorithms); - } - - @Override - public String toString() { - return ToString.objectToJson(this); - } - } - - /** - * A {@link TransportAction} for actually executing - * {@link ListAlgorithmsActionRequest} and providing - * {@link ListAlgorithmsActionResponse}. - */ - public static class TransportListAlgorithmsAction - extends TransportAction { - - protected Logger logger = LogManager.getLogger(getClass()); - private final ClusteringContext controllerSingleton; - - @Inject - public TransportListAlgorithmsAction(TransportService transportService, - ClusteringContext controllerSingleton, - ActionFilters actionFilters) { - super(ListAlgorithmsAction.NAME, - actionFilters, - transportService.getTaskManager()); - this.controllerSingleton = controllerSingleton; - transportService.registerRequestHandler( - ListAlgorithmsAction.NAME, - ThreadPool.Names.SAME, - ListAlgorithmsActionRequest::new, - new TransportHandler()); - } - - @Override - protected void doExecute(Task task, - ListAlgorithmsActionRequest request, - ActionListener listener) { - listener.onResponse(new ListAlgorithmsActionResponse( - new ArrayList<>(controllerSingleton.getAlgorithms().keySet()))); - } - - private final class TransportHandler implements TransportRequestHandler { - @Override - public void messageReceived(final ListAlgorithmsActionRequest request, - final TransportChannel channel, - Task task) throws Exception { - execute(request, new ActionListener() { - @Override - public void onResponse(ListAlgorithmsActionResponse response) { - try { - channel.sendResponse(response); - } catch (Exception e) { - onFailure(e); - } - } - - @Override - public void onFailure(Exception e) { - try { - channel.sendResponse(e); - } catch (Exception e1) { - logger.warn("Failed to send error response for action [" - + NAME + "] and request [" + request + "]", e1); - } - } - }); - } - } - } - - /** - * {@link BaseRestHandler} for serving {@link ListAlgorithmsAction}. - */ - public static class RestListAlgorithmsAction extends BaseRestHandler { - /* Action name suffix. */ - public static String NAME = "_algorithms"; - - protected Logger logger = LogManager.getLogger(getClass()); - - @Override - public List routes() { - return Arrays.asList( - new Route(Method.POST, "/" + NAME), - new Route(Method.GET, "/" + NAME) - ); - } - - @Override - public String getName() { - return NAME; - } - - @Override - public RestChannelConsumer prepareRequest(RestRequest request, NodeClient client) { - if (request.hasContent()) { - return channel -> emitErrorResponse(channel, logger, - new IllegalArgumentException("Request body was expected.")); - } - - ListAlgorithmsActionRequest actionRequest = new ListAlgorithmsActionRequest(); - return channel -> client.execute(INSTANCE, actionRequest, new ActionListener() { + public String getName() { + return NAME; + } + + @Override + public RestChannelConsumer prepareRequest(RestRequest request, NodeClient client) { + if (request.hasContent()) { + return channel -> + emitErrorResponse( + channel, logger, new IllegalArgumentException("Request body was expected.")); + } + + ListAlgorithmsActionRequest actionRequest = new ListAlgorithmsActionRequest(); + return channel -> + client.execute( + INSTANCE, + actionRequest, + new ActionListener() { @Override public void onResponse(ListAlgorithmsActionResponse response) { - try { - XContentBuilder builder = channel.newBuilder(); - builder.startObject(); - response.toXContent(builder, request); - builder.endObject(); - channel.sendResponse( - new BytesRestResponse( - RestStatus.OK, - builder)); - } catch (Exception e) { - logger.debug("Failed to emit response.", e); - onFailure(e); - } + try { + XContentBuilder builder = channel.newBuilder(); + builder.startObject(); + response.toXContent(builder, request); + builder.endObject(); + channel.sendResponse(new BytesRestResponse(RestStatus.OK, builder)); + } catch (Exception e) { + logger.debug("Failed to emit response.", e); + onFailure(e); + } } @Override public void onFailure(Exception e) { - emitErrorResponse(channel, logger, e); + emitErrorResponse(channel, logger, e); } - }); - } + }); } + } } diff --git a/src/main/java/org/carrot2/elasticsearch/LoggerUtils.java b/src/main/java/org/carrot2/elasticsearch/LoggerUtils.java index 3ea8a8c..6b587a1 100644 --- a/src/main/java/org/carrot2/elasticsearch/LoggerUtils.java +++ b/src/main/java/org/carrot2/elasticsearch/LoggerUtils.java @@ -1,20 +1,17 @@ package org.carrot2.elasticsearch; +import java.io.IOException; import org.apache.logging.log4j.Logger; import org.elasticsearch.rest.BytesRestResponse; import org.elasticsearch.rest.RestChannel; -import java.io.IOException; - final class LoggerUtils { - static void emitErrorResponse(RestChannel channel, - Logger logger, - Exception e) { - try { - channel.sendResponse(new BytesRestResponse(channel, e)); - } catch (IOException e1) { - logger.error("Failed to send failure response.", e1); - } + static void emitErrorResponse(RestChannel channel, Logger logger, Exception e) { + try { + channel.sendResponse(new BytesRestResponse(channel, e)); + } catch (IOException e1) { + logger.error("Failed to send failure response.", e1); } + } } diff --git a/src/main/java/org/carrot2/elasticsearch/LogicalField.java b/src/main/java/org/carrot2/elasticsearch/LogicalField.java index 6a73b80..885e8df 100644 --- a/src/main/java/org/carrot2/elasticsearch/LogicalField.java +++ b/src/main/java/org/carrot2/elasticsearch/LogicalField.java @@ -1,42 +1,43 @@ package org.carrot2.elasticsearch; -import org.carrot2.elasticsearch.ClusteringAction.ClusteringActionRequest; - import java.util.HashMap; import java.util.Locale; +import org.carrot2.elasticsearch.ClusteringAction.ClusteringActionRequest; /** * Logical fields of a document to be clustered. - * + * * @see ClusteringActionRequest#addFieldMappingSpec(String, LogicalField) * @see ClusteringActionRequest#addFieldMapping(String, LogicalField) * @see ClusteringActionRequest#addHighlightedFieldMapping(String, LogicalField) * @see ClusteringActionRequest#addSourceFieldMapping(String, LogicalField) */ public enum LogicalField { - TITLE, - CONTENT, - LANGUAGE; + TITLE, + CONTENT, + LANGUAGE; - static final LogicalField [] cachedByOrdinal = values(); - static LogicalField fromOrdinal(int ordinal) { - return cachedByOrdinal[ordinal]; - } + static final LogicalField[] cachedByOrdinal = values(); - static final HashMap aliases; - static { - aliases = new HashMap<>(); - for (LogicalField v : LogicalField.values()) { - aliases.put(v.name(), v); - aliases.put(v.name().toLowerCase(Locale.ROOT), v); - } - } + static LogicalField fromOrdinal(int ordinal) { + return cachedByOrdinal[ordinal]; + } + + static final HashMap aliases; - /** - * Same as {@link LogicalField#valueOf(String)} but does not throw - * an exception on invalid values (returns null). - */ - static LogicalField valueOfCaseInsensitive(String enumValue) { - return aliases.get(enumValue); + static { + aliases = new HashMap<>(); + for (LogicalField v : LogicalField.values()) { + aliases.put(v.name(), v); + aliases.put(v.name().toLowerCase(Locale.ROOT), v); } + } + + /** + * Same as {@link LogicalField#valueOf(String)} but does not throw an exception on invalid values + * (returns null). + */ + static LogicalField valueOfCaseInsensitive(String enumValue) { + return aliases.get(enumValue); + } } diff --git a/src/main/java/org/carrot2/elasticsearch/OptionalQueryHintSetterVisitor.java b/src/main/java/org/carrot2/elasticsearch/OptionalQueryHintSetterVisitor.java index 6d7ddbc..0c56d7d 100644 --- a/src/main/java/org/carrot2/elasticsearch/OptionalQueryHintSetterVisitor.java +++ b/src/main/java/org/carrot2/elasticsearch/OptionalQueryHintSetterVisitor.java @@ -1,5 +1,6 @@ package org.carrot2.elasticsearch; +import java.util.Objects; import org.carrot2.attrs.AcceptingVisitor; import org.carrot2.attrs.AttrBoolean; import org.carrot2.attrs.AttrDouble; @@ -11,54 +12,38 @@ import org.carrot2.attrs.AttrStringArray; import org.carrot2.attrs.AttrVisitor; -import java.util.Objects; - final class OptionalQueryHintSetterVisitor implements AttrVisitor { - private final String queryHint; - - OptionalQueryHintSetterVisitor(String queryHint) { - this.queryHint = queryHint; - } - - @Override - public void visit(String key, AttrBoolean attr) { - - } - - @Override - public void visit(String key, AttrInteger attr) { - - } - - @Override - public void visit(String key, AttrDouble attr) { - - } + private final String queryHint; - @Override - public void visit(String key, AttrString attr) { - if (Objects.equals(key, "queryHint")) { - attr.set(queryHint); - } - } + OptionalQueryHintSetterVisitor(String queryHint) { + this.queryHint = queryHint; + } - @Override - public void visit(String key, AttrStringArray attr) { + @Override + public void visit(String key, AttrBoolean attr) {} - } + @Override + public void visit(String key, AttrInteger attr) {} - @Override - public > void visit(String key, AttrEnum attr) { + @Override + public void visit(String key, AttrDouble attr) {} - } + @Override + public void visit(String key, AttrString attr) { + if (Objects.equals(key, "queryHint")) { + attr.set(queryHint); + } + } - @Override - public void visit(String key, AttrObject attr) { + @Override + public void visit(String key, AttrStringArray attr) {} - } + @Override + public > void visit(String key, AttrEnum attr) {} - @Override - public void visit(String key, AttrObjectArray attr) { + @Override + public void visit(String key, AttrObject attr) {} - } + @Override + public void visit(String key, AttrObjectArray attr) {} } diff --git a/src/main/java/org/carrot2/elasticsearch/PathResourceLookup.java b/src/main/java/org/carrot2/elasticsearch/PathResourceLookup.java index 5762e29..67a4422 100644 --- a/src/main/java/org/carrot2/elasticsearch/PathResourceLookup.java +++ b/src/main/java/org/carrot2/elasticsearch/PathResourceLookup.java @@ -1,7 +1,5 @@ package org.carrot2.elasticsearch; -import org.carrot2.util.ResourceLookup; - import java.io.BufferedInputStream; import java.io.IOException; import java.io.InputStream; @@ -9,48 +7,54 @@ import java.nio.file.Path; import java.util.List; import java.util.stream.Collectors; +import org.carrot2.util.ResourceLookup; public class PathResourceLookup implements ResourceLookup { - private final List locations; - - public PathResourceLookup(List locations) { - if (locations == null || locations.isEmpty()) { - throw new RuntimeException("At least one resource location is required."); - } - this.locations = locations; - } - - @Override - public InputStream open(String resource) throws IOException { - Path p = locate(resource); - if (p == null) { - throw new IOException("Resource " + p + " not found relative to: " - + locations.stream().map(path -> path.toAbsolutePath().toString()) - .collect(Collectors.joining(", "))); - } - return new BufferedInputStream(Files.newInputStream(p)); - } - - @Override - public boolean exists(String resource) { - return locate(resource) != null; - } - - @Override - public String pathOf(String resource) { - return "[" + locations.stream() - .map(path -> path.resolve(resource).toAbsolutePath().toString()) - .collect(Collectors.joining(" | ")) - + "]"; - } - - private Path locate(String resource) { - for (Path base : locations) { - Path p = base.resolve(resource); - if (Files.exists(p)) { - return p; - } + private final List locations; + + public PathResourceLookup(List locations) { + if (locations == null || locations.isEmpty()) { + throw new RuntimeException("At least one resource location is required."); + } + this.locations = locations; + } + + @Override + public InputStream open(String resource) throws IOException { + Path p = locate(resource); + if (p == null) { + throw new IOException( + "Resource " + + p + + " not found relative to: " + + locations.stream() + .map(path -> path.toAbsolutePath().toString()) + .collect(Collectors.joining(", "))); + } + return new BufferedInputStream(Files.newInputStream(p)); + } + + @Override + public boolean exists(String resource) { + return locate(resource) != null; + } + + @Override + public String pathOf(String resource) { + return "[" + + locations.stream() + .map(path -> path.resolve(resource).toAbsolutePath().toString()) + .collect(Collectors.joining(" | ")) + + "]"; + } + + private Path locate(String resource) { + for (Path base : locations) { + Path p = base.resolve(resource); + if (Files.exists(p)) { + return p; } - return null; - } + } + return null; + } } diff --git a/src/main/java/org/carrot2/elasticsearch/Preconditions.java b/src/main/java/org/carrot2/elasticsearch/Preconditions.java index 1f23f27..c6daa3f 100644 --- a/src/main/java/org/carrot2/elasticsearch/Preconditions.java +++ b/src/main/java/org/carrot2/elasticsearch/Preconditions.java @@ -1,20 +1,20 @@ package org.carrot2.elasticsearch; final class Preconditions { - /** - * Mark unreachable code path. Expected use scenario: - *
-     * throw Preconditions.unreachable();
-     * 
- */ - public static RuntimeException unreachable() throws RuntimeException { - throw new RuntimeException("Unreachable code assertion hit."); - } + /** + * Mark unreachable code path. Expected use scenario: + * + *
+   * throw Preconditions.unreachable();
+   * 
+ */ + public static RuntimeException unreachable() throws RuntimeException { + throw new RuntimeException("Unreachable code assertion hit."); + } - public static T checkNotNull(T object) throws RuntimeException { - if (object != null) - return object; + public static T checkNotNull(T object) throws RuntimeException { + if (object != null) return object; - throw new IllegalArgumentException("Cannot be null"); - } + throw new IllegalArgumentException("Cannot be null"); + } } diff --git a/src/main/java/org/carrot2/elasticsearch/ToString.java b/src/main/java/org/carrot2/elasticsearch/ToString.java index c2c2a4b..d80d46a 100644 --- a/src/main/java/org/carrot2/elasticsearch/ToString.java +++ b/src/main/java/org/carrot2/elasticsearch/ToString.java @@ -1,33 +1,31 @@ package org.carrot2.elasticsearch; +import java.io.IOException; +import org.elasticsearch.common.Strings; import org.elasticsearch.common.xcontent.ToXContent; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentFactory; -import org.elasticsearch.common.Strings; -import java.io.IOException; -/** - * Reusable stuff related to {@link Object#toString()} implementations. - */ +/** Reusable stuff related to {@link Object#toString()} implementations. */ final class ToString { - public static String objectToJson(ToXContent xcontentObject) { - try { - XContentBuilder builder = XContentFactory.jsonBuilder().prettyPrint(); - builder.startObject(); - xcontentObject.toXContent(builder, ToXContent.EMPTY_PARAMS); - builder.endObject(); - return Strings.toString(builder); - } catch (IOException e) { - try { - XContentBuilder builder = XContentFactory.jsonBuilder().prettyPrint(); - builder.startObject(); - builder.field("error", e.getMessage()); - builder.field("class", e.getClass().getName()); - builder.endObject(); - return Strings.toString(builder); - } catch (IOException e2) { - return "{ \"error\": \"Could not serialize the underlying error.\"}"; - } - } + public static String objectToJson(ToXContent xcontentObject) { + try { + XContentBuilder builder = XContentFactory.jsonBuilder().prettyPrint(); + builder.startObject(); + xcontentObject.toXContent(builder, ToXContent.EMPTY_PARAMS); + builder.endObject(); + return Strings.toString(builder); + } catch (IOException e) { + try { + XContentBuilder builder = XContentFactory.jsonBuilder().prettyPrint(); + builder.startObject(); + builder.field("error", e.getMessage()); + builder.field("class", e.getClass().getName()); + builder.endObject(); + return Strings.toString(builder); + } catch (IOException e2) { + return "{ \"error\": \"Could not serialize the underlying error.\"}"; + } } + } } diff --git a/src/test/java/org/carrot2/elasticsearch/ClusteringActionIT.java b/src/test/java/org/carrot2/elasticsearch/ClusteringActionIT.java index e26d27c..7691ff5 100644 --- a/src/test/java/org/carrot2/elasticsearch/ClusteringActionIT.java +++ b/src/test/java/org/carrot2/elasticsearch/ClusteringActionIT.java @@ -3,6 +3,14 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectWriter; import com.fasterxml.jackson.databind.node.ObjectNode; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; import org.assertj.core.api.Assertions; import org.carrot2.attrs.Attrs; import org.carrot2.clustering.kmeans.BisectingKMeansClusteringAlgorithm; @@ -22,325 +30,339 @@ import org.elasticsearch.index.query.QueryBuilders; import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.HashMap; -import java.util.LinkedHashMap; -import java.util.List; -import java.util.Map; -import java.util.stream.Collectors; - -/** - * API tests for {@link ClusteringAction}. - */ +/** API tests for {@link ClusteringAction}. */ public class ClusteringActionIT extends SampleIndexTestCase { - public void testComplexQuery() throws IOException { - ClusteringActionResponse result = new ClusteringActionRequestBuilder(client) + public void testComplexQuery() throws IOException { + ClusteringActionResponse result = + new ClusteringActionRequestBuilder(client) .setQueryHint("data mining") .addSourceFieldMapping("title", LogicalField.TITLE) .addHighlightedFieldMapping("content", LogicalField.CONTENT) .setDefaultLanguage("English") .setSearchRequest( - client.prepareSearch() + client + .prepareSearch() .setIndices(INDEX_TEST) .setSize(100) .setQuery(QueryBuilders.termQuery("content", "data")) .highlighter(new HighlightBuilder().preTags("").postTags("")) .setFetchSource(new String[] {"title"}, null) .highlighter(new HighlightBuilder().field("content"))) - .execute().actionGet(); - - checkValid(result); - checkJsonSerialization(result); + .execute() + .actionGet(); + + checkValid(result); + checkJsonSerialization(result); + } + + public void testDefaultLanguage() throws IOException { + LinkedHashMap> labelsByLanguage = new LinkedHashMap<>(); + String[] languages = new LanguageComponentsLoader().load().languages().toArray(String[]::new); + for (String lang : languages) { + ClusteringActionResponse english = + new ClusteringActionRequestBuilder(client) + .setQueryHint("data mining") + .addSourceFieldMapping("title", LogicalField.TITLE) + .addHighlightedFieldMapping("content", LogicalField.CONTENT) + .setDefaultLanguage(lang) + .setSearchRequest( + client + .prepareSearch() + .setIndices(INDEX_TEST) + .setSize(100) + .setQuery(QueryBuilders.termQuery("content", "data")) + .setFetchSource(new String[] {"title"}, null)) + .execute() + .actionGet(); + + checkValid(english); + checkJsonSerialization(english); + + labelsByLanguage.put( + lang, + Arrays.stream(english.getDocumentGroups()) + .map(DocumentGroup::getLabel) + .collect(Collectors.toList())); } - public void testDefaultLanguage() throws IOException { - LinkedHashMap> labelsByLanguage = new LinkedHashMap<>(); - String[] languages = new LanguageComponentsLoader().load().languages().toArray(String[]::new); - for (String lang : languages) { - ClusteringActionResponse english = new ClusteringActionRequestBuilder(client) - .setQueryHint("data mining") - .addSourceFieldMapping("title", LogicalField.TITLE) - .addHighlightedFieldMapping("content", LogicalField.CONTENT) - .setDefaultLanguage(lang) - .setSearchRequest( - client.prepareSearch() - .setIndices(INDEX_TEST) - .setSize(100) - .setQuery(QueryBuilders.termQuery("content", "data")) - .setFetchSource(new String[]{"title"}, null)) - .execute().actionGet(); - - checkValid(english); - checkJsonSerialization(english); - - labelsByLanguage.put(lang, - Arrays.stream(english.getDocumentGroups()).map(DocumentGroup::getLabel) - .collect(Collectors.toList())); - } - - List english = labelsByLanguage.get("English"); - List italian = labelsByLanguage.get("Italian"); - List shared = new ArrayList<>(english); - shared.retainAll(italian); - Assertions.assertThat(shared) - .hasSizeLessThanOrEqualTo((int) (english.size() * 0.75)); - } + List english = labelsByLanguage.get("English"); + List italian = labelsByLanguage.get("Italian"); + List shared = new ArrayList<>(english); + shared.retainAll(italian); + Assertions.assertThat(shared).hasSizeLessThanOrEqualTo((int) (english.size() * 0.75)); + } - public void testAttributes() throws IOException { - LingoClusteringAlgorithm algorithm = new LingoClusteringAlgorithm(); - algorithm.desiredClusterCount.set(5); + public void testAttributes() throws IOException { + LingoClusteringAlgorithm algorithm = new LingoClusteringAlgorithm(); + algorithm.desiredClusterCount.set(5); - Map extract = Attrs.extract(algorithm); - Attrs.populate(algorithm, extract); + Map extract = Attrs.extract(algorithm); + Attrs.populate(algorithm, extract); - ClusteringActionResponse result = new ClusteringActionRequestBuilder(client) + ClusteringActionResponse result = + new ClusteringActionRequestBuilder(client) .setQueryHint("data mining") .addSourceFieldMapping("title", LogicalField.TITLE) .addSourceFieldMapping("content", LogicalField.CONTENT) .addAttributes(Attrs.extract(algorithm)) .setSearchRequest( - client.prepareSearch() + client + .prepareSearch() .setIndices(INDEX_TEST) .setSize(100) .setQuery(QueryBuilders.matchAllQuery()) .setFetchSource(new String[] {"title", "content"}, null)) - .execute().actionGet(); + .execute() + .actionGet(); - checkValid(result); - checkJsonSerialization(result); + checkValid(result); + checkJsonSerialization(result); - Assertions.assertThat(result.getDocumentGroups().length) - .isBetween(0, 5 + 1); - } + Assertions.assertThat(result.getDocumentGroups().length).isBetween(0, 5 + 1); + } - public void testLanguageField() throws IOException { - Map attrs = new HashMap<>(); + public void testLanguageField() throws IOException { + Map attrs = new HashMap<>(); - ClusteringActionResponse result = new ClusteringActionRequestBuilder(client) + ClusteringActionResponse result = + new ClusteringActionRequestBuilder(client) .setQueryHint("data mining") .addSourceFieldMapping("title", LogicalField.TITLE) .addSourceFieldMapping("content", LogicalField.CONTENT) .addSourceFieldMapping("rndlang", LogicalField.LANGUAGE) .addAttributes(attrs) .setSearchRequest( - client.prepareSearch() + client + .prepareSearch() .setIndices(INDEX_TEST) .setSize(100) .setQuery(QueryBuilders.termQuery("content", "data")) .setFetchSource(new String[] {"title", "content", "rndlang"}, null)) .get(); - checkValid(result); - checkJsonSerialization(result); - - // We should receive groups for multiple languages - String [] languages = - result.getInfo().get(ClusteringActionResponse.Fields.Info.LANGUAGES).split(","); - - Assertions.assertThat(languages) - .describedAs("Expected a lot of languages to appear in top groups: " + Arrays.toString(languages)) - .hasSizeGreaterThan(5); - - DocumentGroup [] groups = result.getDocumentGroups(); - List groupLabels = Arrays.stream(groups) - .map(grp -> grp.getLabel() + " (" + grp.getDocumentReferences().length + ")").collect(Collectors.toList()); - Assertions.assertThat(groupLabels) - .hasSizeGreaterThan(5); - } - - public void testListAlgorithms() { - ListAlgorithmsActionResponse response = - new ListAlgorithmsActionRequestBuilder(client).get(); - - List algorithms = response.getAlgorithms(); - Assertions.assertThat(algorithms) - .isNotEmpty() - .contains( - LingoClusteringAlgorithm.NAME, - STCClusteringAlgorithm.NAME, - BisectingKMeansClusteringAlgorithm.NAME); - } - - public void testNonexistentFields() throws IOException { - ClusteringActionResponse result = new ClusteringActionRequestBuilder(client) + checkValid(result); + checkJsonSerialization(result); + + // We should receive groups for multiple languages + String[] languages = + result.getInfo().get(ClusteringActionResponse.Fields.Info.LANGUAGES).split(","); + + Assertions.assertThat(languages) + .describedAs( + "Expected a lot of languages to appear in top groups: " + Arrays.toString(languages)) + .hasSizeGreaterThan(5); + + DocumentGroup[] groups = result.getDocumentGroups(); + List groupLabels = + Arrays.stream(groups) + .map(grp -> grp.getLabel() + " (" + grp.getDocumentReferences().length + ")") + .collect(Collectors.toList()); + Assertions.assertThat(groupLabels).hasSizeGreaterThan(5); + } + + public void testListAlgorithms() { + ListAlgorithmsActionResponse response = new ListAlgorithmsActionRequestBuilder(client).get(); + + List algorithms = response.getAlgorithms(); + Assertions.assertThat(algorithms) + .isNotEmpty() + .contains( + LingoClusteringAlgorithm.NAME, + STCClusteringAlgorithm.NAME, + BisectingKMeansClusteringAlgorithm.NAME); + } + + public void testNonexistentFields() throws IOException { + ClusteringActionResponse result = + new ClusteringActionRequestBuilder(client) .setQueryHint("data mining") .addSourceFieldMapping("_nonexistent_", LogicalField.TITLE) .addSourceFieldMapping("_nonexistent_", LogicalField.CONTENT) .setCreateUngroupedDocumentsCluster(true) .setSearchRequest( - client.prepareSearch() + client + .prepareSearch() .setIndices(INDEX_TEST) .setSize(100) .setQuery(QueryBuilders.termQuery("content", "data")) .setFetchSource(new String[] {"title", "content"}, null)) - .execute().actionGet(); - - // There should be no clusters, but no errors. - checkValid(result); - checkJsonSerialization(result); - - // Top level groups should be input documents' languages (aggregation strategy above). - DocumentGroup[] documentGroups = result.getDocumentGroups(); - for (DocumentGroup group : documentGroups) { - if (!group.isUngroupedDocuments()) { - fail("Expected no clusters for non-existent fields."); - } - } + .execute() + .actionGet(); + + // There should be no clusters, but no errors. + checkValid(result); + checkJsonSerialization(result); + + // Top level groups should be input documents' languages (aggregation strategy above). + DocumentGroup[] documentGroups = result.getDocumentGroups(); + for (DocumentGroup group : documentGroups) { + if (!group.isUngroupedDocuments()) { + fail("Expected no clusters for non-existent fields."); + } } - - public void testNonexistentAlgorithmId() { - // The query should result in an error. - try { - new ClusteringActionRequestBuilder(client) - .setQueryHint("") - .addSourceFieldMapping("_nonexistent_", LogicalField.TITLE) - .setAlgorithm("_nonexistent_") - .setSearchRequest( - client.prepareSearch() - .setIndices(INDEX_TEST) - .setSize(100) - .setQuery(QueryBuilders.termQuery("content", "data")) - .setFetchSource(new String[] {"title", "content"}, null)) - .execute().actionGet(); - throw Preconditions.unreachable(); - } catch (IllegalArgumentException e) { - Assertions.assertThat(e) - .hasMessageContaining("No such algorithm:"); - } + } + + public void testNonexistentAlgorithmId() { + // The query should result in an error. + try { + new ClusteringActionRequestBuilder(client) + .setQueryHint("") + .addSourceFieldMapping("_nonexistent_", LogicalField.TITLE) + .setAlgorithm("_nonexistent_") + .setSearchRequest( + client + .prepareSearch() + .setIndices(INDEX_TEST) + .setSize(100) + .setQuery(QueryBuilders.termQuery("content", "data")) + .setFetchSource(new String[] {"title", "content"}, null)) + .execute() + .actionGet(); + throw Preconditions.unreachable(); + } catch (IllegalArgumentException e) { + Assertions.assertThat(e).hasMessageContaining("No such algorithm:"); } - - public void testPropagatingAlgorithmException() { - // The query should result in an error. - try { - // Out of allowed range (should cause an exception). - Map attrs = new HashMap<>(); - attrs.put("ignoreWordIfInHigherDocsPercent", Double.MAX_VALUE); - - new ClusteringActionRequestBuilder(client) - .setQueryHint("") - .addSourceFieldMapping("title", LogicalField.TITLE) - .addSourceFieldMapping("content", LogicalField.CONTENT) - .setAlgorithm(STCClusteringAlgorithm.NAME) - .addAttributes(attrs) - .setSearchRequest( - client.prepareSearch() - .setIndices(INDEX_TEST) - .setSize(100) - .setQuery(QueryBuilders.termQuery("content", "data")) - .setFetchSource(new String[] {"title", "content"}, null)) - .execute().actionGet(); - throw Preconditions.unreachable(); - } catch (ElasticsearchException e) { - Assertions.assertThat(e) - .hasMessageContaining("Clustering error:"); - } + } + + public void testPropagatingAlgorithmException() { + // The query should result in an error. + try { + // Out of allowed range (should cause an exception). + Map attrs = new HashMap<>(); + attrs.put("ignoreWordIfInHigherDocsPercent", Double.MAX_VALUE); + + new ClusteringActionRequestBuilder(client) + .setQueryHint("") + .addSourceFieldMapping("title", LogicalField.TITLE) + .addSourceFieldMapping("content", LogicalField.CONTENT) + .setAlgorithm(STCClusteringAlgorithm.NAME) + .addAttributes(attrs) + .setSearchRequest( + client + .prepareSearch() + .setIndices(INDEX_TEST) + .setSize(100) + .setQuery(QueryBuilders.termQuery("content", "data")) + .setFetchSource(new String[] {"title", "content"}, null)) + .execute() + .actionGet(); + throw Preconditions.unreachable(); + } catch (ElasticsearchException e) { + Assertions.assertThat(e).hasMessageContaining("Clustering error:"); } - - public void testIncludeHits() throws IOException { - // same search with and without hits - SearchRequestBuilder req = client.prepareSearch() - .setIndices(INDEX_TEST) - .setSize(2) - .setQuery(QueryBuilders.termQuery("content", "data")) - .setFetchSource(new String[] {"content"}, null); - - // with hits (default) - ClusteringActionResponse resultWithHits = new ClusteringActionRequestBuilder(client) + } + + public void testIncludeHits() throws IOException { + // same search with and without hits + SearchRequestBuilder req = + client + .prepareSearch() + .setIndices(INDEX_TEST) + .setSize(2) + .setQuery(QueryBuilders.termQuery("content", "data")) + .setFetchSource(new String[] {"content"}, null); + + // with hits (default) + ClusteringActionResponse resultWithHits = + new ClusteringActionRequestBuilder(client) .setQueryHint("data mining") .setAlgorithm(STCClusteringAlgorithm.NAME) .addSourceFieldMapping("title", LogicalField.TITLE) .setCreateUngroupedDocumentsCluster(true) .setSearchRequest(req) - .execute().actionGet(); - checkValid(resultWithHits); - checkJsonSerialization(resultWithHits); - // get JSON output - XContentBuilder builder = XContentFactory.jsonBuilder().prettyPrint(); - builder.startObject(); - resultWithHits.toXContent(builder, ToXContent.EMPTY_PARAMS); - builder.endObject(); - - ObjectNode jsonWithHits = (ObjectNode) new ObjectMapper().readTree(Strings.toString(builder)); - Assertions.assertThat(jsonWithHits.has("hits")).isTrue(); - - // without hits - ClusteringActionResponse resultWithoutHits = new ClusteringActionRequestBuilder(client) + .execute() + .actionGet(); + checkValid(resultWithHits); + checkJsonSerialization(resultWithHits); + // get JSON output + XContentBuilder builder = XContentFactory.jsonBuilder().prettyPrint(); + builder.startObject(); + resultWithHits.toXContent(builder, ToXContent.EMPTY_PARAMS); + builder.endObject(); + + ObjectNode jsonWithHits = (ObjectNode) new ObjectMapper().readTree(Strings.toString(builder)); + Assertions.assertThat(jsonWithHits.has("hits")).isTrue(); + + // without hits + ClusteringActionResponse resultWithoutHits = + new ClusteringActionRequestBuilder(client) .setQueryHint("data mining") .setMaxHits(0) .setAlgorithm(STCClusteringAlgorithm.NAME) .addSourceFieldMapping("title", LogicalField.TITLE) .setCreateUngroupedDocumentsCluster(true) .setSearchRequest(req) - .execute().actionGet(); - checkValid(resultWithoutHits); - checkJsonSerialization(resultWithoutHits); - - // get JSON output - builder = XContentFactory.jsonBuilder().prettyPrint(); - builder.startObject(); - resultWithoutHits.toXContent(builder, ToXContent.EMPTY_PARAMS); - builder.endObject(); - ObjectNode jsonWithoutHits = (ObjectNode) new ObjectMapper().readTree(Strings.toString(builder)); - - ObjectWriter ow = new ObjectMapper().writerWithDefaultPrettyPrinter(); - Assertions.assertThat(jsonWithoutHits.get("hits").get("hits").size()).isEqualTo(0); - - // insert hits into jsonWithoutHits - jsonWithoutHits.set("hits", jsonWithHits.get("hits")); - - // 'took' can vary, so ignore it - jsonWithoutHits.remove("took"); - jsonWithHits.remove("took"); - - // info can vary (clustering-millis, output_hits), so ignore it - jsonWithoutHits.remove("info"); - jsonWithHits.remove("info"); - - // profile can vary - jsonWithoutHits.remove("profile"); - jsonWithHits.remove("profile"); - - // now they should match - String json1 = ow.writeValueAsString(jsonWithHits); - logger.debug("--> with:\n" + json1); - String json2 = ow.writeValueAsString(jsonWithoutHits); - logger.debug("--> without:\n" + json2); - Assertions.assertThat(json1).isEqualTo(json2); - } - - public void testMaxHits() throws IOException { - // same search with and without hits - SearchRequestBuilder req = client.prepareSearch() - .setIndices(INDEX_TEST) - .setSize(2) - .setQuery(QueryBuilders.termQuery("content", "data")) - .setFetchSource(new String[] {"content"}, null); - - // Limit the set of hits to just top 2. - ClusteringActionResponse limitedHits = new ClusteringActionRequestBuilder(client) + .execute() + .actionGet(); + checkValid(resultWithoutHits); + checkJsonSerialization(resultWithoutHits); + + // get JSON output + builder = XContentFactory.jsonBuilder().prettyPrint(); + builder.startObject(); + resultWithoutHits.toXContent(builder, ToXContent.EMPTY_PARAMS); + builder.endObject(); + ObjectNode jsonWithoutHits = + (ObjectNode) new ObjectMapper().readTree(Strings.toString(builder)); + + ObjectWriter ow = new ObjectMapper().writerWithDefaultPrettyPrinter(); + Assertions.assertThat(jsonWithoutHits.get("hits").get("hits").size()).isEqualTo(0); + + // insert hits into jsonWithoutHits + jsonWithoutHits.set("hits", jsonWithHits.get("hits")); + + // 'took' can vary, so ignore it + jsonWithoutHits.remove("took"); + jsonWithHits.remove("took"); + + // info can vary (clustering-millis, output_hits), so ignore it + jsonWithoutHits.remove("info"); + jsonWithHits.remove("info"); + + // profile can vary + jsonWithoutHits.remove("profile"); + jsonWithHits.remove("profile"); + + // now they should match + String json1 = ow.writeValueAsString(jsonWithHits); + logger.debug("--> with:\n" + json1); + String json2 = ow.writeValueAsString(jsonWithoutHits); + logger.debug("--> without:\n" + json2); + Assertions.assertThat(json1).isEqualTo(json2); + } + + public void testMaxHits() throws IOException { + // same search with and without hits + SearchRequestBuilder req = + client + .prepareSearch() + .setIndices(INDEX_TEST) + .setSize(2) + .setQuery(QueryBuilders.termQuery("content", "data")) + .setFetchSource(new String[] {"content"}, null); + + // Limit the set of hits to just top 2. + ClusteringActionResponse limitedHits = + new ClusteringActionRequestBuilder(client) .setQueryHint("data mining") .setMaxHits(2) .setAlgorithm(STCClusteringAlgorithm.NAME) .addSourceFieldMapping("title", LogicalField.TITLE) .setCreateUngroupedDocumentsCluster(true) .setSearchRequest(req) - .execute().actionGet(); - checkValid(limitedHits); - checkJsonSerialization(limitedHits); - - Assertions.assertThat(limitedHits.getSearchResponse().getHits().getHits()) - .hasSize(2); - - // get JSON output - XContentBuilder builder = XContentFactory.jsonBuilder().prettyPrint(); - builder.startObject(); - limitedHits.toXContent(builder, ToXContent.EMPTY_PARAMS); - builder.endObject(); - ObjectNode json = (ObjectNode) new ObjectMapper().readTree(Strings.toString(builder)); - Assertions.assertThat(json - .get("hits") - .get("hits").size()).isEqualTo(2); - } + .execute() + .actionGet(); + checkValid(limitedHits); + checkJsonSerialization(limitedHits); + + Assertions.assertThat(limitedHits.getSearchResponse().getHits().getHits()).hasSize(2); + + // get JSON output + XContentBuilder builder = XContentFactory.jsonBuilder().prettyPrint(); + builder.startObject(); + limitedHits.toXContent(builder, ToXContent.EMPTY_PARAMS); + builder.endObject(); + ObjectNode json = (ObjectNode) new ObjectMapper().readTree(Strings.toString(builder)); + Assertions.assertThat(json.get("hits").get("hits").size()).isEqualTo(2); + } } diff --git a/src/test/java/org/carrot2/elasticsearch/ClusteringActionRestIT.java b/src/test/java/org/carrot2/elasticsearch/ClusteringActionRestIT.java index ae598da..428e23f 100644 --- a/src/test/java/org/carrot2/elasticsearch/ClusteringActionRestIT.java +++ b/src/test/java/org/carrot2/elasticsearch/ClusteringActionRestIT.java @@ -1,5 +1,7 @@ package org.carrot2.elasticsearch; +import java.util.List; +import java.util.Map; import org.apache.http.HttpResponse; import org.apache.http.HttpStatus; import org.apache.http.client.methods.HttpGet; @@ -13,209 +15,213 @@ import org.carrot2.elasticsearch.ClusteringAction.RestClusteringAction; import org.elasticsearch.common.xcontent.XContentType; -import java.util.List; -import java.util.Map; - -/** - * REST API tests for {@link ClusteringAction}. - */ +/** REST API tests for {@link ClusteringAction}. */ public class ClusteringActionRestIT extends SampleIndexTestCase { - private XContentType xtype = randomFrom(XContentType.values()); - private ContentType contentType = ContentType.parse(xtype.mediaType()); - - public void testPostMultipleFieldMapping() throws Exception { - post("post_multiple_field_mapping.json"); - } - - public void testPostWithHighlightedFields() throws Exception { - post("post_with_highlighted_fields.json"); - } - - public void testPostWithFields() throws Exception { - post("post_with_fields.json"); - } - - public void testPostWithSourceFields() throws Exception { - post("post_with_source_fields.json"); - } - - @SuppressWarnings("unchecked") - @Lingo3G - public void testPostWithClusters() throws Exception { - Map response = post("post_with_clusters.json"); - - List> clusterList = (List>) response.get("clusters"); - int indent = 0; - dumpClusters(clusterList, indent); - } + private XContentType xtype = randomFrom(XContentType.values()); + private ContentType contentType = ContentType.parse(xtype.mediaType()); + + public void testPostMultipleFieldMapping() throws Exception { + post("post_multiple_field_mapping.json"); + } + + public void testPostWithHighlightedFields() throws Exception { + post("post_with_highlighted_fields.json"); + } + + public void testPostWithFields() throws Exception { + post("post_with_fields.json"); + } + + public void testPostWithSourceFields() throws Exception { + post("post_with_source_fields.json"); + } + + @SuppressWarnings("unchecked") + @Lingo3G + public void testPostWithClusters() throws Exception { + Map response = post("post_with_clusters.json"); + + List> clusterList = (List>) response.get("clusters"); + int indent = 0; + dumpClusters(clusterList, indent); + } + + @SuppressWarnings("unchecked") + private void dumpClusters(List> clusterList, int indent) { + for (Map cluster : clusterList) { + float score = ((Number) cluster.get("score")).floatValue(); + String label = (String) cluster.get("label"); + List documents = (List) cluster.get("documents"); + + StringBuilder stringBuilder = new StringBuilder(); + for (int i = 0; i < indent; i++) { + stringBuilder.append(" "); + } - @SuppressWarnings("unchecked") - private void dumpClusters(List> clusterList, int indent) { - for (Map cluster : clusterList) { - float score = ((Number) cluster.get("score")).floatValue(); - String label = (String) cluster.get("label"); - List documents = (List) cluster.get("documents"); - - StringBuilder stringBuilder = new StringBuilder(); - for (int i = 0; i < indent; i++) { - stringBuilder.append(" "); - } - - List> subclusters = (List>) cluster.get("clusters"); - - logger.debug(stringBuilder + "> " + label + " (score=" + score - + ", documents=" + (documents == null ? 0 : documents.size()) - + ", subclusters=" + (subclusters == null ? 0 : subclusters.size())); - - if (subclusters != null) { - dumpClusters(subclusters, indent + 1); - } + List> subclusters = (List>) cluster.get("clusters"); + + logger.debug( + stringBuilder + + "> " + + label + + " (score=" + + score + + ", documents=" + + (documents == null ? 0 : documents.size()) + + ", subclusters=" + + (subclusters == null ? 0 : subclusters.size())); + + if (subclusters != null) { + dumpClusters(subclusters, indent + 1); } } + } - private Map post(String queryJsonResource) throws Exception { - try (CloseableHttpClient httpClient = HttpClientBuilder.create().build()) { - HttpPost post = new HttpPost(restBaseUrl + "/" + RestClusteringAction.NAME + "?pretty=true"); + private Map post(String queryJsonResource) throws Exception { + try (CloseableHttpClient httpClient = HttpClientBuilder.create().build()) { + HttpPost post = new HttpPost(restBaseUrl + "/" + RestClusteringAction.NAME + "?pretty=true"); - post.setEntity(new ByteArrayEntity(jsonResourceAs(queryJsonResource, xtype), contentType)); - HttpResponse response = httpClient.execute(post); + post.setEntity(new ByteArrayEntity(jsonResourceAs(queryJsonResource, xtype), contentType)); + HttpResponse response = httpClient.execute(post); - Map map = checkHttpResponseContainsClusters(response); + Map map = checkHttpResponseContainsClusters(response); - List clusterList = (List) map.get("clusters"); - Assertions.assertThat(clusterList) - .isNotNull() - .isNotEmpty(); + List clusterList = (List) map.get("clusters"); + Assertions.assertThat(clusterList).isNotNull().isNotEmpty(); - Assertions.assertThat(clusterList.size()) - .isGreaterThan(5); + Assertions.assertThat(clusterList.size()).isGreaterThan(5); - return map; - } + return map; } - - public void testGetClusteringRequest() throws Exception { - try (CloseableHttpClient httpClient = HttpClientBuilder.create().build()) { - HttpGet get = new HttpGet(restBaseUrl + "/" + RestClusteringAction.NAME - + "?pretty=true" - // search-specific attrs - + "&q=data+mining" - + "&_source=url,title,content" - + "&size=100" - // clustering-specific attrs - + "&query_hint=data+mining" - + "&" + ClusteringAction.ClusteringActionRequest.JSON_CREATE_UNGROUPED_CLUSTER + "=true" - + "&field_mapping_content=_source.title,_source.content" - + "&algorithm=" + STCClusteringAlgorithm.NAME); - HttpResponse response = httpClient.execute(get); - - Map map = checkHttpResponseContainsClusters(response); - - List clusterList = (List) map.get("clusters"); - Assertions.assertThat(clusterList) - .isNotNull() - .isNotEmpty(); - - Assertions.assertThat(clusterList.size()) - .isGreaterThan(5); - } + } + + public void testGetClusteringRequest() throws Exception { + try (CloseableHttpClient httpClient = HttpClientBuilder.create().build()) { + HttpGet get = + new HttpGet( + restBaseUrl + + "/" + + RestClusteringAction.NAME + + "?pretty=true" + // search-specific attrs + + "&q=data+mining" + + "&_source=url,title,content" + + "&size=100" + // clustering-specific attrs + + "&query_hint=data+mining" + + "&" + + ClusteringAction.ClusteringActionRequest.JSON_CREATE_UNGROUPED_CLUSTER + + "=true" + + "&field_mapping_content=_source.title,_source.content" + + "&algorithm=" + + STCClusteringAlgorithm.NAME); + HttpResponse response = httpClient.execute(get); + + Map map = checkHttpResponseContainsClusters(response); + + List clusterList = (List) map.get("clusters"); + Assertions.assertThat(clusterList).isNotNull().isNotEmpty(); + + Assertions.assertThat(clusterList.size()).isGreaterThan(5); } - - public void testRestApiPathParams() throws Exception { - try (CloseableHttpClient httpClient = HttpClientBuilder.create().build()) { - HttpPost post = new HttpPost(restBaseUrl - + "/" + INDEX_EMPTY - + "/empty/" - + RestClusteringAction.NAME + "?pretty=true"); - post.setEntity(new ByteArrayEntity(jsonResourceAs("post_with_fields.json", xtype), contentType)); - HttpResponse response = httpClient.execute(post); - Map map = checkHttpResponseContainsClusters(response); - - List clusterList = (List) map.get("clusters"); - Assertions.assertThat(clusterList) - .isNotNull() - .isEmpty(); - } - } - - public void testRestApiRuntimeAttributes() throws Exception { - try (CloseableHttpClient httpClient = HttpClientBuilder.create().build()) { - HttpPost post = new HttpPost(restBaseUrl + "/" + RestClusteringAction.NAME + "?pretty=true"); - post.setEntity(new ByteArrayEntity(jsonResourceAs("post_runtime_attributes.json", xtype), contentType)); - HttpResponse response = httpClient.execute(post); - Map map = checkHttpResponseContainsClusters(response); - - List clusterList = (List) map.get("clusters"); - Assertions.assertThat(clusterList) - .isNotNull(); - Assertions.assertThat(clusterList.size()) - .isBetween(1, /* max. cluster size cap */ 5 + /* other topics */ 1); - } + } + + public void testRestApiPathParams() throws Exception { + try (CloseableHttpClient httpClient = HttpClientBuilder.create().build()) { + HttpPost post = + new HttpPost( + restBaseUrl + + "/" + + INDEX_EMPTY + + "/empty/" + + RestClusteringAction.NAME + + "?pretty=true"); + post.setEntity( + new ByteArrayEntity(jsonResourceAs("post_with_fields.json", xtype), contentType)); + HttpResponse response = httpClient.execute(post); + Map map = checkHttpResponseContainsClusters(response); + + List clusterList = (List) map.get("clusters"); + Assertions.assertThat(clusterList).isNotNull().isEmpty(); } - - public void testLanguageField() throws Exception { - try (CloseableHttpClient httpClient = HttpClientBuilder.create().build()) { - HttpPost post = new HttpPost(restBaseUrl + "/" + RestClusteringAction.NAME + "?pretty=true"); - post.setEntity(new ByteArrayEntity(jsonResourceAs("post_language_field.json", xtype), contentType)); - HttpResponse response = httpClient.execute(post); - Map map = checkHttpResponseContainsClusters(response); - - List clusterList = (List) map.get("clusters"); - Assertions.assertThat(clusterList.size()) - .isGreaterThan(1); - - Map info = (Map) map.get("info"); - Assertions.assertThat(((String) info.get("languages")).split(",")) - .hasSizeGreaterThan(3); - } + } + + public void testRestApiRuntimeAttributes() throws Exception { + try (CloseableHttpClient httpClient = HttpClientBuilder.create().build()) { + HttpPost post = new HttpPost(restBaseUrl + "/" + RestClusteringAction.NAME + "?pretty=true"); + post.setEntity( + new ByteArrayEntity(jsonResourceAs("post_runtime_attributes.json", xtype), contentType)); + HttpResponse response = httpClient.execute(post); + Map map = checkHttpResponseContainsClusters(response); + + List clusterList = (List) map.get("clusters"); + Assertions.assertThat(clusterList).isNotNull(); + Assertions.assertThat(clusterList.size()) + .isBetween(1, /* max. cluster size cap */ 5 + /* other topics */ 1); } + } - public void testNonexistentFields() throws Exception { - try (CloseableHttpClient httpClient = HttpClientBuilder.create().build()) { - HttpPost post = new HttpPost(restBaseUrl + "/" + RestClusteringAction.NAME + "?pretty=true"); - post.setEntity(new ByteArrayEntity(jsonResourceAs("post_nonexistent_fields.json", xtype), contentType)); - HttpResponse response = httpClient.execute(post); - Map map = checkHttpResponseContainsClusters(response); + public void testLanguageField() throws Exception { + try (CloseableHttpClient httpClient = HttpClientBuilder.create().build()) { + HttpPost post = new HttpPost(restBaseUrl + "/" + RestClusteringAction.NAME + "?pretty=true"); + post.setEntity( + new ByteArrayEntity(jsonResourceAs("post_language_field.json", xtype), contentType)); + HttpResponse response = httpClient.execute(post); + Map map = checkHttpResponseContainsClusters(response); - List clusterList = (List) map.get("clusters"); - Assertions.assertThat(clusterList).isNotNull(); - } - } + List clusterList = (List) map.get("clusters"); + Assertions.assertThat(clusterList.size()).isGreaterThan(1); - public void testNonexistentAlgorithmId() throws Exception { - try (CloseableHttpClient httpClient = HttpClientBuilder.create().build()) { - HttpPost post = new HttpPost(restBaseUrl + "/" + RestClusteringAction.NAME + "?pretty=true"); - post.setEntity(new ByteArrayEntity(jsonResourceAs("post_nonexistent_algorithmId.json", xtype), contentType)); - HttpResponse response = httpClient.execute(post); - expectErrorResponseWithMessage( - response, - HttpStatus.SC_BAD_REQUEST, - "No such algorithm: _nonexistent_"); - } - } - - public void testInvalidSearchQuery() throws Exception { - try (CloseableHttpClient httpClient = HttpClientBuilder.create().build()) { - HttpPost post = new HttpPost(restBaseUrl + "/" + RestClusteringAction.NAME + "?pretty=true"); - post.setEntity(new ByteArrayEntity(jsonResourceAs("post_invalid_query.json", xtype), contentType)); - HttpResponse response = httpClient.execute(post); - expectErrorResponseWithMessage( - response, - HttpStatus.SC_BAD_REQUEST, - "parsing_exception"); - } - } - - public void testPropagatingAlgorithmException() throws Exception { - try (CloseableHttpClient httpClient = HttpClientBuilder.create().build()) { - HttpPost post = new HttpPost(restBaseUrl + "/" + RestClusteringAction.NAME + "?pretty=true"); - post.setEntity(new ByteArrayEntity(jsonResourceAs("post_invalid_attribute_value.json", xtype), contentType)); - HttpResponse response = httpClient.execute(post); - expectErrorResponseWithMessage( - response, - HttpStatus.SC_INTERNAL_SERVER_ERROR, - "Clustering error: Value must be <= 1.0"); - } - } + Map info = (Map) map.get("info"); + Assertions.assertThat(((String) info.get("languages")).split(",")).hasSizeGreaterThan(3); + } + } + + public void testNonexistentFields() throws Exception { + try (CloseableHttpClient httpClient = HttpClientBuilder.create().build()) { + HttpPost post = new HttpPost(restBaseUrl + "/" + RestClusteringAction.NAME + "?pretty=true"); + post.setEntity( + new ByteArrayEntity(jsonResourceAs("post_nonexistent_fields.json", xtype), contentType)); + HttpResponse response = httpClient.execute(post); + Map map = checkHttpResponseContainsClusters(response); + + List clusterList = (List) map.get("clusters"); + Assertions.assertThat(clusterList).isNotNull(); + } + } + + public void testNonexistentAlgorithmId() throws Exception { + try (CloseableHttpClient httpClient = HttpClientBuilder.create().build()) { + HttpPost post = new HttpPost(restBaseUrl + "/" + RestClusteringAction.NAME + "?pretty=true"); + post.setEntity( + new ByteArrayEntity( + jsonResourceAs("post_nonexistent_algorithmId.json", xtype), contentType)); + HttpResponse response = httpClient.execute(post); + expectErrorResponseWithMessage( + response, HttpStatus.SC_BAD_REQUEST, "No such algorithm: _nonexistent_"); + } + } + + public void testInvalidSearchQuery() throws Exception { + try (CloseableHttpClient httpClient = HttpClientBuilder.create().build()) { + HttpPost post = new HttpPost(restBaseUrl + "/" + RestClusteringAction.NAME + "?pretty=true"); + post.setEntity( + new ByteArrayEntity(jsonResourceAs("post_invalid_query.json", xtype), contentType)); + HttpResponse response = httpClient.execute(post); + expectErrorResponseWithMessage(response, HttpStatus.SC_BAD_REQUEST, "parsing_exception"); + } + } + + public void testPropagatingAlgorithmException() throws Exception { + try (CloseableHttpClient httpClient = HttpClientBuilder.create().build()) { + HttpPost post = new HttpPost(restBaseUrl + "/" + RestClusteringAction.NAME + "?pretty=true"); + post.setEntity( + new ByteArrayEntity( + jsonResourceAs("post_invalid_attribute_value.json", xtype), contentType)); + HttpResponse response = httpClient.execute(post); + expectErrorResponseWithMessage( + response, HttpStatus.SC_INTERNAL_SERVER_ERROR, "Clustering error: Value must be <= 1.0"); + } + } } diff --git a/src/test/java/org/carrot2/elasticsearch/Lingo3G.java b/src/test/java/org/carrot2/elasticsearch/Lingo3G.java index ab011d3..ecc8267 100644 --- a/src/test/java/org/carrot2/elasticsearch/Lingo3G.java +++ b/src/test/java/org/carrot2/elasticsearch/Lingo3G.java @@ -1,7 +1,6 @@ package org.carrot2.elasticsearch; import com.carrotsearch.randomizedtesting.annotations.TestGroup; - import java.lang.annotation.Documented; import java.lang.annotation.ElementType; import java.lang.annotation.Inherited; @@ -14,5 +13,4 @@ @Target({ElementType.METHOD, ElementType.TYPE}) @Inherited @TestGroup(enabled = false, sysProperty = "tests.lingo3g") -public @interface Lingo3G { -} +public @interface Lingo3G {} diff --git a/src/test/java/org/carrot2/elasticsearch/ListAlgorithmsActionIT.java b/src/test/java/org/carrot2/elasticsearch/ListAlgorithmsActionIT.java index 4c9c55b..502d7cc 100644 --- a/src/test/java/org/carrot2/elasticsearch/ListAlgorithmsActionIT.java +++ b/src/test/java/org/carrot2/elasticsearch/ListAlgorithmsActionIT.java @@ -1,5 +1,9 @@ package org.carrot2.elasticsearch; +import static org.elasticsearch.test.ESIntegTestCase.Scope.SUITE; + +import java.util.Arrays; +import java.util.Collection; import org.assertj.core.api.Assertions; import org.carrot2.elasticsearch.ListAlgorithmsAction.ListAlgorithmsActionRequestBuilder; import org.carrot2.elasticsearch.ListAlgorithmsAction.ListAlgorithmsActionResponse; @@ -8,29 +12,24 @@ import org.elasticsearch.test.ESIntegTestCase; import org.elasticsearch.test.ESIntegTestCase.ClusterScope; -import java.util.Arrays; -import java.util.Collection; - -import static org.elasticsearch.test.ESIntegTestCase.Scope.SUITE; - -@ClusterScope(scope = SUITE, transportClientRatio = 0) +@ClusterScope(scope = SUITE, transportClientRatio = 0) public class ListAlgorithmsActionIT extends ESIntegTestCase { - @Override - protected Collection> nodePlugins() { - return Arrays.asList(ClusteringPlugin.class); - } + @Override + protected Collection> nodePlugins() { + return Arrays.asList(ClusteringPlugin.class); + } + + @Override + protected Collection> transportClientPlugins() { + return nodePlugins(); + } - @Override - protected Collection> transportClientPlugins() { - return nodePlugins(); - } + public void testAlgorithmsAreListed() throws Exception { + Client client = client(); - public void testAlgorithmsAreListed() throws Exception { - Client client = client(); - - ListAlgorithmsActionResponse response = new ListAlgorithmsActionRequestBuilder(client).get(); - Assertions.assertThat(response.getAlgorithms()) - .describedAs("A list of algorithms") - .containsOnly("Lingo", "STC", "Bisecting K-Means"); - } + ListAlgorithmsActionResponse response = new ListAlgorithmsActionRequestBuilder(client).get(); + Assertions.assertThat(response.getAlgorithms()) + .describedAs("A list of algorithms") + .containsOnly("Lingo", "STC", "Bisecting K-Means"); + } } diff --git a/src/test/java/org/carrot2/elasticsearch/MultithreadedClusteringIT.java b/src/test/java/org/carrot2/elasticsearch/MultithreadedClusteringIT.java index 84a34b9..4c1fb11 100644 --- a/src/test/java/org/carrot2/elasticsearch/MultithreadedClusteringIT.java +++ b/src/test/java/org/carrot2/elasticsearch/MultithreadedClusteringIT.java @@ -6,7 +6,6 @@ import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.Future; - import org.assertj.core.api.Assertions; import org.carrot2.elasticsearch.ClusteringAction.ClusteringActionRequestBuilder; import org.carrot2.elasticsearch.ClusteringAction.ClusteringActionResponse; @@ -14,55 +13,58 @@ import org.elasticsearch.index.query.QueryBuilders; import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder; -/** - * Java API tests. - */ +/** Java API tests. */ public class MultithreadedClusteringIT extends SampleIndexTestCase { - public void testRequestFlood() throws Exception { - final Client client = client(); + public void testRequestFlood() throws Exception { + final Client client = client(); - List> tasks = new ArrayList<>(); + List> tasks = new ArrayList<>(); - final int requests = 100; - final int threads = 10; + final int requests = 100; + final int threads = 10; - logger.debug("Stress testing: " + client.getClass().getSimpleName() + "| "); - for (int i = 0; i < requests; i++) { - tasks.add(() -> { - logger.debug(">"); + logger.debug("Stress testing: " + client.getClass().getSimpleName() + "| "); + for (int i = 0; i < requests; i++) { + tasks.add( + () -> { + logger.debug(">"); - ClusteringActionResponse result = new ClusteringActionRequestBuilder(client) + ClusteringActionResponse result = + new ClusteringActionRequestBuilder(client) .setQueryHint("data mining") .addFieldMapping("title", LogicalField.TITLE) .addHighlightedFieldMapping("content", LogicalField.CONTENT) .setSearchRequest( - client.prepareSearch() + client + .prepareSearch() .setIndices(INDEX_TEST) .setTypes("test") .setSize(100) .setQuery(QueryBuilders.termQuery("content", "data")) - .highlighter(new HighlightBuilder().preTags("").postTags("").field("content")) + .highlighter( + new HighlightBuilder().preTags("").postTags("").field("content")) .storedFields("title")) - .execute().actionGet(); + .execute() + .actionGet(); - logger.debug("<"); - checkValid(result); - checkJsonSerialization(result); - return result; - }); - } + logger.debug("<"); + checkValid(result); + checkJsonSerialization(result); + return result; + }); + } - ExecutorService executor = Executors.newFixedThreadPool(threads); - try { - for (Future future : executor.invokeAll(tasks)) { - ClusteringActionResponse response = future.get(); - Assertions.assertThat(response).isNotNull(); - Assertions.assertThat(response.getSearchResponse()).isNotNull(); - } - } finally { - executor.shutdown(); - logger.debug("Done."); - } + ExecutorService executor = Executors.newFixedThreadPool(threads); + try { + for (Future future : executor.invokeAll(tasks)) { + ClusteringActionResponse response = future.get(); + Assertions.assertThat(response).isNotNull(); + Assertions.assertThat(response.getSearchResponse()).isNotNull(); + } + } finally { + executor.shutdown(); + logger.debug("Done."); } + } } diff --git a/src/test/java/org/carrot2/elasticsearch/SampleDocumentData.java b/src/test/java/org/carrot2/elasticsearch/SampleDocumentData.java index 8db1419..a3a7c3a 100644 --- a/src/test/java/org/carrot2/elasticsearch/SampleDocumentData.java +++ b/src/test/java/org/carrot2/elasticsearch/SampleDocumentData.java @@ -1,1167 +1,1172 @@ package org.carrot2.elasticsearch; -/** - * Sample "documents" for tests. - */ +/** Sample "documents" for tests. */ final class SampleDocumentData { - static final String[][] SAMPLE_DATA = new String[][]{ - { - "http://en.wikipedia.org/wiki/Data_mining", - "Data mining - Wikipedia, the free encyclopedia", - "Article about knowledge-discovery in databases (KDD), the practice of automatically searching large stores of data " + - "for patterns."}, - - { - "http://www.ccsu.edu/datamining/resources.html", - "CCSU - Data Mining", - "A collection of Data Mining links edited by the Central Connecticut State University ... Graduate Certificate " + - "Program. Data Mining Resources. Resources. Groups ..."}, - - { - "http://www.kdnuggets.com/", - "KDnuggets: Data Mining, Web Mining, and Knowledge Discovery", - "Newsletter on the data mining and knowledge industries, offering information on data mining, knowledge discovery, " + - "text mining, and web mining software, courses, jobs, publications, and meetings."}, - - { - "http://en.wikipedia.org/wiki/Data-mining", - "Data mining - Wikipedia, the free encyclopedia", - "Data mining is considered a subfield within the Computer Science field of knowledge discovery. ... claim to perform " + - "\"data mining\" by automating the creation ..."}, - - { - "http://www.anderson.ucla.edu/faculty/jason.frand/teacher/technologies/palace/datamining.htm", - "Data Mining: What is Data Mining?", - "Outlines what knowledge discovery, the process of analyzing data from different perspectives and summarizing it into" + - " useful information, can do and how it works."}, - - { - "http://www.the-data-mine.com/", - "Data Mining - Home Page (Misc)", - "Provides information about data mining also known as knowledge discovery in databases (KDD) or simply knowledge " + - "discovery. List software, events, organizations, and people working in data mining."}, - - { - "http://www.spss.com/data_mining/", - "Data Mining Software, Data Mining Applications and Data Mining Solutions", - "... complete data mining customer ... Data mining applications, on the other hand, embed ... it, our daily lives are" + - " influenced by data mining applications. ..."}, - - { - "http://datamining.typepad.com/data_mining/", - "Data Mining: Text Mining, Visualization and Social Media", - "Commentary on text mining, data mining, social media and data visualization. ... Opinion Mining Startups ... in " + - "sentiment mining, deriving tuples of ..."}, - - { - "http://www.statsoft.com/textbook/stdatmin.html", - "Data Mining Techniques", - "Outlines the crucial concepts in data mining, defines the data warehousing process, and offers examples of " + - "computational and graphical exploratory data analysis techniques."}, - - { - "http://answers.yahoo.com/question/index?qid=1006040419333", - "answers.yahoo.com/question/index?qid=1006040419333", - "Generally, data mining (sometimes called data or knowledge discovery) is the ... Midwest grocery chain used the data" + - " mining capacity of Oracle software to ..."}, - - { - "http://www.ccsu.edu/datamining/master.html", - "CCSU - Data Mining", - "Details on how to apply to the Master of Science in data mining may be found here. ... All data mining majors are " + - "classified for business purposes as part-time ..."}, - - { - "http://databases.about.com/od/datamining/a/datamining.htm", - "Data Mining: An Introduction", - "About.com article on how businesses are discovering new trends and patterns of behavior that previously went " + - "unnoticed through data mining, automated statistical analysis techniques."}, - - { - "http://www.thearling.com/", - "Data Mining and Analytic Technologies (Kurt Thearling)", - "Kurt Thearling's site dedicated to sharing information about data mining, the automated extraction of hidden " + - "predictive information from databases, and other analytic technologies."}, - - { - "http://www.sas.com/technologies/analytics/datamining/index.html", - "Data Mining Software and Text Mining | SAS", - "Data mining is the process of selecting, exploring and modeling large amounts of ... The knowledge gleaned from data" + - " and text mining can be used to fuel ..."}, - - { - "http://databases.about.com/od/datamining/Data_Mining_and_Data_Warehousing.htm", - "Data Mining and Data Warehousing", - "From data mining tutorials to data warehousing techniques, you'll find it all! ... Administration Design Development" + - " Data Mining Database Training Careers Reviews ..."}, - - { - "http://www.oracle.com/technology/products/bi/odm/index.html", - "Oracle Data Mining", - "Oracle Data Mining Product Center ... Using data mining functionality embedded in Oracle Database 10g, you can find " + - "... Mining High-Dimensional Data for ..."}, - - { - "http://www.ncdm.uic.edu/", - "National Center for Data Mining - Welcome", - "Conducts research in: scaling algorithms, applications and systems to massive data sets, developing algorithms, " + - "applications, and systems for mining distributed data, and establishing standard languages, protocols, and " + - "services for data mining and predictive modeling."}, - - { - "http://research.microsoft.com/dmx/DataMining/default.aspx", - "Data Mining Project", - "A long term Knowledge Discovery and Data Mining project which has the current ... Read more about how data mining is" + - " integrated into SQL server. Contact Us ..."}, - - { - "http://www.dmg.org/", - "Data Mining Group - DMG", - "... high performance networking, internet computing, data mining and related areas. ... Peter Stengard, Oracle Data " + - "Mining Technologies. prudsys AG, Chemnitz, ..."}, - - { - "http://datamining.typepad.com/data_mining/2006/05/the_truth_about.html", - "Data Mining: Text Mining, Visualization and Social Media: The Truth About Blogs", - "Commentary on text mining, data mining, social media and data visualization. ... Data Mining points to the latest " + - "papers from the 3rd International Workshop on ..."}, - - { - "http://searchsqlserver.techtarget.com/sDefinition/0,,sid87_gci211901,00.html", - "What is data mining? - a definition from Whatis.com - see also: data miner, data analysis", - "Data mining is the analysis of data for relationships that have not previously been discovered. ... Data mining " + - "techniques are used in a many research areas, ..."}, - - { - "http://www.thearling.com/text/dmwhite/dmwhite.htm", - "An Introduction to Data Mining", - "Data mining, the extraction of hidden predictive information from large ... prospective analyses offered by data " + - "mining move beyond the analyses of ..."}, - - { - "http://www.oracle.com/solutions/business_intelligence/data-mining.html", - "Oracle Data Mining", - "Using data mining functionality embedded in ... Oracle Data Mining JDeveloper and SQL Developer ... Oracle Magazine:" + - " Using the Oracle Data Mining API ..."}, - - { - "http://www.amazon.com/tag/data%20mining", - "Amazon.com: data mining", - "A community about data mining. Tag and discover new products. ... Data Mining (Paperback) Data Mining: Practical " + - "Machine Learning Tools and Techniques, Second Edition ..."}, - - { - "http://ocw.mit.edu/OcwWeb/Sloan-School-of-Management/15-062Data-MiningSpring2003/CourseHome/index.htm", - "MIT OpenCourseWare | Sloan School of Management | 15.062 Data Mining, Spring 2003 | Home", - "... class of methods known as data mining that assists managers in recognizing ... Data mining is a rapidly growing " + - "field that is concerned with developing ..."}, - - { - "http://www.sas.com/offices/europe/sweden/2746.html", - "Om Data Mining och Text Mining. Ta fram s\u00E4kra beslutsunderlag med Data Miningverktyg fr\u00E5n SAS Institute.", - "SAS Insitutes business intelligence ger v\u00E4rdefull kunskap till hela din ... Till\u00E4mpningen av data mining " + - "str\u00E4cker sig \u00F6ver m\u00E5nga branscher och omr\u00E5den. ..."}, - - { - "http://www.dmoz.org/Computers/Software/Databases/Data_Mining/", - "Open Directory - Computers: Software: Databases: Data Mining", - "Data Mining and Knowledge Discovery - A peer-reviewed journal publishing ... In assessing the potential of data " + - "mining based marketing campaigns one needs to ..."}, - - { - "http://www.investorhome.com/mining.htm", - "Investor Home - Data Mining", - "Data Mining or Data Snooping is the practice of searching for relationships and ... up by making a case study in " + - "data mining out of the Motley Fool's Foolish Four. ..."}, - - { - "http://www.amazon.com/Data-Mining-Concepts-Techniques-Management/dp/1558604898", - "Amazon.com: Data Mining: Concepts and Techniques (The Morgan Kaufmann Series in Data Management Systems): Jiawei " + - "Han...", - "Amazon.com: Data Mining: Concepts and Techniques (The Morgan Kaufmann Series in Data Management Systems): Jiawei " + - "Han,Micheline Kamber: Books"}, - - { - "http://www.monografias.com/trabajos/datamining/datamining.shtml", - "Data Mining - Monografias.com", - "Data Mining, la extracci\u00F3n de informaci\u00F3n oculta y predecible de grandes bases ... Las herramientas de " + - "Data Mining predicen futuras tendencias y comportamientos, ..."}, - - { - "http://www.megaputer.com/data_mining.php", - "Data Mining Technology - Megaputer", - "Data Mining Technology from Megaputer ... Typical tasks addressed by data mining include: ... Yet, data mining " + - "requires far more than just machine learning. ..."}, - - {"http://datamining.itsc.uah.edu/", - "itsc data mining solutions center", ""}, - - { - "http://www.dmreview.com/specialreports/20050503/1026882-1.html", - "Hard Hats for Data Miners: Myths and Pitfalls of Data Mining", - "This article debunks several myths about data mining and presents a plan of action to avoid some of the pitfalls. " + - "... a typical data mining conference or ..."}, - - { - "http://research.microsoft.com/dmx/", - "Data Management, Exploration and Mining- Home", - "The Data Management Exploration and Mining Group (DMX) ... Our research effort in data mining focuses on ensuring " + - "that traditional ..."}, - - { - "http://www.biomedcentral.com/info/about/datamining", - "BioMed Central | about us | Data mining research", - "... a collection of links to publications on the subject of biomedical text mining. Data mining Open Access research" + - " - an article in the 8 September 2003 edition of ..."}, - - { - "http://www.datapult.com/Data_Mining.htm", - "Data Mining", - "Data Mining Services provide customers with tools to quickly sift through the ... into Datapult Central for use with" + - " Data Mining tools and other Datapult products. ..."}, - - { - "http://www.siam.org/meetings/sdm02/", - "SIAM International Conference on Data Mining", - "SIAM International Conference on Data Mining, co-Sponsored by AHPCRC and ... Clustering High Dimensional Data and " + - "its Applications. Mining Scientific Datasets ..."}, - - { - "http://dir.yahoo.com/Computers_and_Internet/Software/Databases/Data_Mining/", - "Data Mining in the Yahoo! Directory", - "Learn about data mining and knowledge discovery, the process of finding patterns ... Cross Industry Standard Process" + - " for Data Mining (CRISP-DM) ..."}, - - { - "http://www.llnl.gov/str/Kamath.html", - "Data Mining", - "... Sapphire-a semiautomated, flexible data-mining software infrastructure. ... Data mining is not a new field. ... " + - "scale, scientific data-mining efforts such ..."}, - - { - "http://www.sqlserverdatamining.com/", - "SQL Server Data Mining > Home", - "SQL Server Data Mining Portal ... information about our exciting data mining features. ... CTP of Microsoft SQL " + - "Server 2008 Data Mining Add-Ins for Office 2007 ..."}, - - { - "http://www.dbmsmag.com/9807m01.html", - "DBMS - DBMS Data Mining Solutions Supplement", - "As recently as two years ago, data mining was a new concept for many people. Data mining products were new and " + - "marred by unpolished interfaces. ..."}, - - {"http://www.oclc.org/research/projects/mining", - "Data mining [OCLC - Projects]", - "Describes the goals, methodology, and timing of the Data mining project."}, - - { - "http://www.the-data-mine.com/bin/view/Misc/IntroductionToDataMining", - "Data Mining - Introduction To Data Mining (Misc)", - "Some example application areas are listed under Applications Of Data Mining ... Crows Introduction - \"Introduction " + - "to Data Mining and Knowledge Discovery\"- http: ..."}, - - { - "http://www.pentaho.com/products/data_mining/", - "Pentaho Commercial Open Source Business Intelligence: Data Mining", - "... (BI) to the next level by adding data mining and workflow to the mix. ... Pentaho Data Mining is differentiated " + - "by its open, standards-compliant nature, ..."}, - - { - "http://www.unf.edu/~selfayou/html/data_mining.html", - "Data Mining", - "This course approaches data mining topics from an Artificial Intelligence ... The course will also cover " + - "Applications and Trends in Data Mining. Textbook: ..."}, - - { - "http://www.statsoft.com/products/dataminer.htm", - "Data Mining Software & Predictive Modeling Solutions", - "data mining software & predictive modeling sold online by statsoft.com. ... of automated and ready-to-deploy data " + - "mining solutions for a wide variety of ..."}, - - { - "http://gosset.wharton.upenn.edu/wiki/index.php/Main_Page", - "Main Page - Knowledge Discovery", - "The Penn Data Mining Group develops principled means of modeling and ... knowledge of specific application areas to " + - "develop new approaches to data mining. ..."}, - - { - "http://www.twocrows.com/glossary.htm", - "Two Crows: Data mining glossary", - "Data mining terms concisely defined. ... Accuracy is an important factor in assessing the success of data mining. " + - "... data mining ..."}, - - { - "http://www.cdc.gov/niosh/mining/data/", - "NIOSH Mining: MSHA Data File Downloads | CDC/NIOSH", - "MSHA accident, injury, employment, and production data files in SPSS and dBase formats ... Data files on mining " + - "accidents, injuries, fatalities, employment, ..."}, - - {"http://www.cartdatamining.com/", "Salford Data mining 2006", - "Objective | Previous Conferences | Call for Abstracts | LATEST INFO ..."}, - - { - "http://www.inductis.com/", - "Data Mining | Focused Data Mining For Discovery To Assist Management", - "Inductis offers high-level data mining services to assist management decisions ... The Data Mining Shootout " + - "...more>> ISOTech 2006 - The Insurance Technology ..."}, - - { - "http://www.datamininglab.com/", - "Elder Research: Predictive Analytics & Data Mining Consulting", - "Provides consulting and short courses in data mining and pattern discovery patterns in data."}, - - { - "http://www.microsoft.com/sql/technologies/dm/default.mspx", - "Microsoft SQL Server: Data Mining", - "Microsoft SQL Server Data Mining helps you explore your business data and discover patterns to reveal the hidden " + - "trends about your products, customer, market, and ..."}, - - { - "http://www.dataminingcasestudies.com/", - "Data Mining Case Studies", - "Recognizing outstanding practical contributions in the field of data mining. ... case studies are one of the most " + - "discussed topics at data mining conferences. ..."}, - - { - "http://www.webopedia.com/TERM/D/data_mining.html", - "What is data mining? - A Word Definition From the Webopedia Computer Dictionary", - "This page describes the term data mining and lists other pages on the Web where you can find additional information." + - " ... Data Mining and Analytic Technologies ..."}, - - { - "http://www.cs.waikato.ac.nz/~ml/weka/book.html", - "Data Mining: Practical Machine Learning Tools and Techniques", - "Book. Data Mining: Practical Machine Learning Tools and Techniques (Second Edition) ... Explains how data mining " + - "algorithms work. ..."}, - - { - "http://www.datamining.com/", - "Predictive Modeling and Predictive Analytics Solutions | Enterprise Miner Software from Insightful Software", - "Insightful Enterprise Miner - Enterprise data mining for predictive modeling and predictive analytics."}, - - { - "http://www.sra.com/services/index.asp?id=153", - "SRA International - Data Mining Solutions", - "... and business who ask these questions are finding solutions through data mining. ... Data mining is the process " + - "of discovering previously unknown relationships in ..."}, - - { - "http://en.wiktionary.org/wiki/data_mining", - "data mining - Wiktionary", - "Data mining. Wikipedia. data mining. a technique for searching large-scale databases for patterns; used mainly to " + - "... Czech: data mining n., dolov\u00E1n\u00ED dat n. ..."}, - - {"http://www.datamining.org/", "data mining institute", ""}, - - { - "http://videolectures.net/Top/Computer_Science/Data_Mining/", - "Videolectures category: Data Mining", - "Next Generation Data Mining Tools: Power laws and self-similarity for graphs, ... Parallel session 4 - Hands-on " + - "section Data mining with R. Luis Torgo. 1 comment ..."}, - - { - "http://www2008.org/CFP/RP-data_mining.html", - "WWW2008 CFP - WWW 2008 Call For Papers: Refereed Papers - Data Mining", - "WWW2008 - The 17th International World Wide Web Conference - Beijing, China (21 - 25 April 2008) Hosted by Beihang " + - "Universit ... data mining, machine ..."}, - - { - "http://answers.yahoo.com/question/index?qid=20070227091350AAVDlI1", - "what is data mining?", - "... the purchases of customers, a data mining system could identify those customers ... A simple example of data " + - "mining, often called Market Basket Analysis, ..."}, - - {"http://clubs.yahoo.com/clubs/datamining", - "datamining2 : Data Mining Club - 1600+ members!!", - "datamining2: Data Mining Club - 1600+ members!"}, - - { - "http://www.siam.org/meetings/sdm01/", - "First SIAM International Conference on Data Mining", - "The field of data mining draws upon extensive work in areas such as statistics, ... recent results in data mining, " + - "including applications, algorithms, software, ..."}, - - { - "http://www.statserv.com/datamining.html", - "St@tServ - About Data Mining", - "St@tServ Data Mining page ... Data mining in molecular biology, by Alvis Brazma. Graham Williams page. Knowledge " + - "Discovery and Data Mining Resources, ..."}, - - { - "http://www.springer.com/computer/database+management+&+information+retrieval/journal/10618", - "Data Mining and Knowledge Discovery - Data Mining and Knowledge Discovery Journals, Books & Online Media | Springer", - "Technical journal focused on the theory, techniques, and practice for extracting information from large databases."}, - - { - "http://msdn2.microsoft.com/en-us/library/ms174949.aspx", - "Data Mining Concepts", - "Data mining is frequently described as "the process of extracting ... Creating a data mining model is a dynamic" + - " and iterative process. ..."}, - - { - "http://www.cs.wisc.edu/dmi/", - "DMI:Data Mining Institute", - "Data Mining Institute at UW-Madison ... The Data Mining Institute (DMI) was ... Corporation with the support of the " + - "Data Mining Group of Microsoft Research. ..."}, - - { - "http://www.dataminingconsultant.com/", - "DataMiningConsultant.com", - "... Website for Data Mining Methods and ... data mining at Central Connecticut State University, he ... also " + - "provides data mining consulting and statistical ..."}, - - { - "http://www.dmreview.com/channels/data_mining.html", - "Data Mining", - "... business intelligence, data warehousing, data mining, CRM, analytics, ... M2007 Data Mining Conference Hitting " + - "10th Year and Going Strong ..."}, - - { - "http://www.unc.edu/~xluan/258/datamining.html", - "Data Mining", - "What is the current state of data mining? The immediate future ... Data Mining is the process of extracting " + - "knowledge hidden from large volumes of ..."}, - - { - "http://www.data-miners.com/", - "Data Miners Inc. We wrote the book on data mining!", - "Data mining consultancy; services include predictive modeling, consulting, and seminars."}, - - { - "http://www.versiontracker.com/dyn/moreinfo/macosx/27607", - "Data Mining 2.2.2 software download - Mac OS X - VersionTracker", - "Find Data Mining downloads, reviews, and updates for Mac OS X including commercial software, shareware and freeware " + - "on VersionTracker.com."}, - - { - "http://www.webtechniques.com/archives/2000/01/greening/", - "New Architect: Features", - "Article by Dan Greening on data mining techniques applied to analyzing and making decisions from web data. ... and " + - "business analysts use data-mining techniques. ..."}, - - { - "http://www.networkdictionary.com/software/DataMining.php", - "Data Mining | NetworkDictionary", - "Data Mining is the automated extraction of hidden predictive information from databases. ... The data mining tools " + - "can make this leap. ..."}, - - { - "http://www.youtube.com/watch?v=wqpMyQMi0to", - "YouTube - What is Data Mining? - February 19, 2008", - "Association Labratory President and CEO Dean West discusses Data Mining and how it can be applied to associations. " + - "... Data Mining Association Forum Dean West ..."}, - - { - "http://www.cs.sfu.ca/~han/DM_Book.html", - "Book page", - "Chapter 4. Data Mining Primitives, Languages, and System Architectures ... Chapter 9. Mining Complex Types of Data " + - "... to Microsoft's OLE DB for Data Mining ..."}, - - { - "http://www.twocrows.com/", - "Two Crows data mining home page", - "Dedicated to the development, marketing, sales and support of tools for knowledge discovery to make data mining " + - "accessible and easy to use."}, - - { - "http://www.autonlab.org/tutorials", - "Statistical Data Mining Tutorials", - "Includes a set of tutorials on many aspects of statistical data mining, including the foundations of probability, " + - "the foundations of statistical data analysis, and most of the classic machine learning and data mining " + - "algorithms."}, - - { - "http://ecommerce.ncsu.edu/technology/topic_Datamining.html", - "E-commerce Technology: Data Mining", - "\"Web usage mining: discovery and applications of web usage patterns from web data\" ... Patterns and Trends by " + - "Applying OLAP and Data Mining Technology on Web Logs. ..."}, - - { - "http://www.teradata.com/t/page/106002/index.html", - "Teradata Data Mining Warehouse Solution", - "... a high-powered analytic warehouse that streamlines the data mining process. ... while building the analytic " + - "model using your favorite data mining tool. ..."}, - - { - "http://datamining.japati.net/", - "Indo Datamining", - "Apa yang bisa dan tidak bisa dilakukan data mining ? ... Iko Pramudiono \"» ... Apa itu data mining ? Iko " + - "Pramudiono \"». artikel lainnya \" tutorial ..."}, - - { - "http://www.affymetrix.com/products/software/specific/dmt.affx", - "Affymetrix - Data Mining Tool (DMT) (Unsupported - Archived Product)", - "Affymetrix is dedicated to developing state-of-the-art technology for acquiring, analyzing, and managing complex " + - "genetic ... The Data Mining Tool (DMT) ..."}, - - { - "http://www.pcc.qub.ac.uk/tec/courses/datamining/stu_notes/dm_book_1.html", - "Data Mining Student Notes, QUB", - "2 - Data Mining Functions. 2.1 - Classification. 2.2 - Associations ... 5 - Data Mining Examples. 5.1 - Bass Brewers" + - ". 5.2 - Northern Bank. 5.3 - TSB Group PLC ..."}, - - { - "http://www.spss.com/text_mining_for_clementine/", - "Text Mining for Clementine | Improve the accuracy of data mining", - "Text Mining for Clementine from SPSS enables you to use text data to improve the accuracy of predictive models. ... " + - "and about data mining in general. ..."}, - - { - "http://www.open-mag.com/features/Vol_16/datamining/datamining.htm", - "Data Mining", - "Without data mining, a merchant isn't even close to leveraging what customers want and will buy. ... Data mining is " + - "to be found in applications like bio ..."}, - - { - "http://wordpress.com/tag/data-mining/", - "Data Mining \u2014 Blogs, Pictures, and more on WordPress", - "Going Beyond the Numbers: Context-Sensitive Data Mining ... Data mining examples ... many websites employing data " + - "mining technology to provide recommendation ..."}, - - { - "http://www.dmbenchmarking.com/", - "Benchmarking- Data Mining Benchmarking Association", - "Association of companies and organizations working to identify \"best in class\" data mining processes through " + - "benchmarking studies."}, - - { - "http://www.dataentryindia.com/data_processing/data_mining.php", - "Data Mining, Data Mining Process, Data Mining Techniques, Outsourcing Mining Data Services", - "... Walmart, Fundraising Data Mining, Data Mining Activities, Web-based Data Mining, ... in many industries makes us" + - " the best choice for your data mining needs. ..."}, - - { - "http://www.target.com/Data-Mining-Applications-International-Information/dp/1853127299", - "Data Mining V: Data Mining, Text Mining... [Hardcover] | Target.com", - "Shop for Data Mining V: Data Mining, Text Mining and Their Business Applications : Fifth International Conference on" + - " Data Mining (Management Information System) at"}, - - { - "http://www.cs.ubc.ca/~rng/research/datamining/data_mining.htm", - "Data Mining", - "... varying degrees of success, the data mining tools developed thus far, by and ... (a) we should recognize that " + - "data mining is a multi-step process, and that (b) ..."}, - - { - "http://jcp.org/en/jsr/detail?id=73", - "The Java Community Process(SM) Program - JSRs: Java Specification Requests - detail JSR# 73", - "Currently, there is no widely agreed upon, standard API for data mining. By using JDMAPI, implementers of data " + - "mining applications can expose a single, ..."}, - - { - "http://www.microsoft.com/spain/sql/technologies/dm/default.mspx", - "Microsoft SQL Server2005: Data Mining", - "Data Mining es la tecnolog\u00EDa BI que le ayudar\u00E1 a construir modelos anal\u00EDticos complejos e integrar " + - "esos modelos con sus operaciones comerciales."}, - - { - "http://www.bos.frb.org/economic/nerr/rr2000/q3/mining.htm", - "Regional Review: Mining Data", - "Although data mining by itself is not going to get the Celtics to the playoffs, ... then, firms that specialize in " + - "data-mining software have been developing a ..."}, - - { - "http://www.scianta.com/technology/datamining.htm", - "Data Mining", - "... are excellent candidates for data mining, fault prediction, problem diagnosis, ... Data Mining uses this theory " + - "to support Link and Affinity Group analysis \u2013 an ..."}, - - { - "http://www.gusconstan.com/DataMining/index.htm", - "Discovery and Mining", - "Verification-Driven Data Mining. Advantages of Symbolic Classifiers. Manual vs. Automatic ... Currently, data mining" + - " solutions have been developed by large software ..."}, - - { - "http://www.dataminingconsultant.com/DKD.htm", - "DataMiningConsultant.com", - "Companion Website for Data Mining Methods and Models ... \"This is an excellent introductory book on data mining. " + - "... An Introduction to Data Mining at Amazon.com ..."}, - - { - "http://www.pfaw.org/pfaw/general/default.aspx?oid=9717", - "People For the American Way - Data Mining", - "data mining, civil liberties, civil rights, terrorism, september 11th, anti-terrorism, ashcroft, government " + - "intrusion, privacy, email, patriot, american"}, - - { - "http://dm1.cs.uiuc.edu/", - "Data Mining Research Group", - "... conducting research in various areas in data mining and other related fields. ... on Data Mining (SDM'08), (full" + - " paper), Atlanta, GA, April 2007. ..."}, - - { - "http://www.dawid.tv/", - "dawid.tv", - "Watch free videos on dawid.tv. Now Playing: DAWID DRIF ... About. Dawid. Bielawa - Poland. Friends: 1. Last Login: " + - "... View All Members of dawid.tv. Tag Cloud ..."}, - - {"http://www.dawid.co.za/", "DAWID", - "Welkom by: Dawid Bredenkamp se webtuiste. Foto's. Skakels. Kontak ..."}, - - { - "http://www.dawid-nowak.org/", - "Dawid Nowak", - "Dawid Nowak Home Page ... Resume. Gallery. Thailand. Still in Thailand. Into Laos. Through Laos To Cambodia. RSS " + - "feeds for lazy technically oriented people ..."}, - - { - "http://dawid.digitalart.org/", - "dawid.digitalart.org - Profile of Dawid Michalczyk", - "A gallery of masterfully created works of digital art. ... Dawid Michalczyk \" Send Private Message \" Send an " + - "E-mail. Art Gallery (13) Guestbook ..."}, - - { - "http://www.dawid.nu/index.php?ID=4", - "dawid :: images / commercial work :: advertising & illustrations", - "The official site of photographer Dawid, Bj\u00F6rn Dawidsson. Fotograf Dawid - Bj\u00F6rn Dawidsson ... references:" + - " AB Vin & Sprit, Apple, Berliner, Bond, Ericsson, ..."}, - - { - "http://www.dawidphotography.com/", - "Photographer London UK, Dawid de Greeff \u00A9 2007 , Digital photographer - Portfolio", - "South African born Dawid & Annemarie de Greeff are International digital ... NAME. EMAIL. MESSAGE ..."}, - - { - "http://www.anniedawid.com/", - ": : Annie Dawid : : Author and Photographer", - "Annie Dawid is the author of Resurrection City: A Novel of Jonestown (to be ... Annie Dawid lives and writes in the " + - "Sangre de Cristo range of South-Central Colorado. ..."}, - - { - "http://en.wikipedia.org/wiki/Dawid_Janowski", - "Dawid Janowski - Wikipedia, the free encyclopedia", - "Dawid Markelowicz Janowski (in English usually called David Janowski) (born 25 ... Dawid Janowski died on January " + - "15, 1927 of tuberculosis. ..."}, - - { - "http://www.dawid.nu/index.php?ID=2", - "dawid :: images / art :: COMP", - "The official site of photographer Dawid, Bj\u00F6rn Dawidsson. Fotograf Dawid - Bj\u00F6rn Dawidsson ... dawid : " + - "images / art : COMP: Series photographed during the mid 80's. ..."}, - - { - "http://en.wikipedia.org/wiki/Dawid", - "Dawid - Wikipedia, the free encyclopedia", - "Dawid. From Wikipedia, the free encyclopedia. Jump to: navigation, search. Dawid may refer to the following people: " + - "David, the biblical King David ..."}, - - { - "http://www.myspace.com/dawidszczesny", - "MySpace.com - dawid szczesny - Wroclaw - www.myspace.com/dawidszczesny", - "MySpace music profile for dawid szczesny with tour dates, songs, videos, pictures, blogs, band information, " + - "downloads and more"}, - - { - "http://www.art.eonworks.com/", - "Computer wallpaper, stock illustration, Sci-Fi art, Fantasy art, Surreal art, Space art, Abstract art - posters, ...", - "Digital Art of Dawid Michalczyk. Unique posters, prints, wallpapers and wall calendars. ... the official website of " + - "Dawid Michalczyk - a freelance illustrator ..."}, - - { - "http://www.surfski.info/content/view/384/147/", - "Surf Ski . Info - Dawid Mocke King of the Harbour 2007", - "Surf Ski information and news. Training tips from the experts, equipment, getting started guides, surfski reviews, " + - "photos ...links and stories."}, - - { - "http://www.agentsbase.com/", - "Agent's Base", - "Dawid Kasperowicz. Get Firefox. Get Google Ads. Affiliates ... By Dawid | February 28, 2008 - 12:05 pm - Posted in " + - "Technology ..."}, - - { - "http://www.target.com/Dawid-Dawidsson-Bjorn/dp/3882437243", - "Dawid [Hardcover] | Target Official Site", - "Shop for Dawid at Target. Choose from a wide range of Books. Expect More, Pay Less at Target.com"}, - - { - "http://www.dawid.tobiasz.org/", - "Dawid", - "Dawid. Fotografia stanowi w\u0142asno\u015B\u0107 autora. Kopiowanie i rozpowszechnianie ... Copyright by Dawid " + - "Tobiasz [Fotografia stanowi w\u0142asno\u015B\u0107 autora. ..."}, - - { - "http://juliedawid.co.uk/", - "Julie Dawid :", - "birthing support scotland. Poetical Fusion Folk. Words. Band. Listen. Contact. Copyright \u00A9 2004 Julie Dawid. " + - "All Rights reserved. Powered by Accidental Media ..."}, - - { - "http://conference.dawid.uni.wroc.pl/index.php?lang=iso-8859-2", - "konferencja - Welcome", - "Joomla - the dynamic portal engine and content management system ... The 1st Symposium of Pedagogy and Psychology " + - "PhD Students. Monday, 13 February 2006 ..."}, - - { - "http://www.ibe.unesco.org/publications/ThinkersPdf/dawide.pdf", - "Jan Wladyslaw Dawid", - "All his life, Jan Wladyslaw Dawid was closely associated with the teaching ... Dawid who believed that these " + - "experiments were fundamental to the blossoming and ..."}, - - {"http://www.dawid-posciel.pl/", "www.dawid-posciel.pl", ""}, - - { - "http://www.dawidrurkowski.com/", - "Dawid Rurkowski - portfolio", - "Dawid Rurkowski online webdesign portfolio ... My name is Dawid, I am a web designer with a real passion to my work." + - " ... \u00A9 Copyright 2007 Dawid Rurkowski All ..."}, - - { - "http://conference.dawid.uni.wroc.pl/index.php?option=com_content&task=blogsection&id=20&Itemid=49%E2%8C%A9=iso-8859-2", - "konferencja - Warsztaty", - "Joomla - the dynamic portal engine and content management system ... Karolina Pietras is a psychologist, business " + - "trainer and PhD student at Faculty ..."}, - - { - "http://chess.about.com/library/persons/blp-jano.htm", - "Famous Chess Players - Dawid Janowsky", - "Beginners Improve Your Game Play Chess Online Chess Downloads Computers and ... Dawid Janowsky. Unsuccessful " + - "challenger for World Championship ..."}, - - { - "http://www.pbase.com/dawidwnuk", - "Dawid Wnuk's Photo Galleries at pbase.com", - "All images on this site copyrighted by DAWID WNUK. Please contact me if you would like to purchase or licence a " + - "photograph. Portraiture ..."}, - - { - "http://dawid-witos.nazwa.pl/chylu/en/index.php?link=news", - "...Official Website of Michael Chylinski...", - "Welcome to chylinski.info- the official web site of Polish National Team and ... We invite you to visite our service" + - " and write your opinions on forum. A few ..."}, - - {"http://photoexposed.com/", "photoeXposed.com", - "Dawid Slaski-Sawicki Photography"}, - - { - "http://vids.myspace.com/index.cfm?fuseaction=vids.individual&VideoID=7370487", - "MySpaceTV Videos: Edyp trailer by dawid", - "Edyp trailer by dawid Watch it on MySpace Videos. ... Posted by: dawid. Runtime: 0:52. Plays: 43. Comments: 0. " + - "Reinkarnacje - \"Czy to mi..."}, - - { - "http://www.linkedin.com/in/dawidmadon", - "LinkedIn: Dawid Mado\u0144", - "Dawid Mado\u0144's professional profile on LinkedIn. ... Dawid Mado\u0144. ORACLE DBA at Apriso and Information " + - "Technology and Services Consultant ..."}, - - { - "http://www.linkedin.com/pub/1/878/410", - "LinkedIn: Dawid Tracz", - "Dawid Tracz's professional profile on LinkedIn. ... Dawid Tracz's Experience. Graphician, WebDesigner, " + - "InterfaceDesigner. DreamLab Onet.pl Sp. ..."}, - - { - "http://profiles.friendster.com/13547484", - "Friendster - Dawid Martin", - "Friendster: ; location: Poland, PL; Kiedrowice, Warsaw (Poland),Jogja (Indonesia); Warsaw Gamelan Group, Bosso, " + - "Tepellere, Mandala, Suita Etnik, Konco-Konco Blues ..."}, - - { - "http://www.genevievedawid.com/", - "Genevieve Dawid mentor, lecturer and author", - "Author of the Achiever's Journey a real self help book for dyslexics, Genevieve Dawid offers a unique approach to " + - "mentoring and personal development."}, - - { - "http://www.last.fm/music/dawid+szczesny", - "dawid szczesny \u2013 Music at Last.fm", - "People who like dawid szczesny also like Masayasu Tzboguchi Trio, Ametsub, ... Dawid Szcz\u0119sny performed in " + - "Poland, Germany (in 2005 invited by Kata Adamek and ..."}, - - { - "http://vids.myspace.com/index.cfm?fuseaction=vids.individual&videoid=2028359840", - "MySpaceTV Videos: paka 2007-1 by dawid", - "paka 2007-1 by dawid Watch it on MySpace Videos. ... Posted by: dawid. Runtime: 0:52. Plays: 43. Comments: 0. " + - "Reinkarnacje - \"Czy to mi..."}, - - { - "http://dawid.secondbrain.com/", - "Dawid's profile page - Second Brain_ - All Your Content", - "Dawid. People first, strategy second ... Dawid's recent updates. February 07 2008. Wimbledon ... Posted by Dawid on " + - "Second Brain February 05 2008. Post comment ..."}, - - { - "http://www.ushmm.org/wlc/article.php?lang=en&ModuleId=10007294", - "Dawid Sierakowiak", - "Dawid was an avid reader and an excellent observer. Throughout Dawid's imprisonment in the Lodz ghetto he made sure " + - "to write about ..."}, - - { - "http://www.ctbodyartist.com/", - "CT Body Artist | Chrys Dawid (203) 255-1875", - "CT Body Artist, Chrys Dawid (203) 255-1875 Professional Body painting service. From Advertising Champaigns to " + - "Private parties, make your statement & Marketing goals ..."}, - - { - "http://www.amazon.com/phrase/Dawid-Sierakowiak", - "Amazon.com: \"Dawid Sierakowiak\": Key Phrase page", - "Key Phrase page for Dawid Sierakowiak: Books containing the phrase Dawid Sierakowiak ... Key Phrases: Dawid " + - "Sierakowiak, United States, New York, Niutek ..."}, - - { - "http://www.planetizen.com/user/403/track", - "Irvin Dawid | Planetizen", - "Irvin Dawid. 0. 2 weeks 20 hours ago. news ... Irvin Dawid. 1. 3 weeks 5 days ago. news. Traffic Crashes Cost Twice " + - "as Much as Congestion ..."}, - - { - "http://www.ushmm.org/wlc/idcard.php?lang=en&ModuleId=10006389", - "Dawid Szpiro", - "Dawid was the older of two sons born to Jewish parents in Warsaw. ... of Warsaw's Jewish district, where Dawid and " + - "his brother, Shlomo, attended Jewish schools. ..."}, - - { - "http://groups.yahoo.com/group/dawid", - "dawid : Katechetyczne Forum Dyskusyjne", - "dawid \u00B7 Katechetyczne Forum Dyskusyjne. Home. Messages ... Lista dyskusyjna strony internetowej DAWID. Most " + - "Recent Messages (View All) (Group by Topic) ..."}, - - { - "http://www.blogger.com/profile/01359115939699161533", - "Blogger: User Profile: Dawid", - "Push-Button Publishing. Dawid. Blogs. Blog Name. Team Members. Midwest Petanque Alliance BLOG ... MGal hdarpini " + - "chilipepper diveborabora DanDan Mike A testerin ..."}, - - { - "http://www.blogger.com/profile/15768169977536938605", - "Blogger: User Profile: David", - "kilconriola Credo Perp\u00E9tua Amanda Liturgeist Chris + AMDG + +Miguel Vinuesa+ Royal Girl ... roydosan " + - "chrysogonus Brownthing Aristotle Boeciana Amanda Lactantius Juan ..."}, - - { - "http://www.babynamer.com/Dawid", - "Dawid on BabyNamer", - "For parents-to-be who want to confidently choose potential names for their baby, ... Dawid. Meaning: Its source is a" + - " ... baby name page for boy name Dawid. ..."}, - - { - "http://profile.myspace.com/index.cfm?fuseaction=user.viewprofile&friendid=38408574", - "MySpace.com - Dawid - 26 - Male - FR - www.myspace.com/trastaroots", - "MySpace profile for Dawid with pictures, videos, personal blog, interests, information about me and more ... yo " + - "dawid, ya un gars de ta r\u00E9gion (koubiak) qui ..."}, - - { - "http://www.imdb.com/name/nm1058743/", - "Dawid Kruiper", - "Actor: Liebe. Macht. Blind.. Visit IMDb for Photos, Filmography, Discussions, Bio, News, Awards, Agent, Fan Sites. " + - "... on IMDb message board for Dawid Kruiper ..."}, - - { - "http://citeseer.ist.psu.edu/context/55656/0", - "Citations: Conditional independence in statistical theory - Dawid (ResearchIndex)", - "A. P. Dawid. Conditional independence in statistical theory (with discussion). J. Roy. ... To capture Dawid s " + - "property for overlapping sets, Pearl introduces ..."}, - - { - "http://www.dawid.pl/gb/main.php", - "Systemy ogrodzeniowe, ta\u015Bmy, sita, siatki - DAWID Cz\u0119stochowa", - "Firma DAWID - Producent siatki ogrodzeniowej, bram, furtek, paneli D-1, D-2 itp. Cz\u0119stochowa. ... DAWID Company" + - " has a long-standing tradition which has been ..."}, - - { - "http://www.imdb.com/name/nm2014139/", - "Dawid Jakubowski", - "Miscellaneous Crew: Once Upon a Knight. Visit IMDb for Photos, Filmography, Discussions, Bio, News, Awards, Agent, " + - "Fan Sites."}, - - { - "http://www.lclark.edu/cgi-bin/shownews.cgi?1011726000.1", - "Dawid publishes Lily in the Desert", - "Lewis & Clark College: Dawid publishes Lily in the Desert ... Annie Dawid is one of those all-too-rare " + - "writers who fully inhabits each ..."}, - - { - "http://dir.nichd.nih.gov/lmg/lmgdevb.htm", - "Igor Dawid Lab Home Page", - "Dawid Lab. Welcome to Igor Dawid's lab in the Laboratory of Molecular Genetics, ... National Institute of Child " + - "Health and Human Development, National ..."}, - - { - "http://www.ucl.ac.uk/~ucak06d/", - "Philip Dawid", - "DEPARTMENT OF STATISTICAL SCIENCE. UNIVERSITY COLLEGE LONDON. A. Philip Dawid ... Professor A. P. Dawid, Department " + - "of Statistical Science, University College London, ..."}, - - { - "http://www.pbase.com/dawidwnuk/profile", - "pbase Artist Dawid Wnuk", - "View Galleries : Dawid Wnuk has 5 galleries and 487 images online. ... My name is Dawid and I'm a photographer from " + - "Warsaw, Poland. ..."}, - - { - "http://dawidfrederik.deviantart.com/", - "DawidFrederik on deviantART", - "Art - community of artists and those devoted to art. ... Dawid Frederik Strauss. Profile Gallery Faves Journal. " + - "Status: deviantART Subscriber ..."}, - - { - "http://citeseer.ist.psu.edu/context/332153/0", - "Citations: Statistical theory - Dawid (ResearchIndex)", - "Dawid, P. (1984). Statistical theory. The prequential approach (with discussion) . Journal of the Royal Statistical " + - "Society A, 147:178--292."}, - - { - "http://www.infinitee-designs.com/Dawid-Michalczyk.htm", - "Dawid Michalczyk Artist of the Month Space Art", - "Artist of the Month, Dawid Michalczyk Abstract 3D Space Art, Visions, computer graphics, 2D illustration, sci-fi, " + - "fantasy, digital art"}, - - { - "http://www.myspace.com/dawidgatti", - "MySpace.com - dawid - 26 - Male - www.myspace.com/dawidgatti", - "MySpace profile for dawid with pictures, videos, personal blog, interests, information about me and more ... to " + - "meet: dawid's Friend Space (Top 1) dawid has 1 ..."}, - - { - "http://ezinearticles.com/?expert=Genevieve_Dawid", - "Genevieve Dawid - EzineArticles.com Expert Author", - "Genevieve Dawid is a published author and highly successful ... Genevieve Dawid's Extended ... [Business:Management]" + - " Genevieve Dawid explores the history of ..."}, - - { - "http://www.artnet.com/artist/698445/dawid-bjorn-dawidsson.html", - "Dawid (Bjorn Dawidsson) on artnet", - "Dawid (Bjorn Dawidsson) (Swedish, 1949) - Find works of art, auction results & sale prices of artist Dawid (Bjorn " + - "Dawidsson) at galleries and auctions worldwide."}, - - { - "http://www.glennshafer.com/assets/downloads/other12.pdf", - "Comments on \"Causal Inference without Counterfactuals\" by A.P. Dawid", - "Phil Dawid's elegant ... ted from discussions of causality with Phil Dawid over many years. ... ground with those " + - "who tout counterfactual variables, Dawid ..."}, - - { - "http://www.primerica.com/dawidkmiotek", - "Primerica Financial Services : Dawid Ireneusz Kmiotek", - "Primerica is in the business of ... Buy Term & Invest the Difference. The Theory of Decreasing ... About Dawid " + - "Ireneusz Kmiotek. Office Directions ..."}, - - { - "http://www.youtube.com/watch?v=tEKmrUhCMFo", - "YouTube - Dawid Janczyk POLAND u-19 - BELGIUM u-19 (4-1)", - "Dawid Janczyk (Legia Warsaw) ... Dawid Janczy gral w sandecji nowy sacz i raz gralem z nim(ja gralem w sokol ... " + - "Dawid Janczyk (Legia Warsaw) (less) Added: ..."}, - - { - "http://www.miniclip.com/games/david/en/", - "David - Miniclip Games - Play Free Games", - "Help David find the Lost Sheep and avoid the rampaging wild animals ... Hotmail, AOL, Yahoo Mail & other online " + - "email services. ..."}, - - { - "http://product.half.ebay.com/_W0QQprZ62221", - "The Diary of Dawid Sierakowiak | Books at Half.com", - "Buy The Diary of Dawid Sierakowiak by Dawid Sierakowiak, Kamil Turowski (1998) at Half.com. Find new and used books " + - "and save more than half off at Half.com."}, - - { - "http://www.primerica.com/PrimericaRep?rep=dawidkmiotek&pageName=about", - "About Dawid Ireneusz Kmiotek", - "Primerica is in the business of ... About Dawid Ireneusz Kmiotek. Office Directions ... Dawid Ireneusz Kmiotek. " + - "DISTRICT LEADER. Mutual Funds ..."}, - - { - "http://www.dawid.tobiasz.org/Monachium%20-%20Dachau/index.html", - "Dawid/Monachium - Dachau", - "Dawid \" Monachium - Dachau. Fotografia stanowi w\u0142asno\u015B\u0107 autora. Kopiowanie i ... Copyright by Dawid " + - "Tobiasz [Fotografia stanowi w\u0142asno\u015B\u0107 autora. ..."}, - - {"http://www.davidwilkerson.org/", - "David Wilkerson | World Challenge", ""}, - - { - "http://www.statslab.cam.ac.uk/~apd/index.html", - "Philip Dawid", - "PHILIP DAWID. Professor of Statistics. Contact Details. Professor A. P. Dawid, ... Valencia International Meetings " + - "on Bayesian Statistics. Bayesians Worldwide ..."}, - - { - "http://ideas.repec.org/e/poc8.html", - "Dawid Zochowski at IDEAS", - "Dawid Zochowski: current contact information and listing of economic research of this author provided by RePEc/IDEAS" + - " ... Pruski, Jerzy & \u017Bochowski, Dawid, 2005. ..."}, - - { - "http://www.scrumalliance.org/profiles/15472-dawid-mielnik", - "Scrum Alliance - Profile: Dawid Mielnik", - "Dawid has five years of professional experience in telecommunications business. ... Dawid is a Warsaw University of " + - "Technology graduate with a BSc in ..."}, - - { - "http://www.flickr.com/photos/dawidwalega/", - "Flickr: Photos from 11September", - "Flickr is almost certainly the best online photo management and sharing ... Explore Page Last 7 Days Interesting " + - "Calendar A Year Ago Today World Map Places ..."}, - - { - "http://www.youtube.com/watch?v=UOMk0M0hBNQ", - "YouTube - Grembach Vigo Zgierz - Dawid Korona Rzesz\u00F3w 8-1", - "Grembach Vigo Zgierz - Dawid Korona Rzesz\u00F3w 8-1 w Pucharze Polski ... Grembach Vigo Zgierz Dawid Korona " + - "Rzesz\u00F3w futsal \u0142\u00F3d\u017A kolejarz clearex hurtap puchar polski ..."}, - - { - "http://www.amazon.com/Diary-Dawid-Sierakowiak-Notebooks-Ghetto/dp/0195122852", - "Amazon.com: The Diary of Dawid Sierakowiak: Five Notebooks from the Lodz Ghetto: Dawid Sierakowiak,Lawrence L. ...", - "Amazon.com: The Diary of Dawid Sierakowiak: Five Notebooks from the Lodz Ghetto: Dawid Sierakowiak,Lawrence L. " + - "Langer,Alan Adelson,Kamil Turowski: Books"}, - - { - "http://shopping.yahoo.com/p:Kimberley%20Jim:1808599509", - "Kimberley Jim - DVD at Yahoo! Shopping", - "Yahoo! Shopping is the best place to comparison shop for Kimberley Jim - DVD. Compare products, compare prices, read" + - " reviews and merchant ratings."}, - - { - "http://www.ctfaceart.com/", - "CT Face Art (203) 255-1875 - Chrys Dawid CTFaceArt@aol.com", - "Award winning Face Painting for children through adults. ... CT FACE ART is owned and operated by Chrys Dawid. CT " + - "FACE ART is CT's finest face painting service. ..."}, - - { - "http://www.discogs.com/artist/Dawid+Szczesny", - "Dawid Szczesny", - "Submissions Drafts Collection Wantlist Favorites Watchlist Friends ... Dawid Szczesny / artists (D) Real Name: Dawid" + - " Szcz\u0119sny. URLs: ..."}, - - { - "http://www.shop.com/+-p94105045-st.shtml", - "York Ferry Annie Dawid - SHOP.COM", - "Shop for York Ferry Annie Dawid at Shop.com. $1.99 - york ferry annie dawid language:english, format:paperback, " + - "fiction/non-fiction:fiction, publisher:cane hill pr,"}, - - { - "http://www.the-artists.org/artistsblog/posts/st_content_001.cfm?id=2600", - "Dawid Michalczyk ...the-artists.org", - "Dawid Michalczyk; portfolio & art news...the-artists.org, modern and contemporary art ... Dawid Michalczyk. " + - "Conflicting emotions. Suburbs 2100. After the ..."}, - - { - "http://www.dcorfield.pwp.blueyonder.co.uk/2006/06/dawid-on-probabilities.html", - "Philosophy of Real Mathematics: Dawid on probabilities", - "... reading group ran through Phil Dawid's Probability, Causality and the Empirical ... Dawid (pronounced 'David') " + - "holds a Bayesian position, made evident in his ..."}, - - { - "http://www.cs.put.poznan.pl/dweiss/xml/index.xml?lang=en", - "Dawid Weiss - Main page", - "Dawid Weiss, PhD. Institute of Computing Science. Poznan University of Technology. ul. ... (Available as RSS) (c) " + - "Dawid Weiss. All rights reserved unless stated ..."}, - - { - "http://www.dawid.eu/", - "dawid.eu", - "Hier entsteht dawid.eu ... dawid.eu. Hier entsteht in K\u00FCrze das Projekt. dawid.eu. info@dawid.eu ..."}, - - { - "http://www.local.com/results.aspx?keyword=Dawid+Frank+B+Inc&location=06890", - "Dawid Frank B Inc in Southport, CT (Connecticut) @ Local.com", - "Dawid Frank B Inc located in Southport, CT (Connecticut). Find contact info, maps and directions for local " + - "contractors and home improvement services at Local.com."}, - - { - "http://www.anniedawid.com/shortfiction.htm", - ": : Annie Dawid : : Short Fiction", - "Annie Dawid is the author of Resurrection City: A Novel of Jonestown (to be ... Copyright \u00A9 2007 Annie Dawid. " + - "Web Site Design by Chameleon Web Design ..."}, - - { - "http://dawid.ca/", - "www.dawid.ca", - "I was in such a huge mistake. (Dawid Bober) ... 2006-02-26 Skating - Agnieszka, Joanna, Michal, Dawid (Nathan " + - "Phillips Square \u2013 Toronto) ..."}, - - { - "http://www.planetizen.com/?q=about/correspondent/dawid", - "Irvin Dawid | Planetizen", - "Irvin Dawid is a long-time Sierra Club activist, having worked in transportation, ... Irvin Dawid. Leo Vazquez. Mary" + - " Reynolds. Michael Dudley. Mike Lydon ..."}, - - { - "http://www.sourcekibitzer.org/Bio.ext?sp=l6", - "SourceKibitzer - Bio - Dawid Weiss", - "Dawid Weiss - Bio. Dawid Weiss. The founder of the Carrot2 project. Adjunct professor at the Laboratory of " + - "Intelligent Decision Support Systems ..."}, - - { - "http://www.lulu.com/content/815029", - "MD by Marcin and Dawid Witukiewicz (Music & Audio) in Electronic & Dance", - "MD by Marcin and Dawid Witukiewicz (Music & Audio) in Electronic & Dance : Music ... Music inspierd by the " + - "photography of Marcin and Dawid. ..."}, - - { - "http://www.juliedawid.co.uk/index.php?page=Band", - "Julie Dawid : Halfwise", - "the songs of prize winning folk singer and poet Julie Dawid. ... Also a lover and keeper of fish, professional " + - "storyteller Julie Dawid ..."}, - - { - "http://www.jewishencyclopedia.com/view.jsp?artid=38&letter=M", - "JewishEncyclopedia.com - MAGEN DAWID", - "The hexagram formed by the combination of two equilateral triangles; used as the ... The \"Magen Dawid,\" therefore," + - " probably did not originate withinRabbinism, the ..."}, - - { - "http://www.lulu.com/content/815298", - "MD Photography by Marcin and Dawid Witukiewicz (Book) in Arts & Photography", - "... This is a book feturing some of Marcin and Dawid Witukiewicz photographic work. ... by Marcin and Dawid " + - "Witukiewicz. Share This. Report this item. Preview ..."}, - - { - "http://finance.yahoo.com/q?s=dawid.x", - "DAWID.X: Summary for DIA Sep 2008 134.0000 call - Yahoo! Finance", - "Get detailed information on DIA Sep 2008 134.0000 call (DAWID.X) including quote performance, Real-Time ECN, " + - "technical chart analysis, key stats, insider ..."}, - - { - "http://www.bikepics.com/members/dawid/", - "BikePics - Dawid's Member Page on BikePics.Com", - "Dawid's Member Page. Member: dawid. Name: Dawid. From: ... You must be a BikePics Member and be logged in to message" + - " members. Current: 1998 Suzuki GS 500 ..."}, - - {"http://www.david-banner.com/main.html", "David Banner", - "Universal Records \\ SRC \\ Artists \\ David Banner ..."}, - - { - "http://www.dawid.com.pl/", - "Kinga Dawid", - "PORTRAITS by Kinga Dawid. Copying, dissemination, forwarding, printing and/or ... All rights reserved. Copyright C " + - "2006 Kinga Dawid ..."}, - - { - "http://www.bikepics.com/members/devdawid/", - "BikePics - dawid's Member Page on BikePics.Com", - "dawid's Member Page. Member: devdawid. Name: dawid. From: Poland. Message: You must be a BikePics Member and be " + - "logged in to message members. Current: 2002 ..."}, - - {"http://dawid.bracka.pl/", "Portfolio", - "google | portfolio | klan mortal. google | portfolio | klan mortal ..."}, - - { - "http://amiestreet.com/dawid", - "Amie Street - DaWid's Music Store", - "Amie Street empowers musicians to release, and music fans to discover, new and ... music from DaWid. recommendations" + - " (3) more info. SELECT: All, None, Free ..."}, - - {"http://markoff.pl/", "Dawid Markoff Photography", - "Nude, Fashion and Portrait photography"}, - - { - "http://www.archinect.com/schoolblog/blog.php?id=C0_372_39", - "Archinect : Schoolblog : UC DAAP (Dawid)", - "UC DAAP (Dawid) (002) a couple of quotes and a mini thesis rant. Oct 02 2006, 6 comments ... UC DAAP (Dawid) (001) " + - "it's the year of the thesis. Sep 06 2006, 4 ..."}, - - { - "http://groups.yahoo.com/group/dawid/rss", - "dawid : RSS / XML", - "dawid: Katechetyczne Forum Dyskusyjne ... Sign In. dawid \u00B7 Katechetyczne Forum Dyskusyjne. Home. Messages. " + - "Members Only. Post. Files ..."}, - - { - "http://cssoff.com/2007/06/14/and-the-winner-is-dawid-lizak/", - "CSS OFF", - "And the Winner is Dawid Lizak. View the winning entry. Dawid Lizak is from \u0141\u0119czna \u2013 a ... Dawid is " + - "currently expanding his knowledge of JavaScript, usability, ..."},}; + static final String[][] SAMPLE_DATA = + new String[][] { + { + "http://en.wikipedia.org/wiki/Data_mining", + "Data mining - Wikipedia, the free encyclopedia", + "Article about knowledge-discovery in databases (KDD), the practice of automatically searching large stores of data " + + "for patterns." + }, + { + "http://www.ccsu.edu/datamining/resources.html", + "CCSU - Data Mining", + "A collection of Data Mining links edited by the Central Connecticut State University ... Graduate Certificate " + + "Program. Data Mining Resources. Resources. Groups ..." + }, + { + "http://www.kdnuggets.com/", + "KDnuggets: Data Mining, Web Mining, and Knowledge Discovery", + "Newsletter on the data mining and knowledge industries, offering information on data mining, knowledge discovery, " + + "text mining, and web mining software, courses, jobs, publications, and meetings." + }, + { + "http://en.wikipedia.org/wiki/Data-mining", + "Data mining - Wikipedia, the free encyclopedia", + "Data mining is considered a subfield within the Computer Science field of knowledge discovery. ... claim to perform " + + "\"data mining\" by automating the creation ..." + }, + { + "http://www.anderson.ucla.edu/faculty/jason.frand/teacher/technologies/palace/datamining.htm", + "Data Mining: What is Data Mining?", + "Outlines what knowledge discovery, the process of analyzing data from different perspectives and summarizing it into" + + " useful information, can do and how it works." + }, + { + "http://www.the-data-mine.com/", + "Data Mining - Home Page (Misc)", + "Provides information about data mining also known as knowledge discovery in databases (KDD) or simply knowledge " + + "discovery. List software, events, organizations, and people working in data mining." + }, + { + "http://www.spss.com/data_mining/", + "Data Mining Software, Data Mining Applications and Data Mining Solutions", + "... complete data mining customer ... Data mining applications, on the other hand, embed ... it, our daily lives are" + + " influenced by data mining applications. ..." + }, + { + "http://datamining.typepad.com/data_mining/", + "Data Mining: Text Mining, Visualization and Social Media", + "Commentary on text mining, data mining, social media and data visualization. ... Opinion Mining Startups ... in " + + "sentiment mining, deriving tuples of ..." + }, + { + "http://www.statsoft.com/textbook/stdatmin.html", + "Data Mining Techniques", + "Outlines the crucial concepts in data mining, defines the data warehousing process, and offers examples of " + + "computational and graphical exploratory data analysis techniques." + }, + { + "http://answers.yahoo.com/question/index?qid=1006040419333", + "answers.yahoo.com/question/index?qid=1006040419333", + "Generally, data mining (sometimes called data or knowledge discovery) is the ... Midwest grocery chain used the data" + + " mining capacity of Oracle software to ..." + }, + { + "http://www.ccsu.edu/datamining/master.html", + "CCSU - Data Mining", + "Details on how to apply to the Master of Science in data mining may be found here. ... All data mining majors are " + + "classified for business purposes as part-time ..." + }, + { + "http://databases.about.com/od/datamining/a/datamining.htm", + "Data Mining: An Introduction", + "About.com article on how businesses are discovering new trends and patterns of behavior that previously went " + + "unnoticed through data mining, automated statistical analysis techniques." + }, + { + "http://www.thearling.com/", + "Data Mining and Analytic Technologies (Kurt Thearling)", + "Kurt Thearling's site dedicated to sharing information about data mining, the automated extraction of hidden " + + "predictive information from databases, and other analytic technologies." + }, + { + "http://www.sas.com/technologies/analytics/datamining/index.html", + "Data Mining Software and Text Mining | SAS", + "Data mining is the process of selecting, exploring and modeling large amounts of ... The knowledge gleaned from data" + + " and text mining can be used to fuel ..." + }, + { + "http://databases.about.com/od/datamining/Data_Mining_and_Data_Warehousing.htm", + "Data Mining and Data Warehousing", + "From data mining tutorials to data warehousing techniques, you'll find it all! ... Administration Design Development" + + " Data Mining Database Training Careers Reviews ..." + }, + { + "http://www.oracle.com/technology/products/bi/odm/index.html", + "Oracle Data Mining", + "Oracle Data Mining Product Center ... Using data mining functionality embedded in Oracle Database 10g, you can find " + + "... Mining High-Dimensional Data for ..." + }, + { + "http://www.ncdm.uic.edu/", + "National Center for Data Mining - Welcome", + "Conducts research in: scaling algorithms, applications and systems to massive data sets, developing algorithms, " + + "applications, and systems for mining distributed data, and establishing standard languages, protocols, and " + + "services for data mining and predictive modeling." + }, + { + "http://research.microsoft.com/dmx/DataMining/default.aspx", + "Data Mining Project", + "A long term Knowledge Discovery and Data Mining project which has the current ... Read more about how data mining is" + + " integrated into SQL server. Contact Us ..." + }, + { + "http://www.dmg.org/", + "Data Mining Group - DMG", + "... high performance networking, internet computing, data mining and related areas. ... Peter Stengard, Oracle Data " + + "Mining Technologies. prudsys AG, Chemnitz, ..." + }, + { + "http://datamining.typepad.com/data_mining/2006/05/the_truth_about.html", + "Data Mining: Text Mining, Visualization and Social Media: The Truth About Blogs", + "Commentary on text mining, data mining, social media and data visualization. ... Data Mining points to the latest " + + "papers from the 3rd International Workshop on ..." + }, + { + "http://searchsqlserver.techtarget.com/sDefinition/0,,sid87_gci211901,00.html", + "What is data mining? - a definition from Whatis.com - see also: data miner, data analysis", + "Data mining is the analysis of data for relationships that have not previously been discovered. ... Data mining " + + "techniques are used in a many research areas, ..." + }, + { + "http://www.thearling.com/text/dmwhite/dmwhite.htm", + "An Introduction to Data Mining", + "Data mining, the extraction of hidden predictive information from large ... prospective analyses offered by data " + + "mining move beyond the analyses of ..." + }, + { + "http://www.oracle.com/solutions/business_intelligence/data-mining.html", + "Oracle Data Mining", + "Using data mining functionality embedded in ... Oracle Data Mining JDeveloper and SQL Developer ... Oracle Magazine:" + + " Using the Oracle Data Mining API ..." + }, + { + "http://www.amazon.com/tag/data%20mining", + "Amazon.com: data mining", + "A community about data mining. Tag and discover new products. ... Data Mining (Paperback) Data Mining: Practical " + + "Machine Learning Tools and Techniques, Second Edition ..." + }, + { + "http://ocw.mit.edu/OcwWeb/Sloan-School-of-Management/15-062Data-MiningSpring2003/CourseHome/index.htm", + "MIT OpenCourseWare | Sloan School of Management | 15.062 Data Mining, Spring 2003 | Home", + "... class of methods known as data mining that assists managers in recognizing ... Data mining is a rapidly growing " + + "field that is concerned with developing ..." + }, + { + "http://www.sas.com/offices/europe/sweden/2746.html", + "Om Data Mining och Text Mining. Ta fram s\u00E4kra beslutsunderlag med Data Miningverktyg fr\u00E5n SAS Institute.", + "SAS Insitutes business intelligence ger v\u00E4rdefull kunskap till hela din ... Till\u00E4mpningen av data mining " + + "str\u00E4cker sig \u00F6ver m\u00E5nga branscher och omr\u00E5den. ..." + }, + { + "http://www.dmoz.org/Computers/Software/Databases/Data_Mining/", + "Open Directory - Computers: Software: Databases: Data Mining", + "Data Mining and Knowledge Discovery - A peer-reviewed journal publishing ... In assessing the potential of data " + + "mining based marketing campaigns one needs to ..." + }, + { + "http://www.investorhome.com/mining.htm", + "Investor Home - Data Mining", + "Data Mining or Data Snooping is the practice of searching for relationships and ... up by making a case study in " + + "data mining out of the Motley Fool's Foolish Four. ..." + }, + { + "http://www.amazon.com/Data-Mining-Concepts-Techniques-Management/dp/1558604898", + "Amazon.com: Data Mining: Concepts and Techniques (The Morgan Kaufmann Series in Data Management Systems): Jiawei " + + "Han...", + "Amazon.com: Data Mining: Concepts and Techniques (The Morgan Kaufmann Series in Data Management Systems): Jiawei " + + "Han,Micheline Kamber: Books" + }, + { + "http://www.monografias.com/trabajos/datamining/datamining.shtml", + "Data Mining - Monografias.com", + "Data Mining, la extracci\u00F3n de informaci\u00F3n oculta y predecible de grandes bases ... Las herramientas de " + + "Data Mining predicen futuras tendencias y comportamientos, ..." + }, + { + "http://www.megaputer.com/data_mining.php", + "Data Mining Technology - Megaputer", + "Data Mining Technology from Megaputer ... Typical tasks addressed by data mining include: ... Yet, data mining " + + "requires far more than just machine learning. ..." + }, + {"http://datamining.itsc.uah.edu/", "itsc data mining solutions center", ""}, + { + "http://www.dmreview.com/specialreports/20050503/1026882-1.html", + "Hard Hats for Data Miners: Myths and Pitfalls of Data Mining", + "This article debunks several myths about data mining and presents a plan of action to avoid some of the pitfalls. " + + "... a typical data mining conference or ..." + }, + { + "http://research.microsoft.com/dmx/", + "Data Management, Exploration and Mining- Home", + "The Data Management Exploration and Mining Group (DMX) ... Our research effort in data mining focuses on ensuring " + + "that traditional ..." + }, + { + "http://www.biomedcentral.com/info/about/datamining", + "BioMed Central | about us | Data mining research", + "... a collection of links to publications on the subject of biomedical text mining. Data mining Open Access research" + + " - an article in the 8 September 2003 edition of ..." + }, + { + "http://www.datapult.com/Data_Mining.htm", + "Data Mining", + "Data Mining Services provide customers with tools to quickly sift through the ... into Datapult Central for use with" + + " Data Mining tools and other Datapult products. ..." + }, + { + "http://www.siam.org/meetings/sdm02/", + "SIAM International Conference on Data Mining", + "SIAM International Conference on Data Mining, co-Sponsored by AHPCRC and ... Clustering High Dimensional Data and " + + "its Applications. Mining Scientific Datasets ..." + }, + { + "http://dir.yahoo.com/Computers_and_Internet/Software/Databases/Data_Mining/", + "Data Mining in the Yahoo! Directory", + "Learn about data mining and knowledge discovery, the process of finding patterns ... Cross Industry Standard Process" + + " for Data Mining (CRISP-DM) ..." + }, + { + "http://www.llnl.gov/str/Kamath.html", + "Data Mining", + "... Sapphire-a semiautomated, flexible data-mining software infrastructure. ... Data mining is not a new field. ... " + + "scale, scientific data-mining efforts such ..." + }, + { + "http://www.sqlserverdatamining.com/", + "SQL Server Data Mining > Home", + "SQL Server Data Mining Portal ... information about our exciting data mining features. ... CTP of Microsoft SQL " + + "Server 2008 Data Mining Add-Ins for Office 2007 ..." + }, + { + "http://www.dbmsmag.com/9807m01.html", + "DBMS - DBMS Data Mining Solutions Supplement", + "As recently as two years ago, data mining was a new concept for many people. Data mining products were new and " + + "marred by unpolished interfaces. ..." + }, + { + "http://www.oclc.org/research/projects/mining", + "Data mining [OCLC - Projects]", + "Describes the goals, methodology, and timing of the Data mining project." + }, + { + "http://www.the-data-mine.com/bin/view/Misc/IntroductionToDataMining", + "Data Mining - Introduction To Data Mining (Misc)", + "Some example application areas are listed under Applications Of Data Mining ... Crows Introduction - \"Introduction " + + "to Data Mining and Knowledge Discovery\"- http: ..." + }, + { + "http://www.pentaho.com/products/data_mining/", + "Pentaho Commercial Open Source Business Intelligence: Data Mining", + "... (BI) to the next level by adding data mining and workflow to the mix. ... Pentaho Data Mining is differentiated " + + "by its open, standards-compliant nature, ..." + }, + { + "http://www.unf.edu/~selfayou/html/data_mining.html", + "Data Mining", + "This course approaches data mining topics from an Artificial Intelligence ... The course will also cover " + + "Applications and Trends in Data Mining. Textbook: ..." + }, + { + "http://www.statsoft.com/products/dataminer.htm", + "Data Mining Software & Predictive Modeling Solutions", + "data mining software & predictive modeling sold online by statsoft.com. ... of automated and ready-to-deploy data " + + "mining solutions for a wide variety of ..." + }, + { + "http://gosset.wharton.upenn.edu/wiki/index.php/Main_Page", + "Main Page - Knowledge Discovery", + "The Penn Data Mining Group develops principled means of modeling and ... knowledge of specific application areas to " + + "develop new approaches to data mining. ..." + }, + { + "http://www.twocrows.com/glossary.htm", + "Two Crows: Data mining glossary", + "Data mining terms concisely defined. ... Accuracy is an important factor in assessing the success of data mining. " + + "... data mining ..." + }, + { + "http://www.cdc.gov/niosh/mining/data/", + "NIOSH Mining: MSHA Data File Downloads | CDC/NIOSH", + "MSHA accident, injury, employment, and production data files in SPSS and dBase formats ... Data files on mining " + + "accidents, injuries, fatalities, employment, ..." + }, + { + "http://www.cartdatamining.com/", + "Salford Data mining 2006", + "Objective | Previous Conferences | Call for Abstracts | LATEST INFO ..." + }, + { + "http://www.inductis.com/", + "Data Mining | Focused Data Mining For Discovery To Assist Management", + "Inductis offers high-level data mining services to assist management decisions ... The Data Mining Shootout " + + "...more>> ISOTech 2006 - The Insurance Technology ..." + }, + { + "http://www.datamininglab.com/", + "Elder Research: Predictive Analytics & Data Mining Consulting", + "Provides consulting and short courses in data mining and pattern discovery patterns in data." + }, + { + "http://www.microsoft.com/sql/technologies/dm/default.mspx", + "Microsoft SQL Server: Data Mining", + "Microsoft SQL Server Data Mining helps you explore your business data and discover patterns to reveal the hidden " + + "trends about your products, customer, market, and ..." + }, + { + "http://www.dataminingcasestudies.com/", + "Data Mining Case Studies", + "Recognizing outstanding practical contributions in the field of data mining. ... case studies are one of the most " + + "discussed topics at data mining conferences. ..." + }, + { + "http://www.webopedia.com/TERM/D/data_mining.html", + "What is data mining? - A Word Definition From the Webopedia Computer Dictionary", + "This page describes the term data mining and lists other pages on the Web where you can find additional information." + + " ... Data Mining and Analytic Technologies ..." + }, + { + "http://www.cs.waikato.ac.nz/~ml/weka/book.html", + "Data Mining: Practical Machine Learning Tools and Techniques", + "Book. Data Mining: Practical Machine Learning Tools and Techniques (Second Edition) ... Explains how data mining " + + "algorithms work. ..." + }, + { + "http://www.datamining.com/", + "Predictive Modeling and Predictive Analytics Solutions | Enterprise Miner Software from Insightful Software", + "Insightful Enterprise Miner - Enterprise data mining for predictive modeling and predictive analytics." + }, + { + "http://www.sra.com/services/index.asp?id=153", + "SRA International - Data Mining Solutions", + "... and business who ask these questions are finding solutions through data mining. ... Data mining is the process " + + "of discovering previously unknown relationships in ..." + }, + { + "http://en.wiktionary.org/wiki/data_mining", + "data mining - Wiktionary", + "Data mining. Wikipedia. data mining. a technique for searching large-scale databases for patterns; used mainly to " + + "... Czech: data mining n., dolov\u00E1n\u00ED dat n. ..." + }, + {"http://www.datamining.org/", "data mining institute", ""}, + { + "http://videolectures.net/Top/Computer_Science/Data_Mining/", + "Videolectures category: Data Mining", + "Next Generation Data Mining Tools: Power laws and self-similarity for graphs, ... Parallel session 4 - Hands-on " + + "section Data mining with R. Luis Torgo. 1 comment ..." + }, + { + "http://www2008.org/CFP/RP-data_mining.html", + "WWW2008 CFP - WWW 2008 Call For Papers: Refereed Papers - Data Mining", + "WWW2008 - The 17th International World Wide Web Conference - Beijing, China (21 - 25 April 2008) Hosted by Beihang " + + "Universit ... data mining, machine ..." + }, + { + "http://answers.yahoo.com/question/index?qid=20070227091350AAVDlI1", + "what is data mining?", + "... the purchases of customers, a data mining system could identify those customers ... A simple example of data " + + "mining, often called Market Basket Analysis, ..." + }, + { + "http://clubs.yahoo.com/clubs/datamining", + "datamining2 : Data Mining Club - 1600+ members!!", + "datamining2: Data Mining Club - 1600+ members!" + }, + { + "http://www.siam.org/meetings/sdm01/", + "First SIAM International Conference on Data Mining", + "The field of data mining draws upon extensive work in areas such as statistics, ... recent results in data mining, " + + "including applications, algorithms, software, ..." + }, + { + "http://www.statserv.com/datamining.html", + "St@tServ - About Data Mining", + "St@tServ Data Mining page ... Data mining in molecular biology, by Alvis Brazma. Graham Williams page. Knowledge " + + "Discovery and Data Mining Resources, ..." + }, + { + "http://www.springer.com/computer/database+management+&+information+retrieval/journal/10618", + "Data Mining and Knowledge Discovery - Data Mining and Knowledge Discovery Journals, Books & Online Media | Springer", + "Technical journal focused on the theory, techniques, and practice for extracting information from large databases." + }, + { + "http://msdn2.microsoft.com/en-us/library/ms174949.aspx", + "Data Mining Concepts", + "Data mining is frequently described as "the process of extracting ... Creating a data mining model is a dynamic" + + " and iterative process. ..." + }, + { + "http://www.cs.wisc.edu/dmi/", + "DMI:Data Mining Institute", + "Data Mining Institute at UW-Madison ... The Data Mining Institute (DMI) was ... Corporation with the support of the " + + "Data Mining Group of Microsoft Research. ..." + }, + { + "http://www.dataminingconsultant.com/", + "DataMiningConsultant.com", + "... Website for Data Mining Methods and ... data mining at Central Connecticut State University, he ... also " + + "provides data mining consulting and statistical ..." + }, + { + "http://www.dmreview.com/channels/data_mining.html", + "Data Mining", + "... business intelligence, data warehousing, data mining, CRM, analytics, ... M2007 Data Mining Conference Hitting " + + "10th Year and Going Strong ..." + }, + { + "http://www.unc.edu/~xluan/258/datamining.html", + "Data Mining", + "What is the current state of data mining? The immediate future ... Data Mining is the process of extracting " + + "knowledge hidden from large volumes of ..." + }, + { + "http://www.data-miners.com/", + "Data Miners Inc. We wrote the book on data mining!", + "Data mining consultancy; services include predictive modeling, consulting, and seminars." + }, + { + "http://www.versiontracker.com/dyn/moreinfo/macosx/27607", + "Data Mining 2.2.2 software download - Mac OS X - VersionTracker", + "Find Data Mining downloads, reviews, and updates for Mac OS X including commercial software, shareware and freeware " + + "on VersionTracker.com." + }, + { + "http://www.webtechniques.com/archives/2000/01/greening/", + "New Architect: Features", + "Article by Dan Greening on data mining techniques applied to analyzing and making decisions from web data. ... and " + + "business analysts use data-mining techniques. ..." + }, + { + "http://www.networkdictionary.com/software/DataMining.php", + "Data Mining | NetworkDictionary", + "Data Mining is the automated extraction of hidden predictive information from databases. ... The data mining tools " + + "can make this leap. ..." + }, + { + "http://www.youtube.com/watch?v=wqpMyQMi0to", + "YouTube - What is Data Mining? - February 19, 2008", + "Association Labratory President and CEO Dean West discusses Data Mining and how it can be applied to associations. " + + "... Data Mining Association Forum Dean West ..." + }, + { + "http://www.cs.sfu.ca/~han/DM_Book.html", + "Book page", + "Chapter 4. Data Mining Primitives, Languages, and System Architectures ... Chapter 9. Mining Complex Types of Data " + + "... to Microsoft's OLE DB for Data Mining ..." + }, + { + "http://www.twocrows.com/", + "Two Crows data mining home page", + "Dedicated to the development, marketing, sales and support of tools for knowledge discovery to make data mining " + + "accessible and easy to use." + }, + { + "http://www.autonlab.org/tutorials", + "Statistical Data Mining Tutorials", + "Includes a set of tutorials on many aspects of statistical data mining, including the foundations of probability, " + + "the foundations of statistical data analysis, and most of the classic machine learning and data mining " + + "algorithms." + }, + { + "http://ecommerce.ncsu.edu/technology/topic_Datamining.html", + "E-commerce Technology: Data Mining", + "\"Web usage mining: discovery and applications of web usage patterns from web data\" ... Patterns and Trends by " + + "Applying OLAP and Data Mining Technology on Web Logs. ..." + }, + { + "http://www.teradata.com/t/page/106002/index.html", + "Teradata Data Mining Warehouse Solution", + "... a high-powered analytic warehouse that streamlines the data mining process. ... while building the analytic " + + "model using your favorite data mining tool. ..." + }, + { + "http://datamining.japati.net/", + "Indo Datamining", + "Apa yang bisa dan tidak bisa dilakukan data mining ? ... Iko Pramudiono \"» ... Apa itu data mining ? Iko " + + "Pramudiono \"». artikel lainnya \" tutorial ..." + }, + { + "http://www.affymetrix.com/products/software/specific/dmt.affx", + "Affymetrix - Data Mining Tool (DMT) (Unsupported - Archived Product)", + "Affymetrix is dedicated to developing state-of-the-art technology for acquiring, analyzing, and managing complex " + + "genetic ... The Data Mining Tool (DMT) ..." + }, + { + "http://www.pcc.qub.ac.uk/tec/courses/datamining/stu_notes/dm_book_1.html", + "Data Mining Student Notes, QUB", + "2 - Data Mining Functions. 2.1 - Classification. 2.2 - Associations ... 5 - Data Mining Examples. 5.1 - Bass Brewers" + + ". 5.2 - Northern Bank. 5.3 - TSB Group PLC ..." + }, + { + "http://www.spss.com/text_mining_for_clementine/", + "Text Mining for Clementine | Improve the accuracy of data mining", + "Text Mining for Clementine from SPSS enables you to use text data to improve the accuracy of predictive models. ... " + + "and about data mining in general. ..." + }, + { + "http://www.open-mag.com/features/Vol_16/datamining/datamining.htm", + "Data Mining", + "Without data mining, a merchant isn't even close to leveraging what customers want and will buy. ... Data mining is " + + "to be found in applications like bio ..." + }, + { + "http://wordpress.com/tag/data-mining/", + "Data Mining \u2014 Blogs, Pictures, and more on WordPress", + "Going Beyond the Numbers: Context-Sensitive Data Mining ... Data mining examples ... many websites employing data " + + "mining technology to provide recommendation ..." + }, + { + "http://www.dmbenchmarking.com/", + "Benchmarking- Data Mining Benchmarking Association", + "Association of companies and organizations working to identify \"best in class\" data mining processes through " + + "benchmarking studies." + }, + { + "http://www.dataentryindia.com/data_processing/data_mining.php", + "Data Mining, Data Mining Process, Data Mining Techniques, Outsourcing Mining Data Services", + "... Walmart, Fundraising Data Mining, Data Mining Activities, Web-based Data Mining, ... in many industries makes us" + + " the best choice for your data mining needs. ..." + }, + { + "http://www.target.com/Data-Mining-Applications-International-Information/dp/1853127299", + "Data Mining V: Data Mining, Text Mining... [Hardcover] | Target.com", + "Shop for Data Mining V: Data Mining, Text Mining and Their Business Applications : Fifth International Conference on" + + " Data Mining (Management Information System) at" + }, + { + "http://www.cs.ubc.ca/~rng/research/datamining/data_mining.htm", + "Data Mining", + "... varying degrees of success, the data mining tools developed thus far, by and ... (a) we should recognize that " + + "data mining is a multi-step process, and that (b) ..." + }, + { + "http://jcp.org/en/jsr/detail?id=73", + "The Java Community Process(SM) Program - JSRs: Java Specification Requests - detail JSR# 73", + "Currently, there is no widely agreed upon, standard API for data mining. By using JDMAPI, implementers of data " + + "mining applications can expose a single, ..." + }, + { + "http://www.microsoft.com/spain/sql/technologies/dm/default.mspx", + "Microsoft SQL Server2005: Data Mining", + "Data Mining es la tecnolog\u00EDa BI que le ayudar\u00E1 a construir modelos anal\u00EDticos complejos e integrar " + + "esos modelos con sus operaciones comerciales." + }, + { + "http://www.bos.frb.org/economic/nerr/rr2000/q3/mining.htm", + "Regional Review: Mining Data", + "Although data mining by itself is not going to get the Celtics to the playoffs, ... then, firms that specialize in " + + "data-mining software have been developing a ..." + }, + { + "http://www.scianta.com/technology/datamining.htm", + "Data Mining", + "... are excellent candidates for data mining, fault prediction, problem diagnosis, ... Data Mining uses this theory " + + "to support Link and Affinity Group analysis \u2013 an ..." + }, + { + "http://www.gusconstan.com/DataMining/index.htm", + "Discovery and Mining", + "Verification-Driven Data Mining. Advantages of Symbolic Classifiers. Manual vs. Automatic ... Currently, data mining" + + " solutions have been developed by large software ..." + }, + { + "http://www.dataminingconsultant.com/DKD.htm", + "DataMiningConsultant.com", + "Companion Website for Data Mining Methods and Models ... \"This is an excellent introductory book on data mining. " + + "... An Introduction to Data Mining at Amazon.com ..." + }, + { + "http://www.pfaw.org/pfaw/general/default.aspx?oid=9717", + "People For the American Way - Data Mining", + "data mining, civil liberties, civil rights, terrorism, september 11th, anti-terrorism, ashcroft, government " + + "intrusion, privacy, email, patriot, american" + }, + { + "http://dm1.cs.uiuc.edu/", + "Data Mining Research Group", + "... conducting research in various areas in data mining and other related fields. ... on Data Mining (SDM'08), (full" + + " paper), Atlanta, GA, April 2007. ..." + }, + { + "http://www.dawid.tv/", + "dawid.tv", + "Watch free videos on dawid.tv. Now Playing: DAWID DRIF ... About. Dawid. Bielawa - Poland. Friends: 1. Last Login: " + + "... View All Members of dawid.tv. Tag Cloud ..." + }, + { + "http://www.dawid.co.za/", + "DAWID", + "Welkom by: Dawid Bredenkamp se webtuiste. Foto's. Skakels. Kontak ..." + }, + { + "http://www.dawid-nowak.org/", + "Dawid Nowak", + "Dawid Nowak Home Page ... Resume. Gallery. Thailand. Still in Thailand. Into Laos. Through Laos To Cambodia. RSS " + + "feeds for lazy technically oriented people ..." + }, + { + "http://dawid.digitalart.org/", + "dawid.digitalart.org - Profile of Dawid Michalczyk", + "A gallery of masterfully created works of digital art. ... Dawid Michalczyk \" Send Private Message \" Send an " + + "E-mail. Art Gallery (13) Guestbook ..." + }, + { + "http://www.dawid.nu/index.php?ID=4", + "dawid :: images / commercial work :: advertising & illustrations", + "The official site of photographer Dawid, Bj\u00F6rn Dawidsson. Fotograf Dawid - Bj\u00F6rn Dawidsson ... references:" + + " AB Vin & Sprit, Apple, Berliner, Bond, Ericsson, ..." + }, + { + "http://www.dawidphotography.com/", + "Photographer London UK, Dawid de Greeff \u00A9 2007 , Digital photographer - Portfolio", + "South African born Dawid & Annemarie de Greeff are International digital ... NAME. EMAIL. MESSAGE ..." + }, + { + "http://www.anniedawid.com/", + ": : Annie Dawid : : Author and Photographer", + "Annie Dawid is the author of Resurrection City: A Novel of Jonestown (to be ... Annie Dawid lives and writes in the " + + "Sangre de Cristo range of South-Central Colorado. ..." + }, + { + "http://en.wikipedia.org/wiki/Dawid_Janowski", + "Dawid Janowski - Wikipedia, the free encyclopedia", + "Dawid Markelowicz Janowski (in English usually called David Janowski) (born 25 ... Dawid Janowski died on January " + + "15, 1927 of tuberculosis. ..." + }, + { + "http://www.dawid.nu/index.php?ID=2", + "dawid :: images / art :: COMP", + "The official site of photographer Dawid, Bj\u00F6rn Dawidsson. Fotograf Dawid - Bj\u00F6rn Dawidsson ... dawid : " + + "images / art : COMP: Series photographed during the mid 80's. ..." + }, + { + "http://en.wikipedia.org/wiki/Dawid", + "Dawid - Wikipedia, the free encyclopedia", + "Dawid. From Wikipedia, the free encyclopedia. Jump to: navigation, search. Dawid may refer to the following people: " + + "David, the biblical King David ..." + }, + { + "http://www.myspace.com/dawidszczesny", + "MySpace.com - dawid szczesny - Wroclaw - www.myspace.com/dawidszczesny", + "MySpace music profile for dawid szczesny with tour dates, songs, videos, pictures, blogs, band information, " + + "downloads and more" + }, + { + "http://www.art.eonworks.com/", + "Computer wallpaper, stock illustration, Sci-Fi art, Fantasy art, Surreal art, Space art, Abstract art - posters, ...", + "Digital Art of Dawid Michalczyk. Unique posters, prints, wallpapers and wall calendars. ... the official website of " + + "Dawid Michalczyk - a freelance illustrator ..." + }, + { + "http://www.surfski.info/content/view/384/147/", + "Surf Ski . Info - Dawid Mocke King of the Harbour 2007", + "Surf Ski information and news. Training tips from the experts, equipment, getting started guides, surfski reviews, " + + "photos ...links and stories." + }, + { + "http://www.agentsbase.com/", + "Agent's Base", + "Dawid Kasperowicz. Get Firefox. Get Google Ads. Affiliates ... By Dawid | February 28, 2008 - 12:05 pm - Posted in " + + "Technology ..." + }, + { + "http://www.target.com/Dawid-Dawidsson-Bjorn/dp/3882437243", + "Dawid [Hardcover] | Target Official Site", + "Shop for Dawid at Target. Choose from a wide range of Books. Expect More, Pay Less at Target.com" + }, + { + "http://www.dawid.tobiasz.org/", + "Dawid", + "Dawid. Fotografia stanowi w\u0142asno\u015B\u0107 autora. Kopiowanie i rozpowszechnianie ... Copyright by Dawid " + + "Tobiasz [Fotografia stanowi w\u0142asno\u015B\u0107 autora. ..." + }, + { + "http://juliedawid.co.uk/", + "Julie Dawid :", + "birthing support scotland. Poetical Fusion Folk. Words. Band. Listen. Contact. Copyright \u00A9 2004 Julie Dawid. " + + "All Rights reserved. Powered by Accidental Media ..." + }, + { + "http://conference.dawid.uni.wroc.pl/index.php?lang=iso-8859-2", + "konferencja - Welcome", + "Joomla - the dynamic portal engine and content management system ... The 1st Symposium of Pedagogy and Psychology " + + "PhD Students. Monday, 13 February 2006 ..." + }, + { + "http://www.ibe.unesco.org/publications/ThinkersPdf/dawide.pdf", + "Jan Wladyslaw Dawid", + "All his life, Jan Wladyslaw Dawid was closely associated with the teaching ... Dawid who believed that these " + + "experiments were fundamental to the blossoming and ..." + }, + {"http://www.dawid-posciel.pl/", "www.dawid-posciel.pl", ""}, + { + "http://www.dawidrurkowski.com/", + "Dawid Rurkowski - portfolio", + "Dawid Rurkowski online webdesign portfolio ... My name is Dawid, I am a web designer with a real passion to my work." + + " ... \u00A9 Copyright 2007 Dawid Rurkowski All ..." + }, + { + "http://conference.dawid.uni.wroc.pl/index.php?option=com_content&task=blogsection&id=20&Itemid=49%E2%8C%A9=iso-8859-2", + "konferencja - Warsztaty", + "Joomla - the dynamic portal engine and content management system ... Karolina Pietras is a psychologist, business " + + "trainer and PhD student at Faculty ..." + }, + { + "http://chess.about.com/library/persons/blp-jano.htm", + "Famous Chess Players - Dawid Janowsky", + "Beginners Improve Your Game Play Chess Online Chess Downloads Computers and ... Dawid Janowsky. Unsuccessful " + + "challenger for World Championship ..." + }, + { + "http://www.pbase.com/dawidwnuk", + "Dawid Wnuk's Photo Galleries at pbase.com", + "All images on this site copyrighted by DAWID WNUK. Please contact me if you would like to purchase or licence a " + + "photograph. Portraiture ..." + }, + { + "http://dawid-witos.nazwa.pl/chylu/en/index.php?link=news", + "...Official Website of Michael Chylinski...", + "Welcome to chylinski.info- the official web site of Polish National Team and ... We invite you to visite our service" + + " and write your opinions on forum. A few ..." + }, + {"http://photoexposed.com/", "photoeXposed.com", "Dawid Slaski-Sawicki Photography"}, + { + "http://vids.myspace.com/index.cfm?fuseaction=vids.individual&VideoID=7370487", + "MySpaceTV Videos: Edyp trailer by dawid", + "Edyp trailer by dawid Watch it on MySpace Videos. ... Posted by: dawid. Runtime: 0:52. Plays: 43. Comments: 0. " + + "Reinkarnacje - \"Czy to mi..." + }, + { + "http://www.linkedin.com/in/dawidmadon", + "LinkedIn: Dawid Mado\u0144", + "Dawid Mado\u0144's professional profile on LinkedIn. ... Dawid Mado\u0144. ORACLE DBA at Apriso and Information " + + "Technology and Services Consultant ..." + }, + { + "http://www.linkedin.com/pub/1/878/410", + "LinkedIn: Dawid Tracz", + "Dawid Tracz's professional profile on LinkedIn. ... Dawid Tracz's Experience. Graphician, WebDesigner, " + + "InterfaceDesigner. DreamLab Onet.pl Sp. ..." + }, + { + "http://profiles.friendster.com/13547484", + "Friendster - Dawid Martin", + "Friendster: ; location: Poland, PL; Kiedrowice, Warsaw (Poland),Jogja (Indonesia); Warsaw Gamelan Group, Bosso, " + + "Tepellere, Mandala, Suita Etnik, Konco-Konco Blues ..." + }, + { + "http://www.genevievedawid.com/", + "Genevieve Dawid mentor, lecturer and author", + "Author of the Achiever's Journey a real self help book for dyslexics, Genevieve Dawid offers a unique approach to " + + "mentoring and personal development." + }, + { + "http://www.last.fm/music/dawid+szczesny", + "dawid szczesny \u2013 Music at Last.fm", + "People who like dawid szczesny also like Masayasu Tzboguchi Trio, Ametsub, ... Dawid Szcz\u0119sny performed in " + + "Poland, Germany (in 2005 invited by Kata Adamek and ..." + }, + { + "http://vids.myspace.com/index.cfm?fuseaction=vids.individual&videoid=2028359840", + "MySpaceTV Videos: paka 2007-1 by dawid", + "paka 2007-1 by dawid Watch it on MySpace Videos. ... Posted by: dawid. Runtime: 0:52. Plays: 43. Comments: 0. " + + "Reinkarnacje - \"Czy to mi..." + }, + { + "http://dawid.secondbrain.com/", + "Dawid's profile page - Second Brain_ - All Your Content", + "Dawid. People first, strategy second ... Dawid's recent updates. February 07 2008. Wimbledon ... Posted by Dawid on " + + "Second Brain February 05 2008. Post comment ..." + }, + { + "http://www.ushmm.org/wlc/article.php?lang=en&ModuleId=10007294", + "Dawid Sierakowiak", + "Dawid was an avid reader and an excellent observer. Throughout Dawid's imprisonment in the Lodz ghetto he made sure " + + "to write about ..." + }, + { + "http://www.ctbodyartist.com/", + "CT Body Artist | Chrys Dawid (203) 255-1875", + "CT Body Artist, Chrys Dawid (203) 255-1875 Professional Body painting service. From Advertising Champaigns to " + + "Private parties, make your statement & Marketing goals ..." + }, + { + "http://www.amazon.com/phrase/Dawid-Sierakowiak", + "Amazon.com: \"Dawid Sierakowiak\": Key Phrase page", + "Key Phrase page for Dawid Sierakowiak: Books containing the phrase Dawid Sierakowiak ... Key Phrases: Dawid " + + "Sierakowiak, United States, New York, Niutek ..." + }, + { + "http://www.planetizen.com/user/403/track", + "Irvin Dawid | Planetizen", + "Irvin Dawid. 0. 2 weeks 20 hours ago. news ... Irvin Dawid. 1. 3 weeks 5 days ago. news. Traffic Crashes Cost Twice " + + "as Much as Congestion ..." + }, + { + "http://www.ushmm.org/wlc/idcard.php?lang=en&ModuleId=10006389", + "Dawid Szpiro", + "Dawid was the older of two sons born to Jewish parents in Warsaw. ... of Warsaw's Jewish district, where Dawid and " + + "his brother, Shlomo, attended Jewish schools. ..." + }, + { + "http://groups.yahoo.com/group/dawid", + "dawid : Katechetyczne Forum Dyskusyjne", + "dawid \u00B7 Katechetyczne Forum Dyskusyjne. Home. Messages ... Lista dyskusyjna strony internetowej DAWID. Most " + + "Recent Messages (View All) (Group by Topic) ..." + }, + { + "http://www.blogger.com/profile/01359115939699161533", + "Blogger: User Profile: Dawid", + "Push-Button Publishing. Dawid. Blogs. Blog Name. Team Members. Midwest Petanque Alliance BLOG ... MGal hdarpini " + + "chilipepper diveborabora DanDan Mike A testerin ..." + }, + { + "http://www.blogger.com/profile/15768169977536938605", + "Blogger: User Profile: David", + "kilconriola Credo Perp\u00E9tua Amanda Liturgeist Chris + AMDG + +Miguel Vinuesa+ Royal Girl ... roydosan " + + "chrysogonus Brownthing Aristotle Boeciana Amanda Lactantius Juan ..." + }, + { + "http://www.babynamer.com/Dawid", + "Dawid on BabyNamer", + "For parents-to-be who want to confidently choose potential names for their baby, ... Dawid. Meaning: Its source is a" + + " ... baby name page for boy name Dawid. ..." + }, + { + "http://profile.myspace.com/index.cfm?fuseaction=user.viewprofile&friendid=38408574", + "MySpace.com - Dawid - 26 - Male - FR - www.myspace.com/trastaroots", + "MySpace profile for Dawid with pictures, videos, personal blog, interests, information about me and more ... yo " + + "dawid, ya un gars de ta r\u00E9gion (koubiak) qui ..." + }, + { + "http://www.imdb.com/name/nm1058743/", + "Dawid Kruiper", + "Actor: Liebe. Macht. Blind.. Visit IMDb for Photos, Filmography, Discussions, Bio, News, Awards, Agent, Fan Sites. " + + "... on IMDb message board for Dawid Kruiper ..." + }, + { + "http://citeseer.ist.psu.edu/context/55656/0", + "Citations: Conditional independence in statistical theory - Dawid (ResearchIndex)", + "A. P. Dawid. Conditional independence in statistical theory (with discussion). J. Roy. ... To capture Dawid s " + + "property for overlapping sets, Pearl introduces ..." + }, + { + "http://www.dawid.pl/gb/main.php", + "Systemy ogrodzeniowe, ta\u015Bmy, sita, siatki - DAWID Cz\u0119stochowa", + "Firma DAWID - Producent siatki ogrodzeniowej, bram, furtek, paneli D-1, D-2 itp. Cz\u0119stochowa. ... DAWID Company" + + " has a long-standing tradition which has been ..." + }, + { + "http://www.imdb.com/name/nm2014139/", + "Dawid Jakubowski", + "Miscellaneous Crew: Once Upon a Knight. Visit IMDb for Photos, Filmography, Discussions, Bio, News, Awards, Agent, " + + "Fan Sites." + }, + { + "http://www.lclark.edu/cgi-bin/shownews.cgi?1011726000.1", + "Dawid publishes Lily in the Desert", + "Lewis & Clark College: Dawid publishes Lily in the Desert ... Annie Dawid is one of those all-too-rare " + + "writers who fully inhabits each ..." + }, + { + "http://dir.nichd.nih.gov/lmg/lmgdevb.htm", + "Igor Dawid Lab Home Page", + "Dawid Lab. Welcome to Igor Dawid's lab in the Laboratory of Molecular Genetics, ... National Institute of Child " + + "Health and Human Development, National ..." + }, + { + "http://www.ucl.ac.uk/~ucak06d/", + "Philip Dawid", + "DEPARTMENT OF STATISTICAL SCIENCE. UNIVERSITY COLLEGE LONDON. A. Philip Dawid ... Professor A. P. Dawid, Department " + + "of Statistical Science, University College London, ..." + }, + { + "http://www.pbase.com/dawidwnuk/profile", + "pbase Artist Dawid Wnuk", + "View Galleries : Dawid Wnuk has 5 galleries and 487 images online. ... My name is Dawid and I'm a photographer from " + + "Warsaw, Poland. ..." + }, + { + "http://dawidfrederik.deviantart.com/", + "DawidFrederik on deviantART", + "Art - community of artists and those devoted to art. ... Dawid Frederik Strauss. Profile Gallery Faves Journal. " + + "Status: deviantART Subscriber ..." + }, + { + "http://citeseer.ist.psu.edu/context/332153/0", + "Citations: Statistical theory - Dawid (ResearchIndex)", + "Dawid, P. (1984). Statistical theory. The prequential approach (with discussion) . Journal of the Royal Statistical " + + "Society A, 147:178--292." + }, + { + "http://www.infinitee-designs.com/Dawid-Michalczyk.htm", + "Dawid Michalczyk Artist of the Month Space Art", + "Artist of the Month, Dawid Michalczyk Abstract 3D Space Art, Visions, computer graphics, 2D illustration, sci-fi, " + + "fantasy, digital art" + }, + { + "http://www.myspace.com/dawidgatti", + "MySpace.com - dawid - 26 - Male - www.myspace.com/dawidgatti", + "MySpace profile for dawid with pictures, videos, personal blog, interests, information about me and more ... to " + + "meet: dawid's Friend Space (Top 1) dawid has 1 ..." + }, + { + "http://ezinearticles.com/?expert=Genevieve_Dawid", + "Genevieve Dawid - EzineArticles.com Expert Author", + "Genevieve Dawid is a published author and highly successful ... Genevieve Dawid's Extended ... [Business:Management]" + + " Genevieve Dawid explores the history of ..." + }, + { + "http://www.artnet.com/artist/698445/dawid-bjorn-dawidsson.html", + "Dawid (Bjorn Dawidsson) on artnet", + "Dawid (Bjorn Dawidsson) (Swedish, 1949) - Find works of art, auction results & sale prices of artist Dawid (Bjorn " + + "Dawidsson) at galleries and auctions worldwide." + }, + { + "http://www.glennshafer.com/assets/downloads/other12.pdf", + "Comments on \"Causal Inference without Counterfactuals\" by A.P. Dawid", + "Phil Dawid's elegant ... ted from discussions of causality with Phil Dawid over many years. ... ground with those " + + "who tout counterfactual variables, Dawid ..." + }, + { + "http://www.primerica.com/dawidkmiotek", + "Primerica Financial Services : Dawid Ireneusz Kmiotek", + "Primerica is in the business of ... Buy Term & Invest the Difference. The Theory of Decreasing ... About Dawid " + + "Ireneusz Kmiotek. Office Directions ..." + }, + { + "http://www.youtube.com/watch?v=tEKmrUhCMFo", + "YouTube - Dawid Janczyk POLAND u-19 - BELGIUM u-19 (4-1)", + "Dawid Janczyk (Legia Warsaw) ... Dawid Janczy gral w sandecji nowy sacz i raz gralem z nim(ja gralem w sokol ... " + + "Dawid Janczyk (Legia Warsaw) (less) Added: ..." + }, + { + "http://www.miniclip.com/games/david/en/", + "David - Miniclip Games - Play Free Games", + "Help David find the Lost Sheep and avoid the rampaging wild animals ... Hotmail, AOL, Yahoo Mail & other online " + + "email services. ..." + }, + { + "http://product.half.ebay.com/_W0QQprZ62221", + "The Diary of Dawid Sierakowiak | Books at Half.com", + "Buy The Diary of Dawid Sierakowiak by Dawid Sierakowiak, Kamil Turowski (1998) at Half.com. Find new and used books " + + "and save more than half off at Half.com." + }, + { + "http://www.primerica.com/PrimericaRep?rep=dawidkmiotek&pageName=about", + "About Dawid Ireneusz Kmiotek", + "Primerica is in the business of ... About Dawid Ireneusz Kmiotek. Office Directions ... Dawid Ireneusz Kmiotek. " + + "DISTRICT LEADER. Mutual Funds ..." + }, + { + "http://www.dawid.tobiasz.org/Monachium%20-%20Dachau/index.html", + "Dawid/Monachium - Dachau", + "Dawid \" Monachium - Dachau. Fotografia stanowi w\u0142asno\u015B\u0107 autora. Kopiowanie i ... Copyright by Dawid " + + "Tobiasz [Fotografia stanowi w\u0142asno\u015B\u0107 autora. ..." + }, + {"http://www.davidwilkerson.org/", "David Wilkerson | World Challenge", ""}, + { + "http://www.statslab.cam.ac.uk/~apd/index.html", + "Philip Dawid", + "PHILIP DAWID. Professor of Statistics. Contact Details. Professor A. P. Dawid, ... Valencia International Meetings " + + "on Bayesian Statistics. Bayesians Worldwide ..." + }, + { + "http://ideas.repec.org/e/poc8.html", + "Dawid Zochowski at IDEAS", + "Dawid Zochowski: current contact information and listing of economic research of this author provided by RePEc/IDEAS" + + " ... Pruski, Jerzy & \u017Bochowski, Dawid, 2005. ..." + }, + { + "http://www.scrumalliance.org/profiles/15472-dawid-mielnik", + "Scrum Alliance - Profile: Dawid Mielnik", + "Dawid has five years of professional experience in telecommunications business. ... Dawid is a Warsaw University of " + + "Technology graduate with a BSc in ..." + }, + { + "http://www.flickr.com/photos/dawidwalega/", + "Flickr: Photos from 11September", + "Flickr is almost certainly the best online photo management and sharing ... Explore Page Last 7 Days Interesting " + + "Calendar A Year Ago Today World Map Places ..." + }, + { + "http://www.youtube.com/watch?v=UOMk0M0hBNQ", + "YouTube - Grembach Vigo Zgierz - Dawid Korona Rzesz\u00F3w 8-1", + "Grembach Vigo Zgierz - Dawid Korona Rzesz\u00F3w 8-1 w Pucharze Polski ... Grembach Vigo Zgierz Dawid Korona " + + "Rzesz\u00F3w futsal \u0142\u00F3d\u017A kolejarz clearex hurtap puchar polski ..." + }, + { + "http://www.amazon.com/Diary-Dawid-Sierakowiak-Notebooks-Ghetto/dp/0195122852", + "Amazon.com: The Diary of Dawid Sierakowiak: Five Notebooks from the Lodz Ghetto: Dawid Sierakowiak,Lawrence L. ...", + "Amazon.com: The Diary of Dawid Sierakowiak: Five Notebooks from the Lodz Ghetto: Dawid Sierakowiak,Lawrence L. " + + "Langer,Alan Adelson,Kamil Turowski: Books" + }, + { + "http://shopping.yahoo.com/p:Kimberley%20Jim:1808599509", + "Kimberley Jim - DVD at Yahoo! Shopping", + "Yahoo! Shopping is the best place to comparison shop for Kimberley Jim - DVD. Compare products, compare prices, read" + + " reviews and merchant ratings." + }, + { + "http://www.ctfaceart.com/", + "CT Face Art (203) 255-1875 - Chrys Dawid CTFaceArt@aol.com", + "Award winning Face Painting for children through adults. ... CT FACE ART is owned and operated by Chrys Dawid. CT " + + "FACE ART is CT's finest face painting service. ..." + }, + { + "http://www.discogs.com/artist/Dawid+Szczesny", + "Dawid Szczesny", + "Submissions Drafts Collection Wantlist Favorites Watchlist Friends ... Dawid Szczesny / artists (D) Real Name: Dawid" + + " Szcz\u0119sny. URLs: ..." + }, + { + "http://www.shop.com/+-p94105045-st.shtml", + "York Ferry Annie Dawid - SHOP.COM", + "Shop for York Ferry Annie Dawid at Shop.com. $1.99 - york ferry annie dawid language:english, format:paperback, " + + "fiction/non-fiction:fiction, publisher:cane hill pr," + }, + { + "http://www.the-artists.org/artistsblog/posts/st_content_001.cfm?id=2600", + "Dawid Michalczyk ...the-artists.org", + "Dawid Michalczyk; portfolio & art news...the-artists.org, modern and contemporary art ... Dawid Michalczyk. " + + "Conflicting emotions. Suburbs 2100. After the ..." + }, + { + "http://www.dcorfield.pwp.blueyonder.co.uk/2006/06/dawid-on-probabilities.html", + "Philosophy of Real Mathematics: Dawid on probabilities", + "... reading group ran through Phil Dawid's Probability, Causality and the Empirical ... Dawid (pronounced 'David') " + + "holds a Bayesian position, made evident in his ..." + }, + { + "http://www.cs.put.poznan.pl/dweiss/xml/index.xml?lang=en", + "Dawid Weiss - Main page", + "Dawid Weiss, PhD. Institute of Computing Science. Poznan University of Technology. ul. ... (Available as RSS) (c) " + + "Dawid Weiss. All rights reserved unless stated ..." + }, + { + "http://www.dawid.eu/", + "dawid.eu", + "Hier entsteht dawid.eu ... dawid.eu. Hier entsteht in K\u00FCrze das Projekt. dawid.eu. info@dawid.eu ..." + }, + { + "http://www.local.com/results.aspx?keyword=Dawid+Frank+B+Inc&location=06890", + "Dawid Frank B Inc in Southport, CT (Connecticut) @ Local.com", + "Dawid Frank B Inc located in Southport, CT (Connecticut). Find contact info, maps and directions for local " + + "contractors and home improvement services at Local.com." + }, + { + "http://www.anniedawid.com/shortfiction.htm", + ": : Annie Dawid : : Short Fiction", + "Annie Dawid is the author of Resurrection City: A Novel of Jonestown (to be ... Copyright \u00A9 2007 Annie Dawid. " + + "Web Site Design by Chameleon Web Design ..." + }, + { + "http://dawid.ca/", + "www.dawid.ca", + "I was in such a huge mistake. (Dawid Bober) ... 2006-02-26 Skating - Agnieszka, Joanna, Michal, Dawid (Nathan " + + "Phillips Square \u2013 Toronto) ..." + }, + { + "http://www.planetizen.com/?q=about/correspondent/dawid", + "Irvin Dawid | Planetizen", + "Irvin Dawid is a long-time Sierra Club activist, having worked in transportation, ... Irvin Dawid. Leo Vazquez. Mary" + + " Reynolds. Michael Dudley. Mike Lydon ..." + }, + { + "http://www.sourcekibitzer.org/Bio.ext?sp=l6", + "SourceKibitzer - Bio - Dawid Weiss", + "Dawid Weiss - Bio. Dawid Weiss. The founder of the Carrot2 project. Adjunct professor at the Laboratory of " + + "Intelligent Decision Support Systems ..." + }, + { + "http://www.lulu.com/content/815029", + "MD by Marcin and Dawid Witukiewicz (Music & Audio) in Electronic & Dance", + "MD by Marcin and Dawid Witukiewicz (Music & Audio) in Electronic & Dance : Music ... Music inspierd by the " + + "photography of Marcin and Dawid. ..." + }, + { + "http://www.juliedawid.co.uk/index.php?page=Band", + "Julie Dawid : Halfwise", + "the songs of prize winning folk singer and poet Julie Dawid. ... Also a lover and keeper of fish, professional " + + "storyteller Julie Dawid ..." + }, + { + "http://www.jewishencyclopedia.com/view.jsp?artid=38&letter=M", + "JewishEncyclopedia.com - MAGEN DAWID", + "The hexagram formed by the combination of two equilateral triangles; used as the ... The \"Magen Dawid,\" therefore," + + " probably did not originate withinRabbinism, the ..." + }, + { + "http://www.lulu.com/content/815298", + "MD Photography by Marcin and Dawid Witukiewicz (Book) in Arts & Photography", + "... This is a book feturing some of Marcin and Dawid Witukiewicz photographic work. ... by Marcin and Dawid " + + "Witukiewicz. Share This. Report this item. Preview ..." + }, + { + "http://finance.yahoo.com/q?s=dawid.x", + "DAWID.X: Summary for DIA Sep 2008 134.0000 call - Yahoo! Finance", + "Get detailed information on DIA Sep 2008 134.0000 call (DAWID.X) including quote performance, Real-Time ECN, " + + "technical chart analysis, key stats, insider ..." + }, + { + "http://www.bikepics.com/members/dawid/", + "BikePics - Dawid's Member Page on BikePics.Com", + "Dawid's Member Page. Member: dawid. Name: Dawid. From: ... You must be a BikePics Member and be logged in to message" + + " members. Current: 1998 Suzuki GS 500 ..." + }, + { + "http://www.david-banner.com/main.html", + "David Banner", + "Universal Records \\ SRC \\ Artists \\ David Banner ..." + }, + { + "http://www.dawid.com.pl/", + "Kinga Dawid", + "PORTRAITS by Kinga Dawid. Copying, dissemination, forwarding, printing and/or ... All rights reserved. Copyright C " + + "2006 Kinga Dawid ..." + }, + { + "http://www.bikepics.com/members/devdawid/", + "BikePics - dawid's Member Page on BikePics.Com", + "dawid's Member Page. Member: devdawid. Name: dawid. From: Poland. Message: You must be a BikePics Member and be " + + "logged in to message members. Current: 2002 ..." + }, + { + "http://dawid.bracka.pl/", + "Portfolio", + "google | portfolio | klan mortal. google | portfolio | klan mortal ..." + }, + { + "http://amiestreet.com/dawid", + "Amie Street - DaWid's Music Store", + "Amie Street empowers musicians to release, and music fans to discover, new and ... music from DaWid. recommendations" + + " (3) more info. SELECT: All, None, Free ..." + }, + { + "http://markoff.pl/", + "Dawid Markoff Photography", + "Nude, Fashion and Portrait photography" + }, + { + "http://www.archinect.com/schoolblog/blog.php?id=C0_372_39", + "Archinect : Schoolblog : UC DAAP (Dawid)", + "UC DAAP (Dawid) (002) a couple of quotes and a mini thesis rant. Oct 02 2006, 6 comments ... UC DAAP (Dawid) (001) " + + "it's the year of the thesis. Sep 06 2006, 4 ..." + }, + { + "http://groups.yahoo.com/group/dawid/rss", + "dawid : RSS / XML", + "dawid: Katechetyczne Forum Dyskusyjne ... Sign In. dawid \u00B7 Katechetyczne Forum Dyskusyjne. Home. Messages. " + + "Members Only. Post. Files ..." + }, + { + "http://cssoff.com/2007/06/14/and-the-winner-is-dawid-lizak/", + "CSS OFF", + "And the Winner is Dawid Lizak. View the winning entry. Dawid Lizak is from \u0141\u0119czna \u2013 a ... Dawid is " + + "currently expanding his knowledge of JavaScript, usability, ..." + }, + }; } diff --git a/src/test/java/org/carrot2/elasticsearch/SampleIndexTestCase.java b/src/test/java/org/carrot2/elasticsearch/SampleIndexTestCase.java index 93bee27..7b4cd04 100644 --- a/src/test/java/org/carrot2/elasticsearch/SampleIndexTestCase.java +++ b/src/test/java/org/carrot2/elasticsearch/SampleIndexTestCase.java @@ -1,5 +1,17 @@ package org.carrot2.elasticsearch; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.net.InetSocketAddress; +import java.nio.charset.StandardCharsets; +import java.util.ArrayDeque; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; +import java.util.Random; import org.apache.http.HttpHeaders; import org.apache.http.HttpResponse; import org.apache.http.HttpStatus; @@ -29,280 +41,279 @@ import org.elasticsearch.test.ESIntegTestCase; import org.junit.Before; -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.io.InputStream; -import java.net.InetSocketAddress; -import java.nio.charset.StandardCharsets; -import java.util.ArrayDeque; -import java.util.Arrays; -import java.util.Collection; -import java.util.Collections; -import java.util.HashMap; -import java.util.Map; -import java.util.Random; - -/** - * Perform tests on sample data. - */ +/** Perform tests on sample data. */ public abstract class SampleIndexTestCase extends ESIntegTestCase { - protected String restBaseUrl; - protected Client client; - - @Override - protected Settings nodeSettings(int nodeOrdinal) { - return Settings.builder() - .put(super.nodeSettings(nodeOrdinal)) - .build(); - } - - @Override - protected boolean addMockHttpTransport() { - return false; - } - - @Override - protected Collection> nodePlugins() { - return Collections.singletonList(ClusteringPlugin.class); - } - - @Override - protected Collection> transportClientPlugins() { - return nodePlugins(); - } - - protected static final String INDEX_TEST = "test"; - protected static final String INDEX_EMPTY = "empty"; - - @Before - public void createTestIndex() throws Exception { - // Delete any previously indexed content. - client = client(); - if (!client.admin().indices().prepareExists(INDEX_TEST).get().isExists()) { - String testTemplate = - "{" + - " \"test\": {" + - " \"properties\": {" + - " \"url\": { \"type\": \"text\" }," + - " \"title\": { \"type\": \"text\" }," + - " \"content\": { \"type\": \"text\" }," + - " \"lang\": { \"type\": \"text\" }," + - " \"rndlang\": { \"type\": \"text\" }" + - " }" + - " }" + - "}"; - - String emptyTemplate = - "{" + - " \"empty\": {" + - " \"properties\": {" + - " \"url\": { \"type\": \"text\" }," + - " \"title\": { \"type\": \"text\" }," + - " \"content\": { \"type\": \"text\" }," + - " \"lang\": { \"type\": \"text\" }," + - " \"rndlang\": { \"type\": \"text\" }" + - " }" + - " }" + - "}"; - - CreateIndexResponse response = client.admin().indices() - .prepareCreate(INDEX_TEST) - .addMapping("test", testTemplate, XContentType.JSON) - .get(); - Assertions.assertThat(response.isAcknowledged()).isTrue(); - - response = client.admin().indices() - .prepareCreate(INDEX_EMPTY) - .addMapping("empty", emptyTemplate, XContentType.JSON) - .get(); - Assertions.assertThat(response.isAcknowledged()).isTrue(); - - // Create content at random in the test index. - Random rnd = random(); - String[] languages = new LanguageComponentsLoader().load().languages().toArray(String[]::new); - Arrays.sort(languages); - - BulkRequestBuilder bulk = client.prepareBulk(); - for (String[] data : SampleDocumentData.SAMPLE_DATA) { - bulk.add(client.prepareIndex() + protected String restBaseUrl; + protected Client client; + + @Override + protected Settings nodeSettings(int nodeOrdinal) { + return Settings.builder().put(super.nodeSettings(nodeOrdinal)).build(); + } + + @Override + protected boolean addMockHttpTransport() { + return false; + } + + @Override + protected Collection> nodePlugins() { + return Collections.singletonList(ClusteringPlugin.class); + } + + @Override + protected Collection> transportClientPlugins() { + return nodePlugins(); + } + + protected static final String INDEX_TEST = "test"; + protected static final String INDEX_EMPTY = "empty"; + + @Before + public void createTestIndex() throws Exception { + // Delete any previously indexed content. + client = client(); + if (!client.admin().indices().prepareExists(INDEX_TEST).get().isExists()) { + String testTemplate = + "{" + + " \"test\": {" + + " \"properties\": {" + + " \"url\": { \"type\": \"text\" }," + + " \"title\": { \"type\": \"text\" }," + + " \"content\": { \"type\": \"text\" }," + + " \"lang\": { \"type\": \"text\" }," + + " \"rndlang\": { \"type\": \"text\" }" + + " }" + + " }" + + "}"; + + String emptyTemplate = + "{" + + " \"empty\": {" + + " \"properties\": {" + + " \"url\": { \"type\": \"text\" }," + + " \"title\": { \"type\": \"text\" }," + + " \"content\": { \"type\": \"text\" }," + + " \"lang\": { \"type\": \"text\" }," + + " \"rndlang\": { \"type\": \"text\" }" + + " }" + + " }" + + "}"; + + CreateIndexResponse response = + client + .admin() + .indices() + .prepareCreate(INDEX_TEST) + .addMapping("test", testTemplate, XContentType.JSON) + .get(); + Assertions.assertThat(response.isAcknowledged()).isTrue(); + + response = + client + .admin() + .indices() + .prepareCreate(INDEX_EMPTY) + .addMapping("empty", emptyTemplate, XContentType.JSON) + .get(); + Assertions.assertThat(response.isAcknowledged()).isTrue(); + + // Create content at random in the test index. + Random rnd = random(); + String[] languages = new LanguageComponentsLoader().load().languages().toArray(String[]::new); + Arrays.sort(languages); + + BulkRequestBuilder bulk = client.prepareBulk(); + for (String[] data : SampleDocumentData.SAMPLE_DATA) { + bulk.add( + client + .prepareIndex() .setIndex(INDEX_TEST) .setType("test") - .setSource(XContentFactory.jsonBuilder() - .startObject() - .field("url", data[0]) - .field("title", data[1]) - .field("content", data[2]) - .field("lang", "English") - .field("rndlang", languages[rnd.nextInt(languages.length)]) - .endObject())); - } - - bulk.add(client.prepareIndex() - .setIndex(INDEX_EMPTY) - .setType("empty") - .setSource(XContentFactory.jsonBuilder() - .startObject() - .field("url", "") - .field("title", "") - .field("content", "") - .endObject())); - - bulk.execute().actionGet(); - flushAndRefresh(INDEX_TEST); - flushAndRefresh(INDEX_EMPTY); - } - ensureGreen(INDEX_TEST); - ensureGreen(INDEX_EMPTY); - - InetSocketAddress endpoint = randomFrom(cluster().httpAddresses()); - this.restBaseUrl = "http://" + NetworkAddress.format(endpoint); - } - - /** - * Check for valid {@link ClusteringActionResponse}. - */ - protected static void checkValid(ClusteringActionResponse result) { - Assertions.assertThat(result.getDocumentGroups()) - .as("top-level clusters") - .isNotNull() - .isNotEmpty(); - - Map idToHit = new HashMap<>(); - SearchHits hits = result.getSearchResponse().getHits(); - if (hits != null) { - for (SearchHit hit : hits) { - idToHit.put(hit.getId(), hit); - } - } - - String maxHits = result.getInfo().get(ClusteringActionResponse.Fields.Info.MAX_HITS); - final boolean containsAllHits = - (maxHits == null || maxHits.isEmpty() || Integer.parseInt(maxHits) == Integer.MAX_VALUE); - - ArrayDeque queue = new ArrayDeque<>(); - queue.addAll(Arrays.asList(result.getDocumentGroups())); - while (!queue.isEmpty()) { - DocumentGroup g = queue.pop(); - - Assertions.assertThat(g.getLabel()) - .as("label") - .isNotNull() - .isNotEmpty(); - - if (containsAllHits) { - String[] documentReferences = g.getDocumentReferences(); - Assertions.assertThat(idToHit.keySet()) - .as("docRefs") - .containsAll(Arrays.asList(documentReferences)); - } + .setSource( + XContentFactory.jsonBuilder() + .startObject() + .field("url", data[0]) + .field("title", data[1]) + .field("content", data[2]) + .field("lang", "English") + .field("rndlang", languages[rnd.nextInt(languages.length)]) + .endObject())); } - Assertions.assertThat(result.getInfo()) - .containsKey(ClusteringActionResponse.Fields.Info.ALGORITHM) - .containsKey(ClusteringActionResponse.Fields.Info.CLUSTERING_MILLIS) - .containsKey(ClusteringActionResponse.Fields.Info.SEARCH_MILLIS) - .containsKey(ClusteringActionResponse.Fields.Info.TOTAL_MILLIS) - .containsKey(ClusteringActionResponse.Fields.Info.MAX_HITS) - .containsKey(ClusteringActionResponse.Fields.Info.LANGUAGES); - } - - /** - * Roundtrip to/from JSON. - */ - protected static void checkJsonSerialization(ClusteringActionResponse result) throws IOException { - XContentBuilder builder = XContentFactory.jsonBuilder().prettyPrint(); - builder.startObject(); - result.toXContent(builder, ToXContent.EMPTY_PARAMS); - builder.endObject(); - String json = Strings.toString(builder); - - try (XContentParser parser = JsonXContent.jsonXContent.createParser(NamedXContentRegistry.EMPTY, - DeprecationHandler.THROW_UNSUPPORTED_OPERATION, json)) { - Map mapAndClose = parser.map(); - Assertions.assertThat(mapAndClose) - .as("json-result") - .containsKey(Fields.CLUSTERS); + bulk.add( + client + .prepareIndex() + .setIndex(INDEX_EMPTY) + .setType("empty") + .setSource( + XContentFactory.jsonBuilder() + .startObject() + .field("url", "") + .field("title", "") + .field("content", "") + .endObject())); + + bulk.execute().actionGet(); + flushAndRefresh(INDEX_TEST); + flushAndRefresh(INDEX_EMPTY); + } + ensureGreen(INDEX_TEST); + ensureGreen(INDEX_EMPTY); + + InetSocketAddress endpoint = randomFrom(cluster().httpAddresses()); + this.restBaseUrl = "http://" + NetworkAddress.format(endpoint); + } + + /** Check for valid {@link ClusteringActionResponse}. */ + protected static void checkValid(ClusteringActionResponse result) { + Assertions.assertThat(result.getDocumentGroups()) + .as("top-level clusters") + .isNotNull() + .isNotEmpty(); + + Map idToHit = new HashMap<>(); + SearchHits hits = result.getSearchResponse().getHits(); + if (hits != null) { + for (SearchHit hit : hits) { + idToHit.put(hit.getId(), hit); } - } + } - protected byte[] jsonResourceAs(String resourceName, XContentType toType) throws IOException { - byte[] bytes = resource(resourceName); - XContentParser parser = XContentFactory.xContent(XContentType.JSON) - .createParser(NamedXContentRegistry.EMPTY, DeprecationHandler.THROW_UNSUPPORTED_OPERATION, bytes); + String maxHits = result.getInfo().get(ClusteringActionResponse.Fields.Info.MAX_HITS); + final boolean containsAllHits = + (maxHits == null || maxHits.isEmpty() || Integer.parseInt(maxHits) == Integer.MAX_VALUE); - ByteArrayOutputStream baos = new ByteArrayOutputStream(); - XContentBuilder builder = XContentFactory.contentBuilder(toType, baos).copyCurrentStructure(parser); - builder.close(); + ArrayDeque queue = new ArrayDeque<>(); + queue.addAll(Arrays.asList(result.getDocumentGroups())); + while (!queue.isEmpty()) { + DocumentGroup g = queue.pop(); - return baos.toByteArray(); - } + Assertions.assertThat(g.getLabel()).as("label").isNotNull().isNotEmpty(); - protected byte[] resource(String resourceName) throws IOException { - try (InputStream is = getClass().getResourceAsStream( - "_" + getClass().getSimpleName() + "/" + resourceName)) { - return is.readAllBytes(); + if (containsAllHits) { + String[] documentReferences = g.getDocumentReferences(); + Assertions.assertThat(idToHit.keySet()) + .as("docRefs") + .containsAll(Arrays.asList(documentReferences)); } - } - - protected static Map checkHttpResponseContainsClusters(HttpResponse response) throws IOException { - Map map = checkHttpResponse(response); - - // We should have some clusters. - Assertions.assertThat(map).containsKey("clusters"); + } + + Assertions.assertThat(result.getInfo()) + .containsKey(ClusteringActionResponse.Fields.Info.ALGORITHM) + .containsKey(ClusteringActionResponse.Fields.Info.CLUSTERING_MILLIS) + .containsKey(ClusteringActionResponse.Fields.Info.SEARCH_MILLIS) + .containsKey(ClusteringActionResponse.Fields.Info.TOTAL_MILLIS) + .containsKey(ClusteringActionResponse.Fields.Info.MAX_HITS) + .containsKey(ClusteringActionResponse.Fields.Info.LANGUAGES); + } + + /** Roundtrip to/from JSON. */ + protected static void checkJsonSerialization(ClusteringActionResponse result) throws IOException { + XContentBuilder builder = XContentFactory.jsonBuilder().prettyPrint(); + builder.startObject(); + result.toXContent(builder, ToXContent.EMPTY_PARAMS); + builder.endObject(); + String json = Strings.toString(builder); + + try (XContentParser parser = + JsonXContent.jsonXContent.createParser( + NamedXContentRegistry.EMPTY, DeprecationHandler.THROW_UNSUPPORTED_OPERATION, json)) { + Map mapAndClose = parser.map(); + Assertions.assertThat(mapAndClose).as("json-result").containsKey(Fields.CLUSTERS); + } + } + + protected byte[] jsonResourceAs(String resourceName, XContentType toType) throws IOException { + byte[] bytes = resource(resourceName); + XContentParser parser = + XContentFactory.xContent(XContentType.JSON) + .createParser( + NamedXContentRegistry.EMPTY, DeprecationHandler.THROW_UNSUPPORTED_OPERATION, bytes); + + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + XContentBuilder builder = + XContentFactory.contentBuilder(toType, baos).copyCurrentStructure(parser); + builder.close(); + + return baos.toByteArray(); + } + + protected byte[] resource(String resourceName) throws IOException { + try (InputStream is = + getClass().getResourceAsStream("_" + getClass().getSimpleName() + "/" + resourceName)) { + return is.readAllBytes(); + } + } + + protected static Map checkHttpResponseContainsClusters(HttpResponse response) + throws IOException { + Map map = checkHttpResponse(response); + + // We should have some clusters. + Assertions.assertThat(map).containsKey("clusters"); + return map; + } + + protected static Map checkHttpResponse(HttpResponse response) throws IOException { + byte[] responseBytes = response.getEntity().getContent().readAllBytes(); + String responseString = new String(responseBytes, StandardCharsets.UTF_8); + + String responseDescription = + "HTTP response status: " + + response.getStatusLine().toString() + + ", " + + "HTTP body: " + + responseString; + + Assertions.assertThat(response.getStatusLine().getStatusCode()) + .describedAs(responseDescription) + .isEqualTo(HttpStatus.SC_OK); + + try (XContentParser parser = + XContentHelper.createParser( + NamedXContentRegistry.EMPTY, + DeprecationHandler.THROW_UNSUPPORTED_OPERATION, + new BytesArray(responseBytes), + XContentType.fromMediaTypeOrFormat( + response.getFirstHeader(HttpHeaders.CONTENT_TYPE).getValue()))) { + Map map = parser.map(); + Assertions.assertThat(map).describedAs(responseDescription).doesNotContainKey("error"); return map; - } - - protected static Map checkHttpResponse(HttpResponse response) throws IOException { - byte[] responseBytes = response.getEntity().getContent().readAllBytes(); - String responseString = new String(responseBytes, StandardCharsets.UTF_8); - - String responseDescription = - "HTTP response status: " + response.getStatusLine().toString() + ", " + - "HTTP body: " + responseString; - - Assertions.assertThat(response.getStatusLine().getStatusCode()) - .describedAs(responseDescription) - .isEqualTo(HttpStatus.SC_OK); - - try (XContentParser parser = XContentHelper.createParser( - NamedXContentRegistry.EMPTY, DeprecationHandler.THROW_UNSUPPORTED_OPERATION, - new BytesArray(responseBytes), - XContentType.fromMediaTypeOrFormat(response.getFirstHeader(HttpHeaders.CONTENT_TYPE).getValue()))) { - Map map = parser.map(); - Assertions.assertThat(map) - .describedAs(responseDescription) - .doesNotContainKey("error"); - return map; - } - } - - protected static void expectErrorResponseWithMessage(HttpResponse response, - int expectedStatus, - String messageSubstring) throws IOException { - byte[] responseBytes = response.getEntity().getContent().readAllBytes(); - String responseString = new String(responseBytes, StandardCharsets.UTF_8); - String responseDescription = - "HTTP response status: " + response.getStatusLine().toString() + ", " + - "HTTP body: " + responseString; - - Assertions.assertThat(response.getStatusLine().getStatusCode()) - .describedAs(responseDescription) - .isEqualTo(expectedStatus); - - XContentType xContentType = XContentType.fromMediaTypeOrFormat( - response.getFirstHeader(HttpHeaders.CONTENT_TYPE).getValue()); - try (XContentParser parser = XContentHelper.createParser( - NamedXContentRegistry.EMPTY, DeprecationHandler.THROW_UNSUPPORTED_OPERATION, - new BytesArray(responseBytes), xContentType)) { - Map responseJson = parser.mapOrdered(); - - Assertions.assertThat(responseJson) - .describedAs(responseString) - .containsKey("error"); - - Assertions.assertThat(responseJson.get("error").toString()) - .describedAs(responseString) - .contains(messageSubstring); - } - } + } + } + + protected static void expectErrorResponseWithMessage( + HttpResponse response, int expectedStatus, String messageSubstring) throws IOException { + byte[] responseBytes = response.getEntity().getContent().readAllBytes(); + String responseString = new String(responseBytes, StandardCharsets.UTF_8); + String responseDescription = + "HTTP response status: " + + response.getStatusLine().toString() + + ", " + + "HTTP body: " + + responseString; + + Assertions.assertThat(response.getStatusLine().getStatusCode()) + .describedAs(responseDescription) + .isEqualTo(expectedStatus); + + XContentType xContentType = + XContentType.fromMediaTypeOrFormat( + response.getFirstHeader(HttpHeaders.CONTENT_TYPE).getValue()); + try (XContentParser parser = + XContentHelper.createParser( + NamedXContentRegistry.EMPTY, + DeprecationHandler.THROW_UNSUPPORTED_OPERATION, + new BytesArray(responseBytes), + xContentType)) { + Map responseJson = parser.mapOrdered(); + + Assertions.assertThat(responseJson).describedAs(responseString).containsKey("error"); + + Assertions.assertThat(responseJson.get("error").toString()) + .describedAs(responseString) + .contains(messageSubstring); + } + } }