From 19ef0ac203b06e2d5324927f970da133636be38f Mon Sep 17 00:00:00 2001
From: zyxxoo <1318247699@qq.com>
Date: Mon, 31 Oct 2022 12:05:22 +0800
Subject: [PATCH] remove word dependency to remove GPL license (#1998)

---
 hugegraph-core/pom.xml                        | 20 -----
 .../hugegraph/analyzer/AnalyzerFactory.java   |  2 -
 .../hugegraph/analyzer/WordAnalyzer.java      | 74 -------------------
 .../baidu/hugegraph/config/CoreOptions.java   |  9 +--
 .../hugegraph/unit/core/AnalyzerTest.java     | 21 ------
 5 files changed, 1 insertion(+), 125 deletions(-)
 delete mode 100644 hugegraph-core/src/main/java/com/baidu/hugegraph/analyzer/WordAnalyzer.java
diff --git a/hugegraph-core/pom.xml b/hugegraph-core/pom.xml
index bb1d3646d2..ded15fda54 100644
--- a/hugegraph-core/pom.xml
+++ b/hugegraph-core/pom.xml
@@ -32,7 +32,6 @@
         <jraft.version>1.3.11</jraft.version>
         <ohc.version>0.7.4</ohc.version>
         <lz4.version>1.8.0</lz4.version>
-        <apdplat-word.version>1.3.1</apdplat-word.version>
         <mmseg4j-core.version>1.10.0</mmseg4j-core.version>
         <jcseg.version>2.6.2</jcseg.version>
         <hanlp.version>portable-1.8.3</hanlp.version>
@@ -125,25 +124,6 @@
             </exclusions>
         </dependency>
 
-        <dependency>
-            <groupId>org.apdplat</groupId>
-            <artifactId>word</artifactId>
-            <version>${apdplat-word.version}</version>
-            <exclusions>
-                <exclusion>
-                    <groupId>ch.qos.logback</groupId>
-                    <artifactId>logback-classic</artifactId>
-                </exclusion>
-                <exclusion>
-                    <groupId>org.apache.lucene</groupId>
-                    <artifactId>lucene-core</artifactId>
-                </exclusion>
-                <exclusion>
-                    <groupId>org.apache.lucene</groupId>
-                    <artifactId>lucene-analyzers-common</artifactId>
-                </exclusion>
-            </exclusions>
-        </dependency>
         <dependency>
             <groupId>org.ansj</groupId>
             <artifactId>ansj_seg</artifactId>
diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/analyzer/AnalyzerFactory.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/analyzer/AnalyzerFactory.java
index ab84dba632..3d6ead94cd 100644
--- a/hugegraph-core/src/main/java/com/baidu/hugegraph/analyzer/AnalyzerFactory.java
+++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/analyzer/AnalyzerFactory.java
@@ -36,8 +36,6 @@ public class AnalyzerFactory {
     public static Analyzer analyzer(String name, String mode) {
         name = name.toLowerCase();
         switch (name) {
-            case "word":
-                return new WordAnalyzer(mode);
             case "ansj":
                 return new AnsjAnalyzer(mode);
             case "hanlp":
diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/analyzer/WordAnalyzer.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/analyzer/WordAnalyzer.java
deleted file mode 100644
index 48ba66d955..0000000000
--- a/hugegraph-core/src/main/java/com/baidu/hugegraph/analyzer/WordAnalyzer.java
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * Copyright 2017 HugeGraph Authors
- *
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with this
- * work for additional information regarding copyright ownership. The ASF
- * licenses this file to You under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- */
-
-package com.baidu.hugegraph.analyzer;
-
-import java.util.List;
-import java.util.Set;
-
-import org.apdplat.word.WordSegmenter;
-import org.apdplat.word.segmentation.SegmentationAlgorithm;
-import org.apdplat.word.segmentation.Word;
-
-import com.baidu.hugegraph.config.ConfigException;
-import com.baidu.hugegraph.util.InsertionOrderUtil;
-import com.google.common.collect.ImmutableList;
-
-/**
- * Reference from https://my.oschina.net/apdplat/blog/412921
- */
-public class WordAnalyzer implements Analyzer {
-
-    public static final List<String> SUPPORT_MODES =
-           ImmutableList.<String>builder()
-                        .add("MaximumMatching")
-                        .add("ReverseMaximumMatching")
-                        .add("MinimumMatching")
-                        .add("ReverseMinimumMatching")
-                        .add("BidirectionalMaximumMatching")
-                        .add("BidirectionalMinimumMatching")
-                        .add("BidirectionalMaximumMinimumMatching")
-                        .add("FullSegmentation")
-                        .add("MinimalWordCount")
-                        .add("MaxNgramScore")
-                        .add("PureEnglish")
-                        .build();
-
-    private final SegmentationAlgorithm algorithm;
-
-    public WordAnalyzer(String mode) {
-        try {
-            this.algorithm = SegmentationAlgorithm.valueOf(mode);
-        } catch (Exception e) {
-            throw new ConfigException(
-                      "Unsupported segment mode '%s' for word analyzer, " +
-                      "the available values are %s", e, mode, SUPPORT_MODES);
-        }
-    }
-
-    @Override
-    public Set<String> segment(String text) {
-        Set<String> result = InsertionOrderUtil.newSet();
-        List<Word> words = WordSegmenter.segWithStopWords(text, this.algorithm);
-        for (Word word : words) {
-            result.add(word.getText());
-        }
-        return result;
-    }
-}
diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/config/CoreOptions.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/config/CoreOptions.java
index a20d4798b7..c2596cbc4b 100644
--- a/hugegraph-core/src/main/java/com/baidu/hugegraph/config/CoreOptions.java
+++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/config/CoreOptions.java
@@ -571,7 +571,7 @@ public static synchronized CoreOptions instance() {
                     "search.text_analyzer",
                     "Choose a text analyzer for searching the " +
                     "vertex/edge properties, available type are " +
-                    "[word, ansj, hanlp, smartcn, jieba, jcseg, " +
+                    "[ansj, hanlp, smartcn, jieba, jcseg, " +
                     "mmseg4j, ikanalyzer].",
                     disallowEmpty(),
                     "ikanalyzer"
@@ -582,13 +582,6 @@ public static synchronized CoreOptions instance() {
                     "search.text_analyzer_mode",
                     "Specify the mode for the text analyzer, " +
                     "the available mode of analyzer are " +
-                    "{word: [MaximumMatching, ReverseMaximumMatching, " +
-                            "MinimumMatching, ReverseMinimumMatching, " +
-                            "BidirectionalMaximumMatching, " +
-                            "BidirectionalMinimumMatching, " +
-                            "BidirectionalMaximumMinimumMatching, " +
-                            "FullSegmentation, MinimalWordCount, " +
-                            "MaxNgramScore, PureEnglish], " +
                     "ansj: [BaseAnalysis, IndexAnalysis, ToAnalysis, " +
                            "NlpAnalysis], " +
                     "hanlp: [standard, nlp, index, nShort, shortest, speed], " +
diff --git a/hugegraph-test/src/main/java/com/baidu/hugegraph/unit/core/AnalyzerTest.java b/hugegraph-test/src/main/java/com/baidu/hugegraph/unit/core/AnalyzerTest.java
index 3674ee83f1..c884888907 100644
--- a/hugegraph-test/src/main/java/com/baidu/hugegraph/unit/core/AnalyzerTest.java
+++ b/hugegraph-test/src/main/java/com/baidu/hugegraph/unit/core/AnalyzerTest.java
@@ -47,27 +47,6 @@ public void teardown() {
         // pass
     }
 
-    @Test
-    public void testWordAnalyzer() {
-        // MaximumMatching mode
-        Analyzer analyzer = AnalyzerFactory.analyzer("word", "MaximumMatching");
-        Assert.assertEquals(setOf("england", "wins", "world", "cup"),
-                            analyzer.segment(TEXT_1));
-        Assert.assertEquals(setOf("英格兰", "世界杯", "夺冠", "中华人民共和国",
-                                  "国歌", "百度", "科技园", "位于", "北京市",
-                                  "海淀区", "西北旺", "东路", "10号", "院"),
-                            analyzer.segment(TEXT_2));
-
-        // ReverseMaximumMatching mode
-        analyzer = AnalyzerFactory.analyzer("word", "ReverseMaximumMatching");
-        Assert.assertEquals(setOf("england", "wins", "world", "cup"),
-                            analyzer.segment(TEXT_1));
-        Assert.assertEquals(setOf("英格兰", "世界杯", "夺冠", "中华人民共和国",
-                                  "国歌", "百度", "科技园", "位于", "北京市",
-                                  "海淀区", "西北旺", "东路", "10号", "院"),
-                            analyzer.segment(TEXT_2));
-    }
-
     @Test
     public void testAnsjAnalyzer() {
         // BaseAnalysis mode