From 19ef0ac203b06e2d5324927f970da133636be38f Mon Sep 17 00:00:00 2001
From: zyxxoo <1318247699@qq.com>
Date: Mon, 31 Oct 2022 12:05:22 +0800
Subject: [PATCH] remove word dependency to remove GPL license (#1998)
---
hugegraph-core/pom.xml | 20 -----
.../hugegraph/analyzer/AnalyzerFactory.java | 2 -
.../hugegraph/analyzer/WordAnalyzer.java | 74 -------------------
.../baidu/hugegraph/config/CoreOptions.java | 9 +--
.../hugegraph/unit/core/AnalyzerTest.java | 21 ------
5 files changed, 1 insertion(+), 125 deletions(-)
delete mode 100644 hugegraph-core/src/main/java/com/baidu/hugegraph/analyzer/WordAnalyzer.java
diff --git a/hugegraph-core/pom.xml b/hugegraph-core/pom.xml
index bb1d3646d2..ded15fda54 100644
--- a/hugegraph-core/pom.xml
+++ b/hugegraph-core/pom.xml
@@ -32,7 +32,6 @@
1.3.11
0.7.4
1.8.0
- 1.3.1
1.10.0
2.6.2
portable-1.8.3
@@ -125,25 +124,6 @@
-
- org.apdplat
- word
- ${apdplat-word.version}
-
-
- ch.qos.logback
- logback-classic
-
-
- org.apache.lucene
- lucene-core
-
-
- org.apache.lucene
- lucene-analyzers-common
-
-
-
org.ansj
ansj_seg
diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/analyzer/AnalyzerFactory.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/analyzer/AnalyzerFactory.java
index ab84dba632..3d6ead94cd 100644
--- a/hugegraph-core/src/main/java/com/baidu/hugegraph/analyzer/AnalyzerFactory.java
+++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/analyzer/AnalyzerFactory.java
@@ -36,8 +36,6 @@ public class AnalyzerFactory {
public static Analyzer analyzer(String name, String mode) {
name = name.toLowerCase();
switch (name) {
- case "word":
- return new WordAnalyzer(mode);
case "ansj":
return new AnsjAnalyzer(mode);
case "hanlp":
diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/analyzer/WordAnalyzer.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/analyzer/WordAnalyzer.java
deleted file mode 100644
index 48ba66d955..0000000000
--- a/hugegraph-core/src/main/java/com/baidu/hugegraph/analyzer/WordAnalyzer.java
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * Copyright 2017 HugeGraph Authors
- *
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with this
- * work for additional information regarding copyright ownership. The ASF
- * licenses this file to You under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- */
-
-package com.baidu.hugegraph.analyzer;
-
-import java.util.List;
-import java.util.Set;
-
-import org.apdplat.word.WordSegmenter;
-import org.apdplat.word.segmentation.SegmentationAlgorithm;
-import org.apdplat.word.segmentation.Word;
-
-import com.baidu.hugegraph.config.ConfigException;
-import com.baidu.hugegraph.util.InsertionOrderUtil;
-import com.google.common.collect.ImmutableList;
-
-/**
- * Reference from https://my.oschina.net/apdplat/blog/412921
- */
-public class WordAnalyzer implements Analyzer {
-
- public static final List SUPPORT_MODES =
- ImmutableList.builder()
- .add("MaximumMatching")
- .add("ReverseMaximumMatching")
- .add("MinimumMatching")
- .add("ReverseMinimumMatching")
- .add("BidirectionalMaximumMatching")
- .add("BidirectionalMinimumMatching")
- .add("BidirectionalMaximumMinimumMatching")
- .add("FullSegmentation")
- .add("MinimalWordCount")
- .add("MaxNgramScore")
- .add("PureEnglish")
- .build();
-
- private final SegmentationAlgorithm algorithm;
-
- public WordAnalyzer(String mode) {
- try {
- this.algorithm = SegmentationAlgorithm.valueOf(mode);
- } catch (Exception e) {
- throw new ConfigException(
- "Unsupported segment mode '%s' for word analyzer, " +
- "the available values are %s", e, mode, SUPPORT_MODES);
- }
- }
-
- @Override
- public Set segment(String text) {
- Set result = InsertionOrderUtil.newSet();
- List words = WordSegmenter.segWithStopWords(text, this.algorithm);
- for (Word word : words) {
- result.add(word.getText());
- }
- return result;
- }
-}
diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/config/CoreOptions.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/config/CoreOptions.java
index a20d4798b7..c2596cbc4b 100644
--- a/hugegraph-core/src/main/java/com/baidu/hugegraph/config/CoreOptions.java
+++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/config/CoreOptions.java
@@ -571,7 +571,7 @@ public static synchronized CoreOptions instance() {
"search.text_analyzer",
"Choose a text analyzer for searching the " +
"vertex/edge properties, available type are " +
- "[word, ansj, hanlp, smartcn, jieba, jcseg, " +
+ "[ansj, hanlp, smartcn, jieba, jcseg, " +
"mmseg4j, ikanalyzer].",
disallowEmpty(),
"ikanalyzer"
@@ -582,13 +582,6 @@ public static synchronized CoreOptions instance() {
"search.text_analyzer_mode",
"Specify the mode for the text analyzer, " +
"the available mode of analyzer are " +
- "{word: [MaximumMatching, ReverseMaximumMatching, " +
- "MinimumMatching, ReverseMinimumMatching, " +
- "BidirectionalMaximumMatching, " +
- "BidirectionalMinimumMatching, " +
- "BidirectionalMaximumMinimumMatching, " +
- "FullSegmentation, MinimalWordCount, " +
- "MaxNgramScore, PureEnglish], " +
"ansj: [BaseAnalysis, IndexAnalysis, ToAnalysis, " +
"NlpAnalysis], " +
"hanlp: [standard, nlp, index, nShort, shortest, speed], " +
diff --git a/hugegraph-test/src/main/java/com/baidu/hugegraph/unit/core/AnalyzerTest.java b/hugegraph-test/src/main/java/com/baidu/hugegraph/unit/core/AnalyzerTest.java
index 3674ee83f1..c884888907 100644
--- a/hugegraph-test/src/main/java/com/baidu/hugegraph/unit/core/AnalyzerTest.java
+++ b/hugegraph-test/src/main/java/com/baidu/hugegraph/unit/core/AnalyzerTest.java
@@ -47,27 +47,6 @@ public void teardown() {
// pass
}
- @Test
- public void testWordAnalyzer() {
- // MaximumMatching mode
- Analyzer analyzer = AnalyzerFactory.analyzer("word", "MaximumMatching");
- Assert.assertEquals(setOf("england", "wins", "world", "cup"),
- analyzer.segment(TEXT_1));
- Assert.assertEquals(setOf("英格兰", "世界杯", "夺冠", "中华人民共和国",
- "国歌", "百度", "科技园", "位于", "北京市",
- "海淀区", "西北旺", "东路", "10号", "院"),
- analyzer.segment(TEXT_2));
-
- // ReverseMaximumMatching mode
- analyzer = AnalyzerFactory.analyzer("word", "ReverseMaximumMatching");
- Assert.assertEquals(setOf("england", "wins", "world", "cup"),
- analyzer.segment(TEXT_1));
- Assert.assertEquals(setOf("英格兰", "世界杯", "夺冠", "中华人民共和国",
- "国歌", "百度", "科技园", "位于", "北京市",
- "海淀区", "西北旺", "东路", "10号", "院"),
- analyzer.segment(TEXT_2));
- }
-
@Test
public void testAnsjAnalyzer() {
// BaseAnalysis mode