From 19ef0ac203b06e2d5324927f970da133636be38f Mon Sep 17 00:00:00 2001 From: zyxxoo <1318247699@qq.com> Date: Mon, 31 Oct 2022 12:05:22 +0800 Subject: [PATCH] remove word dependency to remove GPL license (#1998) --- hugegraph-core/pom.xml | 20 ----- .../hugegraph/analyzer/AnalyzerFactory.java | 2 - .../hugegraph/analyzer/WordAnalyzer.java | 74 ------------------- .../baidu/hugegraph/config/CoreOptions.java | 9 +-- .../hugegraph/unit/core/AnalyzerTest.java | 21 ------ 5 files changed, 1 insertion(+), 125 deletions(-) delete mode 100644 hugegraph-core/src/main/java/com/baidu/hugegraph/analyzer/WordAnalyzer.java diff --git a/hugegraph-core/pom.xml b/hugegraph-core/pom.xml index bb1d3646d2..ded15fda54 100644 --- a/hugegraph-core/pom.xml +++ b/hugegraph-core/pom.xml @@ -32,7 +32,6 @@ 1.3.11 0.7.4 1.8.0 - 1.3.1 1.10.0 2.6.2 portable-1.8.3 @@ -125,25 +124,6 @@ - - org.apdplat - word - ${apdplat-word.version} - - - ch.qos.logback - logback-classic - - - org.apache.lucene - lucene-core - - - org.apache.lucene - lucene-analyzers-common - - - org.ansj ansj_seg diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/analyzer/AnalyzerFactory.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/analyzer/AnalyzerFactory.java index ab84dba632..3d6ead94cd 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/analyzer/AnalyzerFactory.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/analyzer/AnalyzerFactory.java @@ -36,8 +36,6 @@ public class AnalyzerFactory { public static Analyzer analyzer(String name, String mode) { name = name.toLowerCase(); switch (name) { - case "word": - return new WordAnalyzer(mode); case "ansj": return new AnsjAnalyzer(mode); case "hanlp": diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/analyzer/WordAnalyzer.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/analyzer/WordAnalyzer.java deleted file mode 100644 index 48ba66d955..0000000000 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/analyzer/WordAnalyzer.java +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright 2017 HugeGraph Authors - * - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with this - * work for additional information regarding copyright ownership. The ASF - * licenses this file to You under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - */ - -package com.baidu.hugegraph.analyzer; - -import java.util.List; -import java.util.Set; - -import org.apdplat.word.WordSegmenter; -import org.apdplat.word.segmentation.SegmentationAlgorithm; -import org.apdplat.word.segmentation.Word; - -import com.baidu.hugegraph.config.ConfigException; -import com.baidu.hugegraph.util.InsertionOrderUtil; -import com.google.common.collect.ImmutableList; - -/** - * Reference from https://my.oschina.net/apdplat/blog/412921 - */ -public class WordAnalyzer implements Analyzer { - - public static final List SUPPORT_MODES = - ImmutableList.builder() - .add("MaximumMatching") - .add("ReverseMaximumMatching") - .add("MinimumMatching") - .add("ReverseMinimumMatching") - .add("BidirectionalMaximumMatching") - .add("BidirectionalMinimumMatching") - .add("BidirectionalMaximumMinimumMatching") - .add("FullSegmentation") - .add("MinimalWordCount") - .add("MaxNgramScore") - .add("PureEnglish") - .build(); - - private final SegmentationAlgorithm algorithm; - - public WordAnalyzer(String mode) { - try { - this.algorithm = SegmentationAlgorithm.valueOf(mode); - } catch (Exception e) { - throw new ConfigException( - "Unsupported segment mode '%s' for word analyzer, " + - "the available values are %s", e, mode, SUPPORT_MODES); - } - } - - @Override - public Set segment(String text) { - Set result = InsertionOrderUtil.newSet(); - List words = WordSegmenter.segWithStopWords(text, this.algorithm); - for (Word word : words) { - result.add(word.getText()); - } - return result; - } -} diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/config/CoreOptions.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/config/CoreOptions.java index a20d4798b7..c2596cbc4b 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/config/CoreOptions.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/config/CoreOptions.java @@ -571,7 +571,7 @@ public static synchronized CoreOptions instance() { "search.text_analyzer", "Choose a text analyzer for searching the " + "vertex/edge properties, available type are " + - "[word, ansj, hanlp, smartcn, jieba, jcseg, " + + "[ansj, hanlp, smartcn, jieba, jcseg, " + "mmseg4j, ikanalyzer].", disallowEmpty(), "ikanalyzer" @@ -582,13 +582,6 @@ public static synchronized CoreOptions instance() { "search.text_analyzer_mode", "Specify the mode for the text analyzer, " + "the available mode of analyzer are " + - "{word: [MaximumMatching, ReverseMaximumMatching, " + - "MinimumMatching, ReverseMinimumMatching, " + - "BidirectionalMaximumMatching, " + - "BidirectionalMinimumMatching, " + - "BidirectionalMaximumMinimumMatching, " + - "FullSegmentation, MinimalWordCount, " + - "MaxNgramScore, PureEnglish], " + "ansj: [BaseAnalysis, IndexAnalysis, ToAnalysis, " + "NlpAnalysis], " + "hanlp: [standard, nlp, index, nShort, shortest, speed], " + diff --git a/hugegraph-test/src/main/java/com/baidu/hugegraph/unit/core/AnalyzerTest.java b/hugegraph-test/src/main/java/com/baidu/hugegraph/unit/core/AnalyzerTest.java index 3674ee83f1..c884888907 100644 --- a/hugegraph-test/src/main/java/com/baidu/hugegraph/unit/core/AnalyzerTest.java +++ b/hugegraph-test/src/main/java/com/baidu/hugegraph/unit/core/AnalyzerTest.java @@ -47,27 +47,6 @@ public void teardown() { // pass } - @Test - public void testWordAnalyzer() { - // MaximumMatching mode - Analyzer analyzer = AnalyzerFactory.analyzer("word", "MaximumMatching"); - Assert.assertEquals(setOf("england", "wins", "world", "cup"), - analyzer.segment(TEXT_1)); - Assert.assertEquals(setOf("英格兰", "世界杯", "夺冠", "中华人民共和国", - "国歌", "百度", "科技园", "位于", "北京市", - "海淀区", "西北旺", "东路", "10号", "院"), - analyzer.segment(TEXT_2)); - - // ReverseMaximumMatching mode - analyzer = AnalyzerFactory.analyzer("word", "ReverseMaximumMatching"); - Assert.assertEquals(setOf("england", "wins", "world", "cup"), - analyzer.segment(TEXT_1)); - Assert.assertEquals(setOf("英格兰", "世界杯", "夺冠", "中华人民共和国", - "国歌", "百度", "科技园", "位于", "北京市", - "海淀区", "西北旺", "东路", "10号", "院"), - analyzer.segment(TEXT_2)); - } - @Test public void testAnsjAnalyzer() { // BaseAnalysis mode