From 3a5e5dd29ef527ca22c7dd6d9baec256538a3b7b Mon Sep 17 00:00:00 2001 From: Laimonas Simutis Date: Thu, 11 May 2023 14:03:43 -0700 Subject: [PATCH 01/27] upgrade targets to target .net core 6, in addition to .net framework --- .build/dependencies.props | 2 +- .../Lucene.Net.Analysis.OpenNLP.csproj | 2 +- .../Lucene.Net.Tests.Analysis.OpenNLP.csproj | 4 +--- 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/.build/dependencies.props b/.build/dependencies.props index a37cc27a3e..e7ffb9fdff 100644 --- a/.build/dependencies.props +++ b/.build/dependencies.props @@ -68,7 +68,7 @@ 13.0.1 3.17.0 3.13.1 - 1.9.1.1 + 1.9.4.1 2.7.8 1.4.2 0.4.1.1 diff --git a/src/Lucene.Net.Analysis.OpenNLP/Lucene.Net.Analysis.OpenNLP.csproj b/src/Lucene.Net.Analysis.OpenNLP/Lucene.Net.Analysis.OpenNLP.csproj index 625d6ee9c5..d924ac6142 100644 --- a/src/Lucene.Net.Analysis.OpenNLP/Lucene.Net.Analysis.OpenNLP.csproj +++ b/src/Lucene.Net.Analysis.OpenNLP/Lucene.Net.Analysis.OpenNLP.csproj @@ -30,7 +30,7 @@ - net462 + net462;net6.0; Lucene.Net.Analysis.OpenNLP $(PackageTags);analysis;natural;language;processing;opennlp diff --git a/src/Lucene.Net.Tests.Analysis.OpenNLP/Lucene.Net.Tests.Analysis.OpenNLP.csproj b/src/Lucene.Net.Tests.Analysis.OpenNLP/Lucene.Net.Tests.Analysis.OpenNLP.csproj index 1a3bbbc6b7..ef45ee6beb 100644 --- a/src/Lucene.Net.Tests.Analysis.OpenNLP/Lucene.Net.Tests.Analysis.OpenNLP.csproj +++ b/src/Lucene.Net.Tests.Analysis.OpenNLP/Lucene.Net.Tests.Analysis.OpenNLP.csproj @@ -24,9 +24,7 @@ - - net48 - + Lucene.Net.Tests.Analysis.OpenNLP Lucene.Net.Analysis.OpenNlp From 87fd56385b769959e2672bb0b29b82adbc315a33 Mon Sep 17 00:00:00 2001 From: Laimonas Simutis Date: Thu, 11 May 2023 14:22:46 -0700 Subject: [PATCH 02/27] update net 4.6 version --- .../Lucene.Net.Analysis.OpenNLP.csproj | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Lucene.Net.Analysis.OpenNLP/Lucene.Net.Analysis.OpenNLP.csproj b/src/Lucene.Net.Analysis.OpenNLP/Lucene.Net.Analysis.OpenNLP.csproj index d924ac6142..1dfb341e06 100644 --- a/src/Lucene.Net.Analysis.OpenNLP/Lucene.Net.Analysis.OpenNLP.csproj +++ b/src/Lucene.Net.Analysis.OpenNLP/Lucene.Net.Analysis.OpenNLP.csproj @@ -30,7 +30,7 @@ - net462;net6.0; + net461;net6.0; Lucene.Net.Analysis.OpenNLP $(PackageTags);analysis;natural;language;processing;opennlp From 93dbe5f42009597ec5d62ebf7f79033ce9e22300 Mon Sep 17 00:00:00 2001 From: Shad Storhaug Date: Mon, 15 May 2023 02:22:15 +0700 Subject: [PATCH 03/27] Lucene.Net.Tests.OpenNLP: Patched IDE behavior to use net48 when net461 is selected and net7.0 when net5.0 is selected. In CI, we set IsTestProject=false and IsPublishable=false to skip these tests. --- .../Lucene.Net.Analysis.OpenNLP.csproj | 2 +- .../Lucene.Net.Tests.Analysis.OpenNLP.csproj | 15 +++++++++------ .../Lucene.Net.Tests.Cli.csproj | 11 +++++++---- 3 files changed, 17 insertions(+), 11 deletions(-) diff --git a/src/Lucene.Net.Analysis.OpenNLP/Lucene.Net.Analysis.OpenNLP.csproj b/src/Lucene.Net.Analysis.OpenNLP/Lucene.Net.Analysis.OpenNLP.csproj index 1dfb341e06..eb9c22ef0f 100644 --- a/src/Lucene.Net.Analysis.OpenNLP/Lucene.Net.Analysis.OpenNLP.csproj +++ b/src/Lucene.Net.Analysis.OpenNLP/Lucene.Net.Analysis.OpenNLP.csproj @@ -30,7 +30,7 @@ - net461;net6.0; + net6.0;net462 Lucene.Net.Analysis.OpenNLP $(PackageTags);analysis;natural;language;processing;opennlp diff --git a/src/Lucene.Net.Tests.Analysis.OpenNLP/Lucene.Net.Tests.Analysis.OpenNLP.csproj b/src/Lucene.Net.Tests.Analysis.OpenNLP/Lucene.Net.Tests.Analysis.OpenNLP.csproj index ef45ee6beb..1428120861 100644 --- a/src/Lucene.Net.Tests.Analysis.OpenNLP/Lucene.Net.Tests.Analysis.OpenNLP.csproj +++ b/src/Lucene.Net.Tests.Analysis.OpenNLP/Lucene.Net.Tests.Analysis.OpenNLP.csproj @@ -24,17 +24,20 @@ - + + net7.0;net6.0 + $(TargetFrameworks);net48 Lucene.Net.Tests.Analysis.OpenNLP Lucene.Net.Analysis.OpenNlp + + + false - true - - false - true + + net7.0 + net48 diff --git a/src/dotnet/tools/Lucene.Net.Tests.Cli/Lucene.Net.Tests.Cli.csproj b/src/dotnet/tools/Lucene.Net.Tests.Cli/Lucene.Net.Tests.Cli.csproj index d0cf2a720b..9e9a0105ae 100644 --- a/src/dotnet/tools/Lucene.Net.Tests.Cli/Lucene.Net.Tests.Cli.csproj +++ b/src/dotnet/tools/Lucene.Net.Tests.Cli/Lucene.Net.Tests.Cli.csproj @@ -26,12 +26,15 @@ net7.0;net6.0 - - net7.0 Lucene.Net.Tests.Cli + - false - false + + + false + false + + net7.0 From 67260bbf709932c9e9cd6e1bd5cec34ba5646d49 Mon Sep 17 00:00:00 2001 From: Shad Storhaug Date: Mon, 15 May 2023 02:25:02 +0700 Subject: [PATCH 04/27] publish-test-results-for-test-projects.yml: Added support for net7.0 and net6.0 for Lucene.Net.Tests.Analysis.OpenNLP tests. --- ...publish-test-results-for-test-projects.yml | 23 ++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/.build/azure-templates/publish-test-results-for-test-projects.yml b/.build/azure-templates/publish-test-results-for-test-projects.yml index 7ee98b752e..11ebac78fb 100644 --- a/.build/azure-templates/publish-test-results-for-test-projects.yml +++ b/.build/azure-templates/publish-test-results-for-test-projects.yml @@ -102,7 +102,28 @@ steps: testResultsArtifactName: '${{ parameters.testResultsArtifactName }}' testResultsFileName: '${{ parameters.testResultsFileName }}' -# Special case: Only supports net48 +# Special case: Only supports net7.0, net6.0 and net48 + +- template: publish-test-results.yml + parameters: + testProjectName: 'Lucene.Net.Tests.Analysis.OpenNLP' + framework: 'net7.0' # Since condtions are not supported for templates, we check for the file existence within publish-test-results.yml + vsTestPlatform: '${{ parameters.vsTestPlatform }}' + osName: '${{ parameters.osName }}' + testResultsFormat: '${{ parameters.testResultsFormat }}' + testResultsArtifactName: '${{ parameters.testResultsArtifactName }}' + testResultsFileName: '${{ parameters.testResultsFileName }}' + +- template: publish-test-results.yml + parameters: + testProjectName: 'Lucene.Net.Tests.Analysis.OpenNLP' + framework: 'net6.0' # Since condtions are not supported for templates, we check for the file existence within publish-test-results.yml + vsTestPlatform: '${{ parameters.vsTestPlatform }}' + osName: '${{ parameters.osName }}' + testResultsFormat: '${{ parameters.testResultsFormat }}' + testResultsArtifactName: '${{ parameters.testResultsArtifactName }}' + testResultsFileName: '${{ parameters.testResultsFileName }}' + - template: publish-test-results.yml parameters: testProjectName: 'Lucene.Net.Tests.Analysis.OpenNLP' From 449586516c296d756157e5d04ddcc8b041c18c16 Mon Sep 17 00:00:00 2001 From: Shad Storhaug Date: Mon, 15 May 2023 02:28:43 +0700 Subject: [PATCH 05/27] .github/workflows: Regenerated to add net7.0 as a test framework for Lucene.Net.Tests.Analysis.OpenNLP --- .github/workflows/Lucene-Net-Tests-Analysis-OpenNLP.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/Lucene-Net-Tests-Analysis-OpenNLP.yml b/.github/workflows/Lucene-Net-Tests-Analysis-OpenNLP.yml index d23d10fe7c..ca1236afc5 100644 --- a/.github/workflows/Lucene-Net-Tests-Analysis-OpenNLP.yml +++ b/.github/workflows/Lucene-Net-Tests-Analysis-OpenNLP.yml @@ -56,7 +56,7 @@ jobs: fail-fast: false matrix: os: [windows-latest, ubuntu-latest] - framework: [net48] + framework: [net7.0, net48] platform: [x64] configuration: [Release] exclude: From d2e45033543aaebfef9d4a25d55fbaa5ed730f96 Mon Sep 17 00:00:00 2001 From: Shad Storhaug Date: Mon, 15 May 2023 03:01:33 +0700 Subject: [PATCH 06/27] .build/dependencies.props: Upgrade System.Memory to 4.5.5 to match IKVM 8.2.0 --- .build/dependencies.props | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.build/dependencies.props b/.build/dependencies.props index e7ffb9fdff..de8e3c0e40 100644 --- a/.build/dependencies.props +++ b/.build/dependencies.props @@ -72,7 +72,7 @@ 2.7.8 1.4.2 0.4.1.1 - 4.5.4 + 4.5.5 4.3.0 4.3.0 4.3.0 From 98dc38e3250c4ef781b09c39d175debb573986e1 Mon Sep 17 00:00:00 2001 From: Shad Storhaug Date: Tue, 16 May 2023 20:30:29 +0700 Subject: [PATCH 07/27] .build/dependencies.props: Bumped System.Runtime.CompilerServices.Unsafe to 6.0.0 to match IKVM 8.5.0 --- .build/dependencies.props | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.build/dependencies.props b/.build/dependencies.props index de8e3c0e40..c684f69e8f 100644 --- a/.build/dependencies.props +++ b/.build/dependencies.props @@ -76,7 +76,7 @@ 4.3.0 4.3.0 4.3.0 - 5.0.0 + 6.0.0 4.3.0 6.0.1 4.3.0 From fbeeb022f6af3d6b79be761fc7fc57498eea78ac Mon Sep 17 00:00:00 2001 From: Shad Storhaug Date: Tue, 16 May 2023 20:48:47 +0700 Subject: [PATCH 08/27] Lucene.Net.csproj: Added direct dependency on System.Runtime.CompilerServices.Unsafe for netstandard2.0 and net462 to ensure the version will work with any combination of Lucene.Net components. This is a transitive dependency in a few 3rd party DLLs, but there may be version conflicts if this isn't done on .NET Framework. --- .build/TestReferences.Common.targets | 4 ---- src/Lucene.Net/Lucene.Net.csproj | 4 ++++ 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.build/TestReferences.Common.targets b/.build/TestReferences.Common.targets index 3811a42fdf..9eab94e1c1 100644 --- a/.build/TestReferences.Common.targets +++ b/.build/TestReferences.Common.targets @@ -25,8 +25,4 @@ - - - - \ No newline at end of file diff --git a/src/Lucene.Net/Lucene.Net.csproj b/src/Lucene.Net/Lucene.Net.csproj index a1889b96bd..2491eccf46 100644 --- a/src/Lucene.Net/Lucene.Net.csproj +++ b/src/Lucene.Net/Lucene.Net.csproj @@ -60,10 +60,14 @@ + + + + From 0155ec6682999d0087d5b7151c262f59ea5a3ea5 Mon Sep 17 00:00:00 2001 From: Shad Storhaug Date: Tue, 16 May 2023 20:50:34 +0700 Subject: [PATCH 09/27] Lucene.Net.Facet.csproj: Added explicit dependency on System.Memory for netstandard2.0 and net462, since it is being used in Lucene.Net.Facet. --- src/Lucene.Net.Facet/Lucene.Net.Facet.csproj | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/Lucene.Net.Facet/Lucene.Net.Facet.csproj b/src/Lucene.Net.Facet/Lucene.Net.Facet.csproj index 5f489ec875..2406b72f5a 100644 --- a/src/Lucene.Net.Facet/Lucene.Net.Facet.csproj +++ b/src/Lucene.Net.Facet/Lucene.Net.Facet.csproj @@ -38,13 +38,19 @@ $(NoWarn);1591;1573 - - + + + + + + + + From af204a1feb2924ba1ec456667dbde4fd5f1aa762 Mon Sep 17 00:00:00 2001 From: Shad Storhaug Date: Wed, 17 May 2023 01:15:43 +0700 Subject: [PATCH 10/27] Lucene.Net.TestFramework.csproj: Added dependency on System.Text.Json to pin the version so it matches the reference of IKVM 8.5.0 (6.0.6). --- .build/dependencies.props | 1 + src/Lucene.Net.TestFramework/Lucene.Net.TestFramework.csproj | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/.build/dependencies.props b/.build/dependencies.props index c684f69e8f..d25f15e121 100644 --- a/.build/dependencies.props +++ b/.build/dependencies.props @@ -81,6 +81,7 @@ 6.0.1 4.3.0 5.0.0 + 6.0.6 6.1.0 \ No newline at end of file diff --git a/src/Lucene.Net.TestFramework/Lucene.Net.TestFramework.csproj b/src/Lucene.Net.TestFramework/Lucene.Net.TestFramework.csproj index 712ec3b0e2..4eb51d734b 100644 --- a/src/Lucene.Net.TestFramework/Lucene.Net.TestFramework.csproj +++ b/src/Lucene.Net.TestFramework/Lucene.Net.TestFramework.csproj @@ -66,6 +66,11 @@ + + + + + From 47f7608d7ec42df0bef58c8cf9181769f546ceb4 Mon Sep 17 00:00:00 2001 From: Laimonas Simutis Date: Thu, 11 May 2023 14:03:43 -0700 Subject: [PATCH 11/27] upgrade targets to target .net core 6, in addition to .net framework --- .../Lucene.Net.Analysis.OpenNLP.csproj | 1 + .../Lucene.Net.Tests.Analysis.OpenNLP.csproj | 1 + 2 files changed, 2 insertions(+) diff --git a/src/Lucene.Net.Analysis.OpenNLP/Lucene.Net.Analysis.OpenNLP.csproj b/src/Lucene.Net.Analysis.OpenNLP/Lucene.Net.Analysis.OpenNLP.csproj index eb9c22ef0f..de8da84c9d 100644 --- a/src/Lucene.Net.Analysis.OpenNLP/Lucene.Net.Analysis.OpenNLP.csproj +++ b/src/Lucene.Net.Analysis.OpenNLP/Lucene.Net.Analysis.OpenNLP.csproj @@ -31,6 +31,7 @@ net6.0;net462 + net462;net6.0; Lucene.Net.Analysis.OpenNLP $(PackageTags);analysis;natural;language;processing;opennlp diff --git a/src/Lucene.Net.Tests.Analysis.OpenNLP/Lucene.Net.Tests.Analysis.OpenNLP.csproj b/src/Lucene.Net.Tests.Analysis.OpenNLP/Lucene.Net.Tests.Analysis.OpenNLP.csproj index 1428120861..d880584623 100644 --- a/src/Lucene.Net.Tests.Analysis.OpenNLP/Lucene.Net.Tests.Analysis.OpenNLP.csproj +++ b/src/Lucene.Net.Tests.Analysis.OpenNLP/Lucene.Net.Tests.Analysis.OpenNLP.csproj @@ -27,6 +27,7 @@ net7.0;net6.0 $(TargetFrameworks);net48 + Lucene.Net.Tests.Analysis.OpenNLP Lucene.Net.Analysis.OpenNlp From 4812657516fcfcdef086b8f458a332b30e5c9b17 Mon Sep 17 00:00:00 2001 From: Shad Storhaug Date: Tue, 16 May 2023 22:28:17 +0700 Subject: [PATCH 12/27] Lucene.Net.Analysis.OpenNLP.csproj, .build/dependencies.props: Changed to use to build opennlp-tools instead of using the pre-built OpenNLP.NET NuGet package. --- .build/dependencies.props | 3 ++- .../Lucene.Net.Analysis.OpenNLP.csproj | 9 ++++++--- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/.build/dependencies.props b/.build/dependencies.props index d25f15e121..47c358bc84 100644 --- a/.build/dependencies.props +++ b/.build/dependencies.props @@ -38,6 +38,8 @@ $(ICU4NPackageVersion) $(ICU4NPackageVersion) $(ICU4NPackageVersion) + 8.5.0 + 1.5.0-develop0024 2.0.0 1.0.9 2.0.0 @@ -68,7 +70,6 @@ 13.0.1 3.17.0 3.13.1 - 1.9.4.1 2.7.8 1.4.2 0.4.1.1 diff --git a/src/Lucene.Net.Analysis.OpenNLP/Lucene.Net.Analysis.OpenNLP.csproj b/src/Lucene.Net.Analysis.OpenNLP/Lucene.Net.Analysis.OpenNLP.csproj index de8da84c9d..9c1897bba7 100644 --- a/src/Lucene.Net.Analysis.OpenNLP/Lucene.Net.Analysis.OpenNLP.csproj +++ b/src/Lucene.Net.Analysis.OpenNLP/Lucene.Net.Analysis.OpenNLP.csproj @@ -40,8 +40,6 @@ Lucene.Net.Analysis.OpenNlp - - @@ -50,7 +48,12 @@ - + + + + + + From ab84c28f8ae78b98c2f254881f7763d80de5df09 Mon Sep 17 00:00:00 2001 From: Shad Storhaug Date: Fri, 12 Jan 2024 11:37:13 +0700 Subject: [PATCH 13/27] .build/dependencies.props: bumped IKVM to 8.7.3 and IKVM.Maven.Sdk to 1.6.7 --- .build/dependencies.props | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.build/dependencies.props b/.build/dependencies.props index 47c358bc84..13205895b0 100644 --- a/.build/dependencies.props +++ b/.build/dependencies.props @@ -38,8 +38,8 @@ $(ICU4NPackageVersion) $(ICU4NPackageVersion) $(ICU4NPackageVersion) - 8.5.0 - 1.5.0-develop0024 + 8.7.3 + 1.6.7 2.0.0 1.0.9 2.0.0 From fedcb53c4c9796db837be877e3d87080ee17d77e Mon Sep 17 00:00:00 2001 From: Shad Storhaug Date: Fri, 12 Jan 2024 18:39:22 +0700 Subject: [PATCH 14/27] Lucene.Net.Analysis.OpenNLP.csproj: Removed duplicate TargetFrameworks declaration --- .../Lucene.Net.Analysis.OpenNLP.csproj | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Lucene.Net.Analysis.OpenNLP/Lucene.Net.Analysis.OpenNLP.csproj b/src/Lucene.Net.Analysis.OpenNLP/Lucene.Net.Analysis.OpenNLP.csproj index 9c1897bba7..bd9a13b4ed 100644 --- a/src/Lucene.Net.Analysis.OpenNLP/Lucene.Net.Analysis.OpenNLP.csproj +++ b/src/Lucene.Net.Analysis.OpenNLP/Lucene.Net.Analysis.OpenNLP.csproj @@ -31,7 +31,6 @@ net6.0;net462 - net462;net6.0; Lucene.Net.Analysis.OpenNLP $(PackageTags);analysis;natural;language;processing;opennlp From 3a39a6d902b0069f952e381f8eb1ecfa58241178 Mon Sep 17 00:00:00 2001 From: Shad Storhaug Date: Sat, 13 Jan 2024 02:06:44 +0700 Subject: [PATCH 15/27] Lucene.Net.Analysis.OpenNLP: Changed target from net462 > net472, the minimum supported by IKVM. --- .../Lucene.Net.Analysis.OpenNLP.csproj | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Lucene.Net.Analysis.OpenNLP/Lucene.Net.Analysis.OpenNLP.csproj b/src/Lucene.Net.Analysis.OpenNLP/Lucene.Net.Analysis.OpenNLP.csproj index bd9a13b4ed..42c709c1d6 100644 --- a/src/Lucene.Net.Analysis.OpenNLP/Lucene.Net.Analysis.OpenNLP.csproj +++ b/src/Lucene.Net.Analysis.OpenNLP/Lucene.Net.Analysis.OpenNLP.csproj @@ -30,7 +30,7 @@ - net6.0;net462 + net6.0;net472 Lucene.Net.Analysis.OpenNLP $(PackageTags);analysis;natural;language;processing;opennlp From 64f6c8d9d68e84be6267aae5c250bfe772229cef Mon Sep 17 00:00:00 2001 From: Shad Storhaug Date: Sat, 13 Jan 2024 16:49:58 +0700 Subject: [PATCH 16/27] Directory.Build.targets: Updated FEATURE_OPENNLP to be available on .NET Core --- Directory.Build.targets | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Directory.Build.targets b/Directory.Build.targets index 92406f90f8..f7e801610e 100644 --- a/Directory.Build.targets +++ b/Directory.Build.targets @@ -128,9 +128,9 @@ full - + - + $(DefineConstants);FEATURE_OPENNLP From 0c25237385116c9be41d648c7ab9ec73b0edddf8 Mon Sep 17 00:00:00 2001 From: Shad Storhaug Date: Sat, 13 Jan 2024 16:57:54 +0700 Subject: [PATCH 17/27] Lucene.Net.Tests.AllProjects: Updated references so we can successfully compile with IKVM in the mix, both on .NET Framework and .NET Core --- .../Lucene.Net.Tests.AllProjects.csproj | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/src/Lucene.Net.Tests.AllProjects/Lucene.Net.Tests.AllProjects.csproj b/src/Lucene.Net.Tests.AllProjects/Lucene.Net.Tests.AllProjects.csproj index 6219d75962..92056ba0ab 100644 --- a/src/Lucene.Net.Tests.AllProjects/Lucene.Net.Tests.AllProjects.csproj +++ b/src/Lucene.Net.Tests.AllProjects/Lucene.Net.Tests.AllProjects.csproj @@ -118,18 +118,10 @@ - + - - - - - - - - From 5dd48e9e2c59ad22c6398e2deeeccd94a39c2460 Mon Sep 17 00:00:00 2001 From: Shad Storhaug Date: Sat, 13 Jan 2024 17:45:50 +0700 Subject: [PATCH 18/27] .build/dependencies.props: Added OpenNLP MavenReference version so it can be managed with the other packages --- .build/dependencies.props | 3 +++ .../Lucene.Net.Analysis.OpenNLP.csproj | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/.build/dependencies.props b/.build/dependencies.props index 13205895b0..f4655ae7e8 100644 --- a/.build/dependencies.props +++ b/.build/dependencies.props @@ -85,4 +85,7 @@ 6.0.6 6.1.0 + + 1.9.4 + \ No newline at end of file diff --git a/src/Lucene.Net.Analysis.OpenNLP/Lucene.Net.Analysis.OpenNLP.csproj b/src/Lucene.Net.Analysis.OpenNLP/Lucene.Net.Analysis.OpenNLP.csproj index 42c709c1d6..7a06847b8b 100644 --- a/src/Lucene.Net.Analysis.OpenNLP/Lucene.Net.Analysis.OpenNLP.csproj +++ b/src/Lucene.Net.Analysis.OpenNLP/Lucene.Net.Analysis.OpenNLP.csproj @@ -52,7 +52,7 @@ - + From 57df67c556564f6d7c11c2a8ff5a334bedceaa7d Mon Sep 17 00:00:00 2001 From: Shad Storhaug Date: Sat, 13 Jan 2024 20:52:58 +0700 Subject: [PATCH 19/27] Lucene.Net.Analysis.OpenNLP: Added Maven dependency on org.osgi.core to eliminate build warnings (at least 1 type is referenced in opennlp-tools) --- .build/dependencies.props | 1 + .../Lucene.Net.Analysis.OpenNLP.csproj | 1 + 2 files changed, 2 insertions(+) diff --git a/.build/dependencies.props b/.build/dependencies.props index f4655ae7e8..eb0ff2e27c 100644 --- a/.build/dependencies.props +++ b/.build/dependencies.props @@ -87,5 +87,6 @@ 1.9.4 + 4.2.0 \ No newline at end of file diff --git a/src/Lucene.Net.Analysis.OpenNLP/Lucene.Net.Analysis.OpenNLP.csproj b/src/Lucene.Net.Analysis.OpenNLP/Lucene.Net.Analysis.OpenNLP.csproj index 7a06847b8b..fc50d5a320 100644 --- a/src/Lucene.Net.Analysis.OpenNLP/Lucene.Net.Analysis.OpenNLP.csproj +++ b/src/Lucene.Net.Analysis.OpenNLP/Lucene.Net.Analysis.OpenNLP.csproj @@ -53,6 +53,7 @@ + From 95891bd9b1691a00d3f0de6fce9f7b6a97b9e80f Mon Sep 17 00:00:00 2001 From: Shad Storhaug Date: Thu, 25 Jan 2024 22:52:59 +0700 Subject: [PATCH 20/27] Lucene.Net.Analysis.OpenNLP: For now, making net472 conditional based on Windows due to lack of non-Windows build support in IKVM 8.7.3 (see: https://github.com/ikvmnet/ikvm-maven/issues/49). --- .../Lucene.Net.Analysis.OpenNLP.csproj | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/Lucene.Net.Analysis.OpenNLP/Lucene.Net.Analysis.OpenNLP.csproj b/src/Lucene.Net.Analysis.OpenNLP/Lucene.Net.Analysis.OpenNLP.csproj index fc50d5a320..33920cdd4a 100644 --- a/src/Lucene.Net.Analysis.OpenNLP/Lucene.Net.Analysis.OpenNLP.csproj +++ b/src/Lucene.Net.Analysis.OpenNLP/Lucene.Net.Analysis.OpenNLP.csproj @@ -30,7 +30,10 @@ - net6.0;net472 + + net6.0 + $(TargetFrameworks);net472 Lucene.Net.Analysis.OpenNLP $(PackageTags);analysis;natural;language;processing;opennlp From 944d8c75651682e6a6439ca4d3bcebdeaeb54bd7 Mon Sep 17 00:00:00 2001 From: Shad Storhaug Date: Tue, 30 Jan 2024 17:09:59 +0700 Subject: [PATCH 21/27] .build/dependencies.props: Reverted back to OpenNLP 1.9.1 because of build issues with opennlp-uima on 1.9.4. This aligns with Lucene 8.2.0. --- .build/dependencies.props | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.build/dependencies.props b/.build/dependencies.props index eb0ff2e27c..8fd8b2fb46 100644 --- a/.build/dependencies.props +++ b/.build/dependencies.props @@ -86,7 +86,7 @@ 6.1.0 - 1.9.4 + 1.9.1 4.2.0 \ No newline at end of file From a3341439cc6f1335c436626a91c2bf89edc8f813 Mon Sep 17 00:00:00 2001 From: Shad Storhaug Date: Wed, 31 Jan 2024 00:37:23 +0700 Subject: [PATCH 22/27] publish-nuget-packages.yml: Remove forward slash --- .build/azure-templates/publish-nuget-packages.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.build/azure-templates/publish-nuget-packages.yml b/.build/azure-templates/publish-nuget-packages.yml index 184fa2de60..92d63d2136 100644 --- a/.build/azure-templates/publish-nuget-packages.yml +++ b/.build/azure-templates/publish-nuget-packages.yml @@ -1,4 +1,4 @@ -# Licensed to the Apache Software Foundation (ASF) under one +# Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file @@ -57,7 +57,7 @@ steps: inputs: command: push packagesToPush: '${{ parameters.nugetArtifactName }}/*.nupkg;!${{ parameters.nugetArtifactName }}/*.symbols.nupkg' - publishVstsFeed: '/${{ parameters.artifactFeedID }}' + publishVstsFeed: '${{ parameters.artifactFeedID }}' allowPackageConflicts: true - task: PublishSymbols@2 From 2a7bbc94fbb88f25e729a084219b4ce8d15b864f Mon Sep 17 00:00:00 2001 From: Shad Storhaug Date: Wed, 31 Jan 2024 11:25:59 +0700 Subject: [PATCH 23/27] .build/dependencies.props: Bumped IKVM to 8.7.5 --- .build/dependencies.props | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.build/dependencies.props b/.build/dependencies.props index 8fd8b2fb46..22d4e0e23c 100644 --- a/.build/dependencies.props +++ b/.build/dependencies.props @@ -38,7 +38,7 @@ $(ICU4NPackageVersion) $(ICU4NPackageVersion) $(ICU4NPackageVersion) - 8.7.3 + 8.7.5 1.6.7 2.0.0 1.0.9 From e742bbb7816dbf6bae0d239dd6bce53576122d34 Mon Sep 17 00:00:00 2001 From: Shad Storhaug Date: Wed, 31 Jan 2024 21:07:32 +0700 Subject: [PATCH 24/27] Lucene.Net.Analysis.OpenNLP/overview.md: Added missing docs from Lucene and link to MavenReference demo. Fixes #890. --- src/Lucene.Net.Analysis.OpenNLP/overview.md | 98 ++++++++++++++++++++- 1 file changed, 97 insertions(+), 1 deletion(-) diff --git a/src/Lucene.Net.Analysis.OpenNLP/overview.md b/src/Lucene.Net.Analysis.OpenNLP/overview.md index ba4f9fc9b6..1ea9d10636 100644 --- a/src/Lucene.Net.Analysis.OpenNLP/overview.md +++ b/src/Lucene.Net.Analysis.OpenNLP/overview.md @@ -21,4 +21,100 @@ summary: *content limitations under the License. --> -OpenNLP Library Integration \ No newline at end of file +OpenNLP Library Integration + +This module exposes functionality from Apache OpenNLP to Apache Lucene.NET. The Apache OpenNLP library is a machine learning based toolkit for the processing of natural language text. + +For an introduction to Lucene's analysis API, see the [Lucene.Net.Analysis](../core/Lucene.Net.Analysis.html) namespace documentation. + +The OpenNLP Tokenizer behavior is similar to the but is smart about inter-word punctuation. The term stream looks very much like the way you parse words and punctuation while reading. The major difference between this tokenizer and most other tokenizers shipped with Lucene is that punctuation is tokenized. This is required for the following taggers to operate properly. + +The OpenNLP taggers annotate terms using the . + + segments text into sentences or words. This Tokenizer uses the OpenNLP Sentence Detector and/or Tokenizer classes. When used together, the Tokenizer receives sentences and can do a better job. + tags words using one or more technologies: Part-of-Speech, Chunking, and Named Entity Recognition. These tags are assigned as token types. Note that only one of these operations will tag +Since the is not stored in the index, it is recommended that one of these filters is used following OpenNLPFilter to enable search against the assigned tags: + + copies the value to the + creates a cloned token at the same position as each tagged token, and copies the value to the , optionally with a customized prefix (so that tags effectively occupy a different namespace from token text). + +## MavenReference Primer + +When a `` is included for this NuGet package in your SDK-style MSBuild project, it will automatically include transient dependencies to [`opennlp-tools` on maven.org](https://search.maven.org/artifact/org.apache.opennlp/opennlp-tools/1.9.4/bundle). The transient dependency will automatically include a `` in your MSBuild project. + +The `` item group operates similar to a dependency in Maven. All transitive dependencies are collected and resolved, and then the final output is produced. However, unlike `PackageReference`s, `MavenReference`s are collected by the final output project, and reassessed. That is, each dependent Project within your .NET SDK-style solution contributes its `MavenReference`s to project(s) which include it, and each project makes its own dependency graph. Projects do not contribute their final built assemblies up. They only contribute their dependencies. Allowing each project in a complicated solution to make its own local conflict resolution attempt. + +> **NOTE** `` is only supported on SDK-style MSBuild projects. + +## MavenReference Example + +This means this package can be combined with other related packages on Maven in your project and they can be accessed using the same path as in Java like a namespace in .NET. For example, you can add a `` to your project to include a reference to [`opennlp-uima`](https://search.maven.org/artifact/org.apache.opennlp/opennlp-uima/1.9.1/jar). The UIMA (Unstructured Information Management Architecture) integration module is designed to work with the Apache UIMA framework. UIMA is a framework for building applications that analyze unstructured information, and it's often used for processing natural language text. The opennlp-uima module allows you to integrate OpenNLP functionality into UIMA pipelines, leveraging the capabilities of both frameworks. + +Here's a basic outline of how you might extend an existing Lucene.NET analyzer to incorporate OpenNLP-UIMA annotators: + +```xml + + + net7.0 + + + + + + + + + + +``` + +```c# +using Lucene.Net.Analysis; +using Lucene.Net.Analysis.Core; +using Lucene.Net.Analysis.Util; +using Lucene.Net.Util; +using org.apache.uima.analysis_engine; +using System.IO; + +public class CustomOpenNLPAnalyzer : OpenNLPTokenizerFactory +{ + // ... constructor and other methods ... + + public override Tokenizer Create(AttributeFactory factory, TextReader reader) + { + Tokenizer tokenizer = base.Create(factory, reader); + + // Wrap the tokenizer with UIMA annotators + AnalysisEngineDescription uimaSentenceAnnotator = CreateUIMASentenceAnnotator(); + AnalysisEngineDescription uimaTokenAnnotator = CreateUIMATokenAnnotator(); + + // Combine OpenNLP-UIMA annotators with the existing tokenizer + AnalysisEngine tokenizerAndUIMAAnnotators = CreateAggregate(uimaSentenceAnnotator, uimaTokenAnnotator); + + return new UIMATokenizer(tokenizer, tokenizerAndUIMAAnnotators); + } + + // ... other methods ... + + private AnalysisEngineDescription CreateUIMASentenceAnnotator() { + // Create and configure UIMA sentence annotator + // ... + + return /* UIMA sentence annotator description */; + } + + private AnalysisEngineDescription CreateUIMATokenAnnotator() { + // Create and configure UIMA token annotator + // ... + + return /* UIMA token annotator description */; + } +} +``` + +In the above example, `CustomOpenNLPAnalyzer` extends `OpenNLPTokenizerFactory` (assuming that's the analyzer you're using), and it wraps the OpenNLP tokenizer with UIMA annotators. You'll need to replace the placeholder methods (`CreateUIMASentenceAnnotator` and `CreateUIMATokenAnnotator`) with the actual code to create and configure your UIMA annotators. Please note that configuring NLP can be complex. See the [OpenNLP 1.9.4 Manual](https://opennlp.apache.org/docs/1.9.4/manual/opennlp.html) and [OpenNLP UIMA 1.9.4 API Documention](https://opennlp.apache.org/docs/1.9.4/apidocs/opennlp-uima/index.html) for details. + +> [!NOTE] +> IKVM (and ``) does not support Java SE higher than version 8. So it will not be possible to add a `` to OpenNLP 2.x until support is added for it in IKVM. + +For a more complete example, see the [lucenenet-opennlp-mavenreference-demo](https://github.com/NightOwl888/lucenenet-opennlp-mavenreference-demo). \ No newline at end of file From 79d4610b92868d09b3064c05c65102c3257ac0cf Mon Sep 17 00:00:00 2001 From: Shad Storhaug Date: Thu, 1 Feb 2024 12:39:50 +0700 Subject: [PATCH 25/27] FEATURE: Lucene.Net.Analysis.Miscellaneous: Added TypeAsSynonymFilter from Lucene 8.2.0 because it is called out in the docs as part of the process of configuring Lucene.Net.Analysis.OpenNLP. Changed CannedTokenStream to set ITypeAttribute.Type because it is required by the tests for TypeAsSynonymFilter. --- .../Miscellaneous/TypeAsSynonymFilter.cs | 97 +++++++++++++++++++ .../TypeAsSynonymFilterFactory.cs | 62 ++++++++++++ .../Analysis/CannedTokenStream.cs | 10 +- .../TestTypeAsSynonymFilterFactory.cs | 54 +++++++++++ 4 files changed, 222 insertions(+), 1 deletion(-) create mode 100644 src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/TypeAsSynonymFilter.cs create mode 100644 src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/TypeAsSynonymFilterFactory.cs create mode 100644 src/Lucene.Net.Tests.Analysis.Common/Analysis/Miscellaneous/TestTypeAsSynonymFilterFactory.cs diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/TypeAsSynonymFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/TypeAsSynonymFilter.cs new file mode 100644 index 0000000000..b72ec02e0d --- /dev/null +++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/TypeAsSynonymFilter.cs @@ -0,0 +1,97 @@ +// Lucene version compatibility level 8.2.0 +// LUCENENET NOTE: Ported because Lucene.Net.Analysis.OpenNLP requires this to be useful. +using Lucene.Net.Analysis.TokenAttributes; +using Lucene.Net.Util; +#nullable enable + +namespace Lucene.Net.Analysis.Miscellaneous +{ + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + /// + /// Adds the as a synonym, + /// i.e. another token at the same position, optionally with a specified prefix prepended. + /// + public sealed class TypeAsSynonymFilter : TokenFilter + { + private readonly ICharTermAttribute termAtt; + private readonly ITypeAttribute typeAtt; + private readonly IPositionIncrementAttribute posIncrAtt; + private readonly string? prefix; + + private State? savedToken = null; + + /// + /// Initializes a new instance of with + /// the specified token stream. + /// + /// Input token stream. + public TypeAsSynonymFilter(TokenStream input) + : this(input, null) + { + } + + /// + /// Initializes a new instance of with + /// the specified token stream and prefix. + /// + /// Input token stream. + /// Prepend this string to every token type emitted as token text. + /// If null, nothing will be prepended. + public TypeAsSynonymFilter(TokenStream input, string? prefix) + : base(input) + { + this.prefix = prefix; + termAtt = AddAttribute(); + typeAtt = AddAttribute(); + posIncrAtt = AddAttribute(); + } + + + public override bool IncrementToken() + { + if (savedToken != null) + { + // Emit last token's type at the same position + RestoreState(savedToken); + savedToken = null; + termAtt.SetEmpty(); + if (prefix != null) + { + termAtt.Append(prefix); + } + termAtt.Append(typeAtt.Type); + posIncrAtt.PositionIncrement = 0; + return true; + } + else if (m_input.IncrementToken()) + { + // Ho pending token type to emit + savedToken = CaptureState(); + return true; + } + return false; + } + + public override void Reset() + { + base.Reset(); + savedToken = null; + } + } +} diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/TypeAsSynonymFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/TypeAsSynonymFilterFactory.cs new file mode 100644 index 0000000000..462be60bd7 --- /dev/null +++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/TypeAsSynonymFilterFactory.cs @@ -0,0 +1,62 @@ +// Lucene version compatibility level 8.2.0 +// LUCENENET NOTE: Ported because Lucene.Net.Analysis.OpenNLP requires this to be useful. +using Lucene.Net.Analysis.Util; +using System; +using System.Collections.Generic; +#nullable enable + +namespace Lucene.Net.Analysis.Miscellaneous +{ + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + /// + /// Factory for . + /// + /// <fieldType name="text_type_as_synonym" class="solr.TextField" positionIncrementGap="100"> + /// <analyzer> + /// <tokenizer class="solr.UAX29URLEmailTokenizerFactory"/> + /// <filter class="solr.TypeAsSynonymFilterFactory" prefix="_type_" /> + /// </analyzer> + /// </fieldType> + /// + /// + /// + /// If the optional prefix parameter is used, the specified value will be prepended + /// to the type, e.g.with prefix = "_type_", for a token "example.com" with type "<URL>", + /// the emitted synonym will have text "_type_<URL>". + /// + public class TypeAsSynonymFilterFactory : TokenFilterFactory + { + private readonly string prefix; + + public TypeAsSynonymFilterFactory(IDictionary args) + : base(args) + { + prefix = Get(args, "prefix"); // default value is null + if (args.Count > 0) + { + throw new ArgumentException(string.Format(J2N.Text.StringFormatter.CurrentCulture, "Unknown parameters: {0}", args)); + } + } + + public override TokenStream Create(TokenStream input) + { + return new TypeAsSynonymFilter(input, prefix); + } + } +} diff --git a/src/Lucene.Net.TestFramework/Analysis/CannedTokenStream.cs b/src/Lucene.Net.TestFramework/Analysis/CannedTokenStream.cs index e7d787d041..824eaec95d 100644 --- a/src/Lucene.Net.TestFramework/Analysis/CannedTokenStream.cs +++ b/src/Lucene.Net.TestFramework/Analysis/CannedTokenStream.cs @@ -1,4 +1,4 @@ -using Lucene.Net.Analysis.TokenAttributes; +using Lucene.Net.Analysis.TokenAttributes; namespace Lucene.Net.Analysis { @@ -31,6 +31,7 @@ public sealed class CannedTokenStream : TokenStream private readonly IPositionLengthAttribute posLengthAtt; private readonly IOffsetAttribute offsetAtt; private readonly IPayloadAttribute payloadAtt; + private readonly ITypeAttribute typeAtt; // LUCENENET specific - See IncrementToken() private readonly int finalOffset; private readonly int finalPosInc; @@ -49,6 +50,7 @@ public CannedTokenStream(int finalPosInc, int finalOffset, params Token[] tokens posLengthAtt = AddAttribute(); offsetAtt = AddAttribute(); payloadAtt = AddAttribute(); + typeAtt = AddAttribute(); // LUCENENET specific - See IncrementToken() this.tokens = tokens; this.finalOffset = finalOffset; @@ -76,6 +78,12 @@ public override bool IncrementToken() posLengthAtt.PositionLength = token.PositionLength; offsetAtt.SetOffset(token.StartOffset, token.EndOffset); payloadAtt.Payload = token.Payload; + + // LUCENENET: This change is from https://github.com/apache/lucene/commit/72eaeab7151d421a28ecec1634b8c48599e524f5. + // We need it for the TestTypeAsSynonymFilterFactory tests to pass (from lucene 8.2.0). + // But we don't yet have all of the PackedTokenAttributeImpl plumbing it takes to do it the way they did, + // so setting it explicitly as a workaround. + typeAtt.Type = token.Type; return true; } else diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Miscellaneous/TestTypeAsSynonymFilterFactory.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Miscellaneous/TestTypeAsSynonymFilterFactory.cs new file mode 100644 index 0000000000..08f7e396ef --- /dev/null +++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Miscellaneous/TestTypeAsSynonymFilterFactory.cs @@ -0,0 +1,54 @@ +using Lucene.Net.Analysis.Util; +using NUnit.Framework; + +namespace Lucene.Net.Analysis.Miscellaneous +{ + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + public class TestTypeAsSynonymFilterFactory : BaseTokenStreamFactoryTestCase + { + private static readonly Token[] TOKENS = { token("Visit", ""), token("example.com", "") }; + + [Test] + public void TestBasic() + { + TokenStream stream = new CannedTokenStream(TOKENS); + stream = TokenFilterFactory("TypeAsSynonym").Create(stream); + AssertTokenStreamContents(stream, new string[] { "Visit", "", "example.com", "" }, + null, null, new string[] { "", "", "", "" }, new int[] { 1, 0, 1, 0 }); + } + + [Test] + public void TestPrefix() + { + TokenStream stream = new CannedTokenStream(TOKENS); + stream = TokenFilterFactory("TypeAsSynonym", "prefix", "_type_").Create(stream); + AssertTokenStreamContents(stream, new string[] { "Visit", "_type_", "example.com", "_type_" }, + null, null, new string[] { "", "", "", "" }, new int[] { 1, 0, 1, 0 }); + } + + private static Token token(string term, string type) + { + Token token = new Token(); + token.SetEmpty(); + token.Append(term); + token.Type = type; + return token; + } + } +} From 4e73b61847b44a45847722ac9fbe7e8e3264d3eb Mon Sep 17 00:00:00 2001 From: Shad Storhaug Date: Thu, 1 Feb 2024 15:40:19 +0700 Subject: [PATCH 26/27] Lucene.Net.Analysis.Miscellaneous.TestTypeAsSynonymFilterFactory: Added comment with lucene version compatibility level (to indicate we ported it from Lucene 8.2.0) --- .../Analysis/Miscellaneous/TestTypeAsSynonymFilterFactory.cs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Miscellaneous/TestTypeAsSynonymFilterFactory.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Miscellaneous/TestTypeAsSynonymFilterFactory.cs index 08f7e396ef..8f363f4f9b 100644 --- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Miscellaneous/TestTypeAsSynonymFilterFactory.cs +++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Miscellaneous/TestTypeAsSynonymFilterFactory.cs @@ -1,4 +1,6 @@ -using Lucene.Net.Analysis.Util; +// Lucene version compatibility level 8.2.0 +// LUCENENET NOTE: Ported because Lucene.Net.Analysis.OpenNLP requires this to be useful. +using Lucene.Net.Analysis.Util; using NUnit.Framework; namespace Lucene.Net.Analysis.Miscellaneous From 32d5cc9376a9f159b0d21c127da4b171db9fed13 Mon Sep 17 00:00:00 2001 From: Shad Storhaug Date: Thu, 1 Feb 2024 15:56:47 +0700 Subject: [PATCH 27/27] Lucene.Net.Analysis.OpenNLP.overview.md: Corrected information about which filters are included in the package (there is no NER filter in the box) --- src/Lucene.Net.Analysis.OpenNLP/overview.md | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/Lucene.Net.Analysis.OpenNLP/overview.md b/src/Lucene.Net.Analysis.OpenNLP/overview.md index 1ea9d10636..882ddff08c 100644 --- a/src/Lucene.Net.Analysis.OpenNLP/overview.md +++ b/src/Lucene.Net.Analysis.OpenNLP/overview.md @@ -31,12 +31,14 @@ The OpenNLP Tokenizer behavior is similar to the . - segments text into sentences or words. This Tokenizer uses the OpenNLP Sentence Detector and/or Tokenizer classes. When used together, the Tokenizer receives sentences and can do a better job. - tags words using one or more technologies: Part-of-Speech, Chunking, and Named Entity Recognition. These tags are assigned as token types. Note that only one of these operations will tag +- segments text into sentences or words. This Tokenizer uses the OpenNLP Sentence Detector and/or Tokenizer classes. When used together, the Tokenizer receives sentences and can do a better job. +- tags words for Part-of-Speech and tags words for Chunking. These tags are assigned as token types. Note that only one of these operations will tag Since the is not stored in the index, it is recommended that one of these filters is used following OpenNLPFilter to enable search against the assigned tags: - copies the value to the - creates a cloned token at the same position as each tagged token, and copies the value to the , optionally with a customized prefix (so that tags effectively occupy a different namespace from token text). +- copies the value to the +- creates a cloned token at the same position as each tagged token, and copies the value to the , optionally with a customized prefix (so that tags effectively occupy a different namespace from token text). + +Named Entity Recognition is also supported by OpenNLP, but there is no OpenNLPNERFilter included. For an implementation, see the [lucenenet-opennlp-mavenreference-demo](https://github.com/NightOwl888/lucenenet-opennlp-mavenreference-demo). ## MavenReference Primer