Skip to content

Commit

Permalink
Merge pull request #775 from tballison/NUTCH-2998
Browse files Browse the repository at this point in the history
NUTCH-2998 -- Remove Any23 from Nutch
  • Loading branch information
tballison authored Sep 14, 2023
2 parents f078a88 + 8a5ef49 commit 0ad935f
Show file tree
Hide file tree
Showing 20 changed files with 3 additions and 4,808 deletions.
5 changes: 0 additions & 5 deletions LICENSE-binary
Original file line number Diff line number Diff line change
Expand Up @@ -327,11 +327,6 @@ net.sourceforge.owlapi:owlapi-impl
net.sourceforge.owlapi:owlapi-parsers
net.sourceforge.owlapi:owlapi-rio
net.sourceforge.owlapi:owlapi-tools
org.apache.any23:apache-any23-api
org.apache.any23:apache-any23-core
org.apache.any23:apache-any23-csvutils
org.apache.any23:apache-any23-encoding
org.apache.any23:apache-any23-mime
org.apache.avro:avro
org.apache.commons:commons-collections4
org.apache.commons:commons-compress
Expand Down
5 changes: 1 addition & 4 deletions NOTICE-binary
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ code and source code.

The following provides more details on the included cryptographic software:

The plugins parse-tika and any23 use Apache Tika and the Bouncy Castle
The parse-tika plugin uses Apache Tika and the Bouncy Castle
generic encryption libraries for extracting text content and metadata
from encrypted PDF files. See <https://www.bouncycastle.org/> for more
details on Bouncy Castle and <https://tika.apache.org/> for details
Expand All @@ -46,9 +46,6 @@ on Apache Tika.
Apache projects
---------------

Apache Any23 (https://any23.apache.org/)
see https://github.com/apache/any23/blob/master/NOTICE.txt

Apache Avro (https://avro.apache.org)
see https://github.com/apache/avro/blob/master/NOTICE.txt

Expand Down
2 changes: 1 addition & 1 deletion NOTICE.txt
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ code and source code.

The following provides more details on the included cryptographic software:

The plugins parse-tika and any23 use Apache Tika and the Bouncy Castle
The parse-tika plugin uses Apache Tika and the Bouncy Castle
generic encryption libraries for extracting text content and metadata
from encrypted PDF files. See <https://www.bouncycastle.org/> for more
details on Bouncy Castle and <https://tika.apache.org/> for details
Expand Down
5 changes: 0 additions & 5 deletions build.xml
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,6 @@
<arg value="--no-module-directories" if:set="using.jdk.11"/>

<packageset dir="${src.dir}"/>
<packageset dir="${plugins.dir}/any23/src/java/"/>
<packageset dir="${plugins.dir}/creativecommons/src/java"/>
<packageset dir="${plugins.dir}/feed/src/java"/>
<packageset dir="${plugins.dir}/headings/src/java"/>
Expand Down Expand Up @@ -687,7 +686,6 @@
<arg value="--no-module-directories" if:set="using.jdk.11"/>

<packageset dir="${src.dir}"/>
<packageset dir="${plugins.dir}/any23/src/java/" />
<packageset dir="${plugins.dir}/creativecommons/src/java"/>
<packageset dir="${plugins.dir}/feed/src/java"/>
<packageset dir="${plugins.dir}/headings/src/java"/>
Expand Down Expand Up @@ -772,7 +770,6 @@
<classpath>
<fileset dir="${build.plugins}" >
<include name="**/*.jar"/>
<exclude name="any23/javax.annotation-api*.jar"/>
</fileset>
</classpath>

Expand Down Expand Up @@ -1180,8 +1177,6 @@
<source path="${basedir}/src/java/" />
<source path="${basedir}/src/test/" output="build/test/classes" />

<source path="${plugins.dir}/any23/src/java/" />
<source path="${plugins.dir}/any23/src/test/" />
<source path="${plugins.dir}/creativecommons/src/java/" />
<source path="${plugins.dir}/creativecommons/src/test/" />
<source path="${plugins.dir}/feed/src/java/" />
Expand Down
16 changes: 0 additions & 16 deletions conf/nutch-default.xml
Original file line number Diff line number Diff line change
Expand Up @@ -1353,22 +1353,6 @@
</description>
</property>



<!-- any23 plugin properties -->

<property>
<name>any23.extractors</name>
<value>html-microdata</value>
<description>Comma-separated list of Any23 extractors (a list of extractors is available here: http://any23.apache.org/getting-started.html)</description>
</property>

<property>
<name>any23.content_types</name>
<value>text/html,application/xhtml+xml</value>
<description>Comma-separated list of content-types onto which Any23 extractors should be applied (see http://www.iana.org/assignments/media-types/). If empty, all content-types are supported.</description>
</property>

<!-- moreindexingfilter plugin properties -->

<property>
Expand Down
4 changes: 1 addition & 3 deletions default.properties
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,4 @@ plugins.misc=\
org.apache.nutch.collection*:\
org.apache.nutch.analysis.lang*:\
org.creativecommons.nutch*:\
org.apache.nutch.microformats.reltag*:\
org.apache.nutch.any23*

org.apache.nutch.microformats.reltag*:
47 changes: 0 additions & 47 deletions src/plugin/any23/build-ivy.xml

This file was deleted.

36 changes: 0 additions & 36 deletions src/plugin/any23/build.xml

This file was deleted.

22 changes: 0 additions & 22 deletions src/plugin/any23/howto_upgrade_any23.txt

This file was deleted.

49 changes: 0 additions & 49 deletions src/plugin/any23/ivy.xml

This file was deleted.

Loading

0 comments on commit 0ad935f

Please sign in to comment.