Skip to content

Commit

Permalink
include opennlp lang model in tika-eval during assembly
Browse files Browse the repository at this point in the history
convert paths to OS-independent paths in unit tests
add headless in tests
  • Loading branch information
tballison committed Jul 29, 2019
1 parent c25b81d commit 357c163
Show file tree
Hide file tree
Showing 4 changed files with 51 additions and 16 deletions.
3 changes: 2 additions & 1 deletion assembly.xml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@
<excludes>
<exclude>**/target/**</exclude>
<exclude>**/.*/**</exclude>
<exclude>**/opennlp/*.bin</exclude>
<exclude>**/opennlp/ner-*.bin</exclude>
<exclude>**/opennlp/en-*.bin</exclude>
<exclude>**/recognition/*.bin</exclude>
<exclude>**/*.releaseBackup</exclude>
</excludes>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@

import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOUtils;
import org.apache.tika.utils.ProcessUtils;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
Expand Down Expand Up @@ -176,9 +177,7 @@ public void testOneDirOneFileException() throws Exception {
boolean ex = false;
try {
String path = testFile.toAbsolutePath().toString();
if (path.contains(" ")) {
path = "\"" + path + "\"";
}
path = ProcessUtils.escapeCommandLine(path);
String[] params = {testInputPathForCommandLine, path};

String[] commandLine = BatchCommandLineBuilder.build(params);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,7 @@ public ProcessBuilder getNewBatchRunnerProcess(String testConfig, String loggerP
private String[] commandLine(String testConfig, String loggerProps, String[] args) {
List<String> commandLine = new ArrayList<>();
commandLine.add("java");
commandLine.add("-Djava.awt.headless=true");
commandLine.add("-Dlog4j.configuration=file:"+
this.getClass().getResource(loggerProps).getFile());
commandLine.add("-Xmx128m");
Expand Down Expand Up @@ -200,6 +201,7 @@ public BatchProcessDriverCLI getNewDriver(String testConfig,
String[] args) throws Exception {
List<String> commandLine = new ArrayList<>();
commandLine.add("java");
commandLine.add("-Djava.awt.headless=true");
commandLine.add("-Xmx128m");
commandLine.add("-cp");
String cp = System.getProperty("java.class.path");
Expand Down
57 changes: 45 additions & 12 deletions tika-eval/src/test/java/org/apache/tika/eval/TikaEvalCLITest.java
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@

import org.apache.commons.io.FileUtils;
import org.apache.tika.TikaTest;
import org.apache.tika.utils.ProcessUtils;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Ignore;
Expand Down Expand Up @@ -123,9 +124,14 @@ private static void compare() throws IOException {
List<String> args = new ArrayList<>();
args.add("Compare");
args.add("-extractsA");
args.add(extractsDir.resolve("extractsA").toAbsolutePath().toString());
args.add(
ProcessUtils.escapeCommandLine(
extractsDir.resolve("extractsA")
.toAbsolutePath().toString()));
args.add("-extractsB");
args.add(extractsDir.resolve("extractsB").toAbsolutePath().toString());
args.add(ProcessUtils.escapeCommandLine(
extractsDir.resolve("extractsB")
.toAbsolutePath().toString()));
//add these just to confirm this info doesn't cause problems w cli
args.add("-maxTokens");
args.add("10000000");
Expand All @@ -135,7 +141,10 @@ private static void compare() throws IOException {
args.add("100000");

args.add("-db");
args.add(compareDBDir.toAbsolutePath().toString()+"/"+dbName);
args.add(
ProcessUtils.escapeCommandLine(
compareDBDir
.toAbsolutePath().toString()+"/"+dbName));

execute(args, 60000);

Expand All @@ -145,7 +154,9 @@ private static void profile() throws IOException {
List<String> args = new ArrayList<>();
args.add("Profile");
args.add("-extracts");
args.add(extractsDir.resolve("extractsA").toAbsolutePath().toString());
args.add(ProcessUtils.escapeCommandLine(
extractsDir.resolve("extractsA")
.toAbsolutePath().toString()));
//add these just to confirm this info doesn't cause problems w cli
args.add("-maxTokens");
args.add("10000000");
Expand All @@ -155,27 +166,39 @@ private static void profile() throws IOException {
args.add("100000");

args.add("-db");
args.add(profileDBDir.toAbsolutePath().toString()+"/"+dbName);
args.add(
ProcessUtils.escapeCommandLine(
profileDBDir
.toAbsolutePath().toString()+"/"+dbName));
execute(args, 60000);
}

private static void reportProfile() throws IOException {
List<String> args = new ArrayList<>();
args.add("Report");
args.add("-db");
args.add(profileDBDir.toAbsolutePath().toString()+"/"+dbName);
args.add(
ProcessUtils.escapeCommandLine(
profileDBDir.toAbsolutePath()
.toString()+"/"+dbName));
args.add("-rd");
args.add(profileReportsDir.toAbsolutePath().toString());
args.add(
ProcessUtils.escapeCommandLine(
profileReportsDir.toAbsolutePath().toString()));
execute(args, 60000);
}

private static void reportCompare() throws IOException {
List<String> args = new ArrayList<>();
args.add("Report");
args.add("-db");
args.add(compareDBDir.toAbsolutePath().toString()+"/"+dbName);
args.add(
ProcessUtils.escapeCommandLine(
compareDBDir.toAbsolutePath().toString()+"/"+dbName));
args.add("-rd");
args.add(compareReportsDir.toAbsolutePath().toString());
args.add(
ProcessUtils.escapeCommandLine(
compareReportsDir.toAbsolutePath().toString()));
execute(args, 60000);
}

Expand All @@ -186,11 +209,20 @@ public void testOneOff() throws Exception {
List<String> args = new ArrayList<>();
args.add("Compare");
args.add("-extractsA");
args.add(extractsDir.resolve("extractsA").toAbsolutePath().toString());
args.add(
ProcessUtils.escapeCommandLine(
extractsDir.resolve("extractsA")
.toAbsolutePath().toString()));
args.add("-extractsB");
args.add(extractsDir.resolve("extractsB").toAbsolutePath().toString());
args.add(
ProcessUtils.escapeCommandLine(
extractsDir.resolve("extractsB")
.toAbsolutePath().toString()));
args.add("-db");
args.add(compareDBDir.toAbsolutePath().toString()+"/"+dbName);
args.add(
ProcessUtils.escapeCommandLine(
compareDBDir.toAbsolutePath()
.toString()+"/"+dbName));

execute(args, 60000);
// args.add("-drop");
Expand All @@ -202,6 +234,7 @@ private static void execute(List<String> incomingArgs, long maxMillis) throws IO
List<String> args = new ArrayList<>();
String cp = System.getProperty("java.class.path");
args.add("java");
args.add("-Djava.awt.headless=true");
args.add("-cp");
args.add(cp);
args.add("org.apache.tika.eval.TikaEvalCLI");
Expand Down

0 comments on commit 357c163

Please sign in to comment.