Skip to content
This repository has been archived by the owner on Sep 4, 2020. It is now read-only.

Commit

Permalink
Added Szeged NER
Browse files Browse the repository at this point in the history
  • Loading branch information
oroszgy committed Mar 4, 2017
1 parent 5205b3e commit 1cb4cb7
Show file tree
Hide file tree
Showing 7 changed files with 89 additions and 33 deletions.
1 change: 1 addition & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
dependencies:
mkdir -p lib
cd lib && wget -N http://rgai.inf.u-szeged.hu/project/nlp/research/magyarlanc/magyarlanc-3.0.jar
cd lib && wget -N http://rgai.inf.u-szeged.hu/project/nlp/research/NER/ner.jar

install:
mvn install
Expand Down
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@

The aim of this project to provide a unified access to Hungarian NLP tools. As such it provides wrapper classes and a REST API.
Currently the project integrates:
* `magyarlanc`

* [`magyarlanc` 3.0](http://www.inf.u-szeged.hu/rgai/magyarlanc)
* [Szeged NER](http://www.inf.u-szeged.hu/rgai/NER)
# Usage

## REST API
Expand Down Expand Up @@ -34,6 +34,6 @@ First release containing only magyarlanc.

# License

HuNLP is under LGPL3, but integrated libraries might use different (commercial) licenses.
HuNLP is under LGPL3, however libraries integrated might use different (commercial) licenses.


11 changes: 9 additions & 2 deletions pom.xml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
<project xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns="http://maven.apache.org/POM/4.0.0"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>

Expand Down Expand Up @@ -49,6 +49,13 @@
<version>3.0</version>
<systemPath>${basedir}/lib/magyarlanc-3.0.jar</systemPath>
</dependency>
<dependency>
<groupId>hu.u_szeged</groupId>
<artifactId>ner</artifactId>
<scope>system</scope>
<version>1.0</version>
<systemPath>${basedir}/lib/ner.jar</systemPath>
</dependency>
<dependency>
<groupId>com.google.code.gson</groupId>
<artifactId>gson</artifactId>
Expand Down
8 changes: 7 additions & 1 deletion src/main/java/hu/nlp/api/Document.java
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
package hu.nlp.api;

import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import java.util.stream.Collectors;

/**
* Created by gorosz on 2017. 03. 04..
*/
public class Document {
public class Document implements Iterable<Sentence> {
protected List<Sentence> sentences;

public Document(List<Sentence> sentences) {
Expand Down Expand Up @@ -40,4 +41,9 @@ public boolean equals(Object o) {
public int hashCode() {
return sentences != null ? sentences.hashCode() : 0;
}

@Override
public Iterator<Sentence> iterator() {
return sentences.iterator();
}
}
17 changes: 16 additions & 1 deletion src/main/java/hu/nlp/api/HuNlp.java
Original file line number Diff line number Diff line change
@@ -1,17 +1,32 @@
package hu.nlp.api;

import hu.u_szeged.magyarlanc.Magyarlanc;
import main.NamedEntityRecognizer;

/**
* Created by gorosz on 2017. 03. 03..
*/
public class HuNlp {

private final NamedEntityRecognizer ner;

public HuNlp() {
Magyarlanc.fullInit();
ner = new NamedEntityRecognizer();
}

public synchronized Document parse(String text) {
String[][][] result = Magyarlanc.depParse(text);
return Document.fromArray(result);

Document doc = Document.fromArray(result);
for (Sentence sent : doc.sentences) {
String[] tokens = sent.tokenStrings();
String[] labels = ner.predicateSentence(tokens);
for (int i = 0; i < sent.getTokens().size(); i++) {
sent.getTokens().get(i).setEntityType(labels[i]);

}
}
return doc;
}
}
6 changes: 5 additions & 1 deletion src/main/java/hu/nlp/api/Sentence.java
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ public static Sentence fromArray(String[][] tokens) {

@Override
public Iterator<Token> iterator() {
return null;
return tokens.iterator();
}

public List<Token> getTokens() {
Expand Down Expand Up @@ -52,4 +52,8 @@ public String toString() {
"tokens=" + tokens +
'}';
}

String[] tokenStrings() {
return this.tokens.stream().map(Token::getWordform).collect(Collectors.toList()).toArray(new String[]{});
}
}
73 changes: 48 additions & 25 deletions src/main/java/hu/nlp/api/Token.java
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ public class Token {
protected Map<String, String> tagProperties;
protected int headId;
protected String arcLabel;
protected String entityType;

public Token(int id, String wordform, String lemma, String pos, Map<String, String> tagProperties, int headId, String arcLabel) {
this.id = id;
Expand All @@ -26,6 +27,12 @@ public Token(int id, String wordform, String lemma, String pos, Map<String, Stri
this.arcLabel = arcLabel;
}

public Token(int id, String wordform, String lemma, String pos, Map<String, String> tagProperties, int headId,
String arcLabel, String entityType) {
this(id, wordform, lemma, pos, tagProperties, headId, arcLabel);
this.entityType = entityType;
}

public static Token fromArray(String[] token) {
return new Token(
Integer.parseInt(token[0]), token[1], token[2], token[3],
Expand All @@ -40,31 +47,6 @@ protected static Map<String, String> parseProperties(String props) {
(str) -> Arrays.asList(str.split("="))).collect(Collectors.toMap(l -> l.get(0), l -> l.get(1)));
}

@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;

Token token = (Token) o;

if (headId != token.headId) return false;
if (arcLabel != token.arcLabel) return false;
if (!wordform.equals(token.wordform)) return false;
if (!lemma.equals(token.lemma)) return false;
if (!pos.equals(token.pos)) return false;
return tagProperties != null ? tagProperties.equals(token.tagProperties) : token.tagProperties == null;
}

@Override
public int hashCode() {
int result = wordform.hashCode();
result = 31 * result + lemma.hashCode();
result = 31 * result + pos.hashCode();
result = 31 * result + (tagProperties != null ? tagProperties.hashCode() : 0);
result = 31 * result + headId;
result = 31 * result + arcLabel.hashCode();
return result;
}

public int getId() {

Expand Down Expand Up @@ -133,6 +115,47 @@ public String toString() {
", tagProperties=" + tagProperties +
", headId=" + headId +
", arcLabel='" + arcLabel + '\'' +
", entityType='" + entityType + '\'' +
'}';
}

@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;

Token token = (Token) o;

if (id != token.id) return false;
if (headId != token.headId) return false;
if (wordform != null ? !wordform.equals(token.wordform) : token.wordform != null) return false;
if (lemma != null ? !lemma.equals(token.lemma) : token.lemma != null) return false;
if (pos != null ? !pos.equals(token.pos) : token.pos != null) return false;
if (tagProperties != null ? !tagProperties.equals(token.tagProperties) : token.tagProperties != null)
return false;
if (arcLabel != null ? !arcLabel.equals(token.arcLabel) : token.arcLabel != null) return false;
return entityType != null ? entityType.equals(token.entityType) : token.entityType == null;
}

@Override
public int hashCode() {
int result = id;
result = 31 * result + (wordform != null ? wordform.hashCode() : 0);
result = 31 * result + (lemma != null ? lemma.hashCode() : 0);
result = 31 * result + (pos != null ? pos.hashCode() : 0);
result = 31 * result + (tagProperties != null ? tagProperties.hashCode() : 0);
result = 31 * result + headId;
result = 31 * result + (arcLabel != null ? arcLabel.hashCode() : 0);
result = 31 * result + (entityType != null ? entityType.hashCode() : 0);
return result;
}

public String getEntityType() {

return entityType;
}

public void setEntityType(String entityType) {
this.entityType = entityType;
}
}

0 comments on commit 1cb4cb7

Please sign in to comment.