Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

adding transliteration models. #577

Merged
merged 11 commits into from
Oct 30, 2017
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ public class ViewNames {
public static final String POS = "POS";

public static final String MENTION = "MENTION";
public static final String RELATION = "RELATION";
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.


/** the tree gazetteer produced by the SimpleGazetteerAnnotator. */
public static final String TREE_GAZETTEER = "TREE_GAZETTEER";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import edu.illinois.cs.cogcomp.annotation.*;
import edu.illinois.cs.cogcomp.chunker.main.ChunkerAnnotator;
import edu.illinois.cs.cogcomp.comma.CommaLabeler;
import edu.illinois.cs.cogcomp.core.constants.Language;
import edu.illinois.cs.cogcomp.core.datastructures.ViewNames;
import edu.illinois.cs.cogcomp.core.utilities.configuration.Configurator;
import edu.illinois.cs.cogcomp.core.utilities.configuration.ResourceManager;
Expand Down Expand Up @@ -351,8 +352,10 @@ private static Map<String, Annotator> buildAnnotators(ResourceManager nonDefault
}

if (rm.getBoolean(PipelineConfigurator.USE_TRANSLITERATION)) {
TransliterationAnnotator transliterationAnnotator = new TransliterationAnnotator();
viewGenerators.put(ViewNames.TRANSLITERATION, transliterationAnnotator);
for(Language lang : TransliterationAnnotator.supportedLanguages) {
TransliterationAnnotator transliterationAnnotator = new TransliterationAnnotator(true, lang);
viewGenerators.put(ViewNames.TRANSLITERATION + "_" + lang.getCode(), transliterationAnnotator);
}
}

if (rm.getBoolean(PipelineConfigurator.USE_SRL_PREP)) {
Expand All @@ -374,8 +377,8 @@ private static Map<String, Annotator> buildAnnotators(ResourceManager nonDefault
viewGenerators.put(ViewNames.MENTION, mentionAnnotator);
}
if (rm.getBoolean(PipelineConfigurator.USE_RELATION)){
RelationAnnotator relationAnnotator = new RelationAnnotator();
viewGenerators.put(ViewNames.MENTION, relationAnnotator);
viewGenerators.put(ViewNames.RELATION + "_ACE", new RelationAnnotator(true, "ACE"));
viewGenerators.put(ViewNames.RELATION + "_SEMEVAL", new RelationAnnotator(true, "SEMEVAL"));
}
if (rm.getBoolean(PipelineConfigurator.USE_TIMEX3)){
Properties rmProps = new TemporalChunkerConfigurator().getDefaultConfig().getProperties();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -98,28 +98,27 @@ public static void startServer(String[] args, Logger logger) {

// create a hashmap to keep track of client ip addresses and their
int rate = parseResults.getInt("rate");
if( rate > 0) {
if (rate > 0) {
clients = new HashMap<String, Integer>();
}

AnnotatorService finalPipeline = pipeline;
get("/annotate", "application/json", (request, response)->{
get("/annotate", "application/json", (request, response) -> {
logger.info("GET request . . . ");
boolean canServe = true;
if(rate > 0) {
if (rate > 0) {
resetServer();
String ip = request.ip();
int callsSofar = (Integer) clients.getOrDefault(ip, 0);
if( callsSofar > rate ) canServe = false;
if (callsSofar > rate) canServe = false;
clients.put(ip, callsSofar + 1);
}
if(canServe) {
if (canServe) {
logger.info("request.body(): " + request.body());
String text = request.queryParams("text");
String views = request.queryParams("views");
return annotateText(finalPipeline, text, views, logger);
}
else {
} else {
response.status(429);
return "You have reached your maximum daily query limit :-/ ";
}
Expand All @@ -129,22 +128,21 @@ public static void startServer(String[] args, Logger logger) {
{
logger.info("POST request . . . ");
boolean canServe = true;
if(rate > 0) {
if (rate > 0) {
resetServer();
String ip = request.ip();
int callsSofar = (Integer) clients.getOrDefault(ip, 0);
if( callsSofar > rate ) canServe = false;
if (callsSofar > rate) canServe = false;
clients.put(ip, callsSofar + 1);
}
if(canServe) {
logger.info( "request.body(): " + request.body());
if (canServe) {
logger.info("request.body(): " + request.body());
Map<String, String> map = splitQuery(request.body());
System.out.println("POST body parameters parsed: " + map);
String text = map.get("text");
String views = map.get("views");
return annotateText(finalPipeline, text, views, logger);
}
else {
} else {
response.status(429);
return "You have reached your maximum daily query limit :-/ ";
}
Expand All @@ -153,7 +151,7 @@ public static void startServer(String[] args, Logger logger) {

// api to get name of the available views
String viewsString = "";
for(String view : pipeline.getAvailableViews()) {
for (String view : pipeline.getAvailableViews()) {
viewsString += ", " + view;
}
String finalViewsString = viewsString;
Expand All @@ -175,7 +173,7 @@ public static void main(String[] args) {
}

private static String annotateText(AnnotatorService finalPipeline, String text, String views,
Logger logger) throws AnnotatorException {
Logger logger) throws AnnotatorException {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just curious: why is Logger a method argument, not a static class member? (the latter is more standard in CCG code)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Don't remember it now, but it seems odd. Fixed it.

if (views == null || text == null) {
return "The parameters 'text' and/or 'views' are not specified. Here is a sample input: \n ?text=\"This is a sample sentence. I'm happy.\"&views=POS,NER";
} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,25 +33,36 @@ public class RelationAnnotator extends Annotator {
private ACERelationConstrainedClassifier constrainedClassifier;
private Gazetteers gazetteers;
private WordNetManager wordNet;
private MentionAnnotator mentionAnnotator;
private String type = "ACE"; // default relation type

public RelationAnnotator() {
this(true);
}


public RelationAnnotator(boolean lazilyInitialize) {
super(ViewNames.MENTION, new String[]{ViewNames.POS, ViewNames.DEPENDENCY_STANFORD, ViewNames.SHALLOW_PARSE}, lazilyInitialize);
this(lazilyInitialize, "ACE");
}

public RelationAnnotator(boolean lazilyInitialize, String type) {
super(ViewNames.RELATION, new String[]{ViewNames.MENTION, ViewNames.POS, ViewNames.DEPENDENCY_STANFORD, ViewNames.SHALLOW_PARSE}, lazilyInitialize);
this.type = (type.contains("ACE")) ? "ACE" : "SEMEVAL";
}

@Override
public void initialize(ResourceManager rm) {
try {
mentionAnnotator = new MentionAnnotator("ACE_TYPE");
Datastore ds = new Datastore(new ResourceConfigurator().getDefaultConfig());
File modelDir = ds.getDirectory("org.cogcomp.re", "ACE_GOLD_BI", 1.0, false);
String modelFile = modelDir.getPath() + File.separator + "ACE_GOLD_BI" + File.separator + "ACE_GOLD_BI.lc";
String lexFile = modelDir.getPath() + File.separator + "ACE_GOLD_BI" + File.separator + "ACE_GOLD_BI.lex";
String modelFile;
String lexFile;
if (type.equals("ACE")) {
File modelDir = ds.getDirectory("org.cogcomp.re", "ACE_GOLD_BI", 1.0, false);
modelFile = modelDir.getPath() + File.separator + "ACE_GOLD_BI" + File.separator + "ACE_GOLD_BI.lc";
lexFile = modelDir.getPath() + File.separator + "ACE_GOLD_BI" + File.separator + "ACE_GOLD_BI.lex";
} else {
File modelDir = ds.getDirectory("org.cogcomp.re", "SEMEVAL", 1.1, false);
modelFile = modelDir.getPath() + File.separator + "SEMEVAL" + File.separator + "SEMEVAL.lc";
lexFile = modelDir.getPath() + File.separator + "SEMEVAL" + File.separator + "SEMEVAL.lex";
}
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How about here?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@Slash0BZ this is ok? I think it's not working ....

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Did you use the same classifiers or different classifiers?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My bad, I forgot it's a different classifier. The classifier for SemEval is semeval_relation_classifier. Also you don't need the constrainedClassifier anymore, just directly use semeval_relation_classifier and then load model and lexicon

relationClassifier = new relation_classifier();
relationClassifier.readModel(modelFile);
relationClassifier.readLexicon(lexFile);
Expand All @@ -61,61 +72,64 @@ public void initialize(ResourceManager rm) {
WordNetManager.loadConfigAsClasspathResource(true);
wordNet = WordNetManager.getInstance();
gazetteers = GazetteersFactory.get();
}
catch (Exception e){
} catch (Exception e) {
e.printStackTrace();
}
}

@Override
public void addView(TextAnnotation record) throws AnnotatorException {
if (!isInitialized()){
if (!isInitialized()) {
doInitialize();
}
if (!record.hasView(ViewNames.POS) ){
if (!record.hasView(ViewNames.POS)) {
throw new AnnotatorException("Missing required view POS");
}
if (!record.hasView(ViewNames.DEPENDENCY_STANFORD)){
if (!record.hasView(ViewNames.DEPENDENCY_STANFORD)) {
throw new AnnotatorException("Missing required view DEPENDENCY_STANFORD");
}
if (!record.hasView(ViewNames.SHALLOW_PARSE)){
if (!record.hasView(ViewNames.SHALLOW_PARSE)) {
throw new AnnotatorException("Missing required view SHALLOW_PARSE");
}
mentionAnnotator.addView(record);
if (!record.hasView(ViewNames.MENTION)) {
// TODO: show error messages if the mentions are not typed.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What you can do here (outside this 'if' of course) is something like

else{
    if (record.getView(ViewNames.MENTION).getConstituents().get(0).getAttribute("EntityType").equals("MENTION")){
      //The model does not have type
    }
}

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good point.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.

throw new AnnotatorException("Missing required view MENTION");
}

View mentionView = record.getView(ViewNames.MENTION);
View annotatedTokenView = new SpanLabelView("RE_ANNOTATED", record);
for (Constituent co : record.getView(ViewNames.TOKENS).getConstituents()){
for (Constituent co : record.getView(ViewNames.TOKENS).getConstituents()) {
Constituent c = co.cloneForNewView("RE_ANNOTATED");
for (String s : co.getAttributeKeys()){
for (String s : co.getAttributeKeys()) {
c.addAttribute(s, co.getAttribute(s));
}
c.addAttribute("WORDNETTAG", BIOFeatureExtractor.getWordNetTags(wordNet, c));
c.addAttribute("WORDNETHYM", BIOFeatureExtractor.getWordNetHyms(wordNet, c));
annotatedTokenView.addConstituent(c);
}
record.addView("RE_ANNOTATED", annotatedTokenView);
for (int i = 0; i < record.getNumberOfSentences(); i++){
for (int i = 0; i < record.getNumberOfSentences(); i++) {
Sentence curSentence = record.getSentence(i);
List<Constituent> cins = mentionView.getConstituentsCoveringSpan(curSentence.getStartSpan(), curSentence.getEndSpan());
for (int j = 0; j < cins.size(); j++){
for (int k = j + 1; k < cins.size(); k++){
for (int j = 0; j < cins.size(); j++) {
for (int k = j + 1; k < cins.size(); k++) {
if (k == j) continue;
Constituent source = cins.get(j);
Constituent target = cins.get(k);
Constituent sourceHead = MentionAnnotator.getHeadConstituent(source, "");
Constituent targetHead = MentionAnnotator.getHeadConstituent(target, "");
source.addAttribute("GAZ", ((FlatGazetteers) gazetteers).annotatePhrase(sourceHead));
target.addAttribute("GAZ", ((FlatGazetteers)gazetteers).annotatePhrase(targetHead));
target.addAttribute("GAZ", ((FlatGazetteers) gazetteers).annotatePhrase(targetHead));
Relation for_test_forward = new Relation("PredictedRE", source, target, 1.0f);
Relation for_test_backward = new Relation("PredictedRE", target, source, 1.0f);
String tag_forward = constrainedClassifier.discreteValue(for_test_forward);
String tag_backward = constrainedClassifier.discreteValue(for_test_backward);

if (tag_forward.equals(ACEMentionReader.getOppoName(tag_backward)) && !tag_forward.equals("NOT_RELATED")){
if (tag_forward.equals(ACEMentionReader.getOppoName(tag_backward)) && !tag_forward.equals("NOT_RELATED")) {
String tag = tag_forward;
Constituent first = source;
Constituent second = target;
if (tag_forward.length() > tag_backward.length()){
if (tag_forward.length() > tag_backward.length()) {
tag = tag_backward;
first = target;
second = source;
Expand All @@ -127,32 +141,32 @@ public void addView(TextAnnotation record) throws AnnotatorException {
mentionView.addRelation(r);
}
if (!tag_forward.equals(ACEMentionReader.getOppoName(tag_backward)) &&
(!tag_forward.equals("NOT_RELATED") || !tag_backward.equals("NOT_RELATED"))){
(!tag_forward.equals("NOT_RELATED") || !tag_backward.equals("NOT_RELATED"))) {
double forward_score = 0.0;
double backward_score = 0.0;
ScoreSet scores = relationClassifier.scores(for_test_forward);
Score[] scoresArray = scores.toArray();
for (Score s : scoresArray){
if (s.value.equals(tag_forward)){
for (Score s : scoresArray) {
if (s.value.equals(tag_forward)) {
forward_score = s.score;
}
}
scores = relationClassifier.scores(for_test_backward);
scoresArray = scores.toArray();
for (Score s : scoresArray){
if (s.value.equals(tag_forward)){
for (Score s : scoresArray) {
if (s.value.equals(tag_forward)) {
backward_score = s.score;
}
}
String tag = tag_forward;
Constituent first = source;
Constituent second = target;
if (forward_score < backward_score && backward_score - forward_score > 0.005){
if (forward_score < backward_score && backward_score - forward_score > 0.005) {
tag = tag_backward;
first = target;
second = source;
}
if (!tag.equals("NOT_RELATED")){
if (!tag.equals("NOT_RELATED")) {
String coarseType = ACERelationTester.getCoarseType(tag);
Relation r = new Relation(coarseType, first, second, 1.0f);
r.addAttribute("RelationType", coarseType);
Expand All @@ -163,7 +177,7 @@ public void addView(TextAnnotation record) throws AnnotatorException {
}
}
}
record.addView(ViewNames.MENTION, mentionView);
record.addView(ViewNames.RELATION, mentionView);
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@Slash0BZ how does it look now?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

here mentionView was initialized with ViewNames.MENTION, and now you are adding it to ViewNames.RELATION

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah I see. I think it won't make any big problems. Will remember it ...

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok

}

}
20 changes: 5 additions & 15 deletions transliteration/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -31,13 +31,16 @@ http://www.w3.org/2001/XMLSchema-instance ">
<version>3.8.1</version>
<scope>test</scope>
</dependency>

<dependency>
<groupId>edu.illinois.cs.cogcomp</groupId>
<artifactId>illinois-core-utilities</artifactId>
<version>3.1.34</version>
</dependency>

<dependency>
<groupId>edu.illinois.cs.cogcomp</groupId>
<artifactId>illinois-edison</artifactId>
<version>3.1.34</version>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-lang3</artifactId>
Expand All @@ -49,25 +52,21 @@ http://www.w3.org/2001/XMLSchema-instance ">
<version>4.12</version>
<scope>test</scope>
</dependency>

<dependency>
<groupId>com.belerweb</groupId>
<artifactId>pinyin4j</artifactId>
<version>2.5.0</version>
</dependency>

<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
<version>1.7.13</version>
</dependency>

<dependency>
<groupId>com.ibm.icu</groupId>
<artifactId>icu4j</artifactId>
<version>56.1</version>
</dependency>

<dependency>
<groupId>edu.illinois.cs.cogcomp</groupId>
<artifactId>illinois-abstract-server</artifactId>
Expand All @@ -78,7 +77,6 @@ http://www.w3.org/2001/XMLSchema-instance ">
<artifactId>curator-interfaces</artifactId>
<version>0.7</version>
</dependency>

<dependency>
<groupId>org.apache.thrift</groupId>
<artifactId>libthrift</artifactId>
Expand All @@ -89,7 +87,6 @@ http://www.w3.org/2001/XMLSchema-instance ">
<artifactId>curator-utils</artifactId>
<version>0.0.4-SNAPSHOT</version>
</dependency>

</dependencies>

<build>
Expand Down Expand Up @@ -122,13 +119,6 @@ http://www.w3.org/2001/XMLSchema-instance ">
<directory>src/main/resources</directory>
</resource>
</resources>
<extensions>
<extension>
<groupId>org.apache.maven.wagon</groupId>
<artifactId>wagon-ssh</artifactId>
<version>2.4</version>
</extension>
</extensions>
</build>

</project>
Loading