Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Changes in BratWriter #1170

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,10 @@

import static org.apache.uima.fit.util.JCasUtil.selectAll;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.StringWriter;
Expand All @@ -28,6 +31,7 @@
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
Expand All @@ -46,7 +50,9 @@
import org.apache.uima.cas.text.AnnotationFS;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.fit.descriptor.ResourceMetaData;
import org.apache.uima.fit.util.CasUtil;
import org.apache.uima.fit.util.FSUtil;
import org.apache.uima.fit.util.JCasUtil;
import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.ResourceInitializationException;

Expand Down Expand Up @@ -102,6 +108,14 @@ public class BratWriter extends JCasFileWriter_ImplBase
@ConfigurationParameter(name = PARAM_FILENAME_EXTENSION, mandatory = true, defaultValue = ".ann")
private String filenameSuffix;

/**
* Specify the suffix of output files. Default value <code>.ann</code>. If the suffix is not
* needed, provide an empty string as value.
*/
public static final String PARAM_HTML_TEMPLANTE = "htmlTemplate";
@ConfigurationParameter(name = PARAM_HTML_TEMPLANTE , mandatory = false, defaultValue = "template.html")
private String htmlTemplate;

/**
* Types that will not be written to the exported file.
*/
Expand All @@ -111,12 +125,13 @@ public class BratWriter extends JCasFileWriter_ImplBase
private Set<String> excludeTypes;

/**
* Types that are text annotations (aka entities or spans).
* Types that are text annotations (aka entities or spans). Each of them includes a
* type (or parent type) after it ":" and element to extract use pipes to make a chain of elements.
*/
public static final String PARAM_TEXT_ANNOTATION_TYPES = "spanTypes";
public static final String PARAM_TEXT_ANNOTATION_TYPES = "spanTypesVals";
@ConfigurationParameter(name = PARAM_TEXT_ANNOTATION_TYPES, mandatory = true, defaultValue = {
// "de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence",
// "de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token",
// "de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token:Lemma|Value",
// "de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS",
// "de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Lemma",
// "de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Stem",
Expand All @@ -125,7 +140,7 @@ public class BratWriter extends JCasFileWriter_ImplBase
// "de.tudarmstadt.ukp.dkpro.core.api.semantics.type.SemArg",
// "de.tudarmstadt.ukp.dkpro.core.api.semantics.type.SemPred"
})
private Set<String> spanTypes;
private String[] spanTypesVals;

/**
* Types that are relations. It is mandatory to provide the type name followed by two feature
Expand Down Expand Up @@ -205,7 +220,7 @@ public class BratWriter extends JCasFileWriter_ImplBase
private int nextAttributeId;
private int nextPaletteIndex;
private Map<FeatureStructure, String> spanIdMap;

private Map<String,LinkedList<String>> spanTypes;
private BratConfiguration conf;

private Set<String> warnings;
Expand All @@ -218,7 +233,22 @@ public void initialize(UimaContext aContext)
conf = new BratConfiguration();

warnings = new LinkedHashSet<String>();

spanTypes= new HashMap<>();
for (String s: spanTypesVals){
String[] parts=s.split(":");
LinkedList<String>list;
if (spanTypes.containsKey(parts[0])){
list=spanTypes.get(parts[0]);
} else {
list=new LinkedList<>();
}
if (parts.length>1){
list.add(parts[1]);
} else {
list.add("");
}
spanTypes.put(parts[0], list);
}
parsedRelationTypes = new HashMap<>();
for (String rel : relationTypes) {
RelationParam p = RelationParam.parse(rel);
Expand Down Expand Up @@ -306,36 +336,41 @@ private void writeAnnotations(JCas aJCas)

// Go through all the annotations but only handle the ones that have no references to
// other annotations.
for (String currentType: spanTypes.keySet()){
for (FeatureStructure fs : selectAll(aJCas)) {
String typeName=fs.getType().getName();
String superType = fs.getCAS().getTypeSystem().getParent(fs.getType()).getName();
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This code seems to take into account only the type and the direct supertype of the FS. When taking into account the direct supertype, it seems sensible to actually check the entire inheritance hierarchy, no? org.apache.uima.cas.TypeSystem.subsumes(Type, Type) should be useful here.

Comment applies also to other changes further down.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I just used the same way it was used somewhere else in the module. I understood that it was a method to specify the parent type instead of the all the subsumed types.

// Skip excluded types
if (excludeTypes.contains(typeName) || excludeTypes.contains(superType) ) {
getLogger().debug("Excluding [" + fs.getType().getName() + "]");
continue;
}
if (currentType.equalsIgnoreCase(typeName) || currentType.equalsIgnoreCase(superType)) {
writeTextAnnotation(doc, (AnnotationFS) fs);
}
}
}
// second loop, only for relationships once spans have been set.
for (FeatureStructure fs : selectAll(aJCas)) {
// Skip document annotation
if (fs == aJCas.getDocumentAnnotationFs()) {
continue;
}

String typeName=fs.getType().getName();
String superType = fs.getCAS().getTypeSystem().getParent(fs.getType()).getName();
// Skip excluded types
if (excludeTypes.contains(fs.getType().getName())) {
getLogger().debug("Excluding [" + fs.getType().getName() + "]");
if (excludeTypes.contains(typeName) || excludeTypes.contains(superType) ) {
// it would be logged too much times
// getLogger().debug("Excluding [" + fs.getType().getName() + "]");
continue;
}

if (spanTypes.contains(fs.getType().getName())) {
writeTextAnnotation(doc, (AnnotationFS) fs);
if (spanTypes.containsKey(typeName) || spanTypes.containsKey(superType)) {
// writeTextAnnotation(doc, (AnnotationFS) fs);
}
else if (parsedRelationTypes.containsKey(fs.getType().getName())) {
else if (parsedRelationTypes.containsKey(typeName)|| parsedRelationTypes.containsKey(superType)) {
relationFS.add(fs);
}
else if (hasNonPrimitiveFeatures(fs) && (fs instanceof AnnotationFS)) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@joancf why did you entirely drop the support for annotations with multiple non-primitive features?

// else if (parsedEventTypes.containsKey(fs.getType().getName())) {
BratEventAnnotation event = writeEventAnnotation(doc, (AnnotationFS) fs);
eventFS.put(event, fs);
}
else if (fs instanceof AnnotationFS) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why did you drop the fall-back handling for other types of annotations and the warning if an annotation could not be handled?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, in the same way as before: why should the component export elements the user hasn't ask for?

warnings.add("Assuming annotation type ["+fs.getType().getName()+"] is span");
writeTextAnnotation(doc, (AnnotationFS) fs);
}
else {
warnings.add("Skipping annotation with type ["+fs.getType().getName()+"]");
}
}

// Handle relations now since now we can resolve their targets to IDs.
Expand All @@ -358,7 +393,8 @@ else if (fs instanceof AnnotationFS) {
case ".json":
String template ;
if (filenameSuffix.equals(".html")) {
template = IOUtils.toString(getClass().getResource("html/template.html"));
InputStream it=new FileInputStream(htmlTemplate) ;
template = IOUtils.toString(it,"UTF-8");
}
else {
template = "{ \"collData\" : ##COLL-DATA## , \"docData\" : ##DOC-DATA## }";
Expand Down Expand Up @@ -562,7 +598,9 @@ private boolean isSlotFeature(FeatureStructure aFS, Feature aFeature)
private void writeRelationAnnotation(BratAnnotationDocument aDoc, FeatureStructure aFS)
{
RelationParam rel = parsedRelationTypes.get(aFS.getType().getName());

if (rel== null ) {// then is the parent type
rel=parsedRelationTypes.get(aFS.getCAS().getTypeSystem().getParent(aFS.getType()).getName());
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is it the parent type?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we reach this function the type or his parent is in parsdRelationTypes
if the first one fails then is the parent.
There are soo many relation types that using the parent simplifies the definition.

}
FeatureStructure arg1 = aFS.getFeatureValue(aFS.getType().getFeatureByBaseName(
rel.getArg1()));
FeatureStructure arg2 = aFS.getFeatureValue(aFS.getType().getFeatureByBaseName(
Expand All @@ -581,15 +619,25 @@ private void writeRelationAnnotation(BratAnnotationDocument aDoc, FeatureStructu

String superType = getBratType(aFS.getCAS().getTypeSystem().getParent(aFS.getType()));
String type = getBratType(aFS.getType());

BratRelationAnnotation anno = new BratRelationAnnotation(nextRelationAnnotationId,
type, rel.getArg1(), arg1Id, rel.getArg2(), arg2Id);
String value=type;
if (rel.getSubcat()!=""){
value=aFS.getFeatureValueAsString(aFS.getType().getFeatureByBaseName(rel.getSubcat()));
}

BratRelationAnnotation anno = new BratRelationAnnotation(nextRelationAnnotationId,
value, rel.getArg1(), arg1Id, rel.getArg2(), arg2Id);
nextRelationAnnotationId++;

conf.addRelationDecl(superType, type, rel.getArg1(), rel.getArg2());

conf.addRelationDecl(superType, value, rel.getArg1(), rel.getArg2());

if (enableTypeMappings){
// conf.addLabelDecl(type,type,type.substring(0, 2),type.substring(0, 1));
conf.addLabelDecl(value,value);
}else {
conf.addLabelDecl(anno.getType(), aFS.getType().getShortName(), aFS.getType()
.getShortName().substring(0, 1));
}


aDoc.addAnnotation(anno);

Expand All @@ -604,27 +652,57 @@ private void writeTextAnnotation(BratAnnotationDocument aDoc, AnnotationFS aFS)
{
String superType = getBratType(aFS.getCAS().getTypeSystem().getParent(aFS.getType()));
String type = getBratType(aFS.getType());

BratTextAnnotation anno = new BratTextAnnotation(nextTextAnnotationId, type,
aFS.getBegin(), aFS.getEnd(), aFS.getCoveredText());
nextTextAnnotationId++;

conf.addEntityDecl(superType, type);

conf.addLabelDecl(anno.getType(), aFS.getType().getShortName(), aFS.getType()
.getShortName().substring(0, 1));

if (!conf.hasDrawingDecl(anno.getType())) {
conf.addDrawingDecl(new BratTextAnnotationDrawingDecl(anno.getType(), "black",
palette[nextPaletteIndex % palette.length]));
nextPaletteIndex++;
// check if the type has a value to display, replace the the type by the value
// do it similar as with declarations that looks for the value
LinkedList<String> spanDatas=null;
String value=type;
if (spanTypes.containsKey(aFS.getType().getName())) {
spanDatas=spanTypes.get(aFS.getType().getName());
} else if (spanTypes.containsKey(aFS.getCAS().getTypeSystem().getParent(aFS.getType()).getName() )) {
spanDatas=spanTypes.get(aFS.getCAS().getTypeSystem().getParent(aFS.getType()).getName() );
}
for (String spanData : spanDatas){
try {
if (!spanData.equalsIgnoreCase("")){
String [] splits=spanData.split("\\|");
if (splits.length>1){
FeatureStructure currentAnnot = aFS.getFeatureValue(aFS.getType().getFeatureByBaseName(splits[0]));
for (int f=1;f<splits.length-1;f++){
currentAnnot = currentAnnot.getFeatureValue(currentAnnot.getType().getFeatureByBaseName(splits[f]));
}
value= currentAnnot.getFeatureValueAsString(currentAnnot.getType().getFeatureByBaseName(splits[splits.length-1]));
} else {
value=aFS.getFeatureValueAsString(aFS.getType().getFeatureByBaseName(splits[0]));
}
}
} catch (NullPointerException E){ // if the feature is not there, then a null pointer exception raises.
getLogger().error("Type "+ type + "does not have the fature: "+ spanData + "as defined in PARAM_TEXT_ANNOTATION_TYPES");
}

BratTextAnnotation anno = new BratTextAnnotation(nextTextAnnotationId, value,
aFS.getBegin(), aFS.getEnd(), aFS.getCoveredText());
nextTextAnnotationId++;

conf.addEntityDecl(superType, value);
if (enableTypeMappings){
// conf.addLabelDecl(type,type,type.substring(0, 2),type.substring(0, 1));
conf.addLabelDecl(value,value);
}else {
conf.addLabelDecl(anno.getType(), aFS.getType().getShortName(), aFS.getType()
.getShortName().substring(0, 1));
}
if (!conf.hasDrawingDecl(anno.getType())) {
conf.addDrawingDecl(new BratTextAnnotationDrawingDecl(anno.getType(), "black",
palette[nextPaletteIndex % palette.length]));
nextPaletteIndex++;
}

aDoc.addAnnotation(anno);

// writeAttributes(anno, aFS);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why did you disable the ability to write attributes?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

They are not displayed, and I could not see any advantage of having them


spanIdMap.put(aFS, anno.getId());
}

aDoc.addAnnotation(anno);

writeAttributes(anno, aFS);

spanIdMap.put(aFS, anno.getId());
}

private boolean isInternalFeature(Feature aFeature)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,11 +44,11 @@ public class BratTextAnnotation
private final String text;

public BratTextAnnotation(int aId, String aType, int aBegin, int aEnd, String aText)
{
this("T" + aId, aType, aBegin, aEnd, aText);
{
this("T" + String.format("%04d", aId), aType, aBegin, aEnd, aText);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@joancf You mentioned in the previous PR this change is done to permit sorting in the JS (btw. the change to the sorting in the JS code is not included in the present PR anymore). But using the padded numbers is kind of brittle as one needs to pre-suppose the number of annotations. How about instead changing the sorting code so that it strips the annotation type letter and then compares the rest of the ID numerically?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also we could have an ID of the form "layer-id" for sorting, so it will sort by layer (the layer can be a two digits padded number, more than 100 layers would be crazy)
for example 01-1 01-2 01-3 02-4 02-05 ....

}

public BratTextAnnotation(String aId, String aType, int aBegin, int aEnd, String aText)
private BratTextAnnotation(String aId, String aType, int aBegin, int aEnd, String aText)
{
super(aId, aType);
begin = aBegin;
Expand Down