Skip to content

Commit

Permalink
Lots of fixes and improvements
Browse files Browse the repository at this point in the history
  • Loading branch information
ammasjk committed Mar 25, 2024
1 parent 374d138 commit 5223724
Show file tree
Hide file tree
Showing 90 changed files with 5,291 additions and 1,284 deletions.
11 changes: 8 additions & 3 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@

<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<maven.compiler.source>11</maven.compiler.source>
<maven.compiler.release>11</maven.compiler.release>
<maven.compiler.source>17</maven.compiler.source>
<maven.compiler.release>17</maven.compiler.release>
</properties>

<dependencies>
Expand Down Expand Up @@ -49,12 +49,17 @@
<artifactId>jaxen</artifactId>
<version>2.0.0</version>
</dependency>
<dependency>
<groupId>com.fasterxml.woodstox</groupId>
<artifactId>woodstox-core</artifactId>
<version>6.5.0</version>
</dependency>

<!-- Used to clean and parse HTML -->
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.15.3</version>
<version>1.17.1</version>
</dependency>
<dependency>
<groupId>org.projectlombok</groupId>
Expand Down
106 changes: 98 additions & 8 deletions src/main/java/io/datanapis/xbrl/DiscoverableTaxonomySet.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import io.datanapis.xbrl.model.*;
import io.datanapis.xbrl.model.arc.FromToArc;
import io.datanapis.xbrl.model.arc.LabelArc;
import io.datanapis.xbrl.model.arc.ReferenceArc;
import io.datanapis.xbrl.model.link.*;
import org.dom4j.*;
import org.slf4j.Logger;
Expand All @@ -38,12 +39,22 @@ public class DiscoverableTaxonomySet {
}
private static final Logger log = LoggerFactory.getLogger(DiscoverableTaxonomySet.class);

private final Map<String,Namespace> namespaces = new HashMap<>();
private final UriRoleTypeMap roleTypes = new UriRoleTypeMap();
private final UriArcroleTypeMap arcroleTypes = new UriArcroleTypeMap();
private final NameConceptMap nameConceptMap = new NameConceptMap();
private final KeyConceptMap keyConceptMap = new KeyConceptMap();
/*
* Some schema's such as cef-2022.xsd reference labels defined in other schemas such as dei-2022_lab.xsd. e.g., label_EntityFileNumber
* The implication is that labels can be cross-referenced and therefore need to be global. This is the first instance of
* such a use-case. Not sure, if this will be common in the future. If it does, then we will need to make the mapping between
* the label href and the LabelLink global. When resolving we can try to resolve labels locally first before trying this
* global map.
*/
private final LabelLinkMap labelLinkMap = new LabelLinkMap();

public void clear() {
namespaces.clear();
roleTypes.clear();
arcroleTypes.clear();

Expand All @@ -54,6 +65,13 @@ public void clear() {
keyConceptMap.clear();
}

public void addNamespace(Namespace namespace) {
if (Objects.isNull(namespace.getPrefix()))
return;

namespaces.putIfAbsent(namespace.getPrefix(), namespace);
}

public RoleType getRoleType(String roleURI) {
return roleTypes.computeIfAbsent(roleURI, RoleType::createDynamic);
}
Expand Down Expand Up @@ -115,6 +133,36 @@ public Collection<Concept> getAllConcepts() {
return nameConceptMap.values();
}

public Collection<Concept> asConcepts(List<String> names) {
Set<Concept> concepts = new HashSet<>();
for (String name : names) {
int i = name.indexOf(':');
if (i < 0)
continue;

String prefix = name.substring(0, i);
String conceptName = name.substring(i + 1);
Namespace namespace = namespaces.get(prefix);
if (Objects.isNull(namespace))
continue;

QName qName = new QName(conceptName, namespace);
Concept concept = nameConceptMap.get(qName);
if (Objects.nonNull(concept))
concepts.add(concept);
}

return concepts;
}

public void addLabelLink(String href, LabelLink labelLink) {
labelLinkMap.put(href, labelLink);
}

public LabelLink getLabelLink(String href) {
return labelLinkMap.get(href);
}

public static class Statistics {
public final int nOfRoleTypes;
public final int nOfReportableRoleTypes;
Expand Down Expand Up @@ -186,8 +234,10 @@ private String getFile() {
}
}

private static final String ECD_SUB_2023_XSD = "ecd-sub-2023.xsd";
private static final String ECD_2023_XSD_URL = "https://xbrl.sec.gov/ecd/2023/ecd-2023.xsd";
private static final Map<String,String> SCHEMA_DEPENDENCY =
Map.of( "exch-entire-2024.xsd", "https://xbrl.sec.gov/exch/2024/exch-2024.xsd",
"ecd-sub-2023.xsd", "https://xbrl.sec.gov/ecd/2023/ecd-2023.xsd",
"cef-2022.xsd", "https://xbrl.sec.gov/dei/2022/dei-2022_lab.xsd");

/**
* Collect all the urls that will need to be traversed in the order in which they need to be traversed.
Expand Down Expand Up @@ -221,9 +271,11 @@ private Collection<SchemaLocation> collect(XbrlReader.Resolver resolver, String
// Handle any schema imports
//
String schemaLocation = schemaLocationToUrl(child.attributeValue(TagNames.SCHEMA_LOCATION_TAG));
if (schemaLocation.contains(ECD_SUB_2023_XSD)) {
log.debug("Adding [{}] to queue", ECD_2023_XSD_URL);
todo.add(new SchemaLocation(resolver.getAbsolutePath(url.absolutePath, ECD_2023_XSD_URL)));
for (var pair : SCHEMA_DEPENDENCY.entrySet()) {
if (schemaLocation.contains(pair.getKey())) {
log.info("Adding [{}] to queue", pair.getValue());
todo.add(new SchemaLocation(resolver.getAbsolutePath(url.absolutePath, pair.getValue())));
}
}
log.debug("Adding [{}] to queue", schemaLocation);
todo.add(new SchemaLocation(resolver.getAbsolutePath(url.absolutePath, schemaLocation)));
Expand Down Expand Up @@ -308,17 +360,25 @@ private boolean isReportable(String url, String rootHref) {

private class LinkedTaxonomyProcessor {
private final List<LabelLink> labelLinks;
private final List<ReferenceLink> referenceLinks;
private final SchemaLocation url;
private final Element linkedElement;
private final String targetNamespace;
private final boolean reportable;
private final List<Element> linkBaseRoots = new ArrayList<>();

private LinkedTaxonomyProcessor(String rootSchema, List<LabelLink> labelLinks, SchemaLocation url, Element linkedElement) {
private LinkedTaxonomyProcessor(String rootSchema, List<LabelLink> labelLinks, List<ReferenceLink> referenceLinks, SchemaLocation url, Element linkedElement) {
this.labelLinks = labelLinks;
this.referenceLinks = referenceLinks;
this.url = url;
this.linkedElement = linkedElement;
this.targetNamespace = linkedElement.attributeValue(TagNames.TARGET_NAMESPACE_TAG);
Namespace namespace = linkedElement.getNamespaceForURI(targetNamespace);
if (Objects.nonNull(namespace)) {
DiscoverableTaxonomySet.this.addNamespace(namespace);
} else {
int y = 5;
}
this.reportable = isReportable(url.absolutePath, rootSchema);
}

Expand Down Expand Up @@ -389,6 +449,9 @@ private void processElement(Element child) {
} else if (childName.equals(TagNames.LABEL_LINK_TAG)) {
LabelLink link = LabelLink.fromElement(url.absolutePath, DiscoverableTaxonomySet.this, child);
labelLinks.add(link);
} else if (childName.equals(TagNames.REFERENCE_LINK_TAG)) {
ReferenceLink link = ReferenceLink.fromElement(url.absolutePath, DiscoverableTaxonomySet.this, child);
referenceLinks.add(link);
} else {
log.info("Ignoring child [{}] of [{}]", childName, linkedElement.getQualifiedName());
}
Expand Down Expand Up @@ -441,6 +504,7 @@ private void read(XbrlReader.Resolver resolver, Element element) {
*/
private void read(XbrlReader.Resolver resolver, String rootSchema) {
final List<LabelLink> labelLinks = new ArrayList<>();
final List<ReferenceLink> referenceLinks = new ArrayList<>();
final Collection<SchemaLocation> todo = collect(resolver, rootSchema);

for (SchemaLocation url : todo) {
Expand All @@ -449,14 +513,15 @@ private void read(XbrlReader.Resolver resolver, String rootSchema) {
try {
Element linkedElement = resolver.getRootElement(url.absolutePath);
LinkedTaxonomyProcessor linkedTaxonomyProcessor =
new LinkedTaxonomyProcessor(rootSchema, labelLinks, url, linkedElement);
new LinkedTaxonomyProcessor(rootSchema, labelLinks, referenceLinks, url, linkedElement);
linkedTaxonomyProcessor.ingest();
} catch (Exception e) {
throw new RuntimeException(e);
}
}

this.connectConceptsToLabels(labelLinks);
this.connectConceptsToReferences(referenceLinks);
this.connectArcs();
}

Expand All @@ -466,7 +531,30 @@ private void connectConceptsToLabels(List<LabelLink> labelLinks) {
Location location = arc.getFrom();
Concept concept = keyConceptMap.get(location.getHref());
RoleLabelMap label = arc.getTo();
concept.addLabels(label);
if (label != null) {
if (concept != null) {
concept.addLabels(label);
} else {
throw new RuntimeException("Concept is null! location.href = [" + location.getHref() + "]");
}
} else if (Objects.nonNull(arc.getUse()) && !arc.getUse().equalsIgnoreCase(TagNames.PROHIBITED_USE)) {
log.info("arc.To is null but use is not [{}]", TagNames.PROHIBITED_USE);
}
}
}
}

private void connectConceptsToReferences(List<ReferenceLink> referenceLinks) {
for (ReferenceLink link : referenceLinks) {
for (ReferenceArc arc : link.getAllArcs()) {
Location location = arc.getFrom();
Concept concept = keyConceptMap.get(location.getHref());
Reference reference = arc.getTo();
if (reference != null) {
if (concept != null) {
concept.addReference(reference);
}
}
}
}
}
Expand Down Expand Up @@ -541,7 +629,9 @@ private DiscoverableTaxonomySet() {
arcroleTypes.put(ArcroleType.DEPRECATED_AGGREGATE_CONCEPT.getArcroleURI(), ArcroleType.DEPRECATED_AGGREGATE_CONCEPT);
arcroleTypes.put(ArcroleType.EXPLANATORY_FACT.getArcroleURI(), ArcroleType.EXPLANATORY_FACT);

RoleType.clear();
roleTypes.put(RoleType.DEPRECATED.getRoleURI(), RoleType.DEPRECATED);
roleTypes.put(RoleType.DISCLOSURE.getRoleURI(), RoleType.DISCLOSURE);
roleTypes.put(RoleType.COMMON_PRACTICE_REF.getRoleURI(), RoleType.COMMON_PRACTICE_REF);
roleTypes.put(RoleType.NON_AUTHORITATIVE_LITERATURE_REF.getRoleURI(), RoleType.NON_AUTHORITATIVE_LITERATURE_REF);
roleTypes.put(RoleType.RECOGNITION_REF.getRoleURI(), RoleType.RECOGNITION_REF);
Expand Down
9 changes: 9 additions & 0 deletions src/main/java/io/datanapis/xbrl/SubstitutionGroup.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
package io.datanapis.xbrl;

import org.dom4j.QName;

public class SubstitutionGroup {
public static final QName XBRLI_ITEM = new QName("item", XbrlNamespaces.XBRLI_NAMESPACE);
public static final QName XBRLDT_HYPERCUBE_ITEM = new QName("hypercubeItem", XbrlNamespaces.XBRLDT_NAMESPACE);
public static final QName XBRLDT_DIMENSION_ITEM = new QName("dimensionItem", XbrlNamespaces.XBRLDT_NAMESPACE);
}
7 changes: 7 additions & 0 deletions src/main/java/io/datanapis/xbrl/TagNames.java
Original file line number Diff line number Diff line change
Expand Up @@ -39,11 +39,14 @@ public class TagNames {
public static final String DEFINITION_LINK_TAG = "definitionLink";
public static final String DEFINITION_TAG = "definition";
public static final String DEPRECATED_DATE_TAG = "deprecatedDate";
public static final String DEPRECATED_DATE_2_TAG = "DeprecatedDate";
public static final String DEPRECATION_REPLACEMENT_TAG = "DeprecationReplacement";
public static final String DIMENSION_TAG = "dimension";
public static final String DIVIDE_TAG = "divide";
public static final String DOCUMENTATION_TAG = "documentation";
public static final String DOMAIN_TAG = "domain";
public static final String ELEMENT_TAG = "element";
public static final String ELEMENT_DEPRECATED_TAG = "ElementDeprecated";
public static final String END_DATE_TAG = "endDate";
public static final String ENTITY_TAG = "entity";
public static final String EXPLICIT_MEMBER_TAG = "explicitMember";
Expand Down Expand Up @@ -78,6 +81,10 @@ public class TagNames {
public static final String PRESENTATION_ARC_TAG = "presentationArc";
public static final String PRESENTATION_LINK_TAG = "presentationLink";
public static final String PRIORITY_TAG = "priority";
public static final String PROHIBITED_USE = "prohibited";
public static final String REFERENCE_TAG = "reference";
public static final String REFERENCE_ARC_TAG = "referenceArc";
public static final String REFERENCE_LINK_TAG = "referenceLink";
public static final String ROLE_REF_TAG = "roleRef";
public static final String ROLE_TAG = "role";
public static final String ROLE_TYPE_TAG = "roleType";
Expand Down
Loading

0 comments on commit 5223724

Please sign in to comment.