Skip to content

Commit

Permalink
Init runWithBindings implementation
Browse files Browse the repository at this point in the history
  • Loading branch information
NicoLaval committed Nov 5, 2024
1 parent 4356e48 commit 4a543b8
Show file tree
Hide file tree
Showing 5 changed files with 128 additions and 10 deletions.
24 changes: 23 additions & 1 deletion vtl-prov/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,16 @@
<artifactId>vtl-parser</artifactId>
<version>1.8.0-SNAPSHOT</version>
</dependency>

<dependency>
<groupId>fr.insee.trevas</groupId>
<artifactId>vtl-engine</artifactId>
<version>1.8.0-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>fr.insee.trevas</groupId>
<artifactId>vtl-spark</artifactId>
<version>1.8.0-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>fr.insee.trevas</groupId>
<artifactId>vtl-model</artifactId>
Expand All @@ -51,4 +60,17 @@

</dependencies>

<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
<version>3.3.0</version>
<configuration>
<argLine>--add-exports java.base/sun.nio.ch=ALL-UNNAMED</argLine>
</configuration>
</plugin>
</plugins>
</build>

</project>
45 changes: 39 additions & 6 deletions vtl-prov/src/main/java/fr/insee/vtl/prov/ProvenanceListener.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package fr.insee.vtl.prov;

import fr.insee.vtl.model.Dataset;
import fr.insee.vtl.parser.VtlBaseListener;
import fr.insee.vtl.parser.VtlLexer;
import fr.insee.vtl.parser.VtlParser;
Expand All @@ -12,10 +13,12 @@
import org.antlr.v4.runtime.misc.Interval;
import org.antlr.v4.runtime.tree.ParseTreeWalker;

import java.util.HashMap;
import java.util.Map;
import java.util.Set;
import java.util.UUID;
import javax.script.Bindings;
import javax.script.ScriptContext;
import javax.script.ScriptEngine;
import javax.script.ScriptException;
import java.util.*;
import java.util.stream.Collectors;

/**
* ANTLR Listener that create provenance objects.
Expand Down Expand Up @@ -175,8 +178,38 @@ public static Program run(String expr, String id, String programName) {
return provenanceListener.getProgram();
}

public static Program runWithBindings(String expr, String id, String programName) {
return run(expr, id, programName);
public static Program runWithBindings(ScriptEngine engine, String expr, String id, String programName) {
Program program = run(expr, id, programName);
// 0 check if input dataset are empty?
// 1 - Handle input dataset
ProgramStep initialStep = program.getProgramStepByIndex(1);
Set<DataframeInstance> consumedDataframe = initialStep.getConsumedDataframe();
consumedDataframe.forEach(d ->{
Dataset ds = (Dataset) engine.getContext().getAttribute(d.getLabel());
ds.getDataStructure().values().forEach(c -> {
VariableInstance variableInstance = new VariableInstance(c.getName());
variableInstance.setRole(c.getRole());
variableInstance.setType(c.getType());
d.getHasVariableInstances().add(variableInstance);
});
});

// 2 - Split script to loop over program steps and run them
List<String> scripts = Arrays.stream(expr.split(";"))
.map(e -> e + ";")
.collect(Collectors.toList());

scripts.forEach(s -> {
try {
engine.eval(s);
} catch (ScriptException e) {
throw new RuntimeException(e);
}
});

Bindings bindings = engine.getBindings(ScriptContext.ENGINE_SCOPE);

return program;
}

}
7 changes: 7 additions & 0 deletions vtl-prov/src/main/java/fr/insee/vtl/prov/prov/Program.java
Original file line number Diff line number Diff line change
Expand Up @@ -69,4 +69,11 @@ public ProgramStep getProgramStepByLabel(String label) {
.findFirst()
.orElse(null);
}

public ProgramStep getProgramStepByIndex(int index) {
return programSteps.stream()
.filter(p -> p.getIndex() == index)
.findFirst()
.orElse(null);
}
}
19 changes: 19 additions & 0 deletions vtl-prov/src/main/java/fr/insee/vtl/prov/utils/RDFUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import org.apache.jena.rdfconnection.RDFConnectionFactory;
import org.apache.jena.vocabulary.RDF;
import org.apache.jena.vocabulary.RDFS;
import org.apache.jena.vocabulary.XSD;

import java.io.IOException;
import java.io.StringWriter;
Expand Down Expand Up @@ -101,6 +102,12 @@ public static void handleDataframeInstance(Model model, DataframeInstance dfInst
dfURI.addProperty(RDF.type, SDTH_DATAFRAME);
String label = dfInstance.getLabel();
dfURI.addProperty(RDFS.label, label);
Property SDTH_USED_VARIABLE = model.createProperty(SDTH_BASE_URI + "hasVariableInstance");
dfInstance.getHasVariableInstances().forEach(v -> {
Resource varAssignedURI = model.createResource(TREVAS_BASE_URI + "variable/" + v.getId());
dfURI.addProperty(SDTH_USED_VARIABLE, varAssignedURI);
handleVariableInstance(model, v);
});
}

public static void handleVariableInstance(Model model, VariableInstance varInstance) {
Expand All @@ -111,6 +118,18 @@ public static void handleVariableInstance(Model model, VariableInstance varInsta
varURI.addProperty(RDF.type, SDTH_VARIABLE);
String label = varInstance.getLabel();
varURI.addProperty(RDFS.label, label);
if (null != varInstance.getRole()) {
String role = varInstance.getRole().toString();
// TO EXTRACT
Property hasRole = model.createProperty("http://id.making-sense.info/vtl/component/hasRole");
varURI.addProperty(hasRole, role);
}
if (null != varInstance.getType()) {
Class<?> type = varInstance.getType();
// TO EXTRACT
Property hasType = model.createProperty("http://id.making-sense.info/vtl/component/hasType");
varURI.addProperty(hasType, VtlTypes.getVtlType(type));
}
}

public static Model initModel(String baseFilePath) {
Expand Down
43 changes: 40 additions & 3 deletions vtl-prov/src/test/java/fr/insee/vtl/prov/RDFTest.java
Original file line number Diff line number Diff line change
@@ -1,12 +1,20 @@
package fr.insee.vtl.prov;

import fr.insee.vtl.engine.VtlScriptEngine;
import fr.insee.vtl.model.Dataset;
import fr.insee.vtl.model.InMemoryDataset;
import fr.insee.vtl.model.utils.Java8Helpers;
import fr.insee.vtl.prov.prov.Program;
import fr.insee.vtl.prov.utils.PropertiesLoader;
import fr.insee.vtl.prov.utils.RDFUtils;
import org.apache.jena.rdf.model.Model;
import org.apache.spark.sql.SparkSession;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;

import javax.script.ScriptEngine;
import javax.script.ScriptEngineManager;
import javax.script.ScriptException;
import java.io.File;
import java.io.IOException;
import java.util.Properties;
Expand Down Expand Up @@ -73,14 +81,43 @@ public void simpleTest() throws IOException {

@Test
public void simpleTestWithBindings() throws IOException {
SparkSession spark = SparkSession.builder()
.appName("test")
.master("local")
.getOrCreate();

ScriptEngineManager mgr = new ScriptEngineManager();
ScriptEngine engine = mgr.getEngineByExtension("vtl");
engine.put(VtlScriptEngine.PROCESSING_ENGINE_NAMES, "spark");

InMemoryDataset ds1 = new InMemoryDataset(
Java8Helpers.listOf(
Java8Helpers.mapOf("id", "A", "var1", 0L, "var2", 0.1D),
Java8Helpers.mapOf("id", "B", "var1", 1L, "var2", 0.2D),
Java8Helpers.mapOf("id", "C", "var1", 2L, "var2", 0.3D)
),
Java8Helpers.mapOf("id", String.class, "var1", Long.class, "var2", Double.class),
Java8Helpers.mapOf("id", Dataset.Role.IDENTIFIER, "var1", Dataset.Role.MEASURE, "var2", Dataset.Role.MEASURE)
);
InMemoryDataset ds2 = new InMemoryDataset(
Java8Helpers.listOf(
Java8Helpers.mapOf("id", "A", "var1", 10L, "var2", 1.1D),
Java8Helpers.mapOf("id", "B", "var1", 11L, "var2", 1.2D),
Java8Helpers.mapOf("id", "D", "var1", 12L, "var2", 1.3D)
),
Java8Helpers.mapOf("id", String.class, "var1", Long.class, "var2", Double.class),
Java8Helpers.mapOf("id", Dataset.Role.IDENTIFIER, "var1", Dataset.Role.MEASURE, "var2", Dataset.Role.MEASURE)
);

engine.put("ds1", ds1);
engine.put("ds2", ds2);


String script = "ds1 := ds1 + ds2;\n" +
String script = "ds_sum := ds1 + ds2;\n" +
"ds_mul := ds_sum * 3; \n" +
"ds_res <- ds_mul [filter mod(var1, 2) = 0]" +
" [calc var_sum := var1 + var2];";

Program program = ProvenanceListener.runWithBindings(script, "trevas-simple-test", "Simple test from Trevas tests");
Program program = ProvenanceListener.runWithBindings(engine, script, "trevas-simple-test", "Simple test from Trevas tests");
Model model = RDFUtils.buildModel(program);
String content = RDFUtils.serialize(model, "JSON-LD");
assertThat(content).isNotEmpty();
Expand Down

0 comments on commit 4a543b8

Please sign in to comment.