diff --git "a/dsls/README.md\n" "b/dsls/README.md\n" new file mode 100644 index 0000000000000..24f052f18cf4a --- /dev/null +++ "b/dsls/README.md\n" @@ -0,0 +1,22 @@ + + +# Beam DSLs + +*It's working in progress...* diff --git a/dsls/pom.xml b/dsls/pom.xml index 6e0017115a41b..a1bb0ee8b7666 100644 --- a/dsls/pom.xml +++ b/dsls/pom.xml @@ -27,10 +27,11 @@ beam-dsls-parent + pom Apache Beam :: DSLs - + sql @@ -53,4 +54,4 @@ - \ No newline at end of file + diff --git "a/dsls/sql/README.md\n" "b/dsls/sql/README.md\n" new file mode 100644 index 0000000000000..ae9e0f3f6fd57 --- /dev/null +++ "b/dsls/sql/README.md\n" @@ -0,0 +1,24 @@ + + +# Beam SQL + +Beam SQL provides a new interface, to execute a SQL query as a Beam pipeline. + +*It's working in progress...* diff --git a/dsls/sql/pom.xml b/dsls/sql/pom.xml new file mode 100644 index 0000000000000..6819fce23982f --- /dev/null +++ b/dsls/sql/pom.xml @@ -0,0 +1,150 @@ + + + 4.0.0 + + org.apache.beam + beam-dsls-parent + 0.7.0-SNAPSHOT + + + beam-dsls-sql + Apache Beam :: DSLs :: SQL + Beam SQL provides a new interface to generate a Beam pipeline from SQL statement + + jar + + + ${maven.build.timestamp} + yyyy-MM-dd HH:mm + 1.11.0 + + + + + + src/main/resources + true + + + + + + + org.apache.maven.plugins + maven-checkstyle-plugin + + + ${project.basedir}/src/test/ + + + + + + + + org.apache.maven.plugins + maven-compiler-plugin + + + + org.apache.maven.plugins + maven-surefire-plugin + + -da + + + + + org.apache.maven.plugins + maven-jar-plugin + + + + org.apache.maven.plugins + maven-shade-plugin + + + bundle-and-repackage + package + + shade + + + true + + + com.google.guava:guava + + + + + *:* + + META-INF/*.SF + META-INF/*.DSA + META-INF/*.RSA + + + + + + + + + + + org.jacoco + jacoco-maven-plugin + + + + + + + junit + junit + + + org.apache.calcite + calcite-core + ${calcite-version} + + + org.apache.beam + beam-sdks-java-core + + + org.apache.beam + beam-runners-direct-java + provided + + + org.apache.beam + beam-sdks-java-io-kafka + provided + + + org.springframework + spring-expression + + + com.google.guava + guava + + + org.slf4j + slf4j-jdk14 + + + org.slf4j + slf4j-api + + + org.apache.calcite + calcite-linq4j + ${calcite-version} + + + diff --git a/dsls/sql/src/main/java/org/beam/dsls/sql/example/BeamSqlExample.java b/dsls/sql/src/main/java/org/beam/dsls/sql/example/BeamSqlExample.java new file mode 100644 index 0000000000000..81ac6eec99cca --- /dev/null +++ b/dsls/sql/src/main/java/org/beam/dsls/sql/example/BeamSqlExample.java @@ -0,0 +1,102 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.beam.dsls.sql.example; + +import java.io.Serializable; +import java.util.Arrays; +import java.util.HashMap; +import java.util.Map; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rel.type.RelDataTypeFactory; +import org.apache.calcite.rel.type.RelProtoDataType; +import org.apache.calcite.sql.type.SqlTypeName; +import org.apache.kafka.clients.consumer.ConsumerConfig; +import org.beam.dsls.sql.planner.BeamSqlRunner; +import org.beam.dsls.sql.schema.BaseBeamTable; +import org.beam.dsls.sql.schema.kafka.BeamKafkaCSVTable; + +/** + * This is one quick example.
+ * Before start, follow https://kafka.apache.org/quickstart to setup a Kafka + * cluster locally, and run below commands to create required Kafka topics: + *
+ * 
+ * bin/kafka-topics.sh --create --zookeeper localhost:2181 --replication-factor 1 \
+ *   --partitions 1 --topic orders
+ * bin/kafka-topics.sh --create --zookeeper localhost:2181 --replication-factor 1 \
+ *   --partitions 1 --topic sub_orders
+ * 
+ * 
+ * After run the application, produce several test records: + *
+ * 
+ * bin/kafka-console-producer.sh --broker-list localhost:9092 --topic orders
+ * invalid,record
+ * 123445,0,100,3413423
+ * 234123,3,232,3451231234
+ * 234234,0,5,1234123
+ * 345234,0,345234.345,3423
+ * 
+ * 
+ * Meanwhile, open another console to see the output: + *
+ * 
+ * bin/kafka-console-consumer.sh --bootstrap-server localhost:9092 --topic sub_orders
+ * **Expected :
+ * 123445,0,100.0
+ * 345234,0,345234.345
+ * 
+ * 
+ */ +public class BeamSqlExample implements Serializable { + + /** + * + */ + private static final long serialVersionUID = 3673487843555563904L; + + public static void main(String[] args) throws Exception { + BeamSqlRunner runner = new BeamSqlRunner(); + runner.addTable("ORDER_DETAILS", getTable("127.0.0.1:9092", "orders")); + runner.addTable("SUB_ORDER", getTable("127.0.0.1:9092", "sub_orders")); + + // case 2: insert into () select STREAM from + //
from + String sql = "INSERT INTO SUB_ORDER(order_id, site_id, price) " + "SELECT " + + " order_id, site_id, price " + "FROM ORDER_DETAILS " + "WHERE SITE_ID = 0 and price > 20"; + + runner.explainQuery(sql); + runner.submitQuery(sql); + } + + public static BaseBeamTable getTable(String bootstrapServer, String topic) { + final RelProtoDataType protoRowType = new RelProtoDataType() { + @Override + public RelDataType apply(RelDataTypeFactory a0) { + return a0.builder().add("order_id", SqlTypeName.BIGINT).add("site_id", SqlTypeName.INTEGER) + .add("price", SqlTypeName.DOUBLE).add("order_time", SqlTypeName.TIMESTAMP).build(); + } + }; + + Map consumerPara = new HashMap(); + consumerPara.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "latest"); + + return new BeamKafkaCSVTable(protoRowType, bootstrapServer, Arrays.asList(topic)) + .updateConsumerProperties(consumerPara); + } +} diff --git a/dsls/sql/src/main/java/org/beam/dsls/sql/example/package-info.java b/dsls/sql/src/main/java/org/beam/dsls/sql/example/package-info.java new file mode 100644 index 0000000000000..ae678e4297fd1 --- /dev/null +++ b/dsls/sql/src/main/java/org/beam/dsls/sql/example/package-info.java @@ -0,0 +1,23 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * examples on how to use BeamSQL. + * + */ +package org.beam.dsls.sql.example; diff --git a/dsls/sql/src/main/java/org/beam/dsls/sql/interpreter/BeamSQLExpressionExecutor.java b/dsls/sql/src/main/java/org/beam/dsls/sql/interpreter/BeamSQLExpressionExecutor.java new file mode 100644 index 0000000000000..56e483ace0e0e --- /dev/null +++ b/dsls/sql/src/main/java/org/beam/dsls/sql/interpreter/BeamSQLExpressionExecutor.java @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.beam.dsls.sql.interpreter; + +import java.io.Serializable; +import java.util.List; +import org.beam.dsls.sql.schema.BeamSQLRow; + +/** + * {@code BeamSQLExpressionExecutor} fills the gap between relational + * expressions in Calcite SQL and executable code. + * + */ +public interface BeamSQLExpressionExecutor extends Serializable { + + /** + * invoked before data processing. + */ + void prepare(); + + /** + * apply transformation to input record {@link BeamSQLRow}. + * + * @param inputRecord + * @return + */ + List execute(BeamSQLRow inputRecord); + + void close(); +} diff --git a/dsls/sql/src/main/java/org/beam/dsls/sql/interpreter/BeamSQLSpELExecutor.java b/dsls/sql/src/main/java/org/beam/dsls/sql/interpreter/BeamSQLSpELExecutor.java new file mode 100644 index 0000000000000..48306da0dc910 --- /dev/null +++ b/dsls/sql/src/main/java/org/beam/dsls/sql/interpreter/BeamSQLSpELExecutor.java @@ -0,0 +1,126 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.beam.dsls.sql.interpreter; + +import static com.google.common.base.Preconditions.checkArgument; +import java.util.ArrayList; +import java.util.List; +import org.apache.calcite.rex.RexCall; +import org.apache.calcite.rex.RexInputRef; +import org.apache.calcite.rex.RexLiteral; +import org.apache.calcite.rex.RexNode; +import org.beam.dsls.sql.planner.BeamSqlUnsupportedException; +import org.beam.dsls.sql.rel.BeamFilterRel; +import org.beam.dsls.sql.rel.BeamProjectRel; +import org.beam.dsls.sql.rel.BeamRelNode; +import org.beam.dsls.sql.schema.BeamSQLRow; +import org.springframework.expression.Expression; +import org.springframework.expression.ExpressionParser; +import org.springframework.expression.spel.SpelParserConfiguration; +import org.springframework.expression.spel.standard.SpelExpressionParser; +import org.springframework.expression.spel.support.StandardEvaluationContext; + +/** + * {@code BeamSQLSpELExecutor} is one implementation, to convert Calcite SQL + * relational expression to SpEL expression. + * + */ +public class BeamSQLSpELExecutor implements BeamSQLExpressionExecutor { + /** + * + */ + private static final long serialVersionUID = 6777232573390074408L; + + private List spelString; + private List spelExpressions; + + public BeamSQLSpELExecutor(BeamRelNode relNode) { + this.spelString = new ArrayList<>(); + if (relNode instanceof BeamFilterRel) { + String filterSpEL = CalciteToSpEL + .rexcall2SpEL((RexCall) ((BeamFilterRel) relNode).getCondition()); + spelString.add(filterSpEL); + } else if (relNode instanceof BeamProjectRel) { + spelString.addAll(createProjectExps((BeamProjectRel) relNode)); + // List projectRules = + // for (int idx = 0; idx < projectRules.size(); ++idx) { + // spelString.add(projectRules.get(idx).getProjectExp()); + // } + } else { + throw new BeamSqlUnsupportedException( + String.format("%s is not supported yet", relNode.getClass().toString())); + } + } + + @Override + public void prepare() { + this.spelExpressions = new ArrayList<>(); + + SpelParserConfiguration config = new SpelParserConfiguration(true, true); + ExpressionParser parser = new SpelExpressionParser(config); + for (String el : spelString) { + spelExpressions.add(parser.parseExpression(el)); + } + } + + @Override + public List execute(BeamSQLRow inputRecord) { + StandardEvaluationContext inContext = new StandardEvaluationContext(); + inContext.setVariable("in", inputRecord); + + List results = new ArrayList<>(); + for (Expression ep : spelExpressions) { + results.add(ep.getValue(inContext)); + } + return results; + } + + @Override + public void close() { + + } + + private List createProjectExps(BeamProjectRel projectRel) { + List rules = new ArrayList<>(); + + List exps = projectRel.getProjects(); + + for (int idx = 0; idx < exps.size(); ++idx) { + RexNode node = exps.get(idx); + if (node == null) { + rules.add("null"); + } + + if (node instanceof RexLiteral) { + rules.add(((RexLiteral) node).getValue() + ""); + } else { + if (node instanceof RexInputRef) { + rules.add("#in.getFieldValue(" + ((RexInputRef) node).getIndex() + ")"); + } + if (node instanceof RexCall) { + rules.add(CalciteToSpEL.rexcall2SpEL((RexCall) node)); + } + } + } + + checkArgument(rules.size() == exps.size(), "missing projects rules after conversion."); + + return rules; + } + +} diff --git a/dsls/sql/src/main/java/org/beam/dsls/sql/interpreter/CalciteToSpEL.java b/dsls/sql/src/main/java/org/beam/dsls/sql/interpreter/CalciteToSpEL.java new file mode 100644 index 0000000000000..c7cbace4e3602 --- /dev/null +++ b/dsls/sql/src/main/java/org/beam/dsls/sql/interpreter/CalciteToSpEL.java @@ -0,0 +1,80 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.beam.dsls.sql.interpreter; + +import com.google.common.base.Joiner; +import java.util.ArrayList; +import java.util.List; +import org.apache.calcite.rex.RexCall; +import org.apache.calcite.rex.RexInputRef; +import org.apache.calcite.rex.RexNode; +import org.beam.dsls.sql.planner.BeamSqlUnsupportedException; + +/** + * {@code CalciteToSpEL} is used in {@link BeamSQLSpELExecutor}, to convert a + * relational expression {@link RexCall} to SpEL expression. + * + */ +public class CalciteToSpEL { + + public static String rexcall2SpEL(RexCall cdn) { + List parts = new ArrayList<>(); + for (RexNode subcdn : cdn.operands) { + if (subcdn instanceof RexCall) { + parts.add(rexcall2SpEL((RexCall) subcdn)); + } else { + parts.add(subcdn instanceof RexInputRef + ? "#in.getFieldValue(" + ((RexInputRef) subcdn).getIndex() + ")" : subcdn.toString()); + } + } + + String opName = cdn.op.getName(); + switch (cdn.op.getClass().getSimpleName()) { + case "SqlMonotonicBinaryOperator": // +-* + case "SqlBinaryOperator": // > < = >= <= <> OR AND || / . + switch (cdn.op.getName().toUpperCase()) { + case "AND": + return String.format(" ( %s ) ", Joiner.on("&&").join(parts)); + case "OR": + return String.format(" ( %s ) ", Joiner.on("||").join(parts)); + case "=": + return String.format(" ( %s ) ", Joiner.on("==").join(parts)); + case "<>": + return String.format(" ( %s ) ", Joiner.on("!=").join(parts)); + default: + return String.format(" ( %s ) ", Joiner.on(cdn.op.getName().toUpperCase()).join(parts)); + } + case "SqlCaseOperator": // CASE + return String.format(" (%s ? %s : %s)", parts.get(0), parts.get(1), parts.get(2)); + case "SqlCastFunction": // CAST + return parts.get(0); + case "SqlPostfixOperator": + switch (opName.toUpperCase()) { + case "IS NULL": + return String.format(" null == %s ", parts.get(0)); + case "IS NOT NULL": + return String.format(" null != %s ", parts.get(0)); + default: + throw new BeamSqlUnsupportedException(); + } + default: + throw new BeamSqlUnsupportedException(); + } + } + +} diff --git a/dsls/sql/src/main/java/org/beam/dsls/sql/interpreter/package-info.java b/dsls/sql/src/main/java/org/beam/dsls/sql/interpreter/package-info.java new file mode 100644 index 0000000000000..85235e2dcb0b6 --- /dev/null +++ b/dsls/sql/src/main/java/org/beam/dsls/sql/interpreter/package-info.java @@ -0,0 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * interpreter generate runnable 'code' to execute SQL relational expressions. + */ +package org.beam.dsls.sql.interpreter; diff --git a/dsls/sql/src/main/java/org/beam/dsls/sql/package-info.java b/dsls/sql/src/main/java/org/beam/dsls/sql/package-info.java new file mode 100644 index 0000000000000..c6f5cf63ba8ec --- /dev/null +++ b/dsls/sql/src/main/java/org/beam/dsls/sql/package-info.java @@ -0,0 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * BeamSQL provides a new interface to run a SQL statement with Beam. + */ +package org.beam.dsls.sql; diff --git a/dsls/sql/src/main/java/org/beam/dsls/sql/planner/BeamPipelineCreator.java b/dsls/sql/src/main/java/org/beam/dsls/sql/planner/BeamPipelineCreator.java new file mode 100644 index 0000000000000..5a0c73d2a335f --- /dev/null +++ b/dsls/sql/src/main/java/org/beam/dsls/sql/planner/BeamPipelineCreator.java @@ -0,0 +1,85 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.beam.dsls.sql.planner; + +import java.util.Map; +import org.apache.beam.sdk.Pipeline; +import org.apache.beam.sdk.coders.CoderRegistry; +import org.apache.beam.sdk.options.PipelineOptions; +import org.apache.beam.sdk.options.PipelineOptionsFactory; +import org.apache.beam.sdk.values.PCollection; +import org.beam.dsls.sql.rel.BeamRelNode; +import org.beam.dsls.sql.schema.BaseBeamTable; +import org.beam.dsls.sql.schema.BeamSQLRecordType; +import org.beam.dsls.sql.schema.BeamSQLRecordTypeCoder; +import org.beam.dsls.sql.schema.BeamSQLRow; +import org.beam.dsls.sql.schema.BeamSqlRowCoder; + +/** + * {@link BeamPipelineCreator} converts a {@link BeamRelNode} tree, into a Beam + * pipeline. + * + */ +public class BeamPipelineCreator { + private Map sourceTables; + private PCollection latestStream; + + private PipelineOptions options; + + private Pipeline pipeline; + + private boolean hasPersistent = false; + + public BeamPipelineCreator(Map sourceTables) { + this.sourceTables = sourceTables; + + options = PipelineOptionsFactory.fromArgs(new String[] {}).withValidation() + .as(PipelineOptions.class); // FlinkPipelineOptions.class + options.setJobName("BeamPlanCreator"); + + pipeline = Pipeline.create(options); + CoderRegistry cr = pipeline.getCoderRegistry(); + cr.registerCoder(BeamSQLRow.class, BeamSqlRowCoder.of()); + cr.registerCoder(BeamSQLRecordType.class, BeamSQLRecordTypeCoder.of()); + } + + public PCollection getLatestStream() { + return latestStream; + } + + public void setLatestStream(PCollection latestStream) { + this.latestStream = latestStream; + } + + public Map getSourceTables() { + return sourceTables; + } + + public Pipeline getPipeline() { + return pipeline; + } + + public boolean isHasPersistent() { + return hasPersistent; + } + + public void setHasPersistent(boolean hasPersistent) { + this.hasPersistent = hasPersistent; + } + +} diff --git a/dsls/sql/src/main/java/org/beam/dsls/sql/planner/BeamQueryPlanner.java b/dsls/sql/src/main/java/org/beam/dsls/sql/planner/BeamQueryPlanner.java new file mode 100644 index 0000000000000..7a03ae946dcc6 --- /dev/null +++ b/dsls/sql/src/main/java/org/beam/dsls/sql/planner/BeamQueryPlanner.java @@ -0,0 +1,171 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.beam.dsls.sql.planner; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import org.apache.beam.sdk.Pipeline; +import org.apache.beam.sdk.PipelineResult; +import org.apache.calcite.adapter.java.JavaTypeFactory; +import org.apache.calcite.config.Lex; +import org.apache.calcite.jdbc.CalciteSchema; +import org.apache.calcite.jdbc.JavaTypeFactoryImpl; +import org.apache.calcite.plan.Contexts; +import org.apache.calcite.plan.ConventionTraitDef; +import org.apache.calcite.plan.RelOptUtil; +import org.apache.calcite.plan.RelTraitDef; +import org.apache.calcite.plan.RelTraitSet; +import org.apache.calcite.prepare.CalciteCatalogReader; +import org.apache.calcite.rel.RelCollationTraitDef; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.type.RelDataTypeSystem; +import org.apache.calcite.schema.SchemaPlus; +import org.apache.calcite.sql.SqlNode; +import org.apache.calcite.sql.SqlOperatorTable; +import org.apache.calcite.sql.fun.SqlStdOperatorTable; +import org.apache.calcite.sql.parser.SqlParseException; +import org.apache.calcite.sql.parser.SqlParser; +import org.apache.calcite.tools.FrameworkConfig; +import org.apache.calcite.tools.Frameworks; +import org.apache.calcite.tools.Planner; +import org.apache.calcite.tools.RelConversionException; +import org.apache.calcite.tools.ValidationException; +import org.beam.dsls.sql.rel.BeamLogicalConvention; +import org.beam.dsls.sql.rel.BeamRelNode; +import org.beam.dsls.sql.schema.BaseBeamTable; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * The core component to handle through a SQL statement, to submit a Beam + * pipeline. + * + */ +public class BeamQueryPlanner { + private static final Logger LOG = LoggerFactory.getLogger(BeamQueryPlanner.class); + + protected final Planner planner; + private Map sourceTables = new HashMap<>(); + + public static final JavaTypeFactory TYPE_FACTORY = new JavaTypeFactoryImpl( + RelDataTypeSystem.DEFAULT); + + /** + * + * @param schema + */ + public BeamQueryPlanner(SchemaPlus schema) { + final List traitDefs = new ArrayList(); + traitDefs.add(ConventionTraitDef.INSTANCE); + traitDefs.add(RelCollationTraitDef.INSTANCE); + + List sqlOperatorTables = new ArrayList<>(); + sqlOperatorTables.add(SqlStdOperatorTable.instance()); + sqlOperatorTables.add(new CalciteCatalogReader(CalciteSchema.from(schema), false, + Collections.emptyList(), TYPE_FACTORY)); + + FrameworkConfig config = Frameworks.newConfigBuilder() + .parserConfig(SqlParser.configBuilder().setLex(Lex.MYSQL).build()).defaultSchema(schema) + .traitDefs(traitDefs).context(Contexts.EMPTY_CONTEXT).ruleSets(BeamRuleSets.getRuleSets()) + .costFactory(null).typeSystem(BeamRelDataTypeSystem.BEAM_REL_DATATYPE_SYSTEM).build(); + this.planner = Frameworks.getPlanner(config); + + for (String t : schema.getTableNames()) { + sourceTables.put(t, (BaseBeamTable) schema.getTable(t)); + } + } + + /** + * With a Beam pipeline generated in {@link #compileBeamPipeline(String)}, + * submit it to run and wait until finish. + * + * @param sqlStatement + * @throws Exception + */ + public void submitToRun(String sqlStatement) throws Exception { + Pipeline pipeline = compileBeamPipeline(sqlStatement); + + PipelineResult result = pipeline.run(); + result.waitUntilFinish(); + } + + /** + * With the @{@link BeamRelNode} tree generated in + * {@link #convertToBeamRel(String)}, a Beam pipeline is generated. + * + * @param sqlStatement + * @return + * @throws Exception + */ + public Pipeline compileBeamPipeline(String sqlStatement) throws Exception { + BeamRelNode relNode = convertToBeamRel(sqlStatement); + + BeamPipelineCreator planCreator = new BeamPipelineCreator(sourceTables); + return relNode.buildBeamPipeline(planCreator); + } + + /** + * It parses and validate the input query, then convert into a + * {@link BeamRelNode} tree. + * + * @param query + * @return + * @throws ValidationException + * @throws RelConversionException + * @throws SqlParseException + */ + public BeamRelNode convertToBeamRel(String sqlStatement) + throws ValidationException, RelConversionException, SqlParseException { + return (BeamRelNode) validateAndConvert(planner.parse(sqlStatement)); + } + + private RelNode validateAndConvert(SqlNode sqlNode) + throws ValidationException, RelConversionException { + SqlNode validated = validateNode(sqlNode); + LOG.info("SQL:\n" + validated); + RelNode relNode = convertToRelNode(validated); + return convertToBeamRel(relNode); + } + + private RelNode convertToBeamRel(RelNode relNode) throws RelConversionException { + RelTraitSet traitSet = relNode.getTraitSet(); + + LOG.info("SQLPlan>\n" + RelOptUtil.toString(relNode)); + + // PlannerImpl.transform() optimizes RelNode with ruleset + return planner.transform(0, traitSet.plus(BeamLogicalConvention.INSTANCE), relNode); + } + + private RelNode convertToRelNode(SqlNode sqlNode) throws RelConversionException { + return planner.rel(sqlNode).rel; + } + + private SqlNode validateNode(SqlNode sqlNode) throws ValidationException { + SqlNode validatedSqlNode = planner.validate(sqlNode); + validatedSqlNode.accept(new UnsupportedOperatorsVisitor()); + return validatedSqlNode; + } + + public Map getSourceTables() { + return sourceTables; + } + +} diff --git a/dsls/sql/src/main/java/org/beam/dsls/sql/planner/BeamRelDataTypeSystem.java b/dsls/sql/src/main/java/org/beam/dsls/sql/planner/BeamRelDataTypeSystem.java new file mode 100644 index 0000000000000..bf35296df6483 --- /dev/null +++ b/dsls/sql/src/main/java/org/beam/dsls/sql/planner/BeamRelDataTypeSystem.java @@ -0,0 +1,40 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.beam.dsls.sql.planner; + +import org.apache.calcite.rel.type.RelDataTypeSystem; +import org.apache.calcite.rel.type.RelDataTypeSystemImpl; + +/** + * customized data type in Beam. + * + */ +public class BeamRelDataTypeSystem extends RelDataTypeSystemImpl { + public static final RelDataTypeSystem BEAM_REL_DATATYPE_SYSTEM = new BeamRelDataTypeSystem(); + + @Override + public int getMaxNumericScale() { + return 38; + } + + @Override + public int getMaxNumericPrecision() { + return 38; + } + +} diff --git a/dsls/sql/src/main/java/org/beam/dsls/sql/planner/BeamRuleSets.java b/dsls/sql/src/main/java/org/beam/dsls/sql/planner/BeamRuleSets.java new file mode 100644 index 0000000000000..3f40c271c486b --- /dev/null +++ b/dsls/sql/src/main/java/org/beam/dsls/sql/planner/BeamRuleSets.java @@ -0,0 +1,65 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.beam.dsls.sql.planner; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableSet; +import java.util.Iterator; +import org.apache.calcite.plan.RelOptRule; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.tools.RuleSet; +import org.beam.dsls.sql.rel.BeamRelNode; +import org.beam.dsls.sql.rule.BeamFilterRule; +import org.beam.dsls.sql.rule.BeamIOSinkRule; +import org.beam.dsls.sql.rule.BeamIOSourceRule; +import org.beam.dsls.sql.rule.BeamProjectRule; + +/** + * {@link RuleSet} used in {@link BeamQueryPlanner}. It translates a standard + * Calcite {@link RelNode} tree, to represent with {@link BeamRelNode} + * + */ +public class BeamRuleSets { + private static final ImmutableSet calciteToBeamConversionRules = ImmutableSet + .builder().add(BeamIOSourceRule.INSTANCE, BeamProjectRule.INSTANCE, + BeamFilterRule.INSTANCE, BeamIOSinkRule.INSTANCE) + .build(); + + public static RuleSet[] getRuleSets() { + return new RuleSet[] { new BeamRuleSet( + ImmutableSet.builder().addAll(calciteToBeamConversionRules).build()) }; + } + + private static class BeamRuleSet implements RuleSet { + final ImmutableSet rules; + + public BeamRuleSet(ImmutableSet rules) { + this.rules = rules; + } + + public BeamRuleSet(ImmutableList rules) { + this.rules = ImmutableSet.builder().addAll(rules).build(); + } + + @Override + public Iterator iterator() { + return rules.iterator(); + } + } + +} diff --git a/dsls/sql/src/main/java/org/beam/dsls/sql/planner/BeamSQLRelUtils.java b/dsls/sql/src/main/java/org/beam/dsls/sql/planner/BeamSQLRelUtils.java new file mode 100644 index 0000000000000..94b341c82dba9 --- /dev/null +++ b/dsls/sql/src/main/java/org/beam/dsls/sql/planner/BeamSQLRelUtils.java @@ -0,0 +1,73 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.beam.dsls.sql.planner; + +import java.util.concurrent.atomic.AtomicInteger; +import org.apache.calcite.plan.RelOptUtil; +import org.apache.calcite.plan.volcano.RelSubset; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.sql.SqlExplainLevel; +import org.beam.dsls.sql.rel.BeamRelNode; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Utilities for {@code BeamRelNode}. + */ +public class BeamSQLRelUtils { + private static final Logger LOG = LoggerFactory.getLogger(BeamSQLRelUtils.class); + + private static final AtomicInteger sequence = new AtomicInteger(0); + private static final AtomicInteger classSequence = new AtomicInteger(0); + + public static String getStageName(BeamRelNode relNode) { + return relNode.getClass().getSimpleName().toUpperCase() + "_" + relNode.getId() + "_" + + sequence.getAndIncrement(); + } + + public static String getClassName(BeamRelNode relNode) { + return "Generated_" + relNode.getClass().getSimpleName().toUpperCase() + "_" + relNode.getId() + + "_" + classSequence.getAndIncrement(); + } + + public static BeamRelNode getBeamRelInput(RelNode input) { + if (input instanceof RelSubset) { + // go with known best input + input = ((RelSubset) input).getBest(); + } + return (BeamRelNode) input; + } + + public static String explain(final RelNode rel) { + return explain(rel, SqlExplainLevel.EXPPLAN_ATTRIBUTES); + } + + public static String explain(final RelNode rel, SqlExplainLevel detailLevel) { + String explain = ""; + try { + explain = RelOptUtil.toString(rel); + } catch (StackOverflowError e) { + LOG.error("StackOverflowError occurred while extracting plan. " + + "Please report it to the dev@ mailing list."); + LOG.error("RelNode " + rel + " ExplainLevel " + detailLevel, e); + LOG.error("Forcing plan to empty string and continue... " + + "SQL Runner may not working properly after."); + } + return explain; + } +} diff --git a/dsls/sql/src/main/java/org/beam/dsls/sql/planner/BeamSqlRunner.java b/dsls/sql/src/main/java/org/beam/dsls/sql/planner/BeamSqlRunner.java new file mode 100644 index 0000000000000..a10b6edfe8acf --- /dev/null +++ b/dsls/sql/src/main/java/org/beam/dsls/sql/planner/BeamSqlRunner.java @@ -0,0 +1,103 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.beam.dsls.sql.planner; + +import java.io.Serializable; +import org.apache.calcite.plan.RelOptUtil; +import org.apache.calcite.schema.Schema; +import org.apache.calcite.schema.SchemaPlus; +import org.apache.calcite.sql.parser.SqlParseException; +import org.apache.calcite.tools.Frameworks; +import org.apache.calcite.tools.RelConversionException; +import org.apache.calcite.tools.ValidationException; +import org.beam.dsls.sql.rel.BeamRelNode; +import org.beam.dsls.sql.schema.BaseBeamTable; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Interface to explain, submit a SQL query. + * + */ +public class BeamSqlRunner implements Serializable { + /** + * + */ + private static final long serialVersionUID = -4708693435115005182L; + + private static final Logger LOG = LoggerFactory.getLogger(BeamSqlRunner.class); + + private SchemaPlus schema = Frameworks.createRootSchema(true); + + private BeamQueryPlanner planner = new BeamQueryPlanner(schema); + + /** + * Add a schema. + * + * @param schemaName + * @param scheme + */ + public void addSchema(String schemaName, Schema scheme) { + schema.add(schemaName, schema); + } + + /** + * add a {@link BaseBeamTable} to schema repository. + * + * @param tableName + * @param table + */ + public void addTable(String tableName, BaseBeamTable table) { + schema.add(tableName, table); + planner.getSourceTables().put(tableName, table); + } + + /** + * submit as a Beam pipeline. + * + * @param sqlString + * @throws Exception + */ + public void submitQuery(String sqlString) throws Exception { + planner.submitToRun(sqlString); + planner.planner.close(); + } + + /** + * explain and display the execution plan. + * + * @param sqlString + * @throws ValidationException + * @throws RelConversionException + * @throws SqlParseException + */ + public String explainQuery(String sqlString) + throws ValidationException, RelConversionException, SqlParseException { + BeamRelNode exeTree = planner.convertToBeamRel(sqlString); + String beamPlan = RelOptUtil.toString(exeTree); + System.out.println(String.format("beamPlan>\n%s", beamPlan)); + + planner.planner.close(); + return beamPlan; + } + + protected BeamQueryPlanner getPlanner() { + return planner; + } + +} diff --git a/dsls/sql/src/main/java/org/beam/dsls/sql/planner/BeamSqlUnsupportedException.java b/dsls/sql/src/main/java/org/beam/dsls/sql/planner/BeamSqlUnsupportedException.java new file mode 100644 index 0000000000000..a3475bb1c2618 --- /dev/null +++ b/dsls/sql/src/main/java/org/beam/dsls/sql/planner/BeamSqlUnsupportedException.java @@ -0,0 +1,38 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.beam.dsls.sql.planner; + +/** + * Generic exception for un-supported operations. + * + */ +public class BeamSqlUnsupportedException extends RuntimeException { + /** + * + */ + private static final long serialVersionUID = 3445015747629217342L; + + public BeamSqlUnsupportedException(String string) { + super(string); + } + + public BeamSqlUnsupportedException() { + super(); + } + +} diff --git a/dsls/sql/src/main/java/org/beam/dsls/sql/planner/UnsupportedOperatorsVisitor.java b/dsls/sql/src/main/java/org/beam/dsls/sql/planner/UnsupportedOperatorsVisitor.java new file mode 100644 index 0000000000000..702381df60be7 --- /dev/null +++ b/dsls/sql/src/main/java/org/beam/dsls/sql/planner/UnsupportedOperatorsVisitor.java @@ -0,0 +1,28 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.beam.dsls.sql.planner; + +import org.apache.calcite.sql.util.SqlShuttle; + +/** + * Unsupported operation to visit a RelNode. + * + */ +public class UnsupportedOperatorsVisitor extends SqlShuttle { + +} diff --git a/dsls/sql/src/main/java/org/beam/dsls/sql/planner/package-info.java b/dsls/sql/src/main/java/org/beam/dsls/sql/planner/package-info.java new file mode 100644 index 0000000000000..d98c584241216 --- /dev/null +++ b/dsls/sql/src/main/java/org/beam/dsls/sql/planner/package-info.java @@ -0,0 +1,24 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * {@link org.beam.dsls.sql.planner.BeamQueryPlanner} is the main interface. + * It defines data sources, validate a SQL statement, and convert it as a Beam + * pipeline. + */ +package org.beam.dsls.sql.planner; diff --git a/dsls/sql/src/main/java/org/beam/dsls/sql/rel/BeamFilterRel.java b/dsls/sql/src/main/java/org/beam/dsls/sql/rel/BeamFilterRel.java new file mode 100644 index 0000000000000..64f2d1fa67006 --- /dev/null +++ b/dsls/sql/src/main/java/org/beam/dsls/sql/rel/BeamFilterRel.java @@ -0,0 +1,71 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.beam.dsls.sql.rel; + +import org.apache.beam.sdk.Pipeline; +import org.apache.beam.sdk.transforms.ParDo; +import org.apache.beam.sdk.values.PCollection; +import org.apache.calcite.plan.RelOptCluster; +import org.apache.calcite.plan.RelTraitSet; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.core.Filter; +import org.apache.calcite.rex.RexNode; +import org.beam.dsls.sql.interpreter.BeamSQLExpressionExecutor; +import org.beam.dsls.sql.interpreter.BeamSQLSpELExecutor; +import org.beam.dsls.sql.planner.BeamPipelineCreator; +import org.beam.dsls.sql.planner.BeamSQLRelUtils; +import org.beam.dsls.sql.schema.BeamSQLRow; +import org.beam.dsls.sql.transform.BeamSQLFilterFn; + +/** + * BeamRelNode to replace a {@code Filter} node. + * + */ +public class BeamFilterRel extends Filter implements BeamRelNode { + + public BeamFilterRel(RelOptCluster cluster, RelTraitSet traits, RelNode child, + RexNode condition) { + super(cluster, traits, child, condition); + } + + @Override + public Filter copy(RelTraitSet traitSet, RelNode input, RexNode condition) { + return new BeamFilterRel(getCluster(), traitSet, input, condition); + } + + @Override + public Pipeline buildBeamPipeline(BeamPipelineCreator planCreator) throws Exception { + + RelNode input = getInput(); + BeamSQLRelUtils.getBeamRelInput(input).buildBeamPipeline(planCreator); + + String stageName = BeamSQLRelUtils.getStageName(this); + + PCollection upstream = planCreator.getLatestStream(); + + BeamSQLExpressionExecutor executor = new BeamSQLSpELExecutor(this); + + PCollection projectStream = upstream.apply(stageName, + ParDo.of(new BeamSQLFilterFn(getRelTypeName(), executor))); + + planCreator.setLatestStream(projectStream); + + return planCreator.getPipeline(); + } + +} diff --git a/dsls/sql/src/main/java/org/beam/dsls/sql/rel/BeamIOSinkRel.java b/dsls/sql/src/main/java/org/beam/dsls/sql/rel/BeamIOSinkRel.java new file mode 100644 index 0000000000000..46654e5e372a9 --- /dev/null +++ b/dsls/sql/src/main/java/org/beam/dsls/sql/rel/BeamIOSinkRel.java @@ -0,0 +1,75 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.beam.dsls.sql.rel; + +import com.google.common.base.Joiner; +import java.util.List; +import org.apache.beam.sdk.Pipeline; +import org.apache.beam.sdk.values.PCollection; +import org.apache.calcite.plan.RelOptCluster; +import org.apache.calcite.plan.RelOptTable; +import org.apache.calcite.plan.RelTraitSet; +import org.apache.calcite.prepare.Prepare; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.core.TableModify; +import org.apache.calcite.rex.RexNode; +import org.beam.dsls.sql.planner.BeamPipelineCreator; +import org.beam.dsls.sql.planner.BeamSQLRelUtils; +import org.beam.dsls.sql.schema.BaseBeamTable; +import org.beam.dsls.sql.schema.BeamSQLRow; + +/** + * BeamRelNode to replace a {@code TableModify} node. + * + */ +public class BeamIOSinkRel extends TableModify implements BeamRelNode { + public BeamIOSinkRel(RelOptCluster cluster, RelTraitSet traits, RelOptTable table, + Prepare.CatalogReader catalogReader, RelNode child, Operation operation, + List updateColumnList, List sourceExpressionList, boolean flattened) { + super(cluster, traits, table, catalogReader, child, operation, updateColumnList, + sourceExpressionList, flattened); + } + + @Override + public RelNode copy(RelTraitSet traitSet, List inputs) { + return new BeamIOSinkRel(getCluster(), traitSet, getTable(), getCatalogReader(), sole(inputs), + getOperation(), getUpdateColumnList(), getSourceExpressionList(), isFlattened()); + } + + @Override + public Pipeline buildBeamPipeline(BeamPipelineCreator planCreator) throws Exception { + + RelNode input = getInput(); + BeamSQLRelUtils.getBeamRelInput(input).buildBeamPipeline(planCreator); + + String stageName = BeamSQLRelUtils.getStageName(this); + + PCollection upstream = planCreator.getLatestStream(); + + String sourceName = Joiner.on('.').join(getTable().getQualifiedName()); + + BaseBeamTable targetTable = planCreator.getSourceTables().get(sourceName); + + upstream.apply(stageName, targetTable.buildIOWriter()); + + planCreator.setHasPersistent(true); + + return planCreator.getPipeline(); + } + +} diff --git a/dsls/sql/src/main/java/org/beam/dsls/sql/rel/BeamIOSourceRel.java b/dsls/sql/src/main/java/org/beam/dsls/sql/rel/BeamIOSourceRel.java new file mode 100644 index 0000000000000..f14db922e63e1 --- /dev/null +++ b/dsls/sql/src/main/java/org/beam/dsls/sql/rel/BeamIOSourceRel.java @@ -0,0 +1,59 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.beam.dsls.sql.rel; + +import com.google.common.base.Joiner; +import org.apache.beam.sdk.Pipeline; +import org.apache.beam.sdk.values.PCollection; +import org.apache.calcite.plan.RelOptCluster; +import org.apache.calcite.plan.RelOptTable; +import org.apache.calcite.plan.RelTraitSet; +import org.apache.calcite.rel.core.TableScan; +import org.beam.dsls.sql.planner.BeamPipelineCreator; +import org.beam.dsls.sql.planner.BeamSQLRelUtils; +import org.beam.dsls.sql.schema.BaseBeamTable; +import org.beam.dsls.sql.schema.BeamSQLRow; + +/** + * BeamRelNode to replace a {@code TableScan} node. + * + */ +public class BeamIOSourceRel extends TableScan implements BeamRelNode { + + public BeamIOSourceRel(RelOptCluster cluster, RelTraitSet traitSet, RelOptTable table) { + super(cluster, traitSet, table); + } + + @Override + public Pipeline buildBeamPipeline(BeamPipelineCreator planCreator) throws Exception { + + String sourceName = Joiner.on('.').join(getTable().getQualifiedName()).replace(".(STREAM)", ""); + + BaseBeamTable sourceTable = planCreator.getSourceTables().get(sourceName); + + String stageName = BeamSQLRelUtils.getStageName(this); + + PCollection sourceStream = planCreator.getPipeline().apply(stageName, + sourceTable.buildIOReader()); + + planCreator.setLatestStream(sourceStream); + + return planCreator.getPipeline(); + } + +} diff --git a/dsls/sql/src/main/java/org/beam/dsls/sql/rel/BeamLogicalConvention.java b/dsls/sql/src/main/java/org/beam/dsls/sql/rel/BeamLogicalConvention.java new file mode 100644 index 0000000000000..50fe8e013af1f --- /dev/null +++ b/dsls/sql/src/main/java/org/beam/dsls/sql/rel/BeamLogicalConvention.java @@ -0,0 +1,72 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.beam.dsls.sql.rel; + +import org.apache.calcite.plan.Convention; +import org.apache.calcite.plan.ConventionTraitDef; +import org.apache.calcite.plan.RelOptPlanner; +import org.apache.calcite.plan.RelTrait; +import org.apache.calcite.plan.RelTraitDef; +import org.apache.calcite.plan.RelTraitSet; + +/** + * Convertion for Beam SQL. + * + */ +public enum BeamLogicalConvention implements Convention { + INSTANCE; + + @Override + public Class getInterface() { + return BeamRelNode.class; + } + + @Override + public String getName() { + return "BEAM_LOGICAL"; + } + + @Override + public RelTraitDef getTraitDef() { + return ConventionTraitDef.INSTANCE; + } + + @Override + public boolean satisfies(RelTrait trait) { + return this == trait; + } + + @Override + public void register(RelOptPlanner planner) { + } + + @Override + public String toString() { + return getName(); + } + + @Override + public boolean canConvertConvention(Convention toConvention) { + return false; + } + + @Override + public boolean useAbstractConvertersForConversion(RelTraitSet fromTraits, RelTraitSet toTraits) { + return false; + } +} diff --git a/dsls/sql/src/main/java/org/beam/dsls/sql/rel/BeamProjectRel.java b/dsls/sql/src/main/java/org/beam/dsls/sql/rel/BeamProjectRel.java new file mode 100644 index 0000000000000..f4fc2d866afdd --- /dev/null +++ b/dsls/sql/src/main/java/org/beam/dsls/sql/rel/BeamProjectRel.java @@ -0,0 +1,87 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.beam.dsls.sql.rel; + +import java.util.List; +import org.apache.beam.sdk.Pipeline; +import org.apache.beam.sdk.transforms.ParDo; +import org.apache.beam.sdk.values.PCollection; +import org.apache.calcite.plan.RelOptCluster; +import org.apache.calcite.plan.RelTraitSet; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.core.Project; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rex.RexCall; +import org.apache.calcite.rex.RexInputRef; +import org.apache.calcite.rex.RexLiteral; +import org.apache.calcite.rex.RexNode; +import org.beam.dsls.sql.interpreter.BeamSQLExpressionExecutor; +import org.beam.dsls.sql.interpreter.BeamSQLSpELExecutor; +import org.beam.dsls.sql.planner.BeamPipelineCreator; +import org.beam.dsls.sql.planner.BeamSQLRelUtils; +import org.beam.dsls.sql.schema.BeamSQLRecordType; +import org.beam.dsls.sql.schema.BeamSQLRow; +import org.beam.dsls.sql.transform.BeamSQLProjectFn; + +/** + * BeamRelNode to replace a {@code Project} node. + * + */ +public class BeamProjectRel extends Project implements BeamRelNode { + + /** + * projects: {@link RexLiteral}, {@link RexInputRef}, {@link RexCall}. + * + * @param cluster + * @param traits + * @param input + * @param projects + * @param rowType + */ + public BeamProjectRel(RelOptCluster cluster, RelTraitSet traits, RelNode input, + List projects, RelDataType rowType) { + super(cluster, traits, input, projects, rowType); + } + + @Override + public Project copy(RelTraitSet traitSet, RelNode input, List projects, + RelDataType rowType) { + return new BeamProjectRel(getCluster(), traitSet, input, projects, rowType); + } + + @Override + public Pipeline buildBeamPipeline(BeamPipelineCreator planCreator) throws Exception { + RelNode input = getInput(); + BeamSQLRelUtils.getBeamRelInput(input).buildBeamPipeline(planCreator); + + String stageName = BeamSQLRelUtils.getStageName(this); + + PCollection upstream = planCreator.getLatestStream(); + + BeamSQLExpressionExecutor executor = new BeamSQLSpELExecutor(this); + + PCollection projectStream = upstream.apply(stageName, ParDo + .of(new BeamSQLProjectFn(getRelTypeName(), executor, BeamSQLRecordType.from(rowType)))); + + planCreator.setLatestStream(projectStream); + + return planCreator.getPipeline(); + + } + +} diff --git a/dsls/sql/src/main/java/org/beam/dsls/sql/rel/BeamRelNode.java b/dsls/sql/src/main/java/org/beam/dsls/sql/rel/BeamRelNode.java new file mode 100644 index 0000000000000..4b53943d891ff --- /dev/null +++ b/dsls/sql/src/main/java/org/beam/dsls/sql/rel/BeamRelNode.java @@ -0,0 +1,40 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.beam.dsls.sql.rel; + +import org.apache.beam.sdk.Pipeline; +import org.apache.calcite.rel.RelNode; +import org.beam.dsls.sql.planner.BeamPipelineCreator; + +/** + * A new method {@link #buildBeamPipeline(BeamPipelineCreator)} is added, it's + * called by {@link BeamPipelineCreator}. + * + */ +public interface BeamRelNode extends RelNode { + + /** + * A {@link BeamRelNode} is a recursive structure, the + * {@link BeamPipelineCreator} visits it with a DFS(Depth-First-Search) + * algorithm. + * + * @param planCreator + * @throws Exception + */ + Pipeline buildBeamPipeline(BeamPipelineCreator planCreator) throws Exception; +} diff --git a/dsls/sql/src/main/java/org/beam/dsls/sql/rel/package-info.java b/dsls/sql/src/main/java/org/beam/dsls/sql/rel/package-info.java new file mode 100644 index 0000000000000..13dc96285942e --- /dev/null +++ b/dsls/sql/src/main/java/org/beam/dsls/sql/rel/package-info.java @@ -0,0 +1,23 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * BeamSQL specified nodes, to replace {@link org.apache.calcite.rel.RelNode}. + * + */ +package org.beam.dsls.sql.rel; diff --git a/dsls/sql/src/main/java/org/beam/dsls/sql/rule/BeamFilterRule.java b/dsls/sql/src/main/java/org/beam/dsls/sql/rule/BeamFilterRule.java new file mode 100644 index 0000000000000..2ad7c074dbdca --- /dev/null +++ b/dsls/sql/src/main/java/org/beam/dsls/sql/rule/BeamFilterRule.java @@ -0,0 +1,49 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.beam.dsls.sql.rule; + +import org.apache.calcite.plan.Convention; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.convert.ConverterRule; +import org.apache.calcite.rel.core.Filter; +import org.apache.calcite.rel.logical.LogicalFilter; +import org.beam.dsls.sql.rel.BeamFilterRel; +import org.beam.dsls.sql.rel.BeamLogicalConvention; + +/** + * A {@code ConverterRule} to replace {@link Filter} with {@link BeamFilterRel}. + * + */ +public class BeamFilterRule extends ConverterRule { + public static final BeamFilterRule INSTANCE = new BeamFilterRule(); + + private BeamFilterRule() { + super(LogicalFilter.class, Convention.NONE, BeamLogicalConvention.INSTANCE, "BeamFilterRule"); + } + + @Override + public RelNode convert(RelNode rel) { + final Filter filter = (Filter) rel; + final RelNode input = filter.getInput(); + + return new BeamFilterRel(filter.getCluster(), + filter.getTraitSet().replace(BeamLogicalConvention.INSTANCE), + convert(input, input.getTraitSet().replace(BeamLogicalConvention.INSTANCE)), + filter.getCondition()); + } +} diff --git a/dsls/sql/src/main/java/org/beam/dsls/sql/rule/BeamIOSinkRule.java b/dsls/sql/src/main/java/org/beam/dsls/sql/rule/BeamIOSinkRule.java new file mode 100644 index 0000000000000..a44c002f05c00 --- /dev/null +++ b/dsls/sql/src/main/java/org/beam/dsls/sql/rule/BeamIOSinkRule.java @@ -0,0 +1,81 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.beam.dsls.sql.rule; + +import java.util.List; +import org.apache.calcite.plan.Convention; +import org.apache.calcite.plan.RelOptCluster; +import org.apache.calcite.plan.RelOptTable; +import org.apache.calcite.plan.RelTraitSet; +import org.apache.calcite.prepare.Prepare; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.convert.ConverterRule; +import org.apache.calcite.rel.core.TableModify; +import org.apache.calcite.rel.logical.LogicalTableModify; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.schema.Table; +import org.beam.dsls.sql.rel.BeamIOSinkRel; +import org.beam.dsls.sql.rel.BeamLogicalConvention; + +/** + * A {@code ConverterRule} to replace {@link TableModify} with + * {@link BeamIOSinkRel}. + * + */ +public class BeamIOSinkRule extends ConverterRule { + public static final BeamIOSinkRule INSTANCE = new BeamIOSinkRule(); + + private BeamIOSinkRule() { + super(LogicalTableModify.class, Convention.NONE, BeamLogicalConvention.INSTANCE, + "BeamIOSinkRule"); + } + + @Override + public RelNode convert(RelNode rel) { + final TableModify tableModify = (TableModify) rel; + final RelNode input = tableModify.getInput(); + + final RelOptCluster cluster = tableModify.getCluster(); + final RelTraitSet traitSet = tableModify.getTraitSet().replace(BeamLogicalConvention.INSTANCE); + final RelOptTable relOptTable = tableModify.getTable(); + final Prepare.CatalogReader catalogReader = tableModify.getCatalogReader(); + final RelNode convertedInput = convert(input, + input.getTraitSet().replace(BeamLogicalConvention.INSTANCE)); + final TableModify.Operation operation = tableModify.getOperation(); + final List updateColumnList = tableModify.getUpdateColumnList(); + final List sourceExpressionList = tableModify.getSourceExpressionList(); + final boolean flattened = tableModify.isFlattened(); + + final Table table = tableModify.getTable().unwrap(Table.class); + + switch (table.getJdbcTableType()) { + case TABLE: + case STREAM: + if (operation != TableModify.Operation.INSERT) { + throw new UnsupportedOperationException( + String.format("Streams doesn't support %s modify operation", operation)); + } + return new BeamIOSinkRel(cluster, traitSet, + relOptTable, catalogReader, convertedInput, operation, updateColumnList, + sourceExpressionList, flattened); + default: + throw new IllegalArgumentException( + String.format("Unsupported table type: %s", table.getJdbcTableType())); + } + } +} diff --git a/dsls/sql/src/main/java/org/beam/dsls/sql/rule/BeamIOSourceRule.java b/dsls/sql/src/main/java/org/beam/dsls/sql/rule/BeamIOSourceRule.java new file mode 100644 index 0000000000000..9e4778b5ff045 --- /dev/null +++ b/dsls/sql/src/main/java/org/beam/dsls/sql/rule/BeamIOSourceRule.java @@ -0,0 +1,49 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.beam.dsls.sql.rule; + +import org.apache.calcite.plan.Convention; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.convert.ConverterRule; +import org.apache.calcite.rel.core.TableScan; +import org.apache.calcite.rel.logical.LogicalTableScan; +import org.beam.dsls.sql.rel.BeamIOSourceRel; +import org.beam.dsls.sql.rel.BeamLogicalConvention; + +/** + * A {@code ConverterRule} to replace {@link TableScan} with + * {@link BeamIOSourceRel}. + * + */ +public class BeamIOSourceRule extends ConverterRule { + public static final BeamIOSourceRule INSTANCE = new BeamIOSourceRule(); + + private BeamIOSourceRule() { + super(LogicalTableScan.class, Convention.NONE, BeamLogicalConvention.INSTANCE, + "BeamIOSourceRule"); + } + + @Override + public RelNode convert(RelNode rel) { + final TableScan scan = (TableScan) rel; + + return new BeamIOSourceRel(scan.getCluster(), + scan.getTraitSet().replace(BeamLogicalConvention.INSTANCE), scan.getTable()); + } + +} diff --git a/dsls/sql/src/main/java/org/beam/dsls/sql/rule/BeamProjectRule.java b/dsls/sql/src/main/java/org/beam/dsls/sql/rule/BeamProjectRule.java new file mode 100644 index 0000000000000..117a056fa0ff7 --- /dev/null +++ b/dsls/sql/src/main/java/org/beam/dsls/sql/rule/BeamProjectRule.java @@ -0,0 +1,50 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.beam.dsls.sql.rule; + +import org.apache.calcite.plan.Convention; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.convert.ConverterRule; +import org.apache.calcite.rel.core.Project; +import org.apache.calcite.rel.logical.LogicalProject; +import org.beam.dsls.sql.rel.BeamLogicalConvention; +import org.beam.dsls.sql.rel.BeamProjectRel; + +/** + * A {@code ConverterRule} to replace {@link Project} with + * {@link BeamProjectRel}. + * + */ +public class BeamProjectRule extends ConverterRule { + public static final BeamProjectRule INSTANCE = new BeamProjectRule(); + + private BeamProjectRule() { + super(LogicalProject.class, Convention.NONE, BeamLogicalConvention.INSTANCE, "BeamProjectRule"); + } + + @Override + public RelNode convert(RelNode rel) { + final Project project = (Project) rel; + final RelNode input = project.getInput(); + + return new BeamProjectRel(project.getCluster(), + project.getTraitSet().replace(BeamLogicalConvention.INSTANCE), + convert(input, input.getTraitSet().replace(BeamLogicalConvention.INSTANCE)), + project.getProjects(), project.getRowType()); + } +} diff --git a/dsls/sql/src/main/java/org/beam/dsls/sql/rule/package-info.java b/dsls/sql/src/main/java/org/beam/dsls/sql/rule/package-info.java new file mode 100644 index 0000000000000..634f6a8b37ecb --- /dev/null +++ b/dsls/sql/src/main/java/org/beam/dsls/sql/rule/package-info.java @@ -0,0 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * {@link RelOptRule} to generate {@link BeamRelNode}. + */ +package org.beam.dsls.sql.rule; diff --git a/dsls/sql/src/main/java/org/beam/dsls/sql/schema/BaseBeamTable.java b/dsls/sql/src/main/java/org/beam/dsls/sql/schema/BaseBeamTable.java new file mode 100644 index 0000000000000..8d31c6def18a6 --- /dev/null +++ b/dsls/sql/src/main/java/org/beam/dsls/sql/schema/BaseBeamTable.java @@ -0,0 +1,101 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.beam.dsls.sql.schema; + +import java.io.Serializable; +import org.apache.beam.sdk.transforms.PTransform; +import org.apache.beam.sdk.values.PBegin; +import org.apache.beam.sdk.values.PCollection; +import org.apache.beam.sdk.values.PDone; +import org.apache.calcite.DataContext; +import org.apache.calcite.linq4j.Enumerable; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rel.type.RelDataTypeFactory; +import org.apache.calcite.rel.type.RelProtoDataType; +import org.apache.calcite.schema.ScannableTable; +import org.apache.calcite.schema.Schema.TableType; +import org.apache.calcite.schema.Statistic; +import org.apache.calcite.schema.Statistics; +import org.beam.dsls.sql.planner.BeamQueryPlanner; + +/** + * Each IO in Beam has one table schema, by extending {@link BaseBeamTable}. + */ +public abstract class BaseBeamTable implements ScannableTable, Serializable { + + /** + * + */ + private static final long serialVersionUID = -1262988061830914193L; + private RelDataType relDataType; + + protected BeamSQLRecordType beamSqlRecordType; + + public BaseBeamTable(RelProtoDataType protoRowType) { + this.relDataType = protoRowType.apply(BeamQueryPlanner.TYPE_FACTORY); + this.beamSqlRecordType = BeamSQLRecordType.from(relDataType); + } + + /** + * In Beam SQL, there's no difference between a batch query and a streaming + * query. {@link BeamIOType} is used to validate the sources. + */ + public abstract BeamIOType getSourceType(); + + /** + * create a {@code IO.read()} instance to read from source. + * + * @return + */ + public abstract PTransform> buildIOReader(); + + /** + * create a {@code IO.write()} instance to write to target. + * + * @return + */ + public abstract PTransform, PDone> buildIOWriter(); + + @Override + public Enumerable scan(DataContext root) { + // not used as Beam SQL uses its own execution engine + return null; + } + + @Override + public RelDataType getRowType(RelDataTypeFactory typeFactory) { + return relDataType; + } + + /** + * Not used {@link Statistic} to optimize the plan. + */ + @Override + public Statistic getStatistic() { + return Statistics.UNKNOWN; + } + + /** + * all sources are treated as TABLE in Beam SQL. + */ + @Override + public TableType getJdbcTableType() { + return TableType.TABLE; + } + +} diff --git a/dsls/sql/src/main/java/org/beam/dsls/sql/schema/BeamIOType.java b/dsls/sql/src/main/java/org/beam/dsls/sql/schema/BeamIOType.java new file mode 100644 index 0000000000000..5e55b0fc6e482 --- /dev/null +++ b/dsls/sql/src/main/java/org/beam/dsls/sql/schema/BeamIOType.java @@ -0,0 +1,28 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.beam.dsls.sql.schema; + +import java.io.Serializable; + +/** + * Type as a source IO, determined whether it's a STREAMING process, or batch + * process. + */ +public enum BeamIOType implements Serializable { + BOUNDED, UNBOUNDED; +} diff --git a/dsls/sql/src/main/java/org/beam/dsls/sql/schema/BeamSQLRecordType.java b/dsls/sql/src/main/java/org/beam/dsls/sql/schema/BeamSQLRecordType.java new file mode 100644 index 0000000000000..dc8e38103cc69 --- /dev/null +++ b/dsls/sql/src/main/java/org/beam/dsls/sql/schema/BeamSQLRecordType.java @@ -0,0 +1,74 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.beam.dsls.sql.schema; + +import java.io.Serializable; +import java.util.ArrayList; +import java.util.List; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rel.type.RelDataTypeField; +import org.apache.calcite.sql.type.SqlTypeName; + +/** + * Field type information in {@link BeamSQLRow}. + * + */ +//@DefaultCoder(BeamSQLRecordTypeCoder.class) +public class BeamSQLRecordType implements Serializable { + /** + * + */ + private static final long serialVersionUID = -5318734648766104712L; + private List fieldsName = new ArrayList<>(); + private List fieldsType = new ArrayList<>(); + + public static BeamSQLRecordType from(RelDataType tableInfo) { + BeamSQLRecordType record = new BeamSQLRecordType(); + for (RelDataTypeField f : tableInfo.getFieldList()) { + record.fieldsName.add(f.getName()); + record.fieldsType.add(f.getType().getSqlTypeName()); + } + return record; + } + + public int size() { + return fieldsName.size(); + } + + public List getFieldsName() { + return fieldsName; + } + + public void setFieldsName(List fieldsName) { + this.fieldsName = fieldsName; + } + + public List getFieldsType() { + return fieldsType; + } + + public void setFieldsType(List fieldsType) { + this.fieldsType = fieldsType; + } + + @Override + public String toString() { + return "RecordType [fieldsName=" + fieldsName + ", fieldsType=" + fieldsType + "]"; + } + +} diff --git a/dsls/sql/src/main/java/org/beam/dsls/sql/schema/BeamSQLRecordTypeCoder.java b/dsls/sql/src/main/java/org/beam/dsls/sql/schema/BeamSQLRecordTypeCoder.java new file mode 100644 index 0000000000000..c708c4e50b1ce --- /dev/null +++ b/dsls/sql/src/main/java/org/beam/dsls/sql/schema/BeamSQLRecordTypeCoder.java @@ -0,0 +1,71 @@ +package org.beam.dsls.sql.schema; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.util.List; +import org.apache.beam.sdk.coders.Coder; +import org.apache.beam.sdk.coders.CoderException; +import org.apache.beam.sdk.coders.StandardCoder; +import org.apache.beam.sdk.coders.StringUtf8Coder; +import org.apache.beam.sdk.coders.VarIntCoder; +import org.apache.calcite.sql.type.SqlTypeName; + +/** + * A {@link Coder} for {@link BeamSQLRecordType}. + * + */ +public class BeamSQLRecordTypeCoder extends StandardCoder { + private static final StringUtf8Coder stringCoder = StringUtf8Coder.of(); + private static final VarIntCoder intCoder = VarIntCoder.of(); + + private static final BeamSQLRecordTypeCoder INSTANCE = new BeamSQLRecordTypeCoder(); + private BeamSQLRecordTypeCoder(){} + + public static BeamSQLRecordTypeCoder of() { + return INSTANCE; + } + + @Override + public void encode(BeamSQLRecordType value, OutputStream outStream, + org.apache.beam.sdk.coders.Coder.Context context) throws CoderException, IOException { + Context nested = context.nested(); + intCoder.encode(value.size(), outStream, nested); + for(String fieldName : value.getFieldsName()){ + stringCoder.encode(fieldName, outStream, nested); + } + for(SqlTypeName fieldType : value.getFieldsType()){ + stringCoder.encode(fieldType.name(), outStream, nested); + } + outStream.flush(); + } + + @Override + public BeamSQLRecordType decode(InputStream inStream, + org.apache.beam.sdk.coders.Coder.Context context) throws CoderException, IOException { + BeamSQLRecordType typeRecord = new BeamSQLRecordType(); + Context nested = context.nested(); + int size = intCoder.decode(inStream, nested); + for(int idx=0; idx> getCoderArguments() { + // TODO Auto-generated method stub + return null; + } + + @Override + public void verifyDeterministic() + throws org.apache.beam.sdk.coders.Coder.NonDeterministicException { + // TODO Auto-generated method stub + + } + +} diff --git a/dsls/sql/src/main/java/org/beam/dsls/sql/schema/BeamSQLRow.java b/dsls/sql/src/main/java/org/beam/dsls/sql/schema/BeamSQLRow.java new file mode 100644 index 0000000000000..3ec170e85a713 --- /dev/null +++ b/dsls/sql/src/main/java/org/beam/dsls/sql/schema/BeamSQLRow.java @@ -0,0 +1,244 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.beam.dsls.sql.schema; + +import java.io.Serializable; +import java.util.ArrayList; +import java.util.Date; +import java.util.List; +import org.apache.calcite.sql.type.SqlTypeName; + +/** + * Repersent a generic ROW record in Beam SQL. + * + */ +//@DefaultCoder(BeamSqlRowCoder.class) +public class BeamSQLRow implements Serializable { + /** + * + */ + private static final long serialVersionUID = 4569220242480160895L; + + private List nullFields = new ArrayList<>(); + private List dataValues; + private BeamSQLRecordType dataType; + + public BeamSQLRow(BeamSQLRecordType dataType) { + this.dataType = dataType; + this.dataValues = new ArrayList<>(); + for(int idx=0; idx dataValues) { + this.dataValues = dataValues; + this.dataType = dataType; + } + + public void addField(String fieldName, Object fieldValue) { + addField(dataType.getFieldsName().indexOf(fieldName), fieldValue); + } + + public void addField(int index, Object fieldValue) { + if(fieldValue == null){ + dataValues.set(index, fieldValue); + if(!nullFields.contains(index)){nullFields.add(index);} + return; + } + + SqlTypeName fieldType = dataType.getFieldsType().get(index); + switch (fieldType) { + case INTEGER: + case SMALLINT: + case TINYINT: + if(!(fieldValue instanceof Integer)){ + throw new InvalidFieldException(String.format("[%s] doesn't match type [%s]", fieldValue, fieldType)); + } + break; + case DOUBLE: + if(!(fieldValue instanceof Double)){ + throw new InvalidFieldException(String.format("[%s] doesn't match type [%s]", fieldValue, fieldType)); + } + break; + case BIGINT: + if(!(fieldValue instanceof Long)){ + throw new InvalidFieldException(String.format("[%s] doesn't match type [%s]", fieldValue, fieldType)); + } + break; + case FLOAT: + if(!(fieldValue instanceof Float)){ + throw new InvalidFieldException(String.format("[%s] doesn't match type [%s]", fieldValue, fieldType)); + } + break; + case VARCHAR: + if(!(fieldValue instanceof String)){ + throw new InvalidFieldException(String.format("[%s] doesn't match type [%s]", fieldValue, fieldType)); + } + break; + case TIME: + case TIMESTAMP: + if(!(fieldValue instanceof Date)){ + throw new InvalidFieldException(String.format("[%s] doesn't match type [%s]", fieldValue, fieldType)); + } + break; + default: + throw new UnsupportedDataTypeException(fieldType); + } + dataValues.set(index, fieldValue); + } + + + public int getInteger(int idx) { + return (Integer) getFieldValue(idx); + } + + public double getDouble(int idx) { + return (Double) getFieldValue(idx); + } + + public long getLong(int idx) { + return (Long) getFieldValue(idx); + } + + public String getString(int idx) { + return (String) getFieldValue(idx); + } + + public Date getDate(int idx) { + return (Date) getFieldValue(idx); + } + + public Object getFieldValue(String fieldName) { + return getFieldValue(dataType.getFieldsName().indexOf(fieldName)); + } + + public Object getFieldValue(int fieldIdx) { + if(nullFields.contains(fieldIdx)){ + return null; + } + + Object fieldValue = dataValues.get(fieldIdx); + SqlTypeName fieldType = dataType.getFieldsType().get(fieldIdx); + + switch (fieldType) { + case INTEGER: + case SMALLINT: + case TINYINT: + if(!(fieldValue instanceof Integer)){ + throw new InvalidFieldException(String.format("[%s] doesn't match type [%s]", fieldValue, fieldType)); + }else{ + return Integer.valueOf(fieldValue.toString()); + } + case DOUBLE: + if(!(fieldValue instanceof Double)){ + throw new InvalidFieldException(String.format("[%s] doesn't match type [%s]", fieldValue, fieldType)); + }else{ + return Double.valueOf(fieldValue.toString()); + } + case BIGINT: + if(!(fieldValue instanceof Long)){ + throw new InvalidFieldException(String.format("[%s] doesn't match type [%s]", fieldValue, fieldType)); + }else{ + return Long.valueOf(fieldValue.toString()); + } + case FLOAT: + if(!(fieldValue instanceof Float)){ + throw new InvalidFieldException(String.format("[%s] doesn't match type [%s]", fieldValue, fieldType)); + }else{ + return Float.valueOf(fieldValue.toString()); + } + case VARCHAR: + if(!(fieldValue instanceof String)){ + throw new InvalidFieldException(String.format("[%s] doesn't match type [%s]", fieldValue, fieldType)); + }else{ + return fieldValue.toString(); + } + case TIME: + case TIMESTAMP: + if(!(fieldValue instanceof Date)){ + throw new InvalidFieldException(String.format("[%s] doesn't match type [%s]", fieldValue, fieldType)); + }else{ + return fieldValue; + } + default: + throw new UnsupportedDataTypeException(fieldType); + } + } + + public int size() { + return dataValues.size(); + } + + public List getDataValues() { + return dataValues; + } + + public void setDataValues(List dataValues) { + this.dataValues = dataValues; + } + + public BeamSQLRecordType getDataType() { + return dataType; + } + + public void setDataType(BeamSQLRecordType dataType) { + this.dataType = dataType; + } + + public void setNullFields(List nullFields) { + this.nullFields = nullFields; + } + + public List getNullFields() { + return nullFields; + } + + @Override + public String toString() { + return "BeamSQLRow [dataValues=" + dataValues + ", dataType=" + dataType + "]"; + } + + /** + * Return data fields as key=value. + * @return + */ + public String valueInString() { + StringBuffer sb = new StringBuffer(); + for (int idx = 0; idx < size(); ++idx) { + sb.append(String.format(",%s=%s", dataType.getFieldsName().get(idx), getFieldValue(idx))); + } + return sb.substring(1); + } + + @Override + public boolean equals(Object obj) { + if (this == obj) { + return true; + } + if (obj == null) { + return false; + } + if (getClass() != obj.getClass()) { + return false; + } + BeamSQLRow other = (BeamSQLRow) obj; + return toString().equals(other.toString()); + } + +} diff --git a/dsls/sql/src/main/java/org/beam/dsls/sql/schema/BeamSqlRowCoder.java b/dsls/sql/src/main/java/org/beam/dsls/sql/schema/BeamSqlRowCoder.java new file mode 100644 index 0000000000000..de80dd5a16446 --- /dev/null +++ b/dsls/sql/src/main/java/org/beam/dsls/sql/schema/BeamSqlRowCoder.java @@ -0,0 +1,132 @@ +package org.beam.dsls.sql.schema; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.util.Date; +import java.util.List; +import org.apache.beam.sdk.coders.BigEndianIntegerCoder; +import org.apache.beam.sdk.coders.BigEndianLongCoder; +import org.apache.beam.sdk.coders.Coder; +import org.apache.beam.sdk.coders.CoderException; +import org.apache.beam.sdk.coders.DoubleCoder; +import org.apache.beam.sdk.coders.ListCoder; +import org.apache.beam.sdk.coders.StandardCoder; +import org.apache.beam.sdk.coders.StringUtf8Coder; + +/** + * A {@link Coder} encodes {@link BeamSQLRow}. + * + */ +public class BeamSqlRowCoder extends StandardCoder{ + private static final BeamSQLRecordTypeCoder recordTypeCoder = BeamSQLRecordTypeCoder.of(); + + private static final ListCoder listCoder = ListCoder.of(BigEndianIntegerCoder.of()); + + private static final StringUtf8Coder stringCoder = StringUtf8Coder.of(); + private static final BigEndianIntegerCoder intCoder = BigEndianIntegerCoder.of(); + private static final BigEndianLongCoder longCoder = BigEndianLongCoder.of(); + private static final DoubleCoder doubleCoder = DoubleCoder.of(); + + private static final BeamSqlRowCoder INSTANCE = new BeamSqlRowCoder(); + private BeamSqlRowCoder(){} + + public static BeamSqlRowCoder of() { + return INSTANCE; + } + + @Override + public void encode(BeamSQLRow value, OutputStream outStream, + org.apache.beam.sdk.coders.Coder.Context context) throws CoderException, IOException { + recordTypeCoder.encode(value.getDataType(), outStream, context); + listCoder.encode(value.getNullFields(), outStream, context); + + Context nested = context.nested(); + + for (int idx = 0; idx < value.size(); ++idx) { + if(value.getNullFields().contains(idx)){ + continue; + } + + switch (value.getDataType().getFieldsType().get(idx)) { + case INTEGER: + case SMALLINT: + case TINYINT: + intCoder.encode(value.getInteger(idx), outStream, nested); + break; + case DOUBLE: + case FLOAT: + doubleCoder.encode(value.getDouble(idx), outStream, nested); + break; + case BIGINT: + longCoder.encode(value.getLong(idx), outStream, nested); + break; + case VARCHAR: + stringCoder.encode(value.getString(idx), outStream, nested); + break; + case TIME: + case TIMESTAMP: + longCoder.encode(value.getDate(idx).getTime(), outStream, nested); + break; + + default: + throw new UnsupportedDataTypeException(value.getDataType().getFieldsType().get(idx)); + } + } + } + + @Override + public BeamSQLRow decode(InputStream inStream, org.apache.beam.sdk.coders.Coder.Context context) + throws CoderException, IOException { + BeamSQLRecordType type = recordTypeCoder.decode(inStream, context); + List nullFields = listCoder.decode(inStream, context); + + BeamSQLRow record = new BeamSQLRow(type); + record.setNullFields(nullFields); + + for (int idx = 0; idx < type.size(); ++idx) { + if(nullFields.contains(idx)){ + continue; + } + + switch (type.getFieldsType().get(idx)) { + case INTEGER: + case SMALLINT: + case TINYINT: + record.addField(idx, intCoder.decode(inStream, context)); + break; + case DOUBLE: + case FLOAT: + record.addField(idx, doubleCoder.decode(inStream, context)); + break; + case BIGINT: + record.addField(idx, longCoder.decode(inStream, context)); + break; + case VARCHAR: + record.addField(idx, stringCoder.decode(inStream, context)); + break; + case TIME: + case TIMESTAMP: + record.addField(idx, new Date(longCoder.decode(inStream, context))); + break; + + default: + throw new UnsupportedDataTypeException(type.getFieldsType().get(idx)); + } + } + + return record; + } + + @Override + public List> getCoderArguments() { + return null; + } + + @Override + public void verifyDeterministic() + throws org.apache.beam.sdk.coders.Coder.NonDeterministicException { + + } + +} diff --git a/dsls/sql/src/main/java/org/beam/dsls/sql/schema/InvalidFieldException.java b/dsls/sql/src/main/java/org/beam/dsls/sql/schema/InvalidFieldException.java new file mode 100644 index 0000000000000..3ab86c52e1e13 --- /dev/null +++ b/dsls/sql/src/main/java/org/beam/dsls/sql/schema/InvalidFieldException.java @@ -0,0 +1,13 @@ +package org.beam.dsls.sql.schema; + +public class InvalidFieldException extends RuntimeException { + + public InvalidFieldException() { + super(); + } + + public InvalidFieldException(String message) { + super(message); + } + +} diff --git a/dsls/sql/src/main/java/org/beam/dsls/sql/schema/UnsupportedDataTypeException.java b/dsls/sql/src/main/java/org/beam/dsls/sql/schema/UnsupportedDataTypeException.java new file mode 100644 index 0000000000000..7f7afb23991ea --- /dev/null +++ b/dsls/sql/src/main/java/org/beam/dsls/sql/schema/UnsupportedDataTypeException.java @@ -0,0 +1,11 @@ +package org.beam.dsls.sql.schema; + +import org.apache.calcite.sql.type.SqlTypeName; + +public class UnsupportedDataTypeException extends RuntimeException { + + public UnsupportedDataTypeException(SqlTypeName unsupportedType){ + super(String.format("Not support data type [%s]", unsupportedType)); + } + +} diff --git a/dsls/sql/src/main/java/org/beam/dsls/sql/schema/kafka/BeamKafkaCSVTable.java b/dsls/sql/src/main/java/org/beam/dsls/sql/schema/kafka/BeamKafkaCSVTable.java new file mode 100644 index 0000000000000..2570763c3e7b3 --- /dev/null +++ b/dsls/sql/src/main/java/org/beam/dsls/sql/schema/kafka/BeamKafkaCSVTable.java @@ -0,0 +1,127 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.beam.dsls.sql.schema.kafka; + +import java.util.List; +import org.apache.beam.sdk.transforms.DoFn; +import org.apache.beam.sdk.transforms.PTransform; +import org.apache.beam.sdk.transforms.ParDo; +import org.apache.beam.sdk.values.KV; +import org.apache.beam.sdk.values.PCollection; +import org.apache.calcite.rel.type.RelProtoDataType; +import org.beam.dsls.sql.schema.BeamSQLRecordType; +import org.beam.dsls.sql.schema.BeamSQLRow; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * A Kafka topic that saves records as CSV format. + * + */ +public class BeamKafkaCSVTable extends BeamKafkaTable { + + /** + * + */ + private static final long serialVersionUID = 4754022536543333984L; + + public static final String DELIMITER = ","; + private static final Logger LOG = LoggerFactory.getLogger(BeamKafkaCSVTable.class); + + public BeamKafkaCSVTable(RelProtoDataType protoRowType, String bootstrapServers, + List topics) { + super(protoRowType, bootstrapServers, topics); + } + + @Override + public PTransform>, PCollection> + getPTransformForInput() { + return new CsvRecorderDecoder(beamSqlRecordType); + } + + @Override + public PTransform, PCollection>> + getPTransformForOutput() { + return new CsvRecorderEncoder(beamSqlRecordType); + } + + /** + * A PTransform to convert {@code KV} to {@link BeamSQLRow}. + * + */ + public static class CsvRecorderDecoder + extends PTransform>, PCollection> { + private BeamSQLRecordType recordType; + + public CsvRecorderDecoder(BeamSQLRecordType recordType) { + this.recordType = recordType; + } + + @Override + public PCollection expand(PCollection> input) { + return input.apply("decodeRecord", ParDo.of(new DoFn, BeamSQLRow>() { + @ProcessElement + public void processElement(ProcessContext c) { + String rowInString = new String(c.element().getValue()); + String[] parts = rowInString.split(BeamKafkaCSVTable.DELIMITER); + if (parts.length != recordType.size()) { + LOG.error(String.format("invalid record: ", rowInString)); + } else { + BeamSQLRow sourceRecord = new BeamSQLRow(recordType); + for (int idx = 0; idx < parts.length; ++idx) { + sourceRecord.addField(idx, parts[idx]); + } + c.output(sourceRecord); + } + } + })); + } + } + + /** + * A PTransform to convert {@link BeamSQLRow} to {@code KV}. + * + */ + public static class CsvRecorderEncoder + extends PTransform, PCollection>> { + private BeamSQLRecordType recordType; + + public CsvRecorderEncoder(BeamSQLRecordType recordType) { + this.recordType = recordType; + } + + @Override + public PCollection> expand(PCollection input) { + return input.apply("encodeRecord", ParDo.of(new DoFn>() { + @ProcessElement + public void processElement(ProcessContext c) { + BeamSQLRow in = c.element(); + StringBuffer sb = new StringBuffer(); + for (int idx = 0; idx < in.size(); ++idx) { + sb.append(DELIMITER); + sb.append(in.getFieldValue(idx).toString()); + } + c.output(KV.of(new byte[] {}, sb.substring(1).getBytes())); + } + })); + + } + + } + +} diff --git a/dsls/sql/src/main/java/org/beam/dsls/sql/schema/kafka/BeamKafkaTable.java b/dsls/sql/src/main/java/org/beam/dsls/sql/schema/kafka/BeamKafkaTable.java new file mode 100644 index 0000000000000..29f3f927b4297 --- /dev/null +++ b/dsls/sql/src/main/java/org/beam/dsls/sql/schema/kafka/BeamKafkaTable.java @@ -0,0 +1,123 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.beam.dsls.sql.schema.kafka; + +import static com.google.common.base.Preconditions.checkArgument; +import java.io.Serializable; +import java.util.List; +import java.util.Map; +import org.apache.beam.sdk.coders.ByteArrayCoder; +import org.apache.beam.sdk.io.kafka.KafkaIO; +import org.apache.beam.sdk.transforms.PTransform; +import org.apache.beam.sdk.values.KV; +import org.apache.beam.sdk.values.PBegin; +import org.apache.beam.sdk.values.PCollection; +import org.apache.beam.sdk.values.PDone; +import org.apache.calcite.rel.type.RelProtoDataType; +import org.beam.dsls.sql.schema.BaseBeamTable; +import org.beam.dsls.sql.schema.BeamIOType; +import org.beam.dsls.sql.schema.BeamSQLRow; + +/** + * {@code BeamKafkaTable} represent a Kafka topic, as source or target. Need to + * extend to convert between {@code BeamSQLRow} and {@code KV}. + * + */ +public abstract class BeamKafkaTable extends BaseBeamTable implements Serializable { + + /** + * + */ + private static final long serialVersionUID = -634715473399906527L; + + private String bootstrapServers; + private List topics; + private Map configUpdates; + + protected BeamKafkaTable(RelProtoDataType protoRowType) { + super(protoRowType); + } + + public BeamKafkaTable(RelProtoDataType protoRowType, String bootstrapServers, + List topics) { + super(protoRowType); + this.bootstrapServers = bootstrapServers; + this.topics = topics; + } + + public BeamKafkaTable updateConsumerProperties(Map configUpdates) { + this.configUpdates = configUpdates; + return this; + } + + @Override + public BeamIOType getSourceType() { + return BeamIOType.UNBOUNDED; + } + + /** + * + * @return + */ + public abstract PTransform>, PCollection> + getPTransformForInput(); + + /** + * + * @return + */ + public abstract PTransform, PCollection>> + getPTransformForOutput(); + + @Override + public PTransform> buildIOReader() { + return new PTransform>() { + /** + * + */ + private static final long serialVersionUID = 9167792271351182771L; + + @Override + public PCollection expand(PBegin input) { + return input.apply("read", + KafkaIO.read().withBootstrapServers(bootstrapServers).withTopics(topics) + .updateConsumerProperties(configUpdates).withKeyCoder(ByteArrayCoder.of()) + .withValueCoder(ByteArrayCoder.of()).withoutMetadata()) + .apply("in_format", getPTransformForInput()); + + } + }; + } + + @Override + public PTransform, PDone> buildIOWriter() { + checkArgument(topics != null && topics.size() == 1, + "Only one topic can be acceptable as output."); + + return new PTransform, PDone>() { + @Override + public PDone expand(PCollection input) { + return input.apply("out_reformat", getPTransformForOutput()).apply("persistent", + KafkaIO.write().withBootstrapServers(bootstrapServers) + .withTopic(topics.get(0)).withKeyCoder(ByteArrayCoder.of()) + .withValueCoder(ByteArrayCoder.of())); + } + }; + } + +} diff --git a/dsls/sql/src/main/java/org/beam/dsls/sql/schema/kafka/package-info.java b/dsls/sql/src/main/java/org/beam/dsls/sql/schema/kafka/package-info.java new file mode 100644 index 0000000000000..822fce703da10 --- /dev/null +++ b/dsls/sql/src/main/java/org/beam/dsls/sql/schema/kafka/package-info.java @@ -0,0 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * table schema for KafkaIO. + */ +package org.beam.dsls.sql.schema.kafka; diff --git a/dsls/sql/src/main/java/org/beam/dsls/sql/schema/package-info.java b/dsls/sql/src/main/java/org/beam/dsls/sql/schema/package-info.java new file mode 100644 index 0000000000000..ef9cc7d38b6a0 --- /dev/null +++ b/dsls/sql/src/main/java/org/beam/dsls/sql/schema/package-info.java @@ -0,0 +1,23 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * define table schema, to map with Beam IO components. + * + */ +package org.beam.dsls.sql.schema; diff --git a/dsls/sql/src/main/java/org/beam/dsls/sql/transform/BeamSQLFilterFn.java b/dsls/sql/src/main/java/org/beam/dsls/sql/transform/BeamSQLFilterFn.java new file mode 100644 index 0000000000000..06db2802c393c --- /dev/null +++ b/dsls/sql/src/main/java/org/beam/dsls/sql/transform/BeamSQLFilterFn.java @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.beam.dsls.sql.transform; + +import java.util.List; +import org.apache.beam.sdk.transforms.DoFn; +import org.beam.dsls.sql.interpreter.BeamSQLExpressionExecutor; +import org.beam.dsls.sql.rel.BeamFilterRel; +import org.beam.dsls.sql.schema.BeamSQLRow; + +/** + * {@code BeamSQLFilterFn} is the executor for a {@link BeamFilterRel} step. + * + */ +public class BeamSQLFilterFn extends DoFn { + /** + * + */ + private static final long serialVersionUID = -1256111753670606705L; + + private String stepName; + private BeamSQLExpressionExecutor executor; + + public BeamSQLFilterFn(String stepName, BeamSQLExpressionExecutor executor) { + super(); + this.stepName = stepName; + this.executor = executor; + } + + @Setup + public void setup() { + executor.prepare(); + } + + @ProcessElement + public void processElement(ProcessContext c) { + BeamSQLRow in = c.element(); + + List result = executor.execute(in); + + if ((Boolean) result.get(0)) { + c.output(in); + } + } + + @Teardown + public void close() { + executor.close(); + } + +} diff --git a/dsls/sql/src/main/java/org/beam/dsls/sql/transform/BeamSQLOutputToConsoleFn.java b/dsls/sql/src/main/java/org/beam/dsls/sql/transform/BeamSQLOutputToConsoleFn.java new file mode 100644 index 0000000000000..1014c0d3f7b14 --- /dev/null +++ b/dsls/sql/src/main/java/org/beam/dsls/sql/transform/BeamSQLOutputToConsoleFn.java @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.beam.dsls.sql.transform; + +import org.apache.beam.sdk.transforms.DoFn; +import org.beam.dsls.sql.schema.BeamSQLRow; + +/** + * A test PTransform to display output in console. + * + */ +public class BeamSQLOutputToConsoleFn extends DoFn { + /** + * + */ + private static final long serialVersionUID = -1256111753670606705L; + + private String stepName; + + public BeamSQLOutputToConsoleFn(String stepName) { + super(); + this.stepName = stepName; + } + + @ProcessElement + public void processElement(ProcessContext c) { + System.out.println("Output: " + c.element().getDataValues()); + } + +} diff --git a/dsls/sql/src/main/java/org/beam/dsls/sql/transform/BeamSQLProjectFn.java b/dsls/sql/src/main/java/org/beam/dsls/sql/transform/BeamSQLProjectFn.java new file mode 100644 index 0000000000000..12061d2f094c2 --- /dev/null +++ b/dsls/sql/src/main/java/org/beam/dsls/sql/transform/BeamSQLProjectFn.java @@ -0,0 +1,72 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.beam.dsls.sql.transform; + +import java.util.List; +import org.apache.beam.sdk.transforms.DoFn; +import org.beam.dsls.sql.interpreter.BeamSQLExpressionExecutor; +import org.beam.dsls.sql.rel.BeamProjectRel; +import org.beam.dsls.sql.schema.BeamSQLRecordType; +import org.beam.dsls.sql.schema.BeamSQLRow; + +/** + * + * {@code BeamSQLProjectFn} is the executor for a {@link BeamProjectRel} step. + * + */ +public class BeamSQLProjectFn extends DoFn { + + /** + * + */ + private static final long serialVersionUID = -1046605249999014608L; + private String stepName; + private BeamSQLExpressionExecutor executor; + private BeamSQLRecordType outputRecordType; + + public BeamSQLProjectFn(String stepName, BeamSQLExpressionExecutor executor, + BeamSQLRecordType outputRecordType) { + super(); + this.stepName = stepName; + this.executor = executor; + this.outputRecordType = outputRecordType; + } + + @Setup + public void setup() { + executor.prepare(); + } + + @ProcessElement + public void processElement(ProcessContext c) { + List results = executor.execute(c.element()); + + BeamSQLRow outRow = new BeamSQLRow(outputRecordType); + for (int idx = 0; idx < results.size(); ++idx) { + outRow.addField(idx, results.get(idx)); + } + + c.output(outRow); + } + + @Teardown + public void close() { + executor.close(); + } + +} diff --git a/dsls/sql/src/main/java/org/beam/dsls/sql/transform/package-info.java b/dsls/sql/src/main/java/org/beam/dsls/sql/transform/package-info.java new file mode 100644 index 0000000000000..91b5639c62254 --- /dev/null +++ b/dsls/sql/src/main/java/org/beam/dsls/sql/transform/package-info.java @@ -0,0 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * {@link PTransform} used in a BeamSQL pipeline. + */ +package org.beam.dsls.sql.transform; diff --git a/dsls/sql/src/main/resources/log4j.properties b/dsls/sql/src/main/resources/log4j.properties new file mode 100644 index 0000000000000..709484b4951bb --- /dev/null +++ b/dsls/sql/src/main/resources/log4j.properties @@ -0,0 +1,23 @@ +################################################################################ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +################################################################################ + +log4j.rootLogger=ERROR,console +log4j.appender.console=org.apache.log4j.ConsoleAppender +log4j.appender.console.target=System.err +log4j.appender.console.layout=org.apache.log4j.PatternLayout +log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{2}: %m%n \ No newline at end of file diff --git a/dsls/sql/src/test/java/org/beam/dsls/sql/planner/BasePlanner.java b/dsls/sql/src/test/java/org/beam/dsls/sql/planner/BasePlanner.java new file mode 100644 index 0000000000000..56e45c4732d21 --- /dev/null +++ b/dsls/sql/src/test/java/org/beam/dsls/sql/planner/BasePlanner.java @@ -0,0 +1,74 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.beam.dsls.sql.planner; + +import java.util.Arrays; +import java.util.HashMap; +import java.util.Map; + +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rel.type.RelDataTypeFactory; +import org.apache.calcite.rel.type.RelProtoDataType; +import org.apache.calcite.sql.type.SqlTypeName; +import org.apache.kafka.clients.consumer.ConsumerConfig; +import org.beam.dsls.sql.schema.BaseBeamTable; +import org.beam.dsls.sql.schema.kafka.BeamKafkaCSVTable; +import org.junit.BeforeClass; + +/** + * prepare {@code BeamSqlRunner} for test. + * + */ +public class BasePlanner { + public static BeamSqlRunner runner = new BeamSqlRunner(); + + @BeforeClass + public static void prepare() { + runner.addTable("ORDER_DETAILS", getTable()); + runner.addTable("SUB_ORDER", getTable("127.0.0.1:9092", "sub_orders")); + runner.addTable("SUB_ORDER_RAM", getTable()); + } + + private static BaseBeamTable getTable() { + final RelProtoDataType protoRowType = new RelProtoDataType() { + @Override + public RelDataType apply(RelDataTypeFactory a0) { + return a0.builder().add("order_id", SqlTypeName.BIGINT).add("site_id", SqlTypeName.INTEGER) + .add("price", SqlTypeName.DOUBLE).add("order_time", SqlTypeName.TIMESTAMP).build(); + } + }; + + return new MockedBeamSQLTable(protoRowType); + } + + public static BaseBeamTable getTable(String bootstrapServer, String topic) { + final RelProtoDataType protoRowType = new RelProtoDataType() { + @Override + public RelDataType apply(RelDataTypeFactory a0) { + return a0.builder().add("order_id", SqlTypeName.BIGINT).add("site_id", SqlTypeName.INTEGER) + .add("price", SqlTypeName.DOUBLE).add("order_time", SqlTypeName.TIMESTAMP).build(); + } + }; + + Map consumerPara = new HashMap(); + consumerPara.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "latest"); + + return new BeamKafkaCSVTable(protoRowType, bootstrapServer, Arrays.asList(topic)) + .updateConsumerProperties(consumerPara); + } +} diff --git a/dsls/sql/src/test/java/org/beam/dsls/sql/planner/BeamPlannerExplainTest.java b/dsls/sql/src/test/java/org/beam/dsls/sql/planner/BeamPlannerExplainTest.java new file mode 100644 index 0000000000000..a77878fc150ea --- /dev/null +++ b/dsls/sql/src/test/java/org/beam/dsls/sql/planner/BeamPlannerExplainTest.java @@ -0,0 +1,68 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.beam.dsls.sql.planner; + +import org.junit.Assert; +import org.junit.Test; + +/** + * Tests to explain queries. + * + */ +public class BeamPlannerExplainTest extends BasePlanner { + + @Test + public void selectAll() throws Exception { + String sql = "SELECT * FROM ORDER_DETAILS"; + String plan = runner.explainQuery(sql); + + String expectedPlan = + "BeamProjectRel(order_id=[$0], site_id=[$1], price=[$2], order_time=[$3])\n" + + " BeamIOSourceRel(table=[[ORDER_DETAILS]])\n"; + Assert.assertEquals("explain doesn't match", expectedPlan, plan); + } + + @Test + public void selectWithFilter() throws Exception { + String sql = "SELECT " + " order_id, site_id, price " + "FROM ORDER_DETAILS " + + "WHERE SITE_ID = 0 and price > 20"; + String plan = runner.explainQuery(sql); + + String expectedPlan = "BeamProjectRel(order_id=[$0], site_id=[$1], price=[$2])\n" + + " BeamFilterRel(condition=[AND(=($1, 0), >($2, 20))])\n" + + " BeamIOSourceRel(table=[[ORDER_DETAILS]])\n"; + Assert.assertEquals("explain doesn't match", expectedPlan, plan); + } + + @Test + public void insertSelectFilter() throws Exception { + String sql = "INSERT INTO SUB_ORDER(order_id, site_id, price) " + "SELECT " + + " order_id, site_id, price " + "FROM ORDER_DETAILS " + + "WHERE SITE_ID = 0 and price > 20"; + String plan = runner.explainQuery(sql); + + String expectedPlan = + "BeamIOSinkRel(table=[[SUB_ORDER]], operation=[INSERT], flattened=[true])\n" + + " BeamProjectRel(order_id=[$0], site_id=[$1], price=[$2], order_time=[null])\n" + + " BeamProjectRel(order_id=[$0], site_id=[$1], price=[$2])\n" + + " BeamFilterRel(condition=[AND(=($1, 0), >($2, 20))])\n" + + " BeamIOSourceRel(table=[[ORDER_DETAILS]])\n"; + Assert.assertEquals("explain doesn't match", expectedPlan, plan); + } + +} diff --git a/dsls/sql/src/test/java/org/beam/dsls/sql/planner/BeamPlannerSubmitTest.java b/dsls/sql/src/test/java/org/beam/dsls/sql/planner/BeamPlannerSubmitTest.java new file mode 100644 index 0000000000000..eb097a9ed293e --- /dev/null +++ b/dsls/sql/src/test/java/org/beam/dsls/sql/planner/BeamPlannerSubmitTest.java @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.beam.dsls.sql.planner; + +import org.apache.beam.sdk.Pipeline; +import org.junit.Assert; +import org.junit.Test; + +/** + * Tests to execute a query. + * + */ +public class BeamPlannerSubmitTest extends BasePlanner { + @Test + public void insertSelectFilter() throws Exception { + String sql = "INSERT INTO SUB_ORDER_RAM(order_id, site_id, price) SELECT " + + " order_id, site_id, price " + "FROM ORDER_DETAILS " + "WHERE SITE_ID = 0 and price > 20"; + Pipeline pipeline = runner.getPlanner().compileBeamPipeline(sql); + runner.getPlanner().planner.close(); + + pipeline.run().waitUntilFinish(); + + Assert.assertTrue(MockedBeamSQLTable.CONTENT.size() == 1); + Assert.assertEquals("order_id=12345,site_id=0,price=20.5,order_time=null", MockedBeamSQLTable.CONTENT.get(0)); + } + +} diff --git a/dsls/sql/src/test/java/org/beam/dsls/sql/planner/MockedBeamSQLTable.java b/dsls/sql/src/test/java/org/beam/dsls/sql/planner/MockedBeamSQLTable.java new file mode 100644 index 0000000000000..31f55780c3956 --- /dev/null +++ b/dsls/sql/src/test/java/org/beam/dsls/sql/planner/MockedBeamSQLTable.java @@ -0,0 +1,123 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.beam.dsls.sql.planner; + +import java.util.ArrayList; +import java.util.Date; +import java.util.List; + +import org.apache.beam.sdk.transforms.Create; +import org.apache.beam.sdk.transforms.DoFn; +import org.apache.beam.sdk.transforms.PTransform; +import org.apache.beam.sdk.transforms.ParDo; +import org.apache.beam.sdk.values.PBegin; +import org.apache.beam.sdk.values.PCollection; +import org.apache.beam.sdk.values.PDone; +import org.apache.calcite.rel.type.RelProtoDataType; +import org.beam.dsls.sql.schema.BaseBeamTable; +import org.beam.dsls.sql.schema.BeamIOType; +import org.beam.dsls.sql.schema.BeamSQLRow; + +/** + * A mock table use to check input/output. + * + */ +public class MockedBeamSQLTable extends BaseBeamTable { + + /** + * + */ + private static final long serialVersionUID = 1373168368414036932L; + + public static final List CONTENT = new ArrayList<>(); + + public MockedBeamSQLTable(RelProtoDataType protoRowType) { + super(protoRowType); + } + + @Override + public BeamIOType getSourceType() { + return BeamIOType.UNBOUNDED; + } + + @Override + public PTransform> buildIOReader() { + BeamSQLRow row1 = new BeamSQLRow(beamSqlRecordType); + row1.addField(0, 12345L); + row1.addField(1, 0); + row1.addField(2, 10.5); + row1.addField(3, new Date()); + + BeamSQLRow row2 = new BeamSQLRow(beamSqlRecordType); + row2.addField(0, 12345L); + row2.addField(1, 1); + row2.addField(2, 20.5); + row2.addField(3, new Date()); + + BeamSQLRow row3 = new BeamSQLRow(beamSqlRecordType); + row3.addField(0, 12345L); + row3.addField(1, 0); + row3.addField(2, 20.5); + row3.addField(3, new Date()); + + BeamSQLRow row4 = new BeamSQLRow(beamSqlRecordType); + row4.addField(0, null); + row4.addField(1, null); + row4.addField(2, 20.5); + row4.addField(3, new Date()); + + return Create.of(row1, row2, row3); + } + + @Override + public PTransform, PDone> buildIOWriter() { + return new OutputStore(); + } + + /** + * Keep output in {@code CONTENT} for validation. + * + */ + public static class OutputStore extends PTransform, PDone> { + + @Override + public PDone expand(PCollection input) { + input.apply(ParDo.of(new DoFn() { + + @Setup + public void setup() { + CONTENT.clear(); + } + + @ProcessElement + public void processElement(ProcessContext c) { + CONTENT.add(c.element().valueInString()); + } + + @Teardown + public void close() { + + } + + })); + return PDone.in(input.getPipeline()); + } + + } + +} diff --git a/pom.xml b/pom.xml index ef312c19373f7..5749df1fc9bac 100644 --- a/pom.xml +++ b/pom.xml @@ -447,12 +447,12 @@ ${project.version} - + org.apache.beam beam-sdks-java-io-hadoop-input-format - ${project.version} + ${project.version} - + org.apache.beam beam-runners-core-construction-java