diff --git a/runners/spark/pom.xml b/runners/spark/pom.xml index a5e99a06e80b9..b924cb8e3ea88 100644 --- a/runners/spark/pom.xml +++ b/runners/spark/pom.xml @@ -330,7 +330,7 @@ - com.google.common diff --git a/runners/spark/src/main/java/org/apache/beam/runners/spark/SparkRunner.java b/runners/spark/src/main/java/org/apache/beam/runners/spark/SparkRunner.java index 2ce1ff697e42a..fa85a2e25e262 100644 --- a/runners/spark/src/main/java/org/apache/beam/runners/spark/SparkRunner.java +++ b/runners/spark/src/main/java/org/apache/beam/runners/spark/SparkRunner.java @@ -50,7 +50,7 @@ /** * The SparkRunner translate operations defined on a pipeline to a representation * executable by Spark, and then submitting the job to Spark to be executed. If we wanted to run - * a dataflow pipeline with the default options of a single threaded spark instance in local mode, + * a Beam pipeline with the default options of a single threaded spark instance in local mode, * we would do the following: * * {@code diff --git a/runners/spark/src/main/java/org/apache/beam/runners/spark/TestSparkRunner.java b/runners/spark/src/main/java/org/apache/beam/runners/spark/TestSparkRunner.java index 50ed5f3f5ce8c..376b80ff953b2 100644 --- a/runners/spark/src/main/java/org/apache/beam/runners/spark/TestSparkRunner.java +++ b/runners/spark/src/main/java/org/apache/beam/runners/spark/TestSparkRunner.java @@ -28,7 +28,7 @@ /** * The SparkRunner translate operations defined on a pipeline to a representation executable - * by Spark, and then submitting the job to Spark to be executed. If we wanted to run a dataflow + * by Spark, and then submitting the job to Spark to be executed. If we wanted to run a Beam * pipeline with the default options of a single threaded spark instance in local mode, we would do * the following: * diff --git a/runners/spark/src/main/java/org/apache/beam/runners/spark/aggregators/NamedAggregators.java b/runners/spark/src/main/java/org/apache/beam/runners/spark/aggregators/NamedAggregators.java index c15e276c43dbb..e2cd9632e37b7 100644 --- a/runners/spark/src/main/java/org/apache/beam/runners/spark/aggregators/NamedAggregators.java +++ b/runners/spark/src/main/java/org/apache/beam/runners/spark/aggregators/NamedAggregators.java @@ -32,7 +32,7 @@ /** * This class wraps a map of named aggregators. Spark expects that all accumulators be declared - * before a job is launched. Dataflow allows aggregators to be used and incremented on the fly. + * before a job is launched. Beam allows aggregators to be used and incremented on the fly. * We create a map of named aggregators and instantiate in the the spark context before the job * is launched. We can then add aggregators on the fly in Spark. */ diff --git a/runners/spark/src/main/java/org/apache/beam/runners/spark/examples/WordCount.java b/runners/spark/src/main/java/org/apache/beam/runners/spark/examples/WordCount.java index 06770305f2c14..1af84add67ecb 100644 --- a/runners/spark/src/main/java/org/apache/beam/runners/spark/examples/WordCount.java +++ b/runners/spark/src/main/java/org/apache/beam/runners/spark/examples/WordCount.java @@ -110,7 +110,7 @@ public PCollection> apply(PCollection lines) { */ public static interface WordCountOptions extends PipelineOptions { @Description("Path of the file to read from") - @Default.String("gs://dataflow-samples/shakespeare/kinglear.txt") + @Default.String("gs://beam-samples/shakespeare/kinglear.txt") String getInputFile(); void setInputFile(String value); diff --git a/runners/spark/src/main/java/org/apache/beam/runners/spark/io/hadoop/ShardNameTemplateHelper.java b/runners/spark/src/main/java/org/apache/beam/runners/spark/io/hadoop/ShardNameTemplateHelper.java index 7f8e2978c439d..4a7058bfd521d 100644 --- a/runners/spark/src/main/java/org/apache/beam/runners/spark/io/hadoop/ShardNameTemplateHelper.java +++ b/runners/spark/src/main/java/org/apache/beam/runners/spark/io/hadoop/ShardNameTemplateHelper.java @@ -36,9 +36,9 @@ public final class ShardNameTemplateHelper { private static final Logger LOG = LoggerFactory.getLogger(ShardNameTemplateHelper.class); - public static final String OUTPUT_FILE_PREFIX = "spark.dataflow.fileoutputformat.prefix"; - public static final String OUTPUT_FILE_TEMPLATE = "spark.dataflow.fileoutputformat.template"; - public static final String OUTPUT_FILE_SUFFIX = "spark.dataflow.fileoutputformat.suffix"; + public static final String OUTPUT_FILE_PREFIX = "spark.beam.fileoutputformat.prefix"; + public static final String OUTPUT_FILE_TEMPLATE = "spark.beam.fileoutputformat.template"; + public static final String OUTPUT_FILE_SUFFIX = "spark.beam.fileoutputformat.suffix"; private ShardNameTemplateHelper() { } diff --git a/runners/spark/src/main/java/org/apache/beam/runners/spark/translation/DoFnFunction.java b/runners/spark/src/main/java/org/apache/beam/runners/spark/translation/DoFnFunction.java index 800d614e5e964..454b7607e9df2 100644 --- a/runners/spark/src/main/java/org/apache/beam/runners/spark/translation/DoFnFunction.java +++ b/runners/spark/src/main/java/org/apache/beam/runners/spark/translation/DoFnFunction.java @@ -31,7 +31,7 @@ import org.slf4j.LoggerFactory; /** - * Dataflow's Do functions correspond to Spark's FlatMap functions. + * Beam's Do functions correspond to Spark's FlatMap functions. * * @param Input element type. * @param Output element type. diff --git a/runners/spark/src/main/java/org/apache/beam/runners/spark/translation/SparkPipelineTranslator.java b/runners/spark/src/main/java/org/apache/beam/runners/spark/translation/SparkPipelineTranslator.java index 997940bb99de6..1f7ccf1e005b7 100644 --- a/runners/spark/src/main/java/org/apache/beam/runners/spark/translation/SparkPipelineTranslator.java +++ b/runners/spark/src/main/java/org/apache/beam/runners/spark/translation/SparkPipelineTranslator.java @@ -20,7 +20,7 @@ import org.apache.beam.sdk.transforms.PTransform; /** - * Translator to support translation between Dataflow transformations and Spark transformations. + * Translator to support translation between Beam transformations and Spark transformations. */ public interface SparkPipelineTranslator { diff --git a/runners/spark/src/main/java/org/apache/beam/runners/spark/translation/SparkRuntimeContext.java b/runners/spark/src/main/java/org/apache/beam/runners/spark/translation/SparkRuntimeContext.java index 4bc0c00973f8b..2634c65b24484 100644 --- a/runners/spark/src/main/java/org/apache/beam/runners/spark/translation/SparkRuntimeContext.java +++ b/runners/spark/src/main/java/org/apache/beam/runners/spark/translation/SparkRuntimeContext.java @@ -57,7 +57,7 @@ public class SparkRuntimeContext implements Serializable { private final String serializedPipelineOptions; /** - * Map fo names to dataflow aggregators. + * Map fo names to Beam aggregators. */ private final Map> aggregators = new HashMap<>(); private transient CoderRegistry coderRegistry; diff --git a/runners/spark/src/main/java/org/apache/beam/runners/spark/translation/TransformTranslator.java b/runners/spark/src/main/java/org/apache/beam/runners/spark/translation/TransformTranslator.java index 08e3fda7a36b5..eaceb852ce344 100644 --- a/runners/spark/src/main/java/org/apache/beam/runners/spark/translation/TransformTranslator.java +++ b/runners/spark/src/main/java/org/apache/beam/runners/spark/translation/TransformTranslator.java @@ -94,7 +94,7 @@ import scala.Tuple2; /** - * Supports translation between a DataFlow transform, and Spark's operations on RDDs. + * Supports translation between a Beam transform, and Spark's operations on RDDs. */ public final class TransformTranslator { @@ -895,7 +895,7 @@ private static Map, BroadcastHelper> getSideInputs( } /** - * Translator matches Dataflow transformation with the appropriate evaluator. + * Translator matches Beam transformation with the appropriate evaluator. */ public static class Translator implements SparkPipelineTranslator { diff --git a/runners/spark/src/main/java/org/apache/beam/runners/spark/translation/streaming/StreamingTransformTranslator.java b/runners/spark/src/main/java/org/apache/beam/runners/spark/translation/streaming/StreamingTransformTranslator.java index 5f35ebb201862..43dcef665fb2a 100644 --- a/runners/spark/src/main/java/org/apache/beam/runners/spark/translation/streaming/StreamingTransformTranslator.java +++ b/runners/spark/src/main/java/org/apache/beam/runners/spark/translation/streaming/StreamingTransformTranslator.java @@ -68,7 +68,7 @@ /** - * Supports translation between a DataFlow transform, and Spark's operations on DStreams. + * Supports translation between a Beam transform, and Spark's operations on DStreams. */ public final class StreamingTransformTranslator { @@ -349,13 +349,13 @@ public void evaluate(Window.Bound transform, EvaluationContext context) { (TransformEvaluator) EVALUATORS.get(clazz); if (transform == null) { if (UNSUPPORTED_EVALUATORS.contains(clazz)) { - throw new UnsupportedOperationException("Dataflow transformation " + clazz + throw new UnsupportedOperationException("Beam transformation " + clazz .getCanonicalName() + " is currently unsupported by the Spark streaming pipeline"); } // DStream transformations will transform an RDD into another RDD // Actions will create output - // In Dataflow it depends on the PTransform's Input and Output class + // In Beam it depends on the PTransform's Input and Output class Class pTOutputClazz = getPTransformOutputClazz(clazz); if (PDone.class.equals(pTOutputClazz)) { return foreachRDD(rddTranslator); @@ -373,7 +373,7 @@ public void evaluate(Window.Bound transform, EvaluationContext context) { } /** - * Translator matches Dataflow transformation with the appropriate Spark streaming evaluator. + * Translator matches Beam transformation with the appropriate Spark streaming evaluator. * rddTranslator uses Spark evaluators in transform/foreachRDD to evaluate the transformation */ public static class Translator implements SparkPipelineTranslator { diff --git a/runners/spark/src/main/java/org/apache/beam/runners/spark/util/BroadcastHelper.java b/runners/spark/src/main/java/org/apache/beam/runners/spark/util/BroadcastHelper.java index 29c2dd995b6e6..5f0c79576e595 100644 --- a/runners/spark/src/main/java/org/apache/beam/runners/spark/util/BroadcastHelper.java +++ b/runners/spark/src/main/java/org/apache/beam/runners/spark/util/BroadcastHelper.java @@ -34,12 +34,12 @@ public abstract class BroadcastHelper implements Serializable { /** - * If the property {@code dataflow.spark.directBroadcast} is set to + * If the property {@code beam.spark.directBroadcast} is set to * {@code true} then Spark serialization (Kryo) will be used to broadcast values * in View objects. By default this property is not set, and values are coded using * the appropriate {@link Coder}. */ - public static final String DIRECT_BROADCAST = "dataflow.spark.directBroadcast"; + public static final String DIRECT_BROADCAST = "beam.spark.directBroadcast"; private static final Logger LOG = LoggerFactory.getLogger(BroadcastHelper.class); diff --git a/runners/spark/src/test/java/org/apache/beam/runners/spark/translation/TransformTranslatorTest.java b/runners/spark/src/test/java/org/apache/beam/runners/spark/translation/TransformTranslatorTest.java index f61ad1c9a9fda..f72eba7ea6cac 100644 --- a/runners/spark/src/test/java/org/apache/beam/runners/spark/translation/TransformTranslatorTest.java +++ b/runners/spark/src/test/java/org/apache/beam/runners/spark/translation/TransformTranslatorTest.java @@ -56,7 +56,7 @@ public class TransformTranslatorTest { /** * Builds a simple pipeline with TextIO.Read and TextIO.Write, runs the pipeline - * in DirectRunner and on SparkRunner, with the mapped dataflow-to-spark + * in DirectRunner and on SparkRunner, with the mapped beam-to-spark * transforms. Finally it makes sure that the results are the same for both runs. */ @Test diff --git a/runners/spark/src/test/java/org/apache/beam/runners/spark/translation/streaming/KafkaStreamingTest.java b/runners/spark/src/test/java/org/apache/beam/runners/spark/translation/streaming/KafkaStreamingTest.java index 27d6f5ea60689..ac77922bc57c3 100644 --- a/runners/spark/src/test/java/org/apache/beam/runners/spark/translation/streaming/KafkaStreamingTest.java +++ b/runners/spark/src/test/java/org/apache/beam/runners/spark/translation/streaming/KafkaStreamingTest.java @@ -56,7 +56,7 @@ public class KafkaStreamingTest { new EmbeddedKafkaCluster.EmbeddedZookeeper(); private static final EmbeddedKafkaCluster EMBEDDED_KAFKA_CLUSTER = new EmbeddedKafkaCluster(EMBEDDED_ZOOKEEPER.getConnection(), new Properties()); - private static final String TOPIC = "kafka_dataflow_test_topic"; + private static final String TOPIC = "kafka_beam_test_topic"; private static final Map KAFKA_MESSAGES = ImmutableMap.of( "k1", "v1", "k2", "v2", "k3", "v3", "k4", "v4" );