Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
amarouni committed Aug 25, 2016
2 parents 4a63b30 + f7384e1 commit cfacd85
Show file tree
Hide file tree
Showing 875 changed files with 8,534 additions and 9,349 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@
*/
package org.apache.beam.examples;

import java.util.Arrays;
import java.util.List;
import java.util.regex.Pattern;
import org.apache.beam.sdk.Pipeline;
import org.apache.beam.sdk.io.TextIO;
import org.apache.beam.sdk.options.Default;
Expand All @@ -29,17 +32,12 @@
import org.apache.beam.sdk.transforms.Sum;
import org.apache.beam.sdk.values.KV;
import org.apache.beam.sdk.values.PCollection;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.Arrays;
import java.util.List;
import java.util.regex.Pattern;


/**
* An example that verifies word counts in Shakespeare and includes Dataflow best practices.
* An example that verifies word counts in Shakespeare and includes Beam best practices.
*
* <p>This class, {@link DebuggingWordCount}, is the third in a series of four successively more
* detailed 'word count' examples. You may first want to take a look at {@link MinimalWordCount}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,13 @@
*/
package org.apache.beam.examples;

import com.google.api.services.bigquery.model.TableFieldSchema;
import com.google.api.services.bigquery.model.TableReference;
import com.google.api.services.bigquery.model.TableRow;
import com.google.api.services.bigquery.model.TableSchema;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.beam.examples.common.ExampleBigQueryTableOptions;
import org.apache.beam.examples.common.ExampleOptions;
import org.apache.beam.examples.common.ExampleUtils;
Expand All @@ -33,19 +40,9 @@
import org.apache.beam.sdk.transforms.windowing.Window;
import org.apache.beam.sdk.values.KV;
import org.apache.beam.sdk.values.PCollection;

import com.google.api.services.bigquery.model.TableFieldSchema;
import com.google.api.services.bigquery.model.TableReference;
import com.google.api.services.bigquery.model.TableRow;
import com.google.api.services.bigquery.model.TableSchema;

import org.joda.time.Duration;
import org.joda.time.Instant;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;


/**
* An example that counts words in text, and can run over either unbounded or bounded input
Expand Down Expand Up @@ -186,8 +183,7 @@ public static interface Options extends WordCount.WordCountOptions,
public static void main(String[] args) throws IOException {
Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
options.setBigQuerySchema(getSchema());
// DataflowExampleUtils creates the necessary input sources to simplify execution of this
// Pipeline.
// ExampleUtils creates the necessary input sources to simplify execution of this Pipeline.
ExampleUtils exampleUtils = new ExampleUtils(options);
exampleUtils.setup();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@
*/
package org.apache.beam.examples;

import com.google.common.base.Strings;
import com.google.common.io.Resources;
import java.io.IOException;
import org.apache.beam.sdk.Pipeline;
import org.apache.beam.sdk.io.TextIO;
import org.apache.beam.sdk.options.Default;
Expand All @@ -37,11 +40,6 @@
import org.apache.beam.sdk.values.KV;
import org.apache.beam.sdk.values.PCollection;

import com.google.common.base.Strings;
import com.google.common.io.Resources;

import java.io.IOException;

/**
* An example that counts words in Shakespeare and includes Beam best practices.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,13 @@
*/
package org.apache.beam.examples.common;

import com.google.api.services.bigquery.model.TableSchema;
import org.apache.beam.sdk.options.Default;
import org.apache.beam.sdk.options.DefaultValueFactory;
import org.apache.beam.sdk.options.Description;
import org.apache.beam.sdk.options.GcpOptions;
import org.apache.beam.sdk.options.PipelineOptions;

import com.google.api.services.bigquery.model.TableSchema;

/**
* Options that can be used to configure BigQuery tables in Beam examples.
* The project defaults to the project being used to run the example.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,26 +17,23 @@
*/
package org.apache.beam.examples.common;

import com.google.common.base.MoreObjects;
import java.util.concurrent.ThreadLocalRandom;
import org.apache.beam.sdk.options.ApplicationNameOptions;
import org.apache.beam.sdk.options.Default;
import org.apache.beam.sdk.options.DefaultValueFactory;
import org.apache.beam.sdk.options.Description;
import org.apache.beam.sdk.options.PipelineOptions;

import com.google.common.base.MoreObjects;

import org.joda.time.DateTimeUtils;
import org.joda.time.DateTimeZone;
import org.joda.time.format.DateTimeFormat;
import org.joda.time.format.DateTimeFormatter;

import java.util.concurrent.ThreadLocalRandom;

/**
* Options that can be used to configure the Beam examples.
*/
public interface ExampleOptions extends PipelineOptions {
@Description("Whether to keep jobs running on the Dataflow service after local process exit")
@Description("Whether to keep jobs running after local process exit")
@Default.Boolean(false)
boolean getKeepJobsRunning();
void setKeepJobsRunning(boolean keepJobsRunning);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,6 @@
*/
package org.apache.beam.examples.common;

import org.apache.beam.sdk.PipelineResult;
import org.apache.beam.sdk.options.BigQueryOptions;
import org.apache.beam.sdk.options.PipelineOptions;
import org.apache.beam.sdk.options.PubsubOptions;
import org.apache.beam.sdk.util.AttemptBoundedExponentialBackOff;
import org.apache.beam.sdk.util.Transport;

import com.google.api.client.googleapis.json.GoogleJsonResponseException;
import com.google.api.client.googleapis.services.AbstractGoogleClientRequest;
import com.google.api.client.util.BackOff;
Expand All @@ -43,12 +36,17 @@
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import com.google.common.util.concurrent.Uninterruptibles;

import java.io.IOException;
import java.util.Collection;
import java.util.List;
import java.util.Set;
import java.util.concurrent.TimeUnit;
import org.apache.beam.sdk.PipelineResult;
import org.apache.beam.sdk.options.BigQueryOptions;
import org.apache.beam.sdk.options.PipelineOptions;
import org.apache.beam.sdk.options.PubsubOptions;
import org.apache.beam.sdk.util.AttemptBoundedExponentialBackOff;
import org.apache.beam.sdk.util.Transport;

/**
* The utility class that sets up and tears down external resources,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,12 @@
*/
package org.apache.beam.examples.common;

import com.google.api.services.pubsub.Pubsub;
import com.google.api.services.pubsub.model.PublishRequest;
import com.google.api.services.pubsub.model.PubsubMessage;
import com.google.common.collect.ImmutableMap;
import java.io.IOException;
import java.util.Arrays;
import org.apache.beam.sdk.Pipeline;
import org.apache.beam.sdk.io.TextIO;
import org.apache.beam.sdk.options.Description;
Expand All @@ -28,14 +34,6 @@
import org.apache.beam.sdk.transforms.OldDoFn;
import org.apache.beam.sdk.util.Transport;

import com.google.api.services.pubsub.Pubsub;
import com.google.api.services.pubsub.model.PublishRequest;
import com.google.api.services.pubsub.model.PubsubMessage;
import com.google.common.collect.ImmutableMap;

import java.io.IOException;
import java.util.Arrays;

/**
* A batch Dataflow pipeline for injecting a set of GCS files into
* a PubSub topic line by line. Empty lines are skipped.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,21 @@
import static com.google.datastore.v1.client.DatastoreHelper.makeKey;
import static com.google.datastore.v1.client.DatastoreHelper.makeValue;

import com.google.api.services.bigquery.model.TableFieldSchema;
import com.google.api.services.bigquery.model.TableReference;
import com.google.api.services.bigquery.model.TableRow;
import com.google.api.services.bigquery.model.TableSchema;
import com.google.common.base.MoreObjects;
import com.google.datastore.v1.Entity;
import com.google.datastore.v1.Key;
import com.google.datastore.v1.Value;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.beam.examples.common.ExampleBigQueryTableOptions;
import org.apache.beam.examples.common.ExampleOptions;
import org.apache.beam.examples.common.ExampleUtils;
Expand Down Expand Up @@ -53,26 +68,8 @@
import org.apache.beam.sdk.values.KV;
import org.apache.beam.sdk.values.PCollection;
import org.apache.beam.sdk.values.PCollectionList;

import com.google.api.services.bigquery.model.TableFieldSchema;
import com.google.api.services.bigquery.model.TableReference;
import com.google.api.services.bigquery.model.TableRow;
import com.google.api.services.bigquery.model.TableSchema;
import com.google.common.base.MoreObjects;
import com.google.datastore.v1.Entity;
import com.google.datastore.v1.Key;
import com.google.datastore.v1.Value;

import org.joda.time.Duration;

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
* An example that computes the most popular hash tags
* for every prefix, which can be used for auto-completion.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,14 +43,14 @@ This directory contains end-to-end example pipelines that perform complex data p
<code>Windowing</code> to perform time-based aggregations of data.
</li>
<li><a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/complete/TrafficMaxLaneFlow.java">TrafficMaxLaneFlow</a>
&mdash; A streaming Cloud Dataflow example using BigQuery output in the
&mdash; A streaming Beam Example using BigQuery output in the
<code>traffic sensor</code> domain. Demonstrates the Cloud Dataflow streaming
runner, sliding windows, Cloud Pub/Sub topic ingestion, the use of the
<code>AvroCoder</code> to encode a custom class, and custom
<code>Combine</code> transforms.
</li>
<li><a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/complete/TrafficRoutes.java">TrafficRoutes</a>
&mdash; A streaming Cloud Dataflow example using BigQuery output in the
&mdash; A streaming Beam Example using BigQuery output in the
<code>traffic sensor</code> domain. Demonstrates the Cloud Dataflow streaming
runner, <code>GroupByKey</code>, keyed state, sliding windows, and Cloud
Pub/Sub topic ingestion.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,11 @@
*/
package org.apache.beam.examples.complete;

import com.google.api.services.bigquery.model.TableFieldSchema;
import com.google.api.services.bigquery.model.TableRow;
import com.google.api.services.bigquery.model.TableSchema;
import java.io.IOException;
import java.util.ArrayList;
import org.apache.beam.examples.common.ExampleBigQueryTableOptions;
import org.apache.beam.examples.common.ExampleOptions;
import org.apache.beam.examples.common.ExampleUtils;
Expand All @@ -31,15 +36,8 @@
import org.apache.beam.sdk.transforms.DoFn;
import org.apache.beam.sdk.transforms.ParDo;

import com.google.api.services.bigquery.model.TableFieldSchema;
import com.google.api.services.bigquery.model.TableRow;
import com.google.api.services.bigquery.model.TableSchema;

import java.io.IOException;
import java.util.ArrayList;

/**
* A streaming Dataflow Example using BigQuery output.
* A streaming Beam Example using BigQuery output.
*
* <p>This pipeline example reads lines of the input text file, splits each line
* into individual words, capitalizes those words, and writes the output to
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,12 @@
*/
package org.apache.beam.examples.complete;

import java.io.File;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.HashSet;
import java.util.Set;
import org.apache.beam.sdk.Pipeline;
import org.apache.beam.sdk.coders.Coder;
import org.apache.beam.sdk.coders.KvCoder;
Expand Down Expand Up @@ -51,17 +57,9 @@
import org.apache.beam.sdk.values.PDone;
import org.apache.beam.sdk.values.PInput;
import org.apache.beam.sdk.values.TupleTag;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.File;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.HashSet;
import java.util.Set;

/**
* An example that computes a basic TF-IDF search table for a directory or GCS prefix.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
*/
package org.apache.beam.examples.complete;

import com.google.api.services.bigquery.model.TableRow;
import java.util.List;
import org.apache.beam.sdk.Pipeline;
import org.apache.beam.sdk.coders.TableRowJsonCoder;
import org.apache.beam.sdk.io.TextIO;
Expand All @@ -38,14 +40,9 @@
import org.apache.beam.sdk.transforms.windowing.Window;
import org.apache.beam.sdk.values.KV;
import org.apache.beam.sdk.values.PCollection;

import com.google.api.services.bigquery.model.TableRow;

import org.joda.time.Duration;
import org.joda.time.Instant;

import java.util.List;

/**
* An example that reads Wikipedia edit data from Cloud Storage and computes the user with
* the longest string of edits separated by no more than an hour within each month.
Expand Down Expand Up @@ -184,7 +181,7 @@ public void processElement(ProcessContext c) {
/**
* Options supported by this class.
*
* <p>Inherits standard Dataflow configuration options.
* <p>Inherits standard Beam configuration options.
*/
private static interface Options extends PipelineOptions {
@Description(
Expand Down
Loading

0 comments on commit cfacd85

Please sign in to comment.