From 35e03e757b5631492426dfa4d4d3209cb38e73f3 Mon Sep 17 00:00:00 2001 From: "Novotnik, Petr" Date: Mon, 20 Feb 2017 12:39:44 +0100 Subject: [PATCH] #10 Improve documentation around URI based data-sources/-sinks --- .../euphoria/core/client/dataset/Dataset.java | 11 ++++++++ .../euphoria/core/client/flow/Flow.java | 19 ++++++++++++- .../euphoria/core/client/io/IORegistry.java | 28 +++++++++++++++++-- .../core/client/io/SchemeBasedIORegistry.java | 22 ++++++++++++++- 4 files changed, 75 insertions(+), 5 deletions(-) diff --git a/sdks/java/extensions/euphoria/euphoria-core/src/main/java/cz/seznam/euphoria/core/client/dataset/Dataset.java b/sdks/java/extensions/euphoria/euphoria-core/src/main/java/cz/seznam/euphoria/core/client/dataset/Dataset.java index d03898ddccef1..fd3b74cd205ed 100644 --- a/sdks/java/extensions/euphoria/euphoria-core/src/main/java/cz/seznam/euphoria/core/client/dataset/Dataset.java +++ b/sdks/java/extensions/euphoria/euphoria-core/src/main/java/cz/seznam/euphoria/core/client/dataset/Dataset.java @@ -79,6 +79,17 @@ public interface Dataset extends Serializable { */ boolean isBounded(); + /** + * Declares this data set to be persisted to a data sink specified by its URI. + * + * @param uri the URI representing the data sink to persist this data set to + * + * @throws Exception if setting up the actual data sink implementation fails + * for some reason + * + * @see #persist(DataSink) + * @see cz.seznam.euphoria.core.client.io.IORegistry + */ default void persist(URI uri) throws Exception { persist(getFlow().createOutput(uri)); } diff --git a/sdks/java/extensions/euphoria/euphoria-core/src/main/java/cz/seznam/euphoria/core/client/flow/Flow.java b/sdks/java/extensions/euphoria/euphoria-core/src/main/java/cz/seznam/euphoria/core/client/flow/Flow.java index 7f2addc0b8d2d..c3d8c499ef0a4 100644 --- a/sdks/java/extensions/euphoria/euphoria-core/src/main/java/cz/seznam/euphoria/core/client/flow/Flow.java +++ b/sdks/java/extensions/euphoria/euphoria-core/src/main/java/cz/seznam/euphoria/core/client/flow/Flow.java @@ -311,6 +311,8 @@ public Settings getSettings() { * @return a dataset representing the specified source * * @throws Exception if setting up the source fails for some reason + * + * @see IORegistry */ public Dataset createInput(URI uri) throws Exception { return createInput(getSourceFromURI(uri)); @@ -332,7 +334,22 @@ public Dataset createInput(DataSource source) { return ret; } - + /** + * Creates a new output/sink data set based on the specified URI. + * + * @param the type of elements being written to the sink; this is + * not type-safe. if the caller mixes up the sink and the expected + * type of a such a sink the result may be {@link ClassCastException}s + * at later points in time + * + * @param uri the URI describing the sink of the data set + * + * @return a data sink based on the specified URI + * + * @throws Exception if setting up the sink fails for some reason + * + * @see IORegistry + */ public DataSink createOutput(URI uri) throws Exception { return getSinkFromURI(uri); } diff --git a/sdks/java/extensions/euphoria/euphoria-core/src/main/java/cz/seznam/euphoria/core/client/io/IORegistry.java b/sdks/java/extensions/euphoria/euphoria-core/src/main/java/cz/seznam/euphoria/core/client/io/IORegistry.java index 2697578db260f..70b7525b39771 100644 --- a/sdks/java/extensions/euphoria/euphoria-core/src/main/java/cz/seznam/euphoria/core/client/io/IORegistry.java +++ b/sdks/java/extensions/euphoria/euphoria-core/src/main/java/cz/seznam/euphoria/core/client/io/IORegistry.java @@ -20,14 +20,36 @@ import java.net.URI; /** - * Factory of {@code DataSource} from URI and settings. + * Factory for {@link DataSource}s and {@link DataSink}s based on URIs. + *

+ * The registry instantiates {@link SchemeBasedIORegistry} by default to resolve + * a URI to a data source or sink. If {@link #REGISTRY_IMPL_CONF} is defined + * in the supplied configuration bundle, it is expected to define a subclass of + * {@link IORegistry} and have a public, default constructor. A new instance of + * this sub-class will be created for every URI resolve request and is then responsible + * for creating the corresponding data source or sink. */ public abstract class IORegistry { + /** + * The configuration key specifying a sub-class of {@link IORegistry} to + * instantiate to handle URI to data source/sink translation requests. + */ + public static final String REGISTRY_IMPL_CONF = "euphoria.io.registry.impl"; - private static final String REGISTRY_IMPL_CONF = "euphoria.io.registry.impl"; - + /** + * Retrieves an {@link IORegistry} from the specified the configuration. + * Falls back to {@link SchemeBasedIORegistry} if none is explicitly defined. + * + * @param settings the configuration settings + * + * @return a {@link IORegistry} + * + * @throws Exception if instantiating the configured {@link IORegistry} fails + * for some reason or if the configured registry is not sub-class + * of {@link IORegistry} + */ public static IORegistry get(Settings settings) throws Exception { return getInstance(settings, REGISTRY_IMPL_CONF, IORegistry.class, new SchemeBasedIORegistry()); diff --git a/sdks/java/extensions/euphoria/euphoria-core/src/main/java/cz/seznam/euphoria/core/client/io/SchemeBasedIORegistry.java b/sdks/java/extensions/euphoria/euphoria-core/src/main/java/cz/seznam/euphoria/core/client/io/SchemeBasedIORegistry.java index 92436a8c6539e..1ee41afd12961 100644 --- a/sdks/java/extensions/euphoria/euphoria-core/src/main/java/cz/seznam/euphoria/core/client/io/SchemeBasedIORegistry.java +++ b/sdks/java/extensions/euphoria/euphoria-core/src/main/java/cz/seznam/euphoria/core/client/io/SchemeBasedIORegistry.java @@ -20,12 +20,32 @@ import java.net.URI; /** - * {@code IORegistry} that creates {@code DataSource} based on scheme. + * An {@code IORegistry} creating {@code DataSource} based on a URI's schema. + * + * Given a configuration bundle and a URI, this registry creates an associated + * data source as follows: + *

    + *
  1. Extra the schema from the given URI
  2. + *
  3. Lookup the value under the key "{@link #SCHEME_SOURCE_PREFIX} + schema" where + * schema represents the schema value extracted from the given URI
  4. + *
  5. If no such value is define, fail.
  6. + *
  7. Otherwise validate that the value names an existing class which implements + * {@link DataSourceFactory}.
  8. + *
  9. Instantiate the class using its default public constructor.
  10. + *
  11. Invoke {@link DataSourceFactory#get(URI, Settings)} on the new factory instance + * passing on the original URI and configuration values.
  12. + *
  13. Return the result of the factory's {@code get} method invocation.
  14. + *
+ * + * Similarly, the same applies to constructing sinks. The corresponding key prefix is + * {@link #SCHEME_SINK_PREFIX} and the factory interface {@link DataSinkFactory}. */ public class SchemeBasedIORegistry extends IORegistry { + /** Key prefix specifying associations of schemes to particular data source factories. */ public static final String SCHEME_SOURCE_PREFIX = "euphoria.io.datasource.factory."; + /** Key prefix specifying associations of schems to particular data sink factories. */ public static final String SCHEME_SINK_PREFIX = "euphoria.io.datasink.factory."; @Override