From 9019d3b4bb7c94ea4ac379b24edd1b6039ed8066 Mon Sep 17 00:00:00 2001 From: mandar2812 Date: Tue, 14 Aug 2018 00:21:23 +0200 Subject: [PATCH] [Release]: v1.5.3 Signed-off-by: mandar2812 --- build.sbt | 2 +- docs/core/core_dtfdata.md | 280 ++++++++++++++++++ docs/releases/mydoc_release_notes_153.md | 276 +++++++++-------- docs/scaladoc/v1.5.3/core.md | 1 + docs/scaladoc/v1.5.3/examples.md | 1 + docs/scaladoc/v1.5.3/pipes.md | 1 + docs/scaladoc/v1.5.3/repl.md | 1 + .../dynaml/tensorflow/data/DataSet.scala | 10 + mkdocs.yml | 7 + 9 files changed, 447 insertions(+), 132 deletions(-) create mode 100644 docs/core/core_dtfdata.md create mode 100644 docs/scaladoc/v1.5.3/core.md create mode 100644 docs/scaladoc/v1.5.3/examples.md create mode 100644 docs/scaladoc/v1.5.3/pipes.md create mode 100644 docs/scaladoc/v1.5.3/repl.md diff --git a/build.sbt b/build.sbt index fbc195e3e..e1195053f 100644 --- a/build.sbt +++ b/build.sbt @@ -9,7 +9,7 @@ packageSummary := "Scala Library/REPL for Machine Learning Research" packageDescription := "DynaML is a Scala environment for conducting research and education in Machine Learning. DynaML comes packaged with a powerful library of classes for various predictive models and a Scala REPL where one can not only build custom models but also play around with data work-flows. It can also be used as an educational/research tool for data analysis." -val mainVersion = "v1.5.3-beta.3" +val mainVersion = "v1.5.3" val dataDirectory = settingKey[File]("The directory holding the data files for running example scripts") diff --git a/docs/core/core_dtfdata.md b/docs/core/core_dtfdata.md new file mode 100644 index 000000000..80eb68b30 --- /dev/null +++ b/docs/core/core_dtfdata.md @@ -0,0 +1,280 @@ +!!! summary + The `DataSet` API added in v1.5.3, makes it easy to work with potentially large data sets, + perform complex pre-processing tasks and feed these data sets into TensorFlow models. + + +## Data Set + +### Basics + +A `DataSet[X]` instance is simply a wrapper over an `Iterable[X]` object, although the user still has +access to the underlying collection. + +!!! tip + The [`dtfdata`](https://transcendent-ai-labs.github.io/api_docs/DynaML/recent/dynaml-core/#io.github.mandar2812.dynaml.tensorflow.package) + object gives the user easy access to the `DataSet` API. + + ```scala + import _root_.io.github.mandar2812.dynaml.probability._ + import _root_.io.github.mandar2812.dynaml.pipes._ + import io.github.mandar2812.dynaml.tensorflow._ + + + val random_numbers = GaussianRV(0.0, 1.0) :* GaussianRV(1.0, 2.0) + + //Create a data set. + val dataset1 = dtfdata.dataset(random_numbers.iid(10000).draw) + + //Access underlying data + dataset1.data + ``` + +### Transformations + +DynaML data sets support several operations of the _map-reduce_ philosophy. + +#### Map + +Transform each element of type `X` into some other element of type `Y` (`Y` can possibly be the same as `X`). + +```scala +import _root_.io.github.mandar2812.dynaml.probability._ +import _root_.io.github.mandar2812.dynaml.pipes._ +import io.github.mandar2812.dynaml.tensorflow._ + + +val random_numbers = GaussianRV(0.0, 1.0) +//A data set of random gaussian numbers. +val random_gaussian_dataset = dtfdata.dataset( + random_numbers.iid(10000).draw +) + +//Transform data set by applying a scala function +val random_chisq_dataset = random_gaussian_dataset.map((x: Double) => x*x) + +val exp_tr = DataPipe[Double, Double](math.exp _) +//Can pass a DataPipe instead of a function +val random_log_gaussian_dataset = random_gaussian_dataset.map(exp_tr) +``` + +#### Flat Map + +Process each element by applying a function which transforms each element into an `Iterable`, +this operation is followed by flattening of the top level `Iterable`. + +Schematically, this process is + +`Iterable[X] -> Iterable[Iterable[Y]] -> Iterable[Y]` + +```scala +import _root_.io.github.mandar2812.dynaml.probability._ +import _root_.io.github.mandar2812.dynaml.pipes._ +import scala.util.Random +import io.github.mandar2812.dynaml.tensorflow._ + +val random_gaussian_dataset = dtfdata.dataset( + GaussianRV(0.0, 1.0).iid(10000).draw +) + +//Transform data set by applying a scala function +val gaussian_mixture = random_gaussian_dataset.flatMap( + (x: Double) => GaussianRV(0.0, x*x).iid(10).draw +) +``` + +#### Filter + +Collect only the elements which satisfy some predicate, i.e. a function which returns `true` for the +elements to be selected (filtered) and `false` for the ones which should be discarded. + +```scala +import _root_.io.github.mandar2812.dynaml.probability._ +import _root_.io.github.mandar2812.dynaml.pipes._ +import scala.util.Random +import io.github.mandar2812.dynaml.tensorflow._ + +val gaussian_dataset = dtfdata.dataset( + GaussianRV(0.0, 1.0).iid(10000).draw +) + +val onlyPositive = DataPipe[Double, Boolean](_ > 0.0) + +val truncated_gaussian = gaussian_dataset.filter(onlyPositive) + +val zeroOrGreater = (x: Double) => x >= 0.0 +//filterNot works in the opposite manner to filter +val neg_truncated_gaussian = gaussian_dataset.filterNot(zeroOrGreater) + +``` + +#### Scan & Friends + +Sometimes, we need to perform operations on a data set which are sequential in nature. In this situation, +the `scanLeft()` and `scanRight()` are useful. + +Lets simulate a random walk, we start with $x_0$, a number and add independent gaussian increments to it. + +$$ +\begin{align*} +x_t &= x_{t-1} + \epsilon \\ +\epsilon &\sim \mathcal{N}(0, 1) +\end{align*} +$$ + +```scala +import _root_.io.github.mandar2812.dynaml.probability._ +import _root_.io.github.mandar2812.dynaml.pipes._ +import scala.util.Random +import io.github.mandar2812.dynaml.tensorflow._ + +val gaussian_increments = dtfdata.dataset( + GaussianRV(0.0, 1.0).iid(10000).draw +) + +val increment = DataPipe2[Double, Double, Double]((x, i) => x + i) + +//Start the random walk from zero, and keep adding increments. +val random_walk = gaussian_increments.scanLeft(0.0)(increment) +``` + +The `scanRight()` works just like the `scanLeft()` method, except it begins from the last element +of the collection. + +#### Reduce & Reduce Left + +The `reduce()` and `reduceLeft()` methods help in computing summary values from the entire data +collection. + +```scala +import _root_.io.github.mandar2812.dynaml.probability._ +import _root_.io.github.mandar2812.dynaml.pipes._ +import scala.util.Random +import io.github.mandar2812.dynaml.tensorflow._ + +val gaussian_increments = dtfdata.dataset( + GaussianRV(0.0, 1.0).iid(10000).draw +) + +val increment = DataPipe2[Double, Double, Double]((x, i) => x + i) + +val random_walk = gaussian_increments.scanLeft(0.0)(increment) + +val average = random_walk.reduce( + DataPipe2[Double, Double, Double]((x, y) => x + y) +)/10000.0 +``` + +#### Other Transformations + +Some times transformations on data sets cannot be applied on each element individually, but the +entire data collection is required for such a transformation. + +```scala +import _root_.io.github.mandar2812.dynaml.probability._ +import _root_.io.github.mandar2812.dynaml.pipes._ +import scala.util.Random +import io.github.mandar2812.dynaml.tensorflow._ + +val gaussian_data = dtfdata.dataset( + GaussianRV(0.0, 1.0).iid(10000).draw +) + +val resample = DataPipe[Iterable[Double], Iterable[Double]]( + coll => (0 until 10000).map(_ => coll(Random.nextInt(10000))) +) + +val resampled_data = gaussian_data.transform(resample) + +``` + +!!! note + **Conversion to TF-Scala `Dataset` class** + + The TensorFlow scala API also has a `Dataset` class, from a DynaML `DataSet` + instance, it is possible to obtain a TensorFlow `Dataset`. + + ```scala + import _root_.io.github.mandar2812.dynaml.probability._ + import _root_.io.github.mandar2812.dynaml.pipes._ + import io.github.mandar2812.dynaml.tensorflow._ + import org.platanios.tensorflow.api._ + import org.platanios.tensorflow.api.types._ + + + val random_numbers = GaussianRV(0.0, 1.0) + + //Create a data set. + val dataset1 = dtfdata.dataset(random_numbers.iid(10000).draw) + + //Convert to TensorFlow data set + dataset1.build[Tensor, Output, DataType.Aux[Double], DataType, Shape]( + Left(DataPipe[Double, Tensor](x => dtf.tensor_f64(1)(x))), + FLOAT64, Shape(1) + ) + ``` + + + +## Tuple Data & Supervised Data + +The classes `ZipDataSet[X, Y]` and `SupervisedDataSet[X, Y]` both represent data collections which consist of +`(X, Y)` tuples. They can be created in a number of ways. + +### Zip Data + +The `zip()` method can be used to create data sets consisting of tuples. + +```scala +import _root_.io.github.mandar2812.dynaml.probability._ +import _root_.io.github.mandar2812.dynaml.pipes._ +import scala.util.Random +import _root_.breeze.stats.distributions._ +import io.github.mandar2812.dynaml.tensorflow._ + +val gaussian_data = dtfdata.dataset( + GaussianRV(0.0, 1.0).iid(10000).draw +) + +val log_normal_data = gaussian_data.map((x: Double) => math.exp(x)) + +val poisson_data = dtfdata.dataset( + RandomVariable(Poisson(2.5)).iid(10000).draw +) + +val tuple_data1 = poisson_data.zip(gaussian_data) + +val tuple_data2 = poisson_data.zip(log_normal_data) + +//Join on the keys, in this case the +//Poisson distributed integers + +tuple_data1.join(tuple_data2) +``` + +### Supervised Data + +For supervised learning operations, we can use the `SupervisedDataSet` class, which can be instantiated +in the following ways. + +```scala + +import _root_.io.github.mandar2812.dynaml.probability._ +import _root_.io.github.mandar2812.dynaml.pipes._ +import scala.util.Random +import _root_.breeze.stats.distributions._ +import io.github.mandar2812.dynaml.tensorflow._ + +val gaussian_data = dtfdata.dataset( + GaussianRV(0.0, 1.0).iid(10000).draw +) + +val sup_data1 = gaussian_data.to_supervised( + DataPipe[Double, (Double, Double)](x => (x, GaussianRV(0.0, x*x).draw)) +) + +val targets = gaussian_data.map((x: Double) => math.exp(x)) + +val sup_data2 = dtfdata.supervised_dataset(gaussian_data, targets) + +``` + diff --git a/docs/releases/mydoc_release_notes_153.md b/docs/releases/mydoc_release_notes_153.md index 1f820fec6..3eac431f1 100644 --- a/docs/releases/mydoc_release_notes_153.md +++ b/docs/releases/mydoc_release_notes_153.md @@ -1,210 +1,222 @@ !!! summary "" - Version 1.5.3 of DynaML, released August 13, 2017, . + Version 1.5.3 of DynaML, released August 14, 2017, introduces a new API for handling data sets. It also + features greater TensorFlow related integrations, notably the Inception v2 cell. ## Additions ### Data Set API - The `DataSet` family of classes helps the user to create and transform potentially large number of data instances. - Users can create and perform complex transformations on data sets, using the `DataPipe` API or simple Scala functions. +The `DataSet` family of classes helps the user to create and transform potentially large number of data instances. +Users can create and perform complex transformations on data sets, using the `DataPipe` API or simple Scala functions. - ```scala - import _root_.io.github.mandar2812.dynaml.probability._ - import _root_.io.github.mandar2812.dynaml.pipes._ - import io.github.mandar2812.dynaml.tensorflow._ +```scala +import _root_.io.github.mandar2812.dynaml.probability._ +import _root_.io.github.mandar2812.dynaml.pipes._ +import io.github.mandar2812.dynaml.tensorflow._ - val random_numbers = GaussianRV(0.0, 1.0) :* GaussianRV(1.0, 2.0) +val random_numbers = GaussianRV(0.0, 1.0) :* GaussianRV(1.0, 2.0) - //Create a data set. - val dataset1 = dtfdata.dataset(random_numbers.iid(10000).draw) +//Create a data set. +val dataset1 = dtfdata.dataset(random_numbers.iid(10000).draw) - val filter_gr_zero = DataPipe[(Double, Double), Boolean](c => c._1 > 0d && c._2 > 0d) +val filter_gr_zero = DataPipe[(Double, Double), Boolean]( + c => c._1 > 0d && c._2 > 0d +) - //Filter elements - val data_gr_zero = dataset1.filter(filter_gr_zero) +//Filter elements +val data_gr_zero = dataset1.filter(filter_gr_zero) - val abs_func: (Double, Double) => (Double, Double) = (c: (Double, Double)) => (math.abs(c._1), math.abs(c._2)) +val abs_func: (Double, Double) => (Double, Double) = + (c: (Double, Double)) => (math.abs(c._1), math.abs(c._2)) - //Map elements - val data_abs = dataset1.map(abs_func) +//Map elements +val data_abs = dataset1.map(abs_func) - ``` +``` - Find out more about the `DataSet` API and its capabilities in the scala [docs](). +Find out more about the `DataSet` API and its capabilities in the [user guide](/core/core_dtfdata.md). ### Tensorflow Integration - **Package** `dynaml.tensorflow` +**Package** `dynaml.tensorflow` - #### Batch Normalisation +#### Batch Normalisation - [Batch normalisation](https://arxiv.org/abs/1502.03167) is used to standardize activations of convolutional layers and - to speed up training of deep neural nets. +[Batch normalisation](https://arxiv.org/abs/1502.03167) is used to standardize activations of convolutional layers and +to speed up training of deep neural nets. - **Usage** +**Usage** - ```scala - import io.github.mandar2812.dynaml.tensorflow._ +```scala +import io.github.mandar2812.dynaml.tensorflow._ - val bn = dtflearn.batch_norm("BatchNorm1") +val bn = dtflearn.batch_norm("BatchNorm1") - ``` +``` - #### Inception v2 +#### Inception v2 - The [_Inception_](https://www.cs.unc.edu/~wliu/papers/GoogLeNet.pdf) architecture, proposed by Google is an important - building block of _convolutional neural network_ architectures used in vision applications. +The [_Inception_](https://www.cs.unc.edu/~wliu/papers/GoogLeNet.pdf) architecture, proposed by Google is an important +building block of _convolutional neural network_ architectures used in vision applications. - ![inception](https://github.com/transcendent-ai-labs/DynaML/blob/master/docs/images/inception.png) +![inception](/images/inception.png) - DynaML now offers the Inception cell as a computational layer. +In a subsequent [paper](https://arxiv.org/pdf/1512.00567.pdf), the authors introduced optimizations in the Inception +architecture, known colloquially as _Inception v2_. - **Usage** +In _Inception v2_, larger convolutions (i.e. `3 x 3` and `5 x 5`) are implemented in a factorized manner +to reduce the number of parameters to be learned. For example the `3 x 3` convolution is expressed as a +combination of `1 x 3` and `3 x 1` convolutions. - ```scala - import io.github.mandar2812.dynaml.pipes._ - import io.github.mandar2812.dynaml.tensorflow._ - import org.platanios.tensorflow.api._ +![inception](/images/conv-fact.png) - //Create an RELU activation, given a string name/identifier. - val relu_act = DataPipe(tf.learn.ReLU(_)) +Similarly the `5 x 5` convolutions can be expressed a combination of two `3 x 3` convolutions - //Learn 10 filters in each branch of the inception cell - val filters = Seq(10, 10, 10, 10) - - val inception_cell = dtflearn.inception_unit( - channels = 3, num_filters = filters, relu_act, - //Apply batch normalisation after each convolution - use_batch_norm = true)(layer_index = 1) +![inception](/images/conv-fact2.png) + +DynaML now offers the Inception cell as a computational layer. - ``` +**Usage** - In a subsequent [paper](https://arxiv.org/pdf/1512.00567.pdf), the authors introduced optimizations in the Inception - architecture, known colloquially as _Inception v2_. +```scala +import io.github.mandar2812.dynaml.pipes._ +import io.github.mandar2812.dynaml.tensorflow._ +import org.platanios.tensorflow.api._ - In _Inception v2_, larger convolutions (i.e. `3 x 3` and `5 x 5`) are implemented in a factorized manner - to reduce the number of parameters to be learned. For example the `3 x 3` convolution is expressed as a - combination of `1 x 3` and `3 x 1` convolutions. +//Create an RELU activation, given a string name/identifier. +val relu_act = DataPipe(tf.learn.ReLU(_)) - ![inception](https://github.com/transcendent-ai-labs/DynaML/blob/master/docs/images/conv-fact.png) +//Learn 10 filters in each branch of the inception cell +val filters = Seq(10, 10, 10, 10) - Similarly the `5 x 5` convolutions can be expressed a combination of two `3 x 3` convolutions +val inception_cell = dtflearn.inception_unit( + channels = 3, num_filters = filters, relu_act, + //Apply batch normalisation after each convolution + use_batch_norm = true)(layer_index = 1) - ![inception](https://github.com/transcendent-ai-labs/DynaML/blob/master/docs/images/conv-fact2.png) +``` - #### Dynamical Systems: Continuous Time RNN +#### Dynamical Systems: Continuous Time RNN - Continuous time recurrent neural networks (CTRNN) are an important class of recurrent neural networks. They enable - the modelling of non-linear and potentially complex dynamical systems of multiple variables, with feedback. +Continuous time recurrent neural networks (CTRNN) are an important class of recurrent neural networks. They enable +the modelling of non-linear and potentially complex dynamical systems of multiple variables, with feedback. - - Added CTRNN layer: `dtflearn.ctrnn` + - Added CTRNN layer: `dtflearn.ctrnn` - - Added CTRNN layer with inferable time step: `dtflearn.dctrnn`. + - Added CTRNN layer with inferable time step: `dtflearn.dctrnn`. - - Added a projection layer for CTRNN based models `dtflearn.ts_linear`. + - Added a projection layer for CTRNN based models `dtflearn.ts_linear`. - **Training Stopping Criteria** +**Training Stopping Criteria** - Create common and simple training stop criteria such as. +Create common and simple training stop criteria such as. - - Stop after fixed number of iterations `dtflearn.max_iter_stop(100000)` - - Stop after change in value of loss goes below a threshold. `dtflearn.abs_loss_change_stop(0.0001)` - - Stop after change in relative value of loss goes below a threshold. `dtflearn.rel_loss_change_stop(0.001)` + - Stop after fixed number of iterations `dtflearn.max_iter_stop(100000)` + + - Stop after change in value of loss goes below a threshold. `dtflearn.abs_loss_change_stop(0.0001)` + + - Stop after change in relative value of loss goes below a threshold. `dtflearn.rel_loss_change_stop(0.001)` - **Neural Network Building Blocks** +**Neural Network Building Blocks** - - Added helper method ```dtlearn.build_tf_model()``` for training tensorflow models/estimators. + - Added helper method ```dtlearn.build_tf_model()``` for training tensorflow models/estimators. - **Usage** +**Usage** - ```scala +```scala - import io.github.mandar2812.dynaml.tensorflow._ +import io.github.mandar2812.dynaml.tensorflow._ import org.platanios.tensorflow.api._ import org.platanios.tensorflow.data.image.MNISTLoader import ammonite.ops._ - val tempdir = home/"tmp" - - val dataSet = MNISTLoader.load(java.nio.file.Paths.get(tempdir.toString())) - val trainImages = tf.data.TensorSlicesDataset(dataSet.trainImages) - val trainLabels = tf.data.TensorSlicesDataset(dataSet.trainLabels) - val trainData = - trainImages.zip(trainLabels) - .repeat() - .shuffle(10000) - .batch(256) - .prefetch(10) - - // Create the MLP model. - val input = tf.learn.Input( - UINT8, - Shape( - -1, - dataSet.trainImages.shape(1), - dataSet.trainImages.shape(2)) - ) - - val trainInput = tf.learn.Input(UINT8, Shape(-1)) - - val architecture = tf.learn.Flatten("Input/Flatten") >> - tf.learn.Cast("Input/Cast", FLOAT32) >> - tf.learn.Linear("Layer_0/Linear", 128) >> - tf.learn.ReLU("Layer_0/ReLU", 0.1f) >> - tf.learn.Linear("Layer_1/Linear", 64) >> - tf.learn.ReLU("Layer_1/ReLU", 0.1f) >> - tf.learn.Linear("Layer_2/Linear", 32) >> - tf.learn.ReLU("Layer_2/ReLU", 0.1f) >> - tf.learn.Linear("OutputLayer/Linear", 10) - - val trainingInputLayer = tf.learn.Cast("TrainInput/Cast", INT64) - - val loss = - tf.learn.SparseSoftmaxCrossEntropy("Loss/CrossEntropy") >> - tf.learn.Mean("Loss/Mean") >> - tf.learn.ScalarSummary("Loss/Summary", "Loss") - - val optimizer = tf.train.AdaGrad(0.1) - - // Directory in which to save summaries and checkpoints - val summariesDir = java.nio.file.Paths.get((tempdir/"mnist_summaries").toString()) - - - val (model, estimator) = dtflearn.build_tf_model( - architecture, input, trainInput, trainingInputLayer, - loss, optimizer, summariesDir, dtflearn.max_iter_stop(1000), - 100, 100, 100)(trainData) +val tempdir = home/"tmp" + +val dataSet = MNISTLoader.load( + java.nio.file.Paths.get(tempdir.toString()) +) + +val trainImages = tf.data.TensorSlicesDataset(dataSet.trainImages) +val trainLabels = tf.data.TensorSlicesDataset(dataSet.trainLabels) + +val trainData = + trainImages.zip(trainLabels) + .repeat() + .shuffle(10000) + .batch(256) + .prefetch(10) + +// Create the MLP model. +val input = tf.learn.Input( + UINT8, + Shape( + -1, + dataSet.trainImages.shape(1), + dataSet.trainImages.shape(2)) +) + +val trainInput = tf.learn.Input(UINT8, Shape(-1)) + +val architecture = tf.learn.Flatten("Input/Flatten") >> + tf.learn.Cast("Input/Cast", FLOAT32) >> + tf.learn.Linear("Layer_0/Linear", 128) >> + tf.learn.ReLU("Layer_0/ReLU", 0.1f) >> + tf.learn.Linear("Layer_1/Linear", 64) >> + tf.learn.ReLU("Layer_1/ReLU", 0.1f) >> + tf.learn.Linear("Layer_2/Linear", 32) >> + tf.learn.ReLU("Layer_2/ReLU", 0.1f) >> + tf.learn.Linear("OutputLayer/Linear", 10) + +val trainingInputLayer = tf.learn.Cast("TrainInput/Cast", INT64) + +val loss = + tf.learn.SparseSoftmaxCrossEntropy("Loss/CrossEntropy") >> + tf.learn.Mean("Loss/Mean") >> + tf.learn.ScalarSummary("Loss/Summary", "Loss") + +val optimizer = tf.train.AdaGrad(0.1) + +// Directory in which to save summaries and checkpoints +val summariesDir = java.nio.file.Paths.get( + (tempdir/"mnist_summaries").toString() +) + + +val (model, estimator) = dtflearn.build_tf_model( + architecture, input, trainInput, trainingInputLayer, + loss, optimizer, summariesDir, dtflearn.max_iter_stop(1000), + 100, 100, 100)(trainData) ``` - - Build feedforward layers and feedforward layer stacks easier. +- Build feedforward layers and feedforward layer stacks easier. **Usage** - ```scala +```scala - import io.github.mandar2812.dynaml.tensorflow._ - import org.platanios.tensorflow.api._ - //Create a single feedforward layer +import io.github.mandar2812.dynaml.tensorflow._ +import org.platanios.tensorflow.api._ - val layer = dtflearn.feedforward(num_units = 10, useBias = true)(id = 1) +//Create a single feedforward layer +val layer = dtflearn.feedforward(num_units = 10, useBias = true)(id = 1) - //Create a stack of feedforward layers +//Create a stack of feedforward layers - val net_layer_sizes = Seq(10, 5, 3) +val net_layer_sizes = Seq(10, 5, 3) - val stack = dtflearn.feedforward_stack( +val stack = dtflearn.feedforward_stack( (i: Int) => dtflearn.Phi("Act_"+i), FLOAT64)( net_layer_sizes) - ``` +``` @@ -226,6 +238,8 @@ Create 3d plots of surfaces, for a use case, see the `jzydemo.sc` and `tf_wave_p ## Improvements and Upgrades - Bumped up Ammonite version to 1.1.0 + - `RegressionMetrics` and `RegressionMetricsTF` now also compute Spearman rank correlation as + one of the performance metrics. ## Changes diff --git a/docs/scaladoc/v1.5.3/core.md b/docs/scaladoc/v1.5.3/core.md new file mode 100644 index 000000000..46ef46a54 --- /dev/null +++ b/docs/scaladoc/v1.5.3/core.md @@ -0,0 +1 @@ + diff --git a/docs/scaladoc/v1.5.3/examples.md b/docs/scaladoc/v1.5.3/examples.md new file mode 100644 index 000000000..02064de37 --- /dev/null +++ b/docs/scaladoc/v1.5.3/examples.md @@ -0,0 +1 @@ + diff --git a/docs/scaladoc/v1.5.3/pipes.md b/docs/scaladoc/v1.5.3/pipes.md new file mode 100644 index 000000000..b11418655 --- /dev/null +++ b/docs/scaladoc/v1.5.3/pipes.md @@ -0,0 +1 @@ + diff --git a/docs/scaladoc/v1.5.3/repl.md b/docs/scaladoc/v1.5.3/repl.md new file mode 100644 index 000000000..4fb7ceed9 --- /dev/null +++ b/docs/scaladoc/v1.5.3/repl.md @@ -0,0 +1 @@ + diff --git a/dynaml-core/src/main/scala-2.11/io/github/mandar2812/dynaml/tensorflow/data/DataSet.scala b/dynaml-core/src/main/scala-2.11/io/github/mandar2812/dynaml/tensorflow/data/DataSet.scala index 1a3d14729..b550d61fc 100644 --- a/dynaml-core/src/main/scala-2.11/io/github/mandar2812/dynaml/tensorflow/data/DataSet.scala +++ b/dynaml-core/src/main/scala-2.11/io/github/mandar2812/dynaml/tensorflow/data/DataSet.scala @@ -127,6 +127,8 @@ class DataSet[X](val data: Iterable[X]) { def scanLeft[Y](z: Y)(scanPipe: DataPipe2[Y, X, Y]): DataSet[Y] = DataSet(data.scanLeft(z)(scanPipe(_, _))) + def scanRight[Y](z: Y)(scanPipe: DataPipe2[X, Y, Y]): DataSet[Y] = DataSet(data.scanRight(z)(scanPipe(_, _))) + def scan[Y >: X](z: Y)(scanPipe: DataPipe2[Y, Y, Y]): DataSet[Y] = DataSet(data.scan(z)(scanPipe(_, _))) /** @@ -260,6 +262,11 @@ class ZipDataSet[X, Y]( def unzip: (DataSet[X], DataSet[Y]) = (dataset1, dataset2) + /** + * Join the current data set to another key value data set. + * Join operation is carried out over keys of type [[X]]. + * + * */ def join[Z](other: ZipDataSet[X, Z]): ZipDataSet[X, (Y, Z)] = { val otherMap = other.data.toMap @@ -305,6 +312,9 @@ case class SupervisedDataSet[X, Y]( self => + /** + * Split into training and test sets. + * */ override def partition(f: DataPipe[(X, Y), Boolean]): TFDataSet[(X, Y)] = { val data_split = data.partition(f(_)) diff --git a/mkdocs.yml b/mkdocs.yml index 3bb69ed7a..9dbf92ae8 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -13,6 +13,7 @@ pages: - 'Structure': structure.md - Releases: - 'v1.5.x': + - 'v1.5.3': 'releases/mydoc_release_notes_153.md' - 'v1.5.2': 'releases/mydoc_release_notes_152.md' - 'v1.5.1': 'releases/mydoc_release_notes_151.md' - 'v1.5': 'releases/mydoc_release_notes_15.md' @@ -23,6 +24,11 @@ pages: - 'v1.4': 'releases/mydoc_release_notes_14.md' - Scaladoc: - v1.5.x: + - v1.5.3: + - dynaml-core: 'scaladoc/v1.5.3/core.md' + - dynaml-pipes: 'scaladoc/v1.5.3/pipes.md' + - dynaml-repl: 'scaladoc/v1.5.3/repl.md' + - dynaml-examples: 'scaladoc/v1.5.3/examples.md' - v1.5.2: - dynaml-core: 'scaladoc/v1.5.2/core.md' - dynaml-pipes: 'scaladoc/v1.5.2/pipes.md' @@ -74,6 +80,7 @@ pages: - Introduction: 'core/core_dynaml_tf.md' - Tensorflow Pointer: 'core/core_dtf.md' - Building Blocks: 'core/core_dtflearn.md' + - Data Set API: 'core/core_dtfdata.md' #- Utilities: 'core/core_tf_misc.md' - Kernels: - Kernel API: 'core/core_kernels.md'