Skip to content

Commit

Permalink
add minmax scaling and tidy up especially gradient descent app
Browse files Browse the repository at this point in the history
  • Loading branch information
lmath committed Mar 21, 2019
1 parent a1791fc commit c4f1986
Show file tree
Hide file tree
Showing 10 changed files with 165 additions and 165 deletions.
Binary file added notes/week1-written.pdf
Binary file not shown.
30 changes: 18 additions & 12 deletions src/main/scala/app/GradientDescentApp.scala
Original file line number Diff line number Diff line change
Expand Up @@ -7,28 +7,34 @@ import main.scala.util.GradientDescent.LearnedParameterSet
import main.scala.util._

object GradientDescentApp {
def arrayToHeightWeight(strings: Array[String]) = HeightWeight(strings(0), strings(1).toDouble, strings(2).toDouble)
def heightWeightTo2dPoint(data: List[HeightWeight]) = data.map(dataPoint => SimplePoint(dataPoint.height, dataPoint.weight))

val heightWeights = CsvReader.asCaseClassList("/nlys-train.csv", true, arrayToHeightWeight)
val data = heightWeightTo2dPoint(heightWeights)
def main(args: Array[String]): Unit = {

val learnedParameters: LearnedParameterSet = GradientDescent.gradientDescent(data, 0, 1, 3, 500)
def arrayToHeightWeight(strings: Array[String]) = HeightWeight(strings(0), strings(1).toDouble, strings(2).toDouble)
def heightWeightTo2dPoint(data: List[HeightWeight]) = data.map(dataPoint => SimplePoint(dataPoint.height, dataPoint.weight))

def main(args: Array[String]): Unit = {
//load in the training data
val heightWeights = CsvReader.asCaseClassList("/nlys-train.csv", true, arrayToHeightWeight)
val data = heightWeightTo2dPoint(heightWeights)
val normalisedData = FeatureScaler.meanNormalisedData(data)

//run gradient descent
val learnedParameters: LearnedParameterSet = GradientDescent.gradientDescent(normalisedData, 0, 1, 3, 500)

//load in test data
val heightWeightsTestData = CsvReader.asCaseClassList("/nlys-test.csv", true, arrayToHeightWeight)
val heightWeightsTestDataPoints = heightWeightTo2dPoint(heightWeightsTestData)
val heightWeightTestDataScaled = FeatureScaler.meanNormalisedData(heightWeightsTestDataPoints)

val heightWeightScaled = FeatureScaler.scaledFeatures(data)
val heightWeightTestDataScaled = FeatureScaler.scaledFeatures(heightWeightsTestDataPoints)

//plot the trend of cost vs iteration so we know if gradient descent is working
displayPlot(Plotter.costItersPlot(learnedParameters.history, None))
displayPlot(Plotter.heightWeightPlot(heightWeightScaled.map(p => HeightWeight("", p.x, p.y)), learnedParameters.theta0, learnedParameters.theta1, Some("Height vs weight TRAINING data and line fit via linear regression")))
//plot our training data, and the line that we fit with gradient descent
displayPlot(Plotter.heightWeightPlot(normalisedData.map(p => HeightWeight("", p.x, p.y)), learnedParameters.theta0, learnedParameters.theta1, Some("Height vs weight TRAINING data and line fit via linear regression")))
//plot our test data, and the line that we fit with gradient descent
displayPlot(Plotter.heightWeightPlot(heightWeightTestDataScaled.map(p => HeightWeight("", p.x, p.y)), learnedParameters.theta0, learnedParameters.theta1, Some("Height vs weight TEST data and line fit via linear regression")))


val meanAbsoluteError = LinearErrorCalculator.linearMeanAbsoluteError(heightWeightScaled, learnedParameters.theta0, learnedParameters.theta1)
//just print out the mean absolute error for the line we fit when we compare to the test set
val meanAbsoluteError = LinearErrorCalculator.linearMeanAbsoluteError(normalisedData, learnedParameters.theta0, learnedParameters.theta1)
println(s"Hello, world! This is the absolute error: $meanAbsoluteError")
}

Expand Down
44 changes: 30 additions & 14 deletions src/main/scala/util/FeatureScaler.scala
Original file line number Diff line number Diff line change
Expand Up @@ -21,21 +21,39 @@ object FeatureScaler {
}
}

def scaledFeatures(data: List[SimplePoint]): List[SimplePoint] = {

def sum(xs: List[Double]): Double = {
@tailrec
def inner(xs: List[Double], accum: Double): Double = {
xs match {
case x :: tail => inner(tail, accum + x)
case Nil => accum
}
}
def minMaxScaledData(data: List[SimplePoint]): List[SimplePoint] = {
val xList = data.map(_.x)
val yList = data.map(_.y)

val maxX = xList.max
val maxY = yList.max
val minX = yList.min
val minY = yList.min

data.map { point =>
SimplePoint(
(point.x - minX) / (maxX - minX),
(point.y - minY) / (maxY - minY)
)
}

}

inner(xs, 0)
def sum(xs: List[Double]): Double = {
@tailrec
def inner(xs: List[Double], accum: Double): Double = {
xs match {
case x :: tail => inner(tail, accum + x)
case Nil => accum
}
}

def avg(nums: List[Double]): Double = sum(nums) / nums.length.toDouble
inner(xs, 0)
}

def avg(nums: List[Double]): Double = sum(nums) / nums.length.toDouble

def meanNormalisedData(data: List[SimplePoint]): List[SimplePoint] = {

def scaled(min: Double, max: Double, avg: Double, dataPoint: Double): Double = {
val range = max - min
Expand All @@ -52,9 +70,7 @@ object FeatureScaler {
val maxY = yList.max

val xAvg = avg(xList)
println(xAvg)
val yAvg = avg(yList)
println(yAvg)

data.map { point =>
SimplePoint(
Expand Down
15 changes: 6 additions & 9 deletions src/main/scala/util/GradientDescent.scala
Original file line number Diff line number Diff line change
Expand Up @@ -36,12 +36,9 @@ object GradientDescent {
learningRate: Double,
iters: Int): LearnedParameterSet = {

val scaledData = FeatureScaler.scaledFeatures(data)
// val scaledData = FeatureScaler.scaledDividedByMax(data)

var theta0 = startingTheta0
var theta1 = startingTheta1
var cost = LinearErrorCalculator.linearMSE(scaledData, theta0, theta1)
var cost = LinearErrorCalculator.linearMSE(data, theta0, theta1)
var gradientDescentHistory = new ListBuffer[GradientDescentHistoryPoint]
println(s"starting cost ${cost}")

Expand All @@ -51,16 +48,16 @@ object GradientDescent {
println(s"old theta 0 ${theta0}")
println(s"old theta 1 ${theta1}")

println(s"theta 0 iter ${theta0Updated(scaledData, theta0, theta1, learningRate)}")
println(s"theta 1 iter ${theta1Updated(scaledData, theta0, theta1, learningRate)}")
println(s"theta 0 iter ${theta0Updated(data, theta0, theta1, learningRate)}")
println(s"theta 1 iter ${theta1Updated(data, theta0, theta1, learningRate)}")

val tempTheta0 = theta0Updated(scaledData, theta0, theta1, learningRate)
val tempTheta1 = theta1Updated(scaledData, theta0, theta1, learningRate)
val tempTheta0 = theta0Updated(data, theta0, theta1, learningRate)
val tempTheta1 = theta1Updated(data, theta0, theta1, learningRate)

theta0 = tempTheta0
theta1 = tempTheta1

cost = LinearErrorCalculator.linearMSE(scaledData, theta0, theta1)
cost = LinearErrorCalculator.linearMSE(data, theta0, theta1)
println(s"new theta 0 ${theta0}")
println(s"new theta 1 ${theta1}")
println(s"new cost ${cost}")
Expand Down
46 changes: 38 additions & 8 deletions test/main/scala/util/FeatureScalerTest.scala
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@ import org.specs2.mutable.Specification

class FeatureScalerTest extends Specification {

"FeatureScalerTest" should {
"scaledFeatures" in {
"scaledFeatures" should {
"scale the data in " in {
val inputData = List(
SimplePoint(78, 90),
SimplePoint(55, 50),
Expand All @@ -16,16 +16,46 @@ class FeatureScalerTest extends Specification {
)

val expected = List(
SimplePoint(0.22352941176470573,0.3949999999999999),
SimplePoint(-0.4529411764705884,-0.6050000000000001),
SimplePoint(-0.48235294117647076,-0.45500000000000007),
SimplePoint(0.19411764705882337,0.2699999999999999),
SimplePoint(0.5176470588235292,0.3949999999999999)
SimplePoint(0.22352941176470573, 0.3949999999999999),
SimplePoint(-0.4529411764705884, -0.6050000000000001),
SimplePoint(-0.48235294117647076, -0.45500000000000007),
SimplePoint(0.19411764705882337, 0.2699999999999999),
SimplePoint(0.5176470588235292, 0.3949999999999999)
)

val scaled = FeatureScaler.scaledFeatures(inputData)
val scaled = FeatureScaler.meanNormalisedData(inputData)
scaled shouldEqual (expected)
}
}

"minMaxScaledData" should {
"scale the data" in {
val inputData = List(
SimplePoint(78, 90),
SimplePoint(55, 50),
SimplePoint(54, 56),
SimplePoint(77, 85),
SimplePoint(88, 90)
)

val expected = List(
SimplePoint(0.7368421052631579,1.0),
SimplePoint(0.13157894736842105,0.0),
SimplePoint(0.10526315789473684,0.15),
SimplePoint(0.7105263157894737,0.875),
SimplePoint(1.0,1.0)
)

val scaled = FeatureScaler.minMaxScaledData(inputData)
scaled shouldEqual(expected)
}
}

"avg" should {
"give the average of a list of values" in {
val inputValues = List(78.0, 55.0, 54.0, 77.0, 88.0)

FeatureScaler.avg(inputValues) shouldEqual (70.4)
}
}
}
84 changes: 21 additions & 63 deletions test/main/scala/util/GradientDescentTest.scala
Original file line number Diff line number Diff line change
@@ -1,83 +1,41 @@
package main.scala.util

import main.scala.model.{HeightWeight, House, SimplePoint}
import main.scala.model.SimplePoint
import org.specs2.mutable.Specification
import main.scala.util.GradientDescent.LearnedParameterSet

class GradientDescentTest extends Specification{

"theta0Iter" should {
"probably not be zero" in {

val expected = List(
HeightWeight("Male",174.0,96.0),
HeightWeight("Male",189.0,87.0)
"updatedTheta0" should {
"be updated from the value passed in" in {
val normalisedData = List(
SimplePoint(0.22352941176470573, 0.3949999999999999),
SimplePoint(-0.4529411764705884, -0.6050000000000001),
SimplePoint(-0.48235294117647076, -0.45500000000000007),
SimplePoint(0.19411764705882337, 0.2699999999999999),
SimplePoint(0.5176470588235292, 0.3949999999999999)
)

val data = expected.map(dataPoint => SimplePoint(dataPoint.height, dataPoint.weight))
val gmm: Double = GradientDescent.theta0Updated(data, 1, 1, 1)
val updatedTheta0: Double = GradientDescent.theta0Updated(normalisedData, 1, 1, 0.01)

gmm shouldEqual (-90)
updatedTheta0 shouldEqual (0.99)
}
}

"theta1iter" should {
"probably not be zero" in {
"updatedTheta1" should {
"be updated from the value passed in" in {

val expected = List(
HeightWeight("Female",185.0,110.0),
HeightWeight("Female",195.0,104.0)
val normalisedData = List(
SimplePoint(0.22352941176470573, 0.3949999999999999),
SimplePoint(-0.4529411764705884, -0.6050000000000001),
SimplePoint(-0.48235294117647076, -0.45500000000000007),
SimplePoint(0.19411764705882337, 0.2699999999999999),
SimplePoint(0.5176470588235292, 0.3949999999999999)
)

val data = expected.map(dataPoint => SimplePoint(dataPoint.height, dataPoint.weight))
val gmm: Double = GradientDescent.theta1Updated(data, 1, 1, 1)
val updatedTheta1: Double = GradientDescent.theta1Updated(normalisedData, 1, 1, 0.01)

gmm shouldEqual (-15999)
updatedTheta1 shouldEqual (1.0000905017301038)
}
}

"gradientDescent" should {
// "get closer to the right answer" in {
//
// def transformer(strings: Array[String]) = House(strings(0), strings(4).toDouble, strings(80).toDouble)
// val housePrices = CsvReader.asCaseClassList("house-prices-training-resources.data-test.csv", true, transformer)
//
// val resources.data = housePrices.map(dataPoint => SimplePoint(dataPoint.lotArea, dataPoint.salePrice))
// val gmm = GradientDescent.gradientDescent(resources.data, 0, 1, 2, 1000)
//
// gmm shouldEqual (LearnedParameterSet(5, 5))
// }

"get closer to the right answer" in {

def transformer(strings: Array[String]) = HeightWeight(strings(0), strings(1).toDouble, strings(2).toDouble)
val heightWeights = CsvReader.asCaseClassListFromTestResource("height-weight-test.csv", true, transformer)

val data = heightWeights.map(dataPoint => SimplePoint(dataPoint.height, dataPoint.weight))
val learnedParams = GradientDescent.gradientDescent(data, 0, 1, 0.001, 1000)

learnedParams.theta0 shouldEqual (-0.009308274850941753)
learnedParams.theta1 shouldEqual (0.9889333098394358)

}

// "pass the coursera test case" in {
//
// //[1 5; 1 2; 1 4; 1 5],[1 6 4 2]',[0 0]',0.01,1000
//
// def transformer(strings: Array[String]) = House(strings(0), strings(4).toDouble, strings(80).toDouble)
// val housePrices: Seq[House] = CsvReader.asCaseClassList("house-prices-training-resources.data-test.csv", true, transformer)
//
// val resources.data = List(
// SimplePoint(1, 5),
// SimplePoint(1, 2),
// SimplePoint(1, 4),
// SimplePoint(1, 5),
// )
//// val resources.data: Seq[SimplePoint] = housePrices.map(dataPoint => SimplePoint(dataPoint.lotArea, dataPoint.salePrice))
// val gmm = GradientDescent.gradientDescent(resources.data, 0, 1, 2, 1000)
//
// gmm shouldEqual (LearnedParameterSet(5, 5))
// }
}
}
Loading

0 comments on commit c4f1986

Please sign in to comment.