Skip to content

Latest commit

 

History

History
435 lines (355 loc) · 9.96 KB

hw1_Sol.md

File metadata and controls

435 lines (355 loc) · 9.96 KB

homework 1 solution

Questions 1

(ii), (iv) and (v)

Key essence of machine learning:

  1. exists some 'underlying pattern' to be learned
  2. but no programming (easy) definition
  3. somehow there is data about the pattern

Question 2

reinforcement learning

Question 3

unsupervised learning

Question 4

supervised learning

Question 5

active learning

Type of learning:

  1. Learning with Different Output Space Y
    • [classification], [regression], [structured]
  2. Learning with Different Data Label yn
    • [supervised], un/semi-supervised, reinforcement
  3. Learning with Different Protocol f => (xn, yn)
    • [batch], online, active
  4. Learning with Different Input Space X
    • [concrete], raw abstract

Question 6

Sol

Can be converted to a question of how many even between N+1 and N+L.

Question 7

Sol

Obviously, the different values of f depends on c.

Question 8

Sol

Question 9

Sol

Question 10

Sol

Question 11

Sol

Question 12

Sol

Question 13

Sol

Question 14

Sol

Question 15

import numpy as np

def loadData(filename):
    data = np.loadtxt(filename)
    data = np.matrix(data)
    col, row = data.shape
    X = np.c_[np.ones((col, 1)), data[:, 0:-1]]
    Y = data[:, -1]
    return X, Y

def perceptron(X, Y, w, speed=1):
    num = 0; prevpos = 0
    while(True):
        yhat = np.sign(X.dot(w))
        yhat[np.where(yhat == 0)] = -1
        index = np.where(yhat != Y)[0]
        if not index.any():
            break
        if not index[index >= prevpos].any():
            prevpos = 0
        pos = index[index >= prevpos][0]
        prevpos = pos
        w += speed*Y[pos, 0]*X[pos:pos+1, :].T
        num += 1
    return num

X, Y = loadData('./pic/homework/hw1_15_train.dat')
col, row = X.shape

w0 = np.zeros((row, 1))
num = perceptron(X, Y, w0)
print(num)
In [1]: %timeit %run 15.py
39
...
39
39
100 loops, best of 3: 7.74 ms per loop

Question 16

import numpy as np

def loadData(filename):
    data = np.loadtxt(filename)
    data = np.matrix(data)
    col, row = data.shape
    X = np.c_[np.ones((col, 1)), data[:, 0:-1]]
    Y = data[:, -1]
    return X, Y

def perceptron(X, Y, w, speed=1):
    num = 0; prevpos = 0
    while(True):
        yhat = np.sign(X.dot(w))
        yhat[np.where(yhat == 0)] = -1
        index = np.where(yhat != Y)[0]
        if not index.any():
            break
        if not index[index >= prevpos].any():
            prevpos = 0
        pos = index[index >= prevpos][0]
        prevpos = pos
        w += speed*Y[pos, 0]*X[pos:pos+1, :].T
        num += 1
    return num

X, Y = loadData('./pic/homework/hw1_15_train.dat')
col, row = X.shape

total = 0
for i in range(2000):
    w0 = np.zeros((row,1))
    randpos = np.random.permutation(col)
    Xrnd = X[randpos, :]
    Yrnd = Y[randpos, :]
    num = perceptron(Xrnd, Yrnd, w0)
    total += num
print(total/2000)
In [2]: %timeit %run 16.py
39.5295
39.8125
39.9565
39.57
1 loop, best of 3: 4.22 s per loop

Question 17

import numpy as np

def loadData(filename):
    data = np.loadtxt(filename)
    data = np.matrix(data)
    col, row = data.shape
    X = np.c_[np.ones((col, 1)), data[:, 0:-1]]
    Y = data[:, -1]
    return X, Y

def perceptron(X, Y, w, speed=1):
    num = 0; prevpos = 0
    while(True):
        yhat = np.sign(X.dot(w))
        yhat[np.where(yhat == 0)] = -1
        index = np.where(yhat != Y)[0]
        if not index.any():
            break
        if not index[index >= prevpos].any():
            prevpos = 0
        pos = index[index >= prevpos][0]
        prevpos = pos
        w += speed*Y[pos, 0]*X[pos:pos+1, :].T
        num += 1
    return num

X, Y = loadData('./pic/homework/hw1_15_train.dat')
col, row = X.shape

total = 0
for i in range(2000):
    w0 = np.zeros((row,1))
    randpos = np.random.permutation(col)
    Xrnd = X[randpos, :]
    Yrnd = Y[randpos, :]
    num = perceptron(Xrnd, Yrnd, w0, 0.5)
    total += num
print(total/2000)
In [3]: %timeit %run 17.py
40.2405
40.0065
39.9915
39.664
1 loop, best of 3: 4.21 s per loop

Question 18

import numpy as np

def loadData(filename):
    data = np.loadtxt(filename)
    data = np.matrix(data)
    col, row = data.shape
    X = np.c_[np.ones((col, 1)), data[:, 0:-1]]
    Y = data[:, -1]
    return X, Y

def mistake(yhat, y):
    row, col = y.shape
    return np.sum(yhat != y)/row

def pocket(X, Y, w, iternum, speed=1):
    yhat = np.sign(X.dot(w))
    yhat[np.where(yhat == 0)] = -1
    errOld = mistake(yhat, Y)
    wBest = np.zeros(w.shape)
    for i in range(iternum):
        index = np.where(yhat != Y)[0]
        if not index.any():
            break
        pos = index[np.random.permutation(len(index))[0]]
        w += speed*Y[pos, 0]*X[pos:pos+1, :].T
        yhat = np.sign(X.dot(w))
        yhat[np.where(yhat == 0)] = -1
        errNow = mistake(yhat, Y)
        if errNow < errOld:
            wBest = w.copy()
            errOld = errNow
    return wBest, w

X, Y = loadData('./pic/homework/hw1_18_train.dat')
Xtest, Ytest = loadData('./pic/homework/hw1_18_test.dat')
col, row = X.shape

total = 0
for i in range(2000):
    w0 = np.zeros((row, 1))
    randpos = np.random.permutation(col)
    Xrnd = X[randpos, :]
    Yrnd = Y[randpos, :]
    w, wBad = pocket(Xrnd, Yrnd, w0, 50)
    yhat = np.sign(Xtest.dot(w))
    yhat[np.where(yhat == 0)] = -1
    err = mistake(yhat, Ytest)
    total += err
print(total/2000)
In [1]: %timeit %run 18.py
0.133199
0.13218
0.132887
0.134125
1 loop, best of 3: 10.1 s per loop

Question 19

import numpy as np

def loadData(filename):
    data = np.loadtxt(filename)
    data = np.matrix(data)
    col, row = data.shape
    X = np.c_[np.ones((col, 1)), data[:, 0:-1]]
    Y = data[:, -1]
    return X, Y

def mistake(yhat, y):
    row, col = y.shape
    return np.sum(yhat != y)/row

def pocket(X, Y, w, iternum, speed=1):
    yhat = np.sign(X.dot(w))
    yhat[np.where(yhat == 0)] = -1
    errOld = mistake(yhat, Y)
    wBest = np.zeros(w.shape)
    for i in range(iternum):
        index = np.where(yhat != Y)[0]
        if not index.any():
            break
        pos = index[np.random.permutation(len(index))[0]]
        w += speed*Y[pos, 0]*X[pos:pos+1, :].T
        yhat = np.sign(X.dot(w))
        yhat[np.where(yhat == 0)] = -1
        errNow = mistake(yhat, Y)
        if errNow < errOld:
            wBest = w.copy()
            errOld = errNow
    return wBest, w

X, Y = loadData('./pic/homework/hw1_18_train.dat')
Xtest, Ytest = loadData('./pic/homework/hw1_18_test.dat')
col, row = X.shape

total = 0
for i in range(2000):
    w0 = np.zeros((row, 1))
    randpos = np.random.permutation(col)
    Xrnd = X[randpos, :]
    Yrnd = Y[randpos, :]
    w, wBad = pocket(Xrnd, Yrnd, w0, 50)
    yhat = np.sign(Xtest.dot(wBad))
    yhat[np.where(yhat == 0)] = -1
    err = mistake(yhat, Ytest)
    total += err
print(total/2000)
In [2]: %timeit %run 19.py
0.353636
0.358305
0.352031
0.357387
1 loop, best of 3: 10.3 s per loop

Question 20

import numpy as np

def loadData(filename):
    data = np.loadtxt(filename)
    data = np.matrix(data)
    col, row = data.shape
    X = np.c_[np.ones((col, 1)), data[:, 0:-1]]
    Y = data[:, -1]
    return X, Y

def mistake(yhat, y):
    row, col = y.shape
    return np.sum(yhat != y)/row

def pocket(X, Y, w, iternum, speed=1):
    yhat = np.sign(X.dot(w))
    yhat[np.where(yhat == 0)] = -1
    errOld = mistake(yhat, Y)
    wBest = np.zeros(w.shape)
    for i in range(iternum):
        index = np.where(yhat != Y)[0]
        if not index.any():
            break
        pos = index[np.random.permutation(len(index))[0]]
        w += speed*Y[pos, 0]*X[pos:pos+1, :].T
        yhat = np.sign(X.dot(w))
        yhat[np.where(yhat == 0)] = -1
        errNow = mistake(yhat, Y)
        if errNow < errOld:
            wBest = w.copy()
            errOld = errNow
    return wBest, w

X, Y = loadData('./pic/homework/hw1_18_train.dat')
Xtest, Ytest = loadData('./pic/homework/hw1_18_test.dat')
col, row = X.shape

total = 0
for i in range(2000):
    w0 = np.zeros((row, 1))
    randpos = np.random.permutation(col)
    Xrnd = X[randpos, :]
    Yrnd = Y[randpos, :]
    w, wBad = pocket(Xrnd, Yrnd, w0, 100)
    yhat = np.sign(Xtest.dot(w))
    yhat[np.where(yhat == 0)] = -1
    err = mistake(yhat, Ytest)
    total += err
print(total/2000)
In [3]: %timeit %run 20.py
0.115668
0.115639
0.11581
0.115895
1 loop, best of 3: 19.8 s per loop