-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathMachineLearningIntro.py
50 lines (39 loc) · 1.8 KB
/
MachineLearningIntro.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
import random
from typing import List, TypeVar,Tuple
X=TypeVar("X")#generic data type to represent input
Y=TypeVar("Y")#generic data type to represent output
def splitData(data : List[X], percent : float)->Tuple[List[X],List[X]]:
"""Split data into fractions [prob, 1 - prob]"""
data = data[:]
random.shuffle(data)
cutIndex = int(len(data)*percent)
#return everything from start to cut and then everything after that as a tuple
return data[:cutIndex],data[cutIndex:]
data = [n for n in range(10)]
def train_test_split(xs: List[X],
ys: List[Y],
test_pct: float) -> Tuple[List[X], List[X], List[Y], List[Y]]:
#split into 2 x lists, one to train other to test and same with y
trainX,testX=splitData(data=xs,percent=1-test_pct)
trainY,testY=splitData(data=ys,percent=1-test_pct)
return(trainX,testX,trainY,testY)
xs = [x for x in range(1000)] # xs are 1 ... 1000
ys = [2 * x for x in xs] # each y_i is twice x_i
x_train, x_test, y_train, y_test = train_test_split(xs, ys, 0.25)
#accuracy is correctPrecdiction/total data
#tp stands for true positiv fp false positive etc that are in a confusion matrix
#so we sum the true negatives and true posistives and divde by the sum of it all
def accuracy(tp: int, fp: int, fn: int, tn: int) -> float:
correct = tp + tn
total = tp + fp + fn + tn
return correct / total
#ratio of the positives to the total nmber of positives
def precision(tp: int, fp: int, fn: int, tn: int) -> float:
return tp / (tp + fp)
#farction of posisitves our model identified
def recall(tp: int, fp: int, fn: int, tn: int) -> float:
return tp / (tp + fn)
def f1_score(tp: int, fp: int, fn: int, tn: int) -> float:
p = precision(tp, fp, fn, tn)
r = recall(tp, fp, fn, tn)
return 2 * p * r / (p + r)