-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
210 lines (150 loc) · 6.5 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
"""Untitled1.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/1RLAamXA3JF8el2xJh2nOOeI1fW_his6i
"""
#%%
import datetime
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import tensorflow as tf
# from tensorflow.keras.models import Sequential
# from tensorflow.keras.optimizers import Adam
# from tensorflow.keras import layers
# CSV was downloaded from https://finance.yahoo.com/quote/PYPL?p=PYPL&.tsrc=fin-srch -> provides data such as what the stock opened at, What the high and low price was and what the stock closed at. I will be using the date and what the stock closed at for that date.
# Note: The Vs code extension 'Jupyter' is very useful, and is what I used during the course of this project.
#TODO: currently the graph is set up to recieve data from downloaded .csv files. in the future I would like to redesign this to recieve data via links and export graphs
df = pd.read_csv('PYPL.csv')
df
#filters the data from the csv and shows only The closing price and the date
df = df[['Date', 'Close']]
df
df['Date']
# # converts a string to a date i.e "2015-07-06"
# def str_to_datetime(s):
# split = s.split('-')
# year, month, day = int(split[0]), int(split[1]), int(split[2])
# return datetime.datetime(year=year, month=month, day=day)
# datetime_object = str_to_datetime('1986-03-19')
# datetime_object
# df
# df['Date'] = df['Date'].apply(str_to_datetime)
# df['Date']
# # Replaces the index of the chart with the date instead
# df.index = df.pop('Date')
# df
# # Plots a graph by the close price, using matplotlib
# plt.plot(df.index, df['Close'])
# # select 2 dates from the stock that will be used for testing data
# def window_frame(dataframe, first_date, last_date, n=3):
# first_date = str_to_datetime(first_date)
# last_date = str_to_datetime(last_date)
# target_date = first_date
# dates = []
# X, Y = [], []
# last_time = False
# while True:
# sub = dataframe.loc[:target_date].tail(n+1)
# if len(sub) != n+1:
# print(f'Error: Window of size {n} is too large')
# return
# values = sub['Close'].to_numpy()
# x, y = values[:-1], values[-1]
# dates.append(target_date)
# X.append(x)
# Y.append(y)
# next_week = dataframe.loc[target_date:target_date+datetime.timedelta(days=7)]
# next_datetime_str = str(next_week.head(2).tail(1).index.values[0])
# next_date_str = next_datetime_str.split('T')[0]
# year_month_day = next_date_str.split('-')
# year, month, day = year_month_day
# next_date = datetime.datetime(day=int(day), month=int(month), year=int(year))
# if last_time:
# break
# target_date = next_date
# if target_date == last_date:
# last_time = True
# new_df = pd.DataFrame({})
# new_df['Target Date'] = dates
# X = np.array(X)
# for i in range(0, n):
# X[:, i]
# new_df[f'Target-{n-i}'] = X[:, i]
# new_df['Target'] = Y
# return new_df
# # We will now provide dateframe, time frame, and how many target results
# windowed_df = window_frame(df, '2021-03-25', '2022-03-23', n=3)
# windowed_df
# def windowed_df_to_date_X_y(windowed_dataframe):
# # converts the time window into a numpy array
# # We covert this because we are able to push the data to the training model.
# df_as_np = windowed_dataframe.to_numpy()
# dates = df_as_np[:, 0]
# middle_matrix = df_as_np[:, 1:-1]
# # Number of obseravtions / how many number of columns (close) / 1 variable
# X = middle_matrix.reshape((len(dates), middle_matrix.shape[1], 1))
# Y = df_as_np[:, -1]
# return dates, X.astype(np.float32), Y.astype(np.float32)
# dates, X, y = windowed_df_to_date_X_y(windowed_df)
# dates.shape, X.shape, y.shape
# # Number of results, 4 columns (target 1 - 3), 1 type of variable (close)
# dates_times_8 = int(len(dates) * .8)
# dates_times_9 = int(len(dates) * .9)
# # training uses 80% of the data
# dates_train, X_train, y_train = dates[:dates_times_8], X[:dates_times_8], y[:dates_times_8]
# #validation and test uses 90% of the dates
# dates_val, X_val, y_val = dates[dates_times_8:dates_times_9], X[dates_times_8:dates_times_9], y[dates_times_8:dates_times_9]
# dates_test, X_test, y_test = dates[dates_times_9:], X[dates_times_9:], y[dates_times_9:]
# plt.plot(dates_train, y_train)
# # Validation is the data that has matched with the prediction
# plt.plot(dates_val, y_val)
# # Test is the prediction the computer has made
# plt.plot(dates_test, y_test)
# # training = blue
# # validation = orange
# # test = green
# plt.legend(['Train', 'Validated', 'Test'])
# model = Sequential([layers.Input((3, 1)), layers.LSTM(64),layers.Dense(32, activation='relu'),layers.Dense(32, activation='relu'),layers.Dense(1)])
# # 0.001 -> learning rate
# # M.A.R -> avg we are wrong by
# model.compile(loss='mse',
# optimizer=Adam(learning_rate=0.001),
# metrics=['mean_absolute_error'])
# #Epochs -> how many times it runs through the data
# model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=100)
# train_predictions = model.predict(X_train).flatten()
# plt.plot(dates_train, train_predictions)
# plt.plot(dates_train, y_train)
# # Blue -> What the program believed would happen / it follows the data it was trained on
# # Orange -> what really happened / what the program saw happened
# # This is repeated for both validation and testing
# plt.legend(['Training Predictions', 'Training Observations'])
# val_predictions = model.predict(X_val).flatten()
# plt.plot(dates_val, val_predictions)
# plt.plot(dates_val, y_val)
# plt.legend(['Validation Predictions', 'Validation Observations'])
# test_predictions = model.predict(X_test).flatten()
# plt.plot(dates_test, test_predictions)
# plt.plot(dates_test, y_test)
# plt.legend(['Testing Predictions', 'Testing Observations'])
# # Combine all the previous graphs into 1 final graph
# plt.plot(dates_train, train_predictions)
# plt.plot(dates_train, y_train)
# plt.plot(dates_val, val_predictions)
# plt.plot(dates_val, y_val)
# plt.plot(dates_test, test_predictions)
# plt.plot(dates_test, y_test)
# plt.legend(['Training Predictions',
# 'Training Observations',
# 'Validation Predictions',
# 'Validation Observations',
# 'Testing Predictions',
# 'Testing Observations'])
# Final Result
# Orange -> Follow the data in the training
# Red -> what actually happened
# Green -> What it thought happened
# Brown -> what really happened
# Purple -> what it predicted would happened
# %%