import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential, load_model
from keras.layers import LSTM, Dense, Dropout
import os
from helpers import *
from importlib import reload
%matplotlib inline
2023-01-19 12:25:53.047704: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE4.1 SSE4.2 To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
Apple Stock Prediction with LSTM Neural Network
aapl = pd.read_csv("AAPL.csv").open
head_tail_horz(aapl, 5, "Apple Data")
open | |
---|---|
0 | 67.71 |
1 | 68.07 |
2 | 68.50 |
3 | 66.74 |
4 | 66.36 |
open | |
---|---|
1254 | 167.16 |
1255 | 166.00 |
1256 | 159.10 |
1257 | 154.83 |
1258 | 163.09 |
Converting to numpy array
and reshaping for use with LSTM neural network
aapl_np = aapl.values
print("shape before adding dimension: ", aapl_np.shape); sp()
aapl_np = aapl_np.reshape(-1, 1)
print("shape after adding dimension: ", aapl_np.shape); sp()
aapl_np
shape before adding dimension: (1259,) shape after adding dimension: (1259, 1)
array([[ 67.7142], [ 68.0714], [ 68.5014], ..., [159.1 ], [154.83 ], [163.085 ]])
Train-Test Split
aapl_train = np.array(aapl_np[:int(aapl_np.shape[0]*0.8)])
aapl_test = np.array(aapl_np[int(aapl_np.shape[0]*0.8)-50:])
d(aapl_train.shape)
d(aapl_test.shape)
(1007, 1)
(302, 1)
Scaling Training Set
scaler = MinMaxScaler(feature_range=(0, 1))
aapl_train = scaler.fit_transform(aapl_train)
aapl_train
array([[0.15550899], [0.16002875], [0.16546966], ..., [0.9222455 ], [0.93262121], [0.95046235]])
Scaling Test Set
aapl_test = scaler.transform(aapl_test)
aapl_test[0:6]
array([[0.7048619 ], [0.70865789], [0.70043325], [0.71080895], [0.69518213], [0.68006144]])
Creating Dataset
data.shape[0]
refers to the full length of the datasetdef nn_data_prep(data):
inputs = []
targets = []
for item in range(50, data.shape[0]):
inputs.append(data[item - 50: item, 0])
targets.append(data[item, 0])
inputs = np.array(inputs)
targets = np.array(targets)
return inputs, targets
Training and Testing Sets
x_train, y_train = nn_data_prep(aapl_train)
x_test, y_test = nn_data_prep(aapl_test)
x_train[:1].shape, x_test[:1].shape
((1, 50), (1, 50))
Reshaping the data
x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))
x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))
Constructing Neural Network Model
units
= layers in the neural networkreturn_sequences = True
- the output sent to the next layerinput_shape = (x_train.shape[1], 1)
- x_train.shape[1]
= 50, time stepsDropout
= 20%model.add(Dense(units = 1))
- the predictionmodel = Sequential()
model.add(LSTM(units = 96, return_sequences = True,
input_shape = (x_train.shape[1], 1)))
model.add(Dropout(0.2))
model.add(LSTM(units = 96, return_sequences = True))
model.add(Dropout(0.2))
model.add(LSTM(units = 96))
model.add(Dropout(0.2))
model.add(Dense(units = 1))
2023-01-19 12:26:03.214478: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE4.1 SSE4.2 To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
Model Summary
model.summary()
Model: "sequential" _________________________________________________________________ Layer (type) Output Shape Param # ================================================================= lstm (LSTM) (None, 50, 96) 37632 dropout (Dropout) (None, 50, 96) 0 lstm_1 (LSTM) (None, 50, 96) 74112 dropout_1 (Dropout) (None, 50, 96) 0 lstm_2 (LSTM) (None, 96) 74112 dropout_2 (Dropout) (None, 96) 0 dense (Dense) (None, 1) 97 ================================================================= Total params: 185,953 Trainable params: 185,953 Non-trainable params: 0 _________________________________________________________________
Define loss function and optimizer
model.compile(loss = 'mean_squared_error',
optimizer = "adam")
Fitting the model
# if (not os.path.exists(r'/Users/evancarr/Library/CloudStorage/OneDrive-Personal/Code/udemy_nn_stock/stock_prediction_model.h5')):
# model.fit(x_train, y_train, epochs = 50, batch_size=32, verbose = 1)
Saving the trained model
# model.save("stock_prediction_nn_model.h5")
Loading the trained model
model = load_model("stock_prediction_nn_model.h5")
Making predictions
predictions = model.predict(x_test)
8/8 [==============================] - 1s 22ms/step
Unscale the predictions
predictions = scaler.inverse_transform(predictions)
predictions[0:6]
array([[124.68587], [125.86398], [126.74756], [127.3911 ], [127.86211], [128.20259]], dtype=float32)
Visualizing Predictions vs Original Data - Entire timeframe
fig, ax = plt.subplots(figsize=(10,5), facecolor = "cyan")
# The actual (orig) prices from the original AAPL dataframe (imported csv)
# This contains the complete time frame vs train-test split
plt.plot(aapl, color='lightblue', label='Entire AAPL Data');
# The prices predicted by the model
ax.plot(range(len(y_train)+50, len(y_train)+50+len(predictions)),
predictions, c="red", label='Predicted Data');
ax.set_facecolor("#333333")
plt.grid(ls=":", c="white")
plt.title("Model / Predictions vs Original Data", fontsize=22, pad=12)
plt.legend(facecolor="#222222", labelcolor="white")
pretty(len(aapl),
"Length of entire AAPL data: "); sp()
pretty(range(len(y_train)+50, len(y_train)+50+len(predictions)),
"Portion / Range of prediction data: "); sp()
Scaling the Test Data:
scaler.inverse_transform()
is being called, the shape of the data must be 2 dimensional, i.e. (-1, 1)
Reshape your data either using array.reshape(-1, 1) if your data has a single feature or array.reshape(1, -1) if it contains a single sample.
aapl
dataframe to numpy array, we added an additional dimensionpretty(y_test[0:6], 'y_test[0:6]'); sp()
pretty(scaler.inverse_transform(y_test.reshape(-1,1))[0:6],
'scaler.inverse_transform(y_test.reshape(-1,1))[0:6]'); sp()
y_test_scaled = scaler.inverse_transform(y_test.reshape(-1,1))
Visualizing Predictions vs Original Data - Prediction timeframe only
fig, ax = plt.subplots(figsize=(10,5), facecolor = "cyan")
ax.plot(y_test_scaled, color='lightblue', label = 'Actual Data');
plt.plot(predictions, color = 'red', label='Predictions');
ax.set_facecolor("#333333")
plt.grid(ls=":", c="white")
plt.title("Model / Predictions vs Original Data", fontsize=22, pad=12)
plt.legend(facecolor="#222222", labelcolor="white");