from helpers import *
import_all()
import torch
import torch.nn as nn
from sklearn.preprocessing import MinMaxScaler
from torch.utils.data import Dataset, DataLoader
import yfinance as yf
import math
from sklearn.metrics import mean_squared_error
aapl = yf.download("AAPL", period = "10y").Close
[*********************100%***********************] 1 of 1 completed
First View of Data
head_tail_horz(aapl, 5, "Apple Data")
Close | |
---|---|
Date | |
2013-01-22 | 18.03 |
2013-01-23 | 18.36 |
2013-01-24 | 16.09 |
2013-01-25 | 15.71 |
2013-01-28 | 16.07 |
Close | |
---|---|
Date | |
2023-01-13 | 134.76 |
2023-01-17 | 135.94 |
2023-01-18 | 135.21 |
2023-01-19 | 135.27 |
2023-01-20 | 137.87 |
Initial Visualization
fancy_plot(aapl, title="Initial Apple Data: Last 10 Years",
xlabel = "Years", ylabel = "Price",
legend_loc=2)
Instantiating Scaler
scaler = MinMaxScaler(feature_range=(0, 1))
Scaling the Data
aapl_scaled = scaler.fit_transform(np.array(aapl).reshape(-1,1))
pretty(aapl.shape, 'original data shape'); sp()
pretty(aapl_scaled.shape, 'reshaped, scaled data shape')
head_tail_horz(aapl_scaled, 5, "Scaled Apple Data")
0 | |
---|---|
0 | 0.02 |
1 | 0.03 |
2 | 0.01 |
3 | 0.01 |
4 | 0.01 |
0 | |
---|---|
2513 | 0.72 |
2514 | 0.73 |
2515 | 0.72 |
2516 | 0.72 |
2517 | 0.74 |
Splitting Data
len_all = len(aapl_scaled)
train_size = int(len_all * 0.7)
test_size = len_all - train_size
train_data, test_data = (aapl_scaled[0:train_size,:],
aapl_scaled[train_size:len_all, :1])
Defining the Dataset Class
sequence_length
- The sequence of inputs across the timeframetorch.from_numpy(data).float().view(-1)
- converting to tensor for PyTorch__getitem__()
- will use the index, take a value with the sequence_length
as an offsetreturn len(self.data) - self.sequence_length - 1
- this takes care of the special cases when we are at the end of the datasetclass StockDataset(Dataset):
def __init__(self, data, sequence_length = 100):
self.data = data
self.data = torch.from_numpy(data).float().view(-1)
self.sequence_length = sequence_length
def __len__(self):
return len(self.data) - self.sequence_length - 1
def __getitem__(self, index):
return (self.data[index : index + self.sequence_length],
self.data[index + self.sequence_length])
Defining the Datasets
train_ds = StockDataset(train_data)
test_ds = StockDataset(test_data)
Defining DataLoaders
drop_last = True
- removes the part at the end that is remaining after all batchesbatch_size = 64
train_loader = DataLoader(train_ds, batch_size, drop_last = True)
test_loader = DataLoader(test_ds, batch_size, drop_last = True)
Defining the GPU
device = "cuda" if torch.cuda.is_available() else "cpu"
Defining the Model
self.fc
- outputs the single value, i.e. the predictionclass LSTM_model(nn.Module):
def __init__(self, num_input, num_hidden, num_layers):
super(LSTM_model, self).__init__()
self.num_layers = num_layers
self.input_size = num_input
self.num_hidden = num_hidden
self.lstm = nn.LSTM(input_size = num_input,
hidden_size = num_hidden,
num_layers = num_layers)
self.fc = nn.Linear(num_hidden, 1)
def forward(self, inputs, hidden_state, cell_state):
output, (hidden_state, cell_state) = self.lstm(inputs,
(hidden_state, cell_state))
final_output = self.fc(output[-1])
return final_output, hidden_state, cell_state
def predict(self, inputs):
hidden_state, cell_state = self.init()
final_output = self.fc(output[-1])
return final_output
def init(self):
hidden_initial = torch.zeros(self.num_layers,
batch_size,
self.num_hidden).to(device)
cell_initial = torch.zeros(self.num_layers,
batch_size,
self.num_hidden).to(device)
return hidden_initial, cell_initial
Defining Hyperparameters
num_input = 1
num_hidden = 50
num_layers = 3
Defining Model
model = LSTM_model(num_input,
num_hidden,
num_layers).to(device)
Loss Function & Optimizer
loss_function = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr = 0.001)
Defining the Training Function
def train_model(dataloader):
hidden_state, cell_state = model.init()
model.train()
train_losses = []
for batch, item in enumerate(dataloader):
input_data, target_data = item
input_data = input_data.to(device)
target_data = target_data.to(device)
output, hidden_state, cell_state = model(input_data.reshape(100,
batch_size, 1),
hidden_state, cell_state)
loss = loss_function(output.reshape(batch_size), target_data)
hidden_state = hidden_state.detach()
cell_state = cell_state.detach()
optimizer.zero_grad()
loss.backward()
optimizer.step()
if batch == len(dataloader) - 1:
loss = loss.item()
print(f"training loss: {loss: >7f}")
train_losses.append(loss)
return train_losses
Defining the Testing Function
def test_model(dataloader):
hidden_state, cell_state = model.init()
model.eval()
test_losses = []
for batch, item in enumerate(dataloader):
input_data, target_data = item
input_data = input_data.to(device)
target_data = target_data.to(device)
output, hidden_state, cell_state = model(input_data.reshape(100,
batch_size,
1),
hidden_state, cell_state)
loss = loss_function(output.reshape(batch_size), target_data)
if batch == len(dataloader):
loss = loss.item()
print(f"testing loss: {loss: >7f}")
test_losses.append(loss)
return test_losses
Training-Testing Function
def train_test(model, train_loader, test_loader, epochs):
for epoch in range(epochs):
train_losses = train_model(train_loader)
train_losses = [x.detach().cpu() for x in train_losses[:-1]]
test_losses = test_model(test_loader)
test_losses = [x.detach().cpu() for x in test_losses[:-1]]
return train_losses, test_losses
Training the Model
train_losses, test_losses = train_test(model = model,
train_loader = train_loader,
test_loader = test_loader,
epochs = 300)
training loss: 0.000685 training loss: 0.000034 training loss: 0.000044 training loss: 0.000044 training loss: 0.000035 training loss: 0.000041 training loss: 0.000028 training loss: 0.000031 training loss: 0.000029 training loss: 0.000029 training loss: 0.000032 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000030 training loss: 0.000030 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000030 training loss: 0.000030 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000031 training loss: 0.000030 training loss: 0.000035 training loss: 0.000031 training loss: 0.000035 training loss: 0.000032 training loss: 0.000031 training loss: 0.000033 training loss: 0.000030 training loss: 0.000032 training loss: 0.000044 training loss: 0.000302 training loss: 0.000222 training loss: 0.000111 training loss: 0.000077 training loss: 0.000041 training loss: 0.000033 training loss: 0.000040 training loss: 0.000051 training loss: 0.000031 training loss: 0.000030 training loss: 0.000030 training loss: 0.000033 training loss: 0.000034 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000031 training loss: 0.000030 training loss: 0.000030 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000030 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000030 training loss: 0.000031 training loss: 0.000032 training loss: 0.000035 training loss: 0.000043 training loss: 0.000064 training loss: 0.000113 training loss: 0.000250 training loss: 0.000718 training loss: 0.000726 training loss: 0.000043 training loss: 0.000043 training loss: 0.000036 training loss: 0.000030 training loss: 0.000041 training loss: 0.000029 training loss: 0.000031 training loss: 0.000029 training loss: 0.000030 training loss: 0.000031 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000030 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000030 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000031 training loss: 0.000029 training loss: 0.000034 training loss: 0.000030 training loss: 0.000034 training loss: 0.000031 training loss: 0.000031 training loss: 0.000031 training loss: 0.000030 training loss: 0.000031 training loss: 0.000030 training loss: 0.000032 training loss: 0.000031 training loss: 0.000034 training loss: 0.000034 training loss: 0.000088 training loss: 0.000408 training loss: 0.000044 training loss: 0.000041 training loss: 0.000145 training loss: 0.000029 training loss: 0.000031 training loss: 0.000030 training loss: 0.000042 training loss: 0.000035 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000030 training loss: 0.000034 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000030 training loss: 0.000030 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000029 training loss: 0.000030 training loss: 0.000029 training loss: 0.000033 training loss: 0.000030 training loss: 0.000043 training loss: 0.000036 training loss: 0.000078 training loss: 0.000070 training loss: 0.000186 training loss: 0.000325 training loss: 0.000628 training loss: 0.000187 training loss: 0.000047 training loss: 0.000031 training loss: 0.000075 training loss: 0.000029
Evaluating the Model
def evaluate_model(dataloader, scaler):
prediction_array = []
target_array = []
with torch.no_grad():
hidden_state, cell_state = model.init()
for batch, item in enumerate(dataloader):
input_data, target_data = item
input_data, target_data = input_data.to(device), target_data.to(device)
input_data = input_data.view(100, 64, 1)
predictions = model(input_data, hidden_state, cell_state)[0]
predictions = scaler.inverse_transform(predictions.detach()
.cpu()
.numpy()).reshape(-1)
target_data = scaler.inverse_transform(target_data.detach()
.cpu()
.numpy()
.reshape(1, -1)).reshape(-1)
prediction_array.extend(predictions)
target_array.extend(target_data)
result = math.sqrt(mean_squared_error(target_array, prediction_array))
return result
Results of the Model
print("Training Loss: ", evaluate_model(train_loader, scaler))
print("Testing Loss: ", evaluate_model(test_loader, scaler))
Training Loss: 1.561772062489898 Testing Loss: 78.75756102095055