BNB price prediction
import numpy as np
from numpy import array
import pandas as pd
import matplotlib.pyplot as plt
import math
from sklearn.metrics import mean_squared_error
from statsmodels.tsa.stattools import adfuller
from statsmodels.graphics.tsaplots import plot_acf
from statsmodels.graphics.tsaplots import plot_pacf
import tensorflow as tf
from keras.models import Sequential
from keras.layers import LSTM
from keras.layers import Dense
EDA on Dataset
dataset = pd.read_csv('BNB-USD.csv')
dataset.head()
Date | Open | High | Low | Close | Adj Close | Volume | |
---|---|---|---|---|---|---|---|
0 | 2017-11-09 | 2.05314 | 2.17423 | 1.89394 | 1.99077 | 1.99077 | 19192200.0 |
1 | 2017-11-10 | 2.00773 | 2.06947 | 1.64478 | 1.79684 | 1.79684 | 11155000.0 |
2 | 2017-11-11 | 1.78628 | 1.91775 | 1.61429 | 1.67047 | 1.67047 | 8178150.0 |
3 | 2017-11-12 | 1.66889 | 1.67280 | 1.46256 | 1.51969 | 1.51969 | 15298700.0 |
4 | 2017-11-13 | 1.52601 | 1.73502 | 1.51760 | 1.68662 | 1.68662 | 12238800.0 |
dataset.tail()
Date | Open | High | Low | Close | Adj Close | Volume | |
---|---|---|---|---|---|---|---|
2028 | 2023-05-30 | 311.810333 | 314.276611 | 311.313507 | 311.684509 | 311.684509 | 392038878.0 |
2029 | 2023-05-31 | 311.695709 | 311.846649 | 305.376404 | 306.866699 | 306.866699 | 474410245.0 |
2030 | 2023-06-01 | 306.882813 | 307.613770 | 303.644562 | 304.953278 | 304.953278 | 354897855.0 |
2031 | 2023-06-02 | NaN | NaN | NaN | NaN | NaN | NaN |
2032 | 2023-06-03 | 307.370331 | 307.370331 | 307.334808 | 307.334808 | 307.334808 | 380250496.0 |
dataset['Date'] = pd.to_datetime(dataset['Date'])
dataset.head()
Date | Open | High | Low | Close | Adj Close | Volume | |
---|---|---|---|---|---|---|---|
0 | 2017-11-09 | 2.05314 | 2.17423 | 1.89394 | 1.99077 | 1.99077 | 19192200.0 |
1 | 2017-11-10 | 2.00773 | 2.06947 | 1.64478 | 1.79684 | 1.79684 | 11155000.0 |
2 | 2017-11-11 | 1.78628 | 1.91775 | 1.61429 | 1.67047 | 1.67047 | 8178150.0 |
3 | 2017-11-12 | 1.66889 | 1.67280 | 1.46256 | 1.51969 | 1.51969 | 15298700.0 |
4 | 2017-11-13 | 1.52601 | 1.73502 | 1.51760 | 1.68662 | 1.68662 | 12238800.0 |
dataset.describe()
Open | High | Low | Close | Adj Close | Volume | |
---|---|---|---|---|---|---|
count | 2032.000000 | 2032.000000 | 2032.000000 | 2032.000000 | 2032.000000 | 2.032000e+03 |
mean | 158.956768 | 163.860205 | 153.668686 | 159.075751 | 159.075751 | 9.367168e+08 |
std | 181.151077 | 186.767035 | 174.910847 | 181.076968 | 181.076968 | 1.384815e+09 |
min | 1.511360 | 1.582660 | 1.462560 | 1.510360 | 1.510360 | 9.284000e+03 |
25% | 14.691567 | 15.289252 | 14.206488 | 14.794200 | 14.794200 | 1.561682e+08 |
50% | 28.723639 | 29.370382 | 27.973465 | 28.788359 | 28.788359 | 3.960007e+08 |
75% | 307.606750 | 313.983650 | 300.660980 | 307.617782 | 307.617782 | 1.289710e+09 |
max | 676.315918 | 690.931946 | 634.549500 | 675.684082 | 675.684082 | 1.798295e+10 |
dataset.isnull().sum()
Date 0
Open 1
High 1
Low 1
Close 1
Adj Close 1
Volume 1
dtype: int64
dataset = dataset.dropna()
dataset['Close'].dtype
dtype('float64')
ADF Test
result = adfuller(dataset['Close'].values, autolag='AIC')
print(f'ADF Statistic: {result[0]}')
print(f'p-value: {result[1]}')
for key, value in result[4].items():
print('Critial Values:')
print(f' {key}, {value}')
ADF Statistic: -1.571091598405781
p-value: 0.4980968535928449
Critial Values:
1%, -3.4336140516629836
Critial Values:
5%, -2.8629818846551376
Critial Values:
10%, -2.5675375973574788
ACF and PACF Plot
plt.rc("figure", figsize=(10,5))
plot_acf(dataset['Close'])
print()
plt.rc("figure", figsize=(10,5))
plot_pacf(dataset['Close'])
print()
Prediction
data = dataset['Close'].values
print('Shape of data: ', data.shape)
Shape of data: (2032,)
train_length = int(len(data) * 0.8)
print('Train length: ', train_length)
train_data, test_data = data[:train_length], data[train_length:]
print('Shape of Train and Test data: ', len(train_data), len(test_data))
Train length: 1625
Shape of Train and Test data: 1625 407
def split_sequence(sequence, n_steps):
X, y = list(), list()
for i in range(len(sequence)):
end_ix = i + n_steps
if end_ix > len(sequence)-1:
break
seq_x, seq_y = sequence[i:end_ix], sequence[end_ix]
X.append(seq_x)
y.append(seq_y)
return array(X), array(y)
lag = 3
n_features = 1
train_X, train_y = split_sequence(train_data, lag)
test_X, test_y = split_sequence(test_data, lag)
print('Shape of train_X and train_y: ', train_X.shape, train_y.shape)
print('Shape of test_X and test_y: ', test_X.shape, test_y.shape)
Shape of train_X and train_y: (1622, 3) (1622,)
Shape of test_X and test_y: (404, 3) (404,)
train_X = train_X.reshape((train_X.shape[0], train_X.shape[1], n_features))
test_X = test_X.reshape((test_X.shape[0], test_X.shape[1], n_features))
# New shape of train_X and test_X are :-
print('Shape of train_X and train_y: ', train_X.shape, train_y.shape)
print('Shape of test_X and test_y: ', test_X.shape, test_y.shape)
Shape of train_X and train_y: (1622, 3, 1) (1622,)
Shape of test_X and test_y: (404, 3, 1) (404,)
LSTM
model = Sequential()
model.add(LSTM(32, activation='relu', return_sequences=False, input_shape=(lag, n_features)))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')
model.summary()
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
lstm (LSTM) (None, 32) 4352
dense (Dense) (None, 1) 33
=================================================================
Total params: 4,385
Trainable params: 4,385
Non-trainable params: 0
_________________________________________________________________
tf.config.run_functions_eagerly(True)
history = model.fit(train_X, train_y, epochs = 100,
batch_size=64, verbose=1, validation_split= 0.1)
Summarize model
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()
train_predict = model.predict(train_X)
test_predict = model.predict(test_X)
print('Shape of train and test predict: ', train_predict.shape, test_predict.shape)
51/51 [==============================] - 1s 18ms/step
13/13 [==============================] - 0s 18ms/step
Shape of train and test predict: (1622, 1) (404, 1)
Model evaluation
def measure_rmse(actual, predicted):
return math.sqrt(mean_squared_error(actual, predicted))
train_score = measure_rmse(train_y, train_predict)
test_score = measure_rmse(test_y, test_predict)
print('Train and Test RMSE: ', train_score, test_score)
Train and Test RMSE: 12.792479138124301 10.002362357113856
Plot test data and Predicted data
plt.rc("figure", figsize=(14,8))
plt.rcParams.update({'font.size': 16})
plt.plot(test_y, label = 'Actual')
plt.plot(test_predict, label = 'Predicted')
plt.xlabel('Time in days')
plt.ylabel('Adjusted Close price')
plt.title('BNB Close price prediction using LSTM - Test data')
plt.legend()
plt.show()
The full example is on my Kaggle account. This is the link.
https://www.kaggle.com/code/mixmore/bnb-price-prediction
This is just an explanation of the example on Kaggle
.