BNB price prediction

import numpy as np
from numpy import array
import pandas as pd
import matplotlib.pyplot as plt
import math

from sklearn.metrics import mean_squared_error

from statsmodels.tsa.stattools import adfuller
from statsmodels.graphics.tsaplots import plot_acf
from statsmodels.graphics.tsaplots import plot_pacf

import tensorflow as tf

from keras.models import Sequential
from keras.layers import LSTM
from keras.layers import Dense

EDA on Dataset

dataset = pd.read_csv('BNB-USD.csv')
dataset.head()

	Date	Open	High	Low	Close	Adj Close	Volume
0	2017-11-09	2.05314	2.17423	1.89394	1.99077	1.99077	19192200.0
1	2017-11-10	2.00773	2.06947	1.64478	1.79684	1.79684	11155000.0
2	2017-11-11	1.78628	1.91775	1.61429	1.67047	1.67047	8178150.0
3	2017-11-12	1.66889	1.67280	1.46256	1.51969	1.51969	15298700.0
4	2017-11-13	1.52601	1.73502	1.51760	1.68662	1.68662	12238800.0

dataset.tail()

	Date	Open	High	Low	Close	Adj Close	Volume
2028	2023-05-30	311.810333	314.276611	311.313507	311.684509	311.684509	392038878.0
2029	2023-05-31	311.695709	311.846649	305.376404	306.866699	306.866699	474410245.0
2030	2023-06-01	306.882813	307.613770	303.644562	304.953278	304.953278	354897855.0
2031	2023-06-02	NaN	NaN	NaN	NaN	NaN	NaN
2032	2023-06-03	307.370331	307.370331	307.334808	307.334808	307.334808	380250496.0

   
    dataset['Date'] = pd.to_datetime(dataset['Date'])
dataset.head()

	Date	Open	High	Low	Close	Adj Close	Volume
0	2017-11-09	2.05314	2.17423	1.89394	1.99077	1.99077	19192200.0
1	2017-11-10	2.00773	2.06947	1.64478	1.79684	1.79684	11155000.0
2	2017-11-11	1.78628	1.91775	1.61429	1.67047	1.67047	8178150.0
3	2017-11-12	1.66889	1.67280	1.46256	1.51969	1.51969	15298700.0
4	2017-11-13	1.52601	1.73502	1.51760	1.68662	1.68662	12238800.0

dataset.describe()

	Open	High	Low	Close	Adj Close	Volume
count	2032.000000	2032.000000	2032.000000	2032.000000	2032.000000	2.032000e+03
mean	158.956768	163.860205	153.668686	159.075751	159.075751	9.367168e+08
std	181.151077	186.767035	174.910847	181.076968	181.076968	1.384815e+09
min	1.511360	1.582660	1.462560	1.510360	1.510360	9.284000e+03
25%	14.691567	15.289252	14.206488	14.794200	14.794200	1.561682e+08
50%	28.723639	29.370382	27.973465	28.788359	28.788359	3.960007e+08
75%	307.606750	313.983650	300.660980	307.617782	307.617782	1.289710e+09
max	676.315918	690.931946	634.549500	675.684082	675.684082	1.798295e+10

dataset.isnull().sum()

Date         0
Open         1
High         1
Low          1
Close        1
Adj Close    1
Volume       1
dtype: int64

dataset = dataset.dropna()

dataset['Close'].dtype

dtype('float64')

ADF Test

result = adfuller(dataset['Close'].values, autolag='AIC')
print(f'ADF Statistic: {result[0]}')
print(f'p-value: {result[1]}')
for key, value in result[4].items():
print('Critial Values:')
print(f' {key}, {value}')

ADF Statistic: -1.571091598405781
p-value: 0.4980968535928449
Critial Values:
   1%, -3.4336140516629836
Critial Values:
   5%, -2.8629818846551376
Critial Values:
   10%, -2.5675375973574788

ACF and PACF Plot

plt.rc("figure", figsize=(10,5))
plot_acf(dataset['Close'])
print()

  
    plt.rc("figure", figsize=(10,5))
plot_pacf(dataset['Close'])
print()

Prediction

data = dataset['Close'].values
print('Shape of data: ', data.shape)

Shape of data: (2032,)

train_length = int(len(data) * 0.8)
print('Train length: ', train_length)

train_data, test_data = data[:train_length], data[train_length:]
print('Shape of Train and Test data: ', len(train_data), len(test_data))

Train length:  1625
Shape of Train and Test data:  1625 407

def split_sequence(sequence, n_steps):
    X, y = list(), list()
    for i in range(len(sequence)):
        end_ix = i + n_steps
        if end_ix > len(sequence)-1:
            break
        seq_x, seq_y = sequence[i:end_ix], sequence[end_ix]
        X.append(seq_x)
        y.append(seq_y)
    return array(X), array(y)

lag = 3
n_features = 1

train_X, train_y = split_sequence(train_data, lag)
test_X, test_y = split_sequence(test_data, lag)

print('Shape of train_X and train_y: ', train_X.shape, train_y.shape)
print('Shape of test_X and test_y: ', test_X.shape, test_y.shape)

Shape of train_X and train_y:  (1622, 3) (1622,)
Shape of test_X and test_y:  (404, 3) (404,)

train_X = train_X.reshape((train_X.shape[0], train_X.shape[1], n_features))
test_X = test_X.reshape((test_X.shape[0], test_X.shape[1], n_features))

# New shape of train_X and test_X are :-
print('Shape of train_X and train_y: ', train_X.shape, train_y.shape)
print('Shape of test_X and test_y: ', test_X.shape, test_y.shape)

Shape of train_X and train_y:  (1622, 3, 1) (1622,)
Shape of test_X and test_y:  (404, 3, 1) (404,)

LSTM

model = Sequential()
model.add(LSTM(32, activation='relu', return_sequences=False, input_shape=(lag, n_features)))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 lstm (LSTM)                 (None, 32)                4352      
                                                                 
 dense (Dense)               (None, 1)                 33        
                                                                 
=================================================================
Total params: 4,385
Trainable params: 4,385
Non-trainable params: 0
_________________________________________________________________

  
    tf.config.run_functions_eagerly(True)

history = model.fit(train_X, train_y, epochs = 100, 

batch_size=64, verbose=1, validation_split= 0.1)

Summarize model

plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

 
    train_predict = model.predict(train_X)
test_predict = model.predict(test_X)

print('Shape of train and test predict: ', train_predict.shape, test_predict.shape)

51/51 [==============================] - 1s 18ms/step
13/13 [==============================] - 0s 18ms/step
Shape of train and test predict:  (1622, 1) (404, 1)

Model evaluation

def measure_rmse(actual, predicted):
return math.sqrt(mean_squared_error(actual, predicted))

train_score = measure_rmse(train_y, train_predict)
test_score = measure_rmse(test_y, test_predict)

print('Train and Test RMSE: ', train_score, test_score)

Train and Test RMSE: 12.792479138124301 10.002362357113856

Plot test data and Predicted data

plt.rc("figure", figsize=(14,8))
plt.rcParams.update({'font.size': 16})
plt.plot(test_y, label = 'Actual')
plt.plot(test_predict, label = 'Predicted')
plt.xlabel('Time in days')
plt.ylabel('Adjusted Close price')
plt.title('BNB Close price prediction using LSTM - Test data')
plt.legend()
plt.show()

The full example is on my Kaggle account. This is the link.

https://www.kaggle.com/code/mixmore/bnb-price-prediction

This is just an explanation of the example on Kaggle

Deep Side

11/03/2023

BNB price prediction

EDA on Dataset

dataset = pd.read_csv('BNB-USD.csv')
dataset.head()

dataset.tail()

dataset['Date'] = pd.to_datetime(dataset['Date'])
dataset.head()

dataset.describe()

dataset.isnull().sum()

dataset = dataset.dropna()

dataset['Close'].dtype

ADF Test

result = adfuller(dataset['Close'].values, autolag='AIC')
print(f'ADF Statistic: {result[0]}')
print(f'p-value: {result[1]}')
for key, value in result[4].items():
print('Critial Values:')
print(f' {key}, {value}')

ACF and PACF Plot

plt.rc("figure", figsize=(10,5))
plot_acf(dataset['Close'])
print()

plt.rc("figure", figsize=(10,5))
plot_pacf(dataset['Close'])
print()

data = dataset['Close'].values
print('Shape of data: ', data.shape)

Shape of data: (2032,)

train_length = int(len(data) * 0.8)
print('Train length: ', train_length)

train_data, test_data = data[:train_length], data[train_length:]
print('Shape of Train and Test data: ', len(train_data), len(test_data))

model = Sequential()
model.add(LSTM(32, activation='relu', return_sequences=False, input_shape=(lag, n_features)))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')
model.summary()

tf.config.run_functions_eagerly(True)

history = model.fit(train_X, train_y, epochs = 100,

batch_size=64, verbose=1, validation_split= 0.1)

Summarize model

plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

train_predict = model.predict(train_X)
test_predict = model.predict(test_X)

print('Shape of train and test predict: ', train_predict.shape, test_predict.shape)

def measure_rmse(actual, predicted):
return math.sqrt(mean_squared_error(actual, predicted))

train_score = measure_rmse(train_y, train_predict)
test_score = measure_rmse(test_y, test_predict)

print('Train and Test RMSE: ', train_score, test_score)

Train and Test RMSE: 12.792479138124301 10.002362357113856

plt.rc("figure", figsize=(14,8))
plt.rcParams.update({'font.size': 16})
plt.plot(test_y, label = 'Actual')
plt.plot(test_predict, label = 'Predicted')
plt.xlabel('Time in days')
plt.ylabel('Adjusted Close price')
plt.title('BNB Close price prediction using LSTM - Test data')
plt.legend()
plt.show()

11/03/2023

BNB price prediction

EDA on Dataset

dataset = pd.read_csv('BNB-USD.csv')dataset.head()

dataset.tail()

dataset['Date'] = pd.to_datetime(dataset['Date'])dataset.head()

dataset.describe()

dataset.isnull().sum()

dataset = dataset.dropna()

dataset['Close'].dtype

ADF Test

result = adfuller(dataset['Close'].values, autolag='AIC')print(f'ADF Statistic: {result[0]}')print(f'p-value: {result[1]}')for key, value in result[4].items(): print('Critial Values:') print(f' {key}, {value}')

ACF and PACF Plot

plt.rc("figure", figsize=(10,5))plot_acf(dataset['Close'])print()

plt.rc("figure", figsize=(10,5))plot_pacf(dataset['Close'])print()

data = dataset['Close'].valuesprint('Shape of data: ', data.shape)

Shape of data: (2032,)

train_length = int(len(data) * 0.8)print('Train length: ', train_length)train_data, test_data = data[:train_length], data[train_length:]print('Shape of Train and Test data: ', len(train_data), len(test_data))

model = Sequential()model.add(LSTM(32, activation='relu', return_sequences=False, input_shape=(lag, n_features)))model.add(Dense(1))model.compile(optimizer='adam', loss='mse')model.summary()

tf.config.run_functions_eagerly(True)history = model.fit(train_X, train_y, epochs = 100, batch_size=64, verbose=1, validation_split= 0.1)

Summarize model

plt.plot(history.history['loss'])plt.plot(history.history['val_loss'])plt.title('model loss')plt.ylabel('loss')plt.xlabel('epoch')plt.legend(['train', 'test'], loc='upper left')plt.show()

train_predict = model.predict(train_X)test_predict = model.predict(test_X)print('Shape of train and test predict: ', train_predict.shape, test_predict.shape)

def measure_rmse(actual, predicted): return math.sqrt(mean_squared_error(actual, predicted))train_score = measure_rmse(train_y, train_predict)test_score = measure_rmse(test_y, test_predict)print('Train and Test RMSE: ', train_score, test_score)

Train and Test RMSE: 12.792479138124301 10.002362357113856

plt.rc("figure", figsize=(14,8))plt.rcParams.update({'font.size': 16})plt.plot(test_y, label = 'Actual')plt.plot(test_predict, label = 'Predicted')plt.xlabel('Time in days')plt.ylabel('Adjusted Close price')plt.title('BNB Close price prediction using LSTM - Test data')plt.legend()plt.show()

dataset = pd.read_csv('BNB-USD.csv')
dataset.head()

dataset['Date'] = pd.to_datetime(dataset['Date'])
dataset.head()

result = adfuller(dataset['Close'].values, autolag='AIC')
print(f'ADF Statistic: {result[0]}')
print(f'p-value: {result[1]}')
for key, value in result[4].items():
print('Critial Values:')
print(f' {key}, {value}')

plt.rc("figure", figsize=(10,5))
plot_acf(dataset['Close'])
print()

plt.rc("figure", figsize=(10,5))
plot_pacf(dataset['Close'])
print()

data = dataset['Close'].values
print('Shape of data: ', data.shape)

train_length = int(len(data) * 0.8)
print('Train length: ', train_length)

train_data, test_data = data[:train_length], data[train_length:]
print('Shape of Train and Test data: ', len(train_data), len(test_data))

model = Sequential()
model.add(LSTM(32, activation='relu', return_sequences=False, input_shape=(lag, n_features)))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')
model.summary()

tf.config.run_functions_eagerly(True)

history = model.fit(train_X, train_y, epochs = 100,

batch_size=64, verbose=1, validation_split= 0.1)

plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

train_predict = model.predict(train_X)
test_predict = model.predict(test_X)

print('Shape of train and test predict: ', train_predict.shape, test_predict.shape)

def measure_rmse(actual, predicted):
return math.sqrt(mean_squared_error(actual, predicted))

train_score = measure_rmse(train_y, train_predict)
test_score = measure_rmse(test_y, test_predict)

print('Train and Test RMSE: ', train_score, test_score)

plt.rc("figure", figsize=(14,8))
plt.rcParams.update({'font.size': 16})
plt.plot(test_y, label = 'Actual')
plt.plot(test_predict, label = 'Predicted')
plt.xlabel('Time in days')
plt.ylabel('Adjusted Close price')
plt.title('BNB Close price prediction using LSTM - Test data')
plt.legend()
plt.show()