BNB price prediction
import numpy as np
from numpy import array
import pandas as pd 
import matplotlib.pyplot as plt
import math
from sklearn.metrics import mean_squared_error
from statsmodels.tsa.stattools import adfuller
from statsmodels.graphics.tsaplots import plot_acf
from statsmodels.graphics.tsaplots import plot_pacf
import tensorflow as tf
from keras.models import Sequential
from keras.layers import LSTM
from keras.layers import Dense
    EDA on Dataset
dataset = pd.read_csv('BNB-USD.csv')
dataset.head()
    | Date | Open | High | Low | Close | Adj Close | Volume | |
|---|---|---|---|---|---|---|---|
| 0 | 2017-11-09 | 2.05314 | 2.17423 | 1.89394 | 1.99077 | 1.99077 | 19192200.0 | 
| 1 | 2017-11-10 | 2.00773 | 2.06947 | 1.64478 | 1.79684 | 1.79684 | 11155000.0 | 
| 2 | 2017-11-11 | 1.78628 | 1.91775 | 1.61429 | 1.67047 | 1.67047 | 8178150.0 | 
| 3 | 2017-11-12 | 1.66889 | 1.67280 | 1.46256 | 1.51969 | 1.51969 | 15298700.0 | 
| 4 | 2017-11-13 | 1.52601 | 1.73502 | 1.51760 | 1.68662 | 1.68662 | 12238800.0 | 
 dataset.tail()
| Date | Open | High | Low | Close | Adj Close | Volume | |
|---|---|---|---|---|---|---|---|
| 2028 | 2023-05-30 | 311.810333 | 314.276611 | 311.313507 | 311.684509 | 311.684509 | 392038878.0 | 
| 2029 | 2023-05-31 | 311.695709 | 311.846649 | 305.376404 | 306.866699 | 306.866699 | 474410245.0 | 
| 2030 | 2023-06-01 | 306.882813 | 307.613770 | 303.644562 | 304.953278 | 304.953278 | 354897855.0 | 
| 2031 | 2023-06-02 | NaN | NaN | NaN | NaN | NaN | NaN | 
| 2032 | 2023-06-03 | 307.370331 | 307.370331 | 307.334808 | 307.334808 | 307.334808 | 380250496.0 | 
   dataset['Date'] = pd.to_datetime(dataset['Date'])
dataset.head()
      | Date | Open | High | Low | Close | Adj Close | Volume | |
|---|---|---|---|---|---|---|---|
| 0 | 2017-11-09 | 2.05314 | 2.17423 | 1.89394 | 1.99077 | 1.99077 | 19192200.0 | 
| 1 | 2017-11-10 | 2.00773 | 2.06947 | 1.64478 | 1.79684 | 1.79684 | 11155000.0 | 
| 2 | 2017-11-11 | 1.78628 | 1.91775 | 1.61429 | 1.67047 | 1.67047 | 8178150.0 | 
| 3 | 2017-11-12 | 1.66889 | 1.67280 | 1.46256 | 1.51969 | 1.51969 | 15298700.0 | 
| 4 | 2017-11-13 | 1.52601 | 1.73502 | 1.51760 | 1.68662 | 1.68662 | 12238800.0 | 
  dataset.describe()
| Open | High | Low | Close | Adj Close | Volume | |
|---|---|---|---|---|---|---|
| count | 2032.000000 | 2032.000000 | 2032.000000 | 2032.000000 | 2032.000000 | 2.032000e+03 | 
| mean | 158.956768 | 163.860205 | 153.668686 | 159.075751 | 159.075751 | 9.367168e+08 | 
| std | 181.151077 | 186.767035 | 174.910847 | 181.076968 | 181.076968 | 1.384815e+09 | 
| min | 1.511360 | 1.582660 | 1.462560 | 1.510360 | 1.510360 | 9.284000e+03 | 
| 25% | 14.691567 | 15.289252 | 14.206488 | 14.794200 | 14.794200 | 1.561682e+08 | 
| 50% | 28.723639 | 29.370382 | 27.973465 | 28.788359 | 28.788359 | 3.960007e+08 | 
| 75% | 307.606750 | 313.983650 | 300.660980 | 307.617782 | 307.617782 | 1.289710e+09 | 
| max | 676.315918 | 690.931946 | 634.549500 | 675.684082 | 675.684082 | 1.798295e+10 | 
  dataset.isnull().sum()
Date         0
Open         1
High         1
Low          1
Close        1
Adj Close    1
Volume       1
dtype: int64 dataset = dataset.dropna() 
      dataset['Close'].dtype
dtype('float64')
ADF Test
result = adfuller(dataset['Close'].values, autolag='AIC')
print(f'ADF Statistic: {result[0]}')
print(f'p-value: {result[1]}')
for key, value in result[4].items():
    print('Critial Values:')
    print(f'   {key}, {value}')
    ADF Statistic: -1.571091598405781
p-value: 0.4980968535928449
Critial Values:
   1%, -3.4336140516629836
Critial Values:
   5%, -2.8629818846551376
Critial Values:
   10%, -2.5675375973574788
ACF and PACF Plot
plt.rc("figure", figsize=(10,5))
plot_acf(dataset['Close'])
print()
      plt.rc("figure", figsize=(10,5))
plot_pacf(dataset['Close'])
print()
     Prediction
data = dataset['Close'].values
print('Shape of data: ', data.shape)
    Shape of data: (2032,)
train_length = int(len(data) * 0.8)
print('Train length: ', train_length)
train_data, test_data = data[:train_length], data[train_length:]
print('Shape of Train and Test data: ', len(train_data), len(test_data))
    Train length:  1625
Shape of Train and Test data:  1625 407 def split_sequence(sequence, n_steps):
    X, y = list(), list()
    for i in range(len(sequence)):
        end_ix = i + n_steps
        if end_ix > len(sequence)-1:
            break
        seq_x, seq_y = sequence[i:end_ix], sequence[end_ix]
        X.append(seq_x)
        y.append(seq_y)
    return array(X), array(y)
lag = 3 
n_features = 1
train_X, train_y = split_sequence(train_data, lag)
test_X, test_y = split_sequence(test_data, lag)
print('Shape of train_X and train_y: ', train_X.shape, train_y.shape)
print('Shape of test_X and test_y: ', test_X.shape, test_y.shape)
    
Shape of train_X and train_y:  (1622, 3) (1622,)
Shape of test_X and test_y:  (404, 3) (404,)  train_X = train_X.reshape((train_X.shape[0], train_X.shape[1], n_features))
test_X = test_X.reshape((test_X.shape[0], test_X.shape[1], n_features))
# New shape of train_X and test_X are :-
print('Shape of train_X and train_y: ', train_X.shape, train_y.shape)
print('Shape of test_X and test_y: ', test_X.shape, test_y.shape)
     
Shape of train_X and train_y:  (1622, 3, 1) (1622,)
Shape of test_X and test_y:  (404, 3, 1) (404,)
LSTM
model = Sequential()
model.add(LSTM(32, activation='relu', return_sequences=False, input_shape=(lag, n_features)))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')
model.summary()
    Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 lstm (LSTM)                 (None, 32)                4352      
                                                                 
 dense (Dense)               (None, 1)                 33        
                                                                 
=================================================================
Total params: 4,385
Trainable params: 4,385
Non-trainable params: 0
_________________________________________________________________
  tf.config.run_functions_eagerly(True)
history = model.fit(train_X, train_y, epochs = 100, 
batch_size=64, verbose=1, validation_split= 0.1)
    Summarize model
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()
     train_predict = model.predict(train_X)
test_predict = model.predict(test_X)
print('Shape of train and test predict: ', train_predict.shape, test_predict.shape)
     
51/51 [==============================] - 1s 18ms/step
13/13 [==============================] - 0s 18ms/step
Shape of train and test predict:  (1622, 1) (404, 1)
Model evaluation
def measure_rmse(actual, predicted):
    return math.sqrt(mean_squared_error(actual, predicted))
train_score = measure_rmse(train_y, train_predict)
test_score = measure_rmse(test_y, test_predict)
print('Train and Test RMSE: ', train_score, test_score)
    Train and Test RMSE: 12.792479138124301 10.002362357113856
Plot test data and Predicted data
plt.rc("figure", figsize=(14,8))
plt.rcParams.update({'font.size': 16})
plt.plot(test_y, label = 'Actual')
plt.plot(test_predict, label = 'Predicted')
plt.xlabel('Time in days')
plt.ylabel('Adjusted Close price')
plt.title('BNB Close price prediction using LSTM - Test data')
plt.legend()
plt.show()
    
The full example is on my Kaggle account. This is the link.
https://www.kaggle.com/code/mixmore/bnb-price-prediction
This is just an explanation of the example on Kaggle
.
