Cardano price prediction

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from numpy import log
from numpy import array
import math

from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.stattools import kpss
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.stattools import pacf
from statsmodels.graphics.tsaplots import plot_acf
from statsmodels.graphics.tsaplots import plot_pacf

from sklearn.metrics import r2_score
from sklearn.metrics import mean_absolute_error as mae
from sklearn.metrics import mean_squared_error

import tensorflow as tf

import keras
from keras.models import Sequential
from keras.layers import LSTM, SimpleRNN
from keras.layers import Dense
from keras.layers import Activation

import warnings
warnings.filterwarnings('ignore')

    dataset = pd.read_csv('ADA-USD.csv')
dataset.head()

	Date	Open	High	Low	Close	Adj Close	Volume
0	2017-11-09	0.025160	0.035060	0.025006	0.032053	0.032053	18716200.0
1	2017-11-10	0.032219	0.033348	0.026451	0.027119	0.027119	6766780.0
2	2017-11-11	0.026891	0.029659	0.025684	0.027437	0.027437	5532220.0
3	2017-11-12	0.027480	0.027952	0.022591	0.023977	0.023977	7280250.0
4	2017-11-13	0.024364	0.026300	0.023495	0.025808	0.025808	4419440.0

dataset.tail()

	Date	Open	High	Low	Close	Adj Close	Volume
2028	2023-05-30	0.379129	0.383068	0.375512	0.377934	0.377934	186645169.0
2029	2023-05-31	0.377937	0.380359	0.371003	0.374403	0.374403	193309518.0
2030	2023-06-01	0.374414	0.375474	0.361692	0.364724	0.364724	235187863.0
2031	2023-06-02	NaN	NaN	NaN	NaN	NaN	NaN
2032	2023-06-03	0.378149	0.378149	0.376950	0.377337	0.377337	197218784.0

dataset.describe()

	Open	High	Low	Close	Adj Close	Volume
count	2032.000000	2032.000000	2032.000000	2032.000000	2032.000000	2.032000e+03
mean	0.480802	0.501045	0.459207	0.480965	0.480965	1.082908e+09
std	0.601298	0.626942	0.573988	0.601294	0.601294	1.979570e+09
min	0.023954	0.025993	0.019130	0.023961	0.023961	2.930550e+06
25%	0.070544	0.073276	0.068434	0.070921	0.070921	8.421186e+07
50%	0.210612	0.222707	0.198475	0.211688	0.211688	2.898286e+08
75%	0.552721	0.590249	0.522270	0.552757	0.552757	1.090055e+09
max	2.966390	3.099186	2.907606	2.968239	2.968239	1.914198e+10

  
    # find null values
dataset.isnull().sum()

Date         0
Open         1
High         1
Low          1
Close        1
Adj Close    1
Volume       1
dtype: int64

dataset = dataset.dropna()

EDA on Cardano Data

start_date = pd.to_datetime(dataset.Date[0])
end_date = pd.to_datetime(dataset.Date.values[-1])
dataset['Date'] = pd.to_datetime(dataset['Date'])

top_plt = plt.subplot2grid((5,4), (0, 0), rowspan = 3, colspan = 4)
top_plt.plot(dataset.Date, dataset["Close"])
plt.title('Cardano Close Price')
bottom_plt = plt.subplot2grid((5,4), (3,0), rowspan = 1, colspan = 4)
bottom_plt.bar(dataset.Date, dataset['Volume'])
plt.title('Cardano Trading Volume', y = -0.60)
plt.gcf().set_size_inches(16,10)

Checking for Correlation

sns.heatmap(dataset.corr(), annot = True, fmt = ".1f")
plt.show()

 
    dataset2 = dataset[['Close', 'Volume']]
dataset2.head()

	Close	Volume
0	0.032053	18716200.0
1	0.027119	6766780.0
2	0.027437	5532220.0
3	0.023977	7280250.0
4	0.025808	4419440.0

(ADF/KPSS)

result = adfuller(dataset2.Close.values, autolag='AIC')
print(f'ADF Statistic: {result[0]}')
print(f'p-value: {result[1]}')

for key, value in result[4].items():
print('Critial Values:')
print(f' {key}, {value}')

ADF Statistic: -1.8630247943037663
p-value: 0.3496502319040484
Critial Values:
   1%, -3.4336156817103016
Critial Values:
   5%, -2.862982604329594
Critial Values:
   10%, -2.567537980547385

KPSS test

result = kpss(dataset2['Close'].values, regression='c')
print('\nKPSS Statistic: %f' % result[0])
print('p-value: %f' % result[1])

for key, value in result[3].items():
print('Critial Values:')
print(f' {key}, {value}');

 KPSS Statistic: 2.190770
p-value: 0.010000
Critial Values:
   10%, 0.347
Critial Values:
   5%, 0.463
Critial Values:
   2.5%, 0.574
Critial Values:
   1%, 0.739

Prediction

ARIMA

plt.rcParams.update({'figure.figsize':(9,7), 'figure.dpi':120})

fig, axes = plt.subplots(3, 2, sharex=True)
axes[0, 0].plot(dataset2['Close'].values); axes[0, 0].set_title('Original Series')
plot_acf(dataset2['Close'].values, ax = axes[0, 1])

axes[1, 0].plot(dataset2['Close'].diff()); axes[1, 0].set_title('1st Order Differencing')
plot_acf(dataset2['Close'].diff().dropna(), ax = axes[1, 1])

axes[2, 0].plot(dataset2['Close'].diff().diff()); axes[2, 0].set_title('2nd Order Differencing')
plot_acf(dataset2['Close'].diff().diff().dropna(), ax = axes[2, 1])

plt.show()

 
    plt.rc("figure", figsize = (10,5))
plot_acf(dataset2['Close'])
print()

 
    plt.rc("figure", figsize=(10,5))
plot_pacf(dataset2['Close'])
print()
    

  
    plt.rcParams.update({'figure.figsize':(9,3), 'figure.dpi':120})

fig, axes = plt.subplots(1, 2, sharex = True)
axes[0].plot(dataset2['Close'].diff()); axes[0].set_title('1st Differencing')
axes[1].set(ylim = (0,5))
plot_pacf(dataset2['Close'].diff().dropna(), ax = axes[1])

plt.show()

  
    fig, axes = plt.subplots(1, 2, sharex = True)
axes[0].plot(dataset2['Close'].diff()); axes[0].set_title('1st Differencing')
axes[1].set(ylim = (0,1.2))
plot_acf(dataset2['Close'].diff().dropna(), ax=axes[1])

plt.show()

 
    data = dataset2['Close'].values
print('Length of Total data: ', len(data))
train_length = int(len(data) * 0.8)
train_data = data[:train_length]
test_data = data[train_length:]
print('Train and Test data length: ', len(train_data), len(test_data))
    
 Length of Total data:  2032
Train and Test data length:  1625 407

Build ARIMA Model

model = ARIMA(train_data, order = (1,0,8))
model_fit = model.fit(low_memory = False)
print(model_fit.summary())

                               SARIMAX Results                                
==============================================================================
Dep. Variable:                      y   No. Observations:                 1625
Model:                 ARIMA(1, 0, 8)   Log Likelihood                2531.088
Date:                Sun, 04 Jun 2023   AIC                          -5040.175
Time:                        17:55:41   BIC                          -4980.849
Sample:                             0   HQIC                         -5018.163
                               - 1625                                         
Covariance Type:                  opg                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.4945      0.789      0.627      0.531      -1.052       2.040
ar.L1          0.9976      0.002    590.822      0.000       0.994       1.001
ma.L1         -0.0605      0.011     -5.692      0.000      -0.081      -0.040
ma.L2          0.0191      0.011      1.744      0.081      -0.002       0.041
ma.L3         -0.0685      0.010     -6.584      0.000      -0.089      -0.048
ma.L4          0.0566      0.010      5.523      0.000       0.036       0.077
ma.L5         -0.0502      0.011     -4.619      0.000      -0.072      -0.029
ma.L6          0.0361      0.011      3.238      0.001       0.014       0.058
ma.L7         -0.0189      0.011     -1.662      0.097      -0.041       0.003
ma.L8          0.0030      0.012      0.259      0.796      -0.020       0.026
sigma2         0.0026    3.2e-05     80.844      0.000       0.003       0.003
===================================================================================
Ljung-Box (L1) (Q):                   0.00   Jarque-Bera (JB):             26623.10
Prob(Q):                              0.98   Prob(JB):                         0.00
Heteroskedasticity (H):               8.53   Skew:                             0.53
Prob(H) (two-sided):                  0.00   Kurtosis:                        22.80
===================================================================================

residuals = pd.DataFrame(model_fit.resid)
fig, ax = plt.subplots(1,2)
residuals.plot(title = "Residuals", ax = ax[0])
residuals.plot(kind = 'kde', title = 'Density', ax = ax[1])
plt.show()

  
    forecast_result = model_fit.forecast(150, alpha = 0.05)  # 95% conf
forecast_result[:30]

array([0.91167908, 0.90996577, 0.9105535 , 0.90885626, 0.9084636 ,
       0.90684824, 0.90632089, 0.9052688 , 0.90429749, 0.90332849,
       0.90236177, 0.90139734, 0.90043519, 0.89947531, 0.8985177 ,
       0.89756236, 0.89660928, 0.89565845, 0.89470986, 0.89376352,
       0.89281942, 0.89187755, 0.89093791, 0.89000049, 0.88906528,
       0.88813229, 0.8872015 , 0.88627291, 0.88534652, 0.88442232])

test_data[:30]

array([0.907154, 0.888503, 0.88635 , 0.898695, 0.827645, 0.840595,
       0.84395 , 0.805043, 0.75666 , 0.790344, 0.781529, 0.771145,
       0.896942, 0.791152, 0.783359, 0.761882, 0.739563, 0.610088,
       0.628963, 0.5128  , 0.473746, 0.528877, 0.539358, 0.5975  ,
       0.556716, 0.578077, 0.505615, 0.533321, 0.517907, 0.528314])

Test and Predicted

plt.figure(figsize = (12, 6))
plt.rcParams.update({'font.size': 12})
plt.plot(test_data[:150], '#0077be',label = 'Actual')
plt.plot(forecast_result[:], '#ff8841',label = 'Predicted')
plt.title('ARIMA Model for Cardano Price Forecasting')
plt.ylabel('Cardano Price [in Dollar]')
plt.xlabel('Time Steps [in Days] ')
plt.legend()
plt.show()

Artificial Neural Network

data = dataset2['Close'].values
print('Shape of data: ', data.shape)

Shape of data: (2032,)

    train_length = int(len(data) * 0.8)
print('Train length: ', train_length) 

train_data, test_data = data[:train_length], data[train_length:]
print('Shape of Train and Test data: ', train_data.shape, test_data.shape)
    

Train length:  1625
Shape of Train and Test data:  (1625,) (407,)

train_data = train_data.reshape(-1, 1)
test_data = test_data.reshape(-1, 1)
print('Shape of Train and Test data: ', train_data.shape, test_data.shape)

Shape of Train and Test data: (1625, 1) (407, 1)

def create_dataset(dataset, lookback):
    dataX, dataY = [], []
    for i in range(len(dataset) - lookback -1):
        a = dataset[i: (i+lookback), 0]
        dataX.append(a)
        b = dataset[i+lookback, 0]
        dataY.append(b)
    return np.array(dataX), np.array(dataY)

plot_pacf(data, lags = 10)
plt.show()

Considering only Auto-correlation Lag value Greater than 10%

pacf_value = pacf(data, nlags=20)
lag = 0
for x in pacf_value:
    if x > 0.1:
        lag += 1
    else:
        break
print('Selected look_back (or lag = ): ', lag)

Selected look_back (or lag = ):  2

Separating Input and Output values

train_X, train_y = create_dataset(train_data, lag)
test_X, test_y = create_dataset(test_data, lag)

print('Shape of train_X and train_y: ', train_X.shape, train_y.shape)
print('Shape of test_X and test_y: ', test_X.shape, test_y.shape)

Shape of train_X and train_y:  (1622, 2) (1622,)
Shape of test_X and test_y:  (404, 2) (404,)

Build MLP model

np.random.seed(7)

model = Sequential()
model.add(Dense(64, input_dim = lag, activation='relu', name= "1st_hidden"))
# model.add(Dense(64, activation='relu', name = '2nd_hidden'))
model.add(Dense(1, name = 'Output_layer', activation = 'linear'))
# model.add(Activation("linear", name = 'Linear_activation'))
model.compile(loss = "mean_squared_error", optimizer = "adam")
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 1st_hidden (Dense)          (None, 64)                192       
                                                                 
 Output_layer (Dense)        (None, 1)                 65        
                                                                 
=================================================================
Total params: 257
Trainable params: 257
Non-trainable params: 0
_________________________________________________________________

  
    epoch_number = 100
batches = 64

history = model.fit(train_X, train_y, epochs = epoch_number, batch_size = batches, verbose = 1, shuffle = False, 
                    validation_split = 0.1)
    
  
    plt.clf
plt.figure(figsize = (10,8))
plt.plot(history.history['loss'], label = 'train')
plt.plot(history.history['val_loss'], label = 'test')
plt.xlabel('Number of Epochs')
plt.ylabel('Train and Test Loss')
plt.title('Train and Test loss per epochs [Univariate]')
plt.legend()
plt.show()
    
   

Predictions

testPredict = model.predict(test_X)
predicted_value = testPredict[:, 0]

13/13 [==============================] - 0s 2ms/step

Metrics

def evaluate_forecast_results(actual, predicted):
    print('R2 Score: ', round(r2_score(actual, predicted), 2))
    print('MAE : ', round(mae(actual, predicted), 2))
    print('MSE: ', round(mean_squared_error(actual,predicted), 2))
    print('RMSE: ', round(math.sqrt(mean_squared_error(actual,predicted)), 2))
    print('NRMSE: ', NRMSE(actual, predicted))
    print('WMAPE: ', WMAPE(actual, predicted))

def NRMSE(actual, predicted):
    rmse = math.sqrt(mean_squared_error(actual,predicted))
    nrmse = rmse / np.mean(actual)
    return round(nrmse, 4)

def WMAPE(actual, predicted):
    abs_error = np.sum(actual - predicted)
    wmape = abs_error / np.sum(actual)
    return round(wmape, 4)

evaluate_forecast_results(test_y, predicted_value)

R2 Score:  0.96
MAE :  0.01
MSE:  0.0
RMSE:  0.02
NRMSE:  0.0514
WMAPE:  0.0009

plt.figure(figsize = (16, 8))
plt.rcParams.update({'font.size': 12})
plt.plot(test_y[:], '#0077be', label = 'Actual')
plt.plot(predicted_value, '#ff8841', label = 'Predicted')
plt.title('Cardano Close Price Forecasting')
plt.ylabel('Cardano Close Price ')
plt.xlabel('Time Steps [in Days] ')
plt.legend()
plt.show()

RNN

data = dataset2['Close'].values
print('Shape of data: ', data.shape)

Shape of data:  (2032,)

train_length = int(len(data) * 0.8)
print('Train length: ', train_length)

train_data, test_data = data[:train_length], data[train_length:]
print('Shape of Train and Test data: ', len(train_data), len(test_data))

Train length:  1625
Shape of Train and Test data:  1625 407

def split_sequence(sequence, n_steps):
    X, y = list(), list()
    for i in range(len(sequence)):
        end_ix = i + n_steps
        if end_ix > len(sequence)-1:
            break
        seq_x, seq_y = sequence[i:end_ix], sequence[end_ix]
        X.append(seq_x)
        y.append(seq_y)
    return array(X), array(y)

pacf_value = pacf(data, nlags=20)
lag = 0
# collect lag values greater than 10% correlation
for x in pacf_value:
    if x > 0.1:
        lag += 1
    else:
        break
print('Selected look_back (or lag = ): ', lag)

Shape of train_X and train_y:  (1623, 2) (1623,)
Shape of test_X and test_y:  (405, 2) (405,)

 Reshaping train_X and test_X

train_X = train_X.reshape((train_X.shape[0], train_X.shape[1], n_features))
test_X = test_X.reshape((test_X.shape[0], test_X.shape[1], n_features))

print('Shape of train_X and train_y: ', train_X.shape, train_y.shape)
print('Shape of test_X and test_y: ', test_X.shape, test_y.shape)

Shape of train_X and train_y:  (1623, 2, 1) (1623,)
Shape of test_X and test_y:  (405, 2, 1) (405,)

Building the model

model = Sequential()
model.add(SimpleRNN(64, activation='relu', return_sequences = False, input_shape = (lag, n_features)))
model.add(Dense(1))
model.compile(optimizer = 'adam', loss = 'mse')
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 simple_rnn (SimpleRNN)      (None, 64)                4224      
                                                                 
 dense (Dense)               (None, 1)                 65        
                                                                 
=================================================================
Total params: 4,289
Trainable params: 4,289
Non-trainable params: 0
_________________________________________________________________

 Model
    tf.config.run_functions_eagerly(True)

cb = tf.keras.callbacks.EarlyStopping(monitor = 'loss', patience = 15, restore_best_weights = True)
history = model.fit(train_X, train_y, epochs = 150, batch_size = 64, verbose = 1, validation_split = 0.1, 
                   callbacks = [cb])
    
 Summarizing model accuracy and Loss 
    plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc = 'upper left')
plt.show()
    
 

Prediction with Test data

train_predict = model.predict(train_X)
test_predict = model.predict(test_X)

print('Shape of train and test predict: ', train_predict.shape, test_predict.shape)

51/51 [==============================] - 1s 12ms/step
13/13 [==============================] - 0s 13ms/step
Shape of train and test predict:  (1623, 1) (405, 1)

Model evaluation

actual_ = test_y
predicted_ = test_predict[:, 0]
len(actual_), len(predicted_)

(405, 405)

evaluate_forecast_results(actual_, predicted_)

R2 Score:  0.96
MAE :  0.01
MSE:  0.0
RMSE:  0.02
NRMSE:  0.0517
WMAPE:  0.007

Test and predicted data

plt.rc("figure", figsize = (14,8))
plt.rcParams.update({'font.size': 16})
plt.plot(actual_, label = 'Actual')
plt.plot(predicted_, label = 'Predicted')
plt.xlabel('Time in days')
plt.ylabel('Cardano price')
plt.title('Cardano Close price prediction by Simple RNN - Test data')
plt.legend()
plt.show()

 
    df_train = pd.DataFrame(columns = ['Train data'])
df_train['Train data'] = train_data

df = pd.DataFrame(columns = ['Test data', 'Predicted data'])
df['Test data'] = actual_
df['Predicted data'] = predicted_

total_len = len(df_train['Train data']) + len(df['Test data'])
range(len(df_train['Train data']), total_len)
x_list = [x for x in range(len(df_train['Train data']), total_len)]
df.index = x_list

plt.rc("figure", figsize=(14,8))
plt.rcParams.update({'font.size': 16})
plt.xlabel('Time in days')
plt.ylabel('Cardano price')
plt.title('Cardano price prediction by Simple RNN')
plt.plot(df_train['Train data'])
plt.plot(df[['Test data', 'Predicted data']])
plt.legend(['Train', 'Test', 'Predictions'], loc='lower right')
plt.show()
    
   

 
    data = dataset2['Close'].values
print('Shape of data: ', data.shape)

Shape of data:  (2032,)

# Separate train and test data
train_length = int(len(data) * 0.8)
print('Train length: ', train_length)
train_data, test_data = data[:train_length], data[train_length:]
print('Shape train and test data: ', len(train_data), len(test_data))

Train length:  1625
Shape train and test data:  1625 407

def split_sequence(sequence, n_steps):
    X, y = list(), list()
    for i in range(len(sequence)):
        end_ix = i + n_steps
        if end_ix > len(sequence)-1:
            break
        seq_x, seq_y = sequence[i:end_ix], sequence[end_ix]
        X.append(seq_x)
        y.append(seq_y)

Choosing the appropriate lag value

 
    lag = 2  
n_features = 1

train_X, train_y = split_sequence(train_data, lag)
test_X, test_y = split_sequence(test_data, lag)

print('Shape of train_X and train_y: ', train_X.shape, train_y.shape)
print('Shape of test_X and test_y: ', test_X.shape, test_y.shape)
    

Shape of train_X and train_y:  (1623, 2) (1623,)
Shape of test_X and test_y:  (405, 2) (405,)

Reshaping train_X and test_X to 3D

train_X = train_X.reshape((train_X.shape[0], train_X.shape[1], n_features))
test_X = test_X.reshape((test_X.shape[0], test_X.shape[1], n_features))

print('Shape of train_X and train_y: ', train_X.shape, train_y.shape)
print('Shape of test_X and test_y: ', test_X.shape, test_y.shape)

Shape of train_X and train_y:  (1623, 2, 1) (1623,)
Shape of test_X and test_y:  (405, 2, 1) (405,)

Building LSTM Model

model = Sequential()
model.add(LSTM(64, activation = 'relu', return_sequences = True, input_shape = (lag, n_features)))
model.add(LSTM(64, activation = 'relu'))
model.add(Dense(1))
model.compile(optimizer = 'adam', loss = 'mse')
model.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 lstm (LSTM)                 (None, 2, 64)             16896     
                                                                 
 lstm_1 (LSTM)               (None, 64)                33024     
                                                                 
 dense_1 (Dense)             (None, 1)                 65        
                                                                 
=================================================================
Total params: 49,985
Trainable params: 49,985
Non-trainable params: 0
_________________________________________________________________

Fitting model with data

tf.config.run_functions_eagerly(True)

cb = tf.keras.callbacks.EarlyStopping(monitor = 'loss', patience = 15, restore_best_weights = True)
history = model.fit(train_X, train_y, epochs = 150, batch_size = 64, verbose = 1, validation_split = 0.1,
callbacks = [cb])

Summarizing model accuracy and Loss

plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc = 'upper left')
plt.show()

Making the prediction

train_predict = model.predict(train_X)
test_predict = model.predict(test_X)

print('Shape train and test predict: ', train_predict.shape, test_predict.shape)

51/51 [==============================] - 2s 32ms/step
13/13 [==============================] - 0s 33ms/step
Shape train and test predict:  (1623, 1) (405, 1)

Model Evaluation

actual_lstm = test_y
predicted_lstm = test_predict[:, 0]
evaluate_forecast_results(actual_lstm, predicted_lstm)

R2 Score:  0.94
MAE :  0.02
MSE:  0.0
RMSE:  0.03
NRMSE:  0.0619
WMAPE:  -0.0224

df_train = pd.DataFrame(columns = ['Train data'])
df_train['Train data'] = train_data

df = pd.DataFrame(columns = ['Test data', 'Predicted data'])
df['Test data'] = actual_lstm
df['Predicted data'] = predicted_lstm

total_len = len(df_train['Train data']) + len(df['Test data'])
range(len(df_train['Train data']), total_len)
x_list = [x for x in range(len(df_train['Train data']), total_len)]
df.index = x_list

plt.rc("figure", figsize = (14,8))
plt.rcParams.update({'font.size': 16})
plt.xlabel('Time in days')
plt.ylabel('Cardano close price')
plt.title('Cardano price prediction using LSTM')
plt.plot(df_train['Train data'])
plt.plot(df[['Test data', 'Predicted data']])
plt.legend(['Train', 'Test', 'Predictions'], loc = 'lower right')
plt.show()

The full example is on my Kaggle account. This is the link.

https://www.kaggle.com/code/mixmore/cardano-price-prediction

This is just an explanation of the example on Kaggle.

09/03/2023

Cardano price prediction

dataset = pd.read_csv('ADA-USD.csv')dataset.head()

dataset.tail()

dataset.describe()

# find null valuesdataset.isnull().sum()

dataset = dataset.dropna()

EDA on Cardano Data

sns.heatmap(dataset.corr(), annot = True, fmt = ".1f")plt.show()

dataset2 = dataset[['Close', 'Volume']]dataset2.head()

result = adfuller(dataset2.Close.values, autolag='AIC')print(f'ADF Statistic: {result[0]}')print(f'p-value: {result[1]}')for key, value in result[4].items(): print('Critial Values:') print(f' {key}, {value}')

result = kpss(dataset2['Close'].values, regression='c')print('\nKPSS Statistic: %f' % result[0])print('p-value: %f' % result[1])for key, value in result[3].items(): print('Critial Values:') print(f' {key}, {value}');

Prediction

ARIMA

plt.rc("figure", figsize = (10,5))plot_acf(dataset2['Close'])print()

plt.rc("figure", figsize=(10,5))plot_pacf(dataset2['Close'])print()

plt.rcParams.update({'figure.figsize':(9,3), 'figure.dpi':120})fig, axes = plt.subplots(1, 2, sharex = True)axes[0].plot(dataset2['Close'].diff()); axes[0].set_title('1st Differencing')axes[1].set(ylim = (0,5))plot_pacf(dataset2['Close'].diff().dropna(), ax = axes[1])plt.show()

fig, axes = plt.subplots(1, 2, sharex = True)axes[0].plot(dataset2['Close'].diff()); axes[0].set_title('1st Differencing')axes[1].set(ylim = (0,1.2))plot_acf(dataset2['Close'].diff().dropna(), ax=axes[1])plt.show()

data = dataset2['Close'].valuesprint('Length of Total data: ', len(data))train_length = int(len(data) * 0.8)train_data = data[:train_length]test_data = data[train_length:]print('Train and Test data length: ', len(train_data), len(test_data))

Build ARIMA Model

model = ARIMA(train_data, order = (1,0,8))model_fit = model.fit(low_memory = False)print(model_fit.summary())

residuals = pd.DataFrame(model_fit.resid)fig, ax = plt.subplots(1,2)residuals.plot(title = "Residuals", ax = ax[0])residuals.plot(kind = 'kde', title = 'Density', ax = ax[1])plt.show()

forecast_result = model_fit.forecast(150, alpha = 0.05) # 95% confforecast_result[:30]

test_data[:30]

Test and Predicted

Artificial Neural Network

data = dataset2['Close'].valuesprint('Shape of data: ', data.shape)

Shape of data: (2032,)

train_length = int(len(data) * 0.8)print('Train length: ', train_length) train_data, test_data = data[:train_length], data[train_length:]print('Shape of Train and Test data: ', train_data.shape, test_data.shape)

Considering only Auto-correlation Lag value Greater than 10%

pacf_value = pacf(data, nlags=20)lag = 0for x in pacf_value: if x > 0.1: lag += 1 else: breakprint('Selected look_back (or lag = ): ', lag)

Separating Input and Output values

train_X, train_y = create_dataset(train_data, lag)test_X, test_y = create_dataset(test_data, lag)print('Shape of train_X and train_y: ', train_X.shape, train_y.shape)print('Shape of test_X and test_y: ', test_X.shape, test_y.shape)

Build MLP model

epoch_number = 100batches = 64history = model.fit(train_X, train_y, epochs = epoch_number, batch_size = batches, verbose = 1, shuffle = False, validation_split = 0.1)

plt.clfplt.figure(figsize = (10,8))plt.plot(history.history['loss'], label = 'train')plt.plot(history.history['val_loss'], label = 'test')plt.xlabel('Number of Epochs')plt.ylabel('Train and Test Loss')plt.title('Train and Test loss per epochs [Univariate]')plt.legend()plt.show()

Predictions

testPredict = model.predict(test_X)predicted_value = testPredict[:, 0]

Metrics

RNN

data = dataset2['Close'].valuesprint('Shape of data: ', data.shape)

train_length = int(len(data) * 0.8)print('Train length: ', train_length)train_data, test_data = data[:train_length], data[train_length:]print('Shape of Train and Test data: ', len(train_data), len(test_data))

train_X = train_X.reshape((train_X.shape[0], train_X.shape[1], n_features))test_X = test_X.reshape((test_X.shape[0], test_X.shape[1], n_features))print('Shape of train_X and train_y: ', train_X.shape, train_y.shape)print('Shape of test_X and test_y: ', test_X.shape, test_y.shape)

Building the model

model = Sequential()model.add(SimpleRNN(64, activation='relu', return_sequences = False, input_shape = (lag, n_features)))model.add(Dense(1))model.compile(optimizer = 'adam', loss = 'mse')model.summary()

Model

tf.config.run_functions_eagerly(True)cb = tf.keras.callbacks.EarlyStopping(monitor = 'loss', patience = 15, restore_best_weights = True)history = model.fit(train_X, train_y, epochs = 150, batch_size = 64, verbose = 1, validation_split = 0.1, callbacks = [cb])

Summarizing model accuracy and Loss

plt.plot(history.history['loss'])plt.plot(history.history['val_loss'])plt.title('model loss')plt.ylabel('loss')plt.xlabel('epoch')plt.legend(['train', 'test'], loc = 'upper left')plt.show()

Prediction with Test data

train_predict = model.predict(train_X)test_predict = model.predict(test_X)print('Shape of train and test predict: ', train_predict.shape, test_predict.shape)

Model evaluation

actual_ = test_ypredicted_ = test_predict[:, 0]len(actual_), len(predicted_)

evaluate_forecast_results(actual_, predicted_)

Test and predicted data

plt.rc("figure", figsize = (14,8))plt.rcParams.update({'font.size': 16})plt.plot(actual_, label = 'Actual')plt.plot(predicted_, label = 'Predicted')plt.xlabel('Time in days')plt.ylabel('Cardano price')plt.title('Cardano Close price prediction by Simple RNN - Test data')plt.legend()plt.show()

data = dataset2['Close'].valuesprint('Shape of data: ', data.shape)

# Separate train and test datatrain_length = int(len(data) * 0.8)print('Train length: ', train_length)train_data, test_data = data[:train_length], data[train_length:]print('Shape train and test data: ', len(train_data), len(test_data))

def split_sequence(sequence, n_steps): X, y = list(), list() for i in range(len(sequence)): end_ix = i + n_steps if end_ix > len(sequence)-1: break seq_x, seq_y = sequence[i:end_ix], sequence[end_ix] X.append(seq_x) y.append(seq_y)

Choosing the appropriate lag value

lag = 2 n_features = 1train_X, train_y = split_sequence(train_data, lag)test_X, test_y = split_sequence(test_data, lag)print('Shape of train_X and train_y: ', train_X.shape, train_y.shape)print('Shape of test_X and test_y: ', test_X.shape, test_y.shape)

Reshaping train_X and test_X to 3D

train_X = train_X.reshape((train_X.shape[0], train_X.shape[1], n_features))test_X = test_X.reshape((test_X.shape[0], test_X.shape[1], n_features))print('Shape of train_X and train_y: ', train_X.shape, train_y.shape)print('Shape of test_X and test_y: ', test_X.shape, test_y.shape)

Building LSTM Model

model = Sequential()model.add(LSTM(64, activation = 'relu', return_sequences = True, input_shape = (lag, n_features)))model.add(LSTM(64, activation = 'relu'))model.add(Dense(1))model.compile(optimizer = 'adam', loss = 'mse')model.summary()

Fitting model with data

tf.config.run_functions_eagerly(True)cb = tf.keras.callbacks.EarlyStopping(monitor = 'loss', patience = 15, restore_best_weights = True)history = model.fit(train_X, train_y, epochs = 150, batch_size = 64, verbose = 1, validation_split = 0.1, callbacks = [cb])

Summarizing model accuracy and Loss

plt.plot(history.history['loss'])plt.plot(history.history['val_loss'])plt.title('model loss')plt.ylabel('loss')plt.xlabel('epoch')plt.legend(['train', 'test'], loc = 'upper left')plt.show()

Making the prediction

train_predict = model.predict(train_X)test_predict = model.predict(test_X)print('Shape train and test predict: ', train_predict.shape, test_predict.shape)

actual_lstm = test_ypredicted_lstm = test_predict[:, 0]evaluate_forecast_results(actual_lstm, predicted_lstm)

dataset = pd.read_csv('ADA-USD.csv')
dataset.head()

# find null values
dataset.isnull().sum()

sns.heatmap(dataset.corr(), annot = True, fmt = ".1f")
plt.show()

dataset2 = dataset[['Close', 'Volume']]
dataset2.head()

result = adfuller(dataset2.Close.values, autolag='AIC')
print(f'ADF Statistic: {result[0]}')
print(f'p-value: {result[1]}')

for key, value in result[4].items():
print('Critial Values:')
print(f' {key}, {value}')

result = kpss(dataset2['Close'].values, regression='c')
print('\nKPSS Statistic: %f' % result[0])
print('p-value: %f' % result[1])

for key, value in result[3].items():
print('Critial Values:')
print(f' {key}, {value}');

plt.rc("figure", figsize = (10,5))
plot_acf(dataset2['Close'])
print()

plt.rc("figure", figsize=(10,5))
plot_pacf(dataset2['Close'])
print()

plt.rcParams.update({'figure.figsize':(9,3), 'figure.dpi':120})

fig, axes = plt.subplots(1, 2, sharex = True)
axes[0].plot(dataset2['Close'].diff()); axes[0].set_title('1st Differencing')
axes[1].set(ylim = (0,5))
plot_pacf(dataset2['Close'].diff().dropna(), ax = axes[1])

plt.show()

fig, axes = plt.subplots(1, 2, sharex = True)
axes[0].plot(dataset2['Close'].diff()); axes[0].set_title('1st Differencing')
axes[1].set(ylim = (0,1.2))
plot_acf(dataset2['Close'].diff().dropna(), ax=axes[1])

plt.show()

data = dataset2['Close'].values
print('Length of Total data: ', len(data))
train_length = int(len(data) * 0.8)
train_data = data[:train_length]
test_data = data[train_length:]
print('Train and Test data length: ', len(train_data), len(test_data))

model = ARIMA(train_data, order = (1,0,8))
model_fit = model.fit(low_memory = False)
print(model_fit.summary())

residuals = pd.DataFrame(model_fit.resid)
fig, ax = plt.subplots(1,2)
residuals.plot(title = "Residuals", ax = ax[0])
residuals.plot(kind = 'kde', title = 'Density', ax = ax[1])
plt.show()

forecast_result = model_fit.forecast(150, alpha = 0.05) # 95% conf
forecast_result[:30]

data = dataset2['Close'].values
print('Shape of data: ', data.shape)

train_length = int(len(data) * 0.8)
print('Train length: ', train_length)

train_data, test_data = data[:train_length], data[train_length:]
print('Shape of Train and Test data: ', train_data.shape, test_data.shape)

pacf_value = pacf(data, nlags=20)
lag = 0
for x in pacf_value:
if x > 0.1:
lag += 1
else:
break
print('Selected look_back (or lag = ): ', lag)

train_X, train_y = create_dataset(train_data, lag)
test_X, test_y = create_dataset(test_data, lag)

print('Shape of train_X and train_y: ', train_X.shape, train_y.shape)
print('Shape of test_X and test_y: ', test_X.shape, test_y.shape)

epoch_number = 100
batches = 64

history = model.fit(train_X, train_y, epochs = epoch_number, batch_size = batches, verbose = 1, shuffle = False,
validation_split = 0.1)

plt.clf
plt.figure(figsize = (10,8))
plt.plot(history.history['loss'], label = 'train')
plt.plot(history.history['val_loss'], label = 'test')
plt.xlabel('Number of Epochs')
plt.ylabel('Train and Test Loss')
plt.title('Train and Test loss per epochs [Univariate]')
plt.legend()
plt.show()

testPredict = model.predict(test_X)
predicted_value = testPredict[:, 0]

data = dataset2['Close'].values
print('Shape of data: ', data.shape)

train_length = int(len(data) * 0.8)
print('Train length: ', train_length)

train_data, test_data = data[:train_length], data[train_length:]
print('Shape of Train and Test data: ', len(train_data), len(test_data))

train_X = train_X.reshape((train_X.shape[0], train_X.shape[1], n_features))
test_X = test_X.reshape((test_X.shape[0], test_X.shape[1], n_features))

print('Shape of train_X and train_y: ', train_X.shape, train_y.shape)
print('Shape of test_X and test_y: ', test_X.shape, test_y.shape)

model = Sequential()
model.add(SimpleRNN(64, activation='relu', return_sequences = False, input_shape = (lag, n_features)))
model.add(Dense(1))
model.compile(optimizer = 'adam', loss = 'mse')
model.summary()

tf.config.run_functions_eagerly(True)

cb = tf.keras.callbacks.EarlyStopping(monitor = 'loss', patience = 15, restore_best_weights = True)
history = model.fit(train_X, train_y, epochs = 150, batch_size = 64, verbose = 1, validation_split = 0.1,
callbacks = [cb])

plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc = 'upper left')
plt.show()

train_predict = model.predict(train_X)
test_predict = model.predict(test_X)

print('Shape of train and test predict: ', train_predict.shape, test_predict.shape)

actual_ = test_y
predicted_ = test_predict[:, 0]
len(actual_), len(predicted_)

data = dataset2['Close'].values
print('Shape of data: ', data.shape)

# Separate train and test data
train_length = int(len(data) * 0.8)
print('Train length: ', train_length)
train_data, test_data = data[:train_length], data[train_length:]
print('Shape train and test data: ', len(train_data), len(test_data))

def split_sequence(sequence, n_steps):
X, y = list(), list()
for i in range(len(sequence)):
end_ix = i + n_steps
if end_ix > len(sequence)-1:
break
seq_x, seq_y = sequence[i:end_ix], sequence[end_ix]
X.append(seq_x)
y.append(seq_y)

lag = 2
n_features = 1

train_X, train_y = split_sequence(train_data, lag)
test_X, test_y = split_sequence(test_data, lag)

print('Shape of train_X and train_y: ', train_X.shape, train_y.shape)
print('Shape of test_X and test_y: ', test_X.shape, test_y.shape)

train_X = train_X.reshape((train_X.shape[0], train_X.shape[1], n_features))
test_X = test_X.reshape((test_X.shape[0], test_X.shape[1], n_features))

print('Shape of train_X and train_y: ', train_X.shape, train_y.shape)
print('Shape of test_X and test_y: ', test_X.shape, test_y.shape)

model = Sequential()
model.add(LSTM(64, activation = 'relu', return_sequences = True, input_shape = (lag, n_features)))
model.add(LSTM(64, activation = 'relu'))
model.add(Dense(1))
model.compile(optimizer = 'adam', loss = 'mse')
model.summary()

tf.config.run_functions_eagerly(True)

cb = tf.keras.callbacks.EarlyStopping(monitor = 'loss', patience = 15, restore_best_weights = True)
history = model.fit(train_X, train_y, epochs = 150, batch_size = 64, verbose = 1, validation_split = 0.1,
callbacks = [cb])

plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc = 'upper left')
plt.show()

train_predict = model.predict(train_X)
test_predict = model.predict(test_X)

print('Shape train and test predict: ', train_predict.shape, test_predict.shape)

actual_lstm = test_y
predicted_lstm = test_predict[:, 0]
evaluate_forecast_results(actual_lstm, predicted_lstm)