Cardano price prediction
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from numpy import log
from numpy import array
import math
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.stattools import kpss
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.stattools import pacf
from statsmodels.graphics.tsaplots import plot_acf
from statsmodels.graphics.tsaplots import plot_pacf
from sklearn.metrics import r2_score
from sklearn.metrics import mean_absolute_error as mae
from sklearn.metrics import mean_squared_error
import tensorflow as tf
import keras
from keras.models import Sequential
from keras.layers import LSTM, SimpleRNN
from keras.layers import Dense
from keras.layers import Activation
import warnings
warnings.filterwarnings('ignore')
dataset = pd.read_csv('ADA-USD.csv')
dataset.head()
Date | Open | High | Low | Close | Adj Close | Volume | |
---|---|---|---|---|---|---|---|
0 | 2017-11-09 | 0.025160 | 0.035060 | 0.025006 | 0.032053 | 0.032053 | 18716200.0 |
1 | 2017-11-10 | 0.032219 | 0.033348 | 0.026451 | 0.027119 | 0.027119 | 6766780.0 |
2 | 2017-11-11 | 0.026891 | 0.029659 | 0.025684 | 0.027437 | 0.027437 | 5532220.0 |
3 | 2017-11-12 | 0.027480 | 0.027952 | 0.022591 | 0.023977 | 0.023977 | 7280250.0 |
4 | 2017-11-13 | 0.024364 | 0.026300 | 0.023495 | 0.025808 | 0.025808 | 4419440.0 |
dataset.tail()
Date | Open | High | Low | Close | Adj Close | Volume | |
---|---|---|---|---|---|---|---|
2028 | 2023-05-30 | 0.379129 | 0.383068 | 0.375512 | 0.377934 | 0.377934 | 186645169.0 |
2029 | 2023-05-31 | 0.377937 | 0.380359 | 0.371003 | 0.374403 | 0.374403 | 193309518.0 |
2030 | 2023-06-01 | 0.374414 | 0.375474 | 0.361692 | 0.364724 | 0.364724 | 235187863.0 |
2031 | 2023-06-02 | NaN | NaN | NaN | NaN | NaN | NaN |
2032 | 2023-06-03 | 0.378149 | 0.378149 | 0.376950 | 0.377337 | 0.377337 | 197218784.0 |
dataset.describe()
Open | High | Low | Close | Adj Close | Volume | |
---|---|---|---|---|---|---|
count | 2032.000000 | 2032.000000 | 2032.000000 | 2032.000000 | 2032.000000 | 2.032000e+03 |
mean | 0.480802 | 0.501045 | 0.459207 | 0.480965 | 0.480965 | 1.082908e+09 |
std | 0.601298 | 0.626942 | 0.573988 | 0.601294 | 0.601294 | 1.979570e+09 |
min | 0.023954 | 0.025993 | 0.019130 | 0.023961 | 0.023961 | 2.930550e+06 |
25% | 0.070544 | 0.073276 | 0.068434 | 0.070921 | 0.070921 | 8.421186e+07 |
50% | 0.210612 | 0.222707 | 0.198475 | 0.211688 | 0.211688 | 2.898286e+08 |
75% | 0.552721 | 0.590249 | 0.522270 | 0.552757 | 0.552757 | 1.090055e+09 |
max | 2.966390 | 3.099186 | 2.907606 | 2.968239 | 2.968239 | 1.914198e+10 |
# find null values
dataset.isnull().sum()
Date 0
Open 1
High 1
Low 1
Close 1
Adj Close 1
Volume 1
dtype: int64
dataset = dataset.dropna()
EDA on Cardano Data
start_date = pd.to_datetime(dataset.Date[0])
end_date = pd.to_datetime(dataset.Date.values[-1])
dataset['Date'] = pd.to_datetime(dataset['Date'])
top_plt = plt.subplot2grid((5,4), (0, 0), rowspan = 3, colspan = 4)
top_plt.plot(dataset.Date, dataset["Close"])
plt.title('Cardano Close Price')
bottom_plt = plt.subplot2grid((5,4), (3,0), rowspan = 1, colspan = 4)
bottom_plt.bar(dataset.Date, dataset['Volume'])
plt.title('Cardano Trading Volume', y = -0.60)
plt.gcf().set_size_inches(16,10)
Checking for Correlation
sns.heatmap(dataset.corr(), annot = True, fmt = ".1f")
plt.show()
dataset2 = dataset[['Close', 'Volume']]
dataset2.head()
Close | Volume | |
---|---|---|
0 | 0.032053 | 18716200.0 |
1 | 0.027119 | 6766780.0 |
2 | 0.027437 | 5532220.0 |
3 | 0.023977 | 7280250.0 |
4 | 0.025808 | 4419440.0 |
(ADF/KPSS)
result = adfuller(dataset2.Close.values, autolag='AIC')
print(f'ADF Statistic: {result[0]}')
print(f'p-value: {result[1]}')
for key, value in result[4].items():
print('Critial Values:')
print(f' {key}, {value}')
ADF Statistic: -1.8630247943037663
p-value: 0.3496502319040484
Critial Values:
1%, -3.4336156817103016
Critial Values:
5%, -2.862982604329594
Critial Values:
10%, -2.567537980547385
KPSS test
result = kpss(dataset2['Close'].values, regression='c')
print('\nKPSS Statistic: %f' % result[0])
print('p-value: %f' % result[1])
for key, value in result[3].items():
print('Critial Values:')
print(f' {key}, {value}');
KPSS Statistic: 2.190770
p-value: 0.010000
Critial Values:
10%, 0.347
Critial Values:
5%, 0.463
Critial Values:
2.5%, 0.574
Critial Values:
1%, 0.739
Prediction
ARIMA
plt.rcParams.update({'figure.figsize':(9,7), 'figure.dpi':120})
fig, axes = plt.subplots(3, 2, sharex=True)
axes[0, 0].plot(dataset2['Close'].values); axes[0, 0].set_title('Original Series')
plot_acf(dataset2['Close'].values, ax = axes[0, 1])
axes[1, 0].plot(dataset2['Close'].diff()); axes[1, 0].set_title('1st Order Differencing')
plot_acf(dataset2['Close'].diff().dropna(), ax = axes[1, 1])
axes[2, 0].plot(dataset2['Close'].diff().diff()); axes[2, 0].set_title('2nd Order Differencing')
plot_acf(dataset2['Close'].diff().diff().dropna(), ax = axes[2, 1])
plt.show()
plt.rc("figure", figsize = (10,5))
plot_acf(dataset2['Close'])
print()
plt.rc("figure", figsize=(10,5))
plot_pacf(dataset2['Close'])
print()
plt.rcParams.update({'figure.figsize':(9,3), 'figure.dpi':120})
fig, axes = plt.subplots(1, 2, sharex = True)
axes[0].plot(dataset2['Close'].diff()); axes[0].set_title('1st Differencing')
axes[1].set(ylim = (0,5))
plot_pacf(dataset2['Close'].diff().dropna(), ax = axes[1])
plt.show()
fig, axes = plt.subplots(1, 2, sharex = True)
axes[0].plot(dataset2['Close'].diff()); axes[0].set_title('1st Differencing')
axes[1].set(ylim = (0,1.2))
plot_acf(dataset2['Close'].diff().dropna(), ax=axes[1])
plt.show()
data = dataset2['Close'].values
print('Length of Total data: ', len(data))
train_length = int(len(data) * 0.8)
train_data = data[:train_length]
test_data = data[train_length:]
print('Train and Test data length: ', len(train_data), len(test_data))
Length of Total data: 2032
Train and Test data length: 1625 407
Build ARIMA Model
model = ARIMA(train_data, order = (1,0,8))
model_fit = model.fit(low_memory = False)
print(model_fit.summary())
SARIMAX Results
==============================================================================
Dep. Variable: y No. Observations: 1625
Model: ARIMA(1, 0, 8) Log Likelihood 2531.088
Date: Sun, 04 Jun 2023 AIC -5040.175
Time: 17:55:41 BIC -4980.849
Sample: 0 HQIC -5018.163
- 1625
Covariance Type: opg
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
const 0.4945 0.789 0.627 0.531 -1.052 2.040
ar.L1 0.9976 0.002 590.822 0.000 0.994 1.001
ma.L1 -0.0605 0.011 -5.692 0.000 -0.081 -0.040
ma.L2 0.0191 0.011 1.744 0.081 -0.002 0.041
ma.L3 -0.0685 0.010 -6.584 0.000 -0.089 -0.048
ma.L4 0.0566 0.010 5.523 0.000 0.036 0.077
ma.L5 -0.0502 0.011 -4.619 0.000 -0.072 -0.029
ma.L6 0.0361 0.011 3.238 0.001 0.014 0.058
ma.L7 -0.0189 0.011 -1.662 0.097 -0.041 0.003
ma.L8 0.0030 0.012 0.259 0.796 -0.020 0.026
sigma2 0.0026 3.2e-05 80.844 0.000 0.003 0.003
===================================================================================
Ljung-Box (L1) (Q): 0.00 Jarque-Bera (JB): 26623.10
Prob(Q): 0.98 Prob(JB): 0.00
Heteroskedasticity (H): 8.53 Skew: 0.53
Prob(H) (two-sided): 0.00 Kurtosis: 22.80
===================================================================================
residuals = pd.DataFrame(model_fit.resid)
fig, ax = plt.subplots(1,2)
residuals.plot(title = "Residuals", ax = ax[0])
residuals.plot(kind = 'kde', title = 'Density', ax = ax[1])
plt.show()
forecast_result = model_fit.forecast(150, alpha = 0.05) # 95% conf
forecast_result[:30]
array([0.91167908, 0.90996577, 0.9105535 , 0.90885626, 0.9084636 ,
0.90684824, 0.90632089, 0.9052688 , 0.90429749, 0.90332849,
0.90236177, 0.90139734, 0.90043519, 0.89947531, 0.8985177 ,
0.89756236, 0.89660928, 0.89565845, 0.89470986, 0.89376352,
0.89281942, 0.89187755, 0.89093791, 0.89000049, 0.88906528,
0.88813229, 0.8872015 , 0.88627291, 0.88534652, 0.88442232])
test_data[:30]
array([0.907154, 0.888503, 0.88635 , 0.898695, 0.827645, 0.840595,
0.84395 , 0.805043, 0.75666 , 0.790344, 0.781529, 0.771145,
0.896942, 0.791152, 0.783359, 0.761882, 0.739563, 0.610088,
0.628963, 0.5128 , 0.473746, 0.528877, 0.539358, 0.5975 ,
0.556716, 0.578077, 0.505615, 0.533321, 0.517907, 0.528314])
Test and Predicted
plt.figure(figsize = (12, 6))
plt.rcParams.update({'font.size': 12})
plt.plot(test_data[:150], '#0077be',label = 'Actual')
plt.plot(forecast_result[:], '#ff8841',label = 'Predicted')
plt.title('ARIMA Model for Cardano Price Forecasting')
plt.ylabel('Cardano Price [in Dollar]')
plt.xlabel('Time Steps [in Days] ')
plt.legend()
plt.show()
Artificial Neural Network
data = dataset2['Close'].values
print('Shape of data: ', data.shape)
Shape of data: (2032,)
train_length = int(len(data) * 0.8)
print('Train length: ', train_length)
train_data, test_data = data[:train_length], data[train_length:]
print('Shape of Train and Test data: ', train_data.shape, test_data.shape)
Train length: 1625
Shape of Train and Test data: (1625,) (407,)
train_data = train_data.reshape(-1, 1)
test_data = test_data.reshape(-1, 1)
print('Shape of Train and Test data: ', train_data.shape, test_data.shape)
Shape of Train and Test data: (1625, 1) (407, 1)
def create_dataset(dataset, lookback):
dataX, dataY = [], []
for i in range(len(dataset) - lookback -1):
a = dataset[i: (i+lookback), 0]
dataX.append(a)
b = dataset[i+lookback, 0]
dataY.append(b)
return np.array(dataX), np.array(dataY)
plot_pacf(data, lags = 10)
plt.show()
Considering only Auto-correlation Lag value Greater than 10%
pacf_value = pacf(data, nlags=20)
lag = 0
for x in pacf_value:
if x > 0.1:
lag += 1
else:
break
print('Selected look_back (or lag = ): ', lag)
Selected look_back (or lag = ): 2
Separating Input and Output values
train_X, train_y = create_dataset(train_data, lag)
test_X, test_y = create_dataset(test_data, lag)
print('Shape of train_X and train_y: ', train_X.shape, train_y.shape)
print('Shape of test_X and test_y: ', test_X.shape, test_y.shape)
Shape of train_X and train_y: (1622, 2) (1622,)
Shape of test_X and test_y: (404, 2) (404,)
Build MLP model
np.random.seed(7)
model = Sequential()
model.add(Dense(64, input_dim = lag, activation='relu', name= "1st_hidden"))
# model.add(Dense(64, activation='relu', name = '2nd_hidden'))
model.add(Dense(1, name = 'Output_layer', activation = 'linear'))
# model.add(Activation("linear", name = 'Linear_activation'))
model.compile(loss = "mean_squared_error", optimizer = "adam")
model.summary()
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
1st_hidden (Dense) (None, 64) 192
Output_layer (Dense) (None, 1) 65
=================================================================
Total params: 257
Trainable params: 257
Non-trainable params: 0
_________________________________________________________________
epoch_number = 100
batches = 64
history = model.fit(train_X, train_y, epochs = epoch_number, batch_size = batches, verbose = 1, shuffle = False,
validation_split = 0.1)
plt.clf
plt.figure(figsize = (10,8))
plt.plot(history.history['loss'], label = 'train')
plt.plot(history.history['val_loss'], label = 'test')
plt.xlabel('Number of Epochs')
plt.ylabel('Train and Test Loss')
plt.title('Train and Test loss per epochs [Univariate]')
plt.legend()
plt.show()
Predictions
testPredict = model.predict(test_X)
predicted_value = testPredict[:, 0]
13/13 [==============================] - 0s 2ms/step
Metrics
def evaluate_forecast_results(actual, predicted):
print('R2 Score: ', round(r2_score(actual, predicted), 2))
print('MAE : ', round(mae(actual, predicted), 2))
print('MSE: ', round(mean_squared_error(actual,predicted), 2))
print('RMSE: ', round(math.sqrt(mean_squared_error(actual,predicted)), 2))
print('NRMSE: ', NRMSE(actual, predicted))
print('WMAPE: ', WMAPE(actual, predicted))
def NRMSE(actual, predicted):
rmse = math.sqrt(mean_squared_error(actual,predicted))
nrmse = rmse / np.mean(actual)
return round(nrmse, 4)
def WMAPE(actual, predicted):
abs_error = np.sum(actual - predicted)
wmape = abs_error / np.sum(actual)
return round(wmape, 4)
evaluate_forecast_results(test_y, predicted_value)
R2 Score: 0.96
MAE : 0.01
MSE: 0.0
RMSE: 0.02
NRMSE: 0.0514
WMAPE: 0.0009
plt.figure(figsize = (16, 8))
plt.rcParams.update({'font.size': 12})
plt.plot(test_y[:], '#0077be', label = 'Actual')
plt.plot(predicted_value, '#ff8841', label = 'Predicted')
plt.title('Cardano Close Price Forecasting')
plt.ylabel('Cardano Close Price ')
plt.xlabel('Time Steps [in Days] ')
plt.legend()
plt.show()
RNN
data = dataset2['Close'].values
print('Shape of data: ', data.shape)
Shape of data: (2032,)
train_length = int(len(data) * 0.8)
print('Train length: ', train_length)
train_data, test_data = data[:train_length], data[train_length:]
print('Shape of Train and Test data: ', len(train_data), len(test_data))
Train length: 1625
Shape of Train and Test data: 1625 407
def split_sequence(sequence, n_steps):
X, y = list(), list()
for i in range(len(sequence)):
end_ix = i + n_steps
if end_ix > len(sequence)-1:
break
seq_x, seq_y = sequence[i:end_ix], sequence[end_ix]
X.append(seq_x)
y.append(seq_y)
return array(X), array(y)
pacf_value = pacf(data, nlags=20)
lag = 0
# collect lag values greater than 10% correlation
for x in pacf_value:
if x > 0.1:
lag += 1
else:
break
print('Selected look_back (or lag = ): ', lag)
Shape of train_X and train_y: (1623, 2) (1623,)
Shape of test_X and test_y: (405, 2) (405,)
Reshaping train_X and test_X
train_X = train_X.reshape((train_X.shape[0], train_X.shape[1], n_features))
test_X = test_X.reshape((test_X.shape[0], test_X.shape[1], n_features))
print('Shape of train_X and train_y: ', train_X.shape, train_y.shape)
print('Shape of test_X and test_y: ', test_X.shape, test_y.shape)
Shape of train_X and train_y: (1623, 2, 1) (1623,)
Shape of test_X and test_y: (405, 2, 1) (405,)
Building the model
model = Sequential()
model.add(SimpleRNN(64, activation='relu', return_sequences = False, input_shape = (lag, n_features)))
model.add(Dense(1))
model.compile(optimizer = 'adam', loss = 'mse')
model.summary()
Model: "sequential_1"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
simple_rnn (SimpleRNN) (None, 64) 4224
dense (Dense) (None, 1) 65
=================================================================
Total params: 4,289
Trainable params: 4,289
Non-trainable params: 0
_________________________________________________________________
Model
tf.config.run_functions_eagerly(True)
cb = tf.keras.callbacks.EarlyStopping(monitor = 'loss', patience = 15, restore_best_weights = True)
history = model.fit(train_X, train_y, epochs = 150, batch_size = 64, verbose = 1, validation_split = 0.1,
callbacks = [cb])
Summarizing model accuracy and Loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc = 'upper left')
plt.show()
Prediction with Test data
train_predict = model.predict(train_X)
test_predict = model.predict(test_X)
print('Shape of train and test predict: ', train_predict.shape, test_predict.shape)
51/51 [==============================] - 1s 12ms/step
13/13 [==============================] - 0s 13ms/step
Shape of train and test predict: (1623, 1) (405, 1)
Model evaluation
actual_ = test_y
predicted_ = test_predict[:, 0]
len(actual_), len(predicted_)
(405, 405)
evaluate_forecast_results(actual_, predicted_)
R2 Score: 0.96
MAE : 0.01
MSE: 0.0
RMSE: 0.02
NRMSE: 0.0517
WMAPE: 0.007
Test and predicted data
plt.rc("figure", figsize = (14,8))
plt.rcParams.update({'font.size': 16})
plt.plot(actual_, label = 'Actual')
plt.plot(predicted_, label = 'Predicted')
plt.xlabel('Time in days')
plt.ylabel('Cardano price')
plt.title('Cardano Close price prediction by Simple RNN - Test data')
plt.legend()
plt.show()
df_train = pd.DataFrame(columns = ['Train data'])
df_train['Train data'] = train_data
df = pd.DataFrame(columns = ['Test data', 'Predicted data'])
df['Test data'] = actual_
df['Predicted data'] = predicted_
total_len = len(df_train['Train data']) + len(df['Test data'])
range(len(df_train['Train data']), total_len)
x_list = [x for x in range(len(df_train['Train data']), total_len)]
df.index = x_list
plt.rc("figure", figsize=(14,8))
plt.rcParams.update({'font.size': 16})
plt.xlabel('Time in days')
plt.ylabel('Cardano price')
plt.title('Cardano price prediction by Simple RNN')
plt.plot(df_train['Train data'])
plt.plot(df[['Test data', 'Predicted data']])
plt.legend(['Train', 'Test', 'Predictions'], loc='lower right')
plt.show()
data = dataset2['Close'].values
print('Shape of data: ', data.shape)
Shape of data: (2032,)
# Separate train and test data
train_length = int(len(data) * 0.8)
print('Train length: ', train_length)
train_data, test_data = data[:train_length], data[train_length:]
print('Shape train and test data: ', len(train_data), len(test_data))
Train length: 1625
Shape train and test data: 1625 407
def split_sequence(sequence, n_steps):
X, y = list(), list()
for i in range(len(sequence)):
end_ix = i + n_steps
if end_ix > len(sequence)-1:
break
seq_x, seq_y = sequence[i:end_ix], sequence[end_ix]
X.append(seq_x)
y.append(seq_y)
Choosing the appropriate lag value
lag = 2
n_features = 1
train_X, train_y = split_sequence(train_data, lag)
test_X, test_y = split_sequence(test_data, lag)
print('Shape of train_X and train_y: ', train_X.shape, train_y.shape)
print('Shape of test_X and test_y: ', test_X.shape, test_y.shape)
Shape of train_X and train_y: (1623, 2) (1623,)
Shape of test_X and test_y: (405, 2) (405,)
Reshaping train_X and test_X to 3D
train_X = train_X.reshape((train_X.shape[0], train_X.shape[1], n_features))
test_X = test_X.reshape((test_X.shape[0], test_X.shape[1], n_features))
print('Shape of train_X and train_y: ', train_X.shape, train_y.shape)
print('Shape of test_X and test_y: ', test_X.shape, test_y.shape)
Shape of train_X and train_y: (1623, 2, 1) (1623,)
Shape of test_X and test_y: (405, 2, 1) (405,)
Building LSTM Model
model = Sequential()
model.add(LSTM(64, activation = 'relu', return_sequences = True, input_shape = (lag, n_features)))
model.add(LSTM(64, activation = 'relu'))
model.add(Dense(1))
model.compile(optimizer = 'adam', loss = 'mse')
model.summary()
Model: "sequential_2"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
lstm (LSTM) (None, 2, 64) 16896
lstm_1 (LSTM) (None, 64) 33024
dense_1 (Dense) (None, 1) 65
=================================================================
Total params: 49,985
Trainable params: 49,985
Non-trainable params: 0
_________________________________________________________________
Fitting model with data
tf.config.run_functions_eagerly(True)
cb = tf.keras.callbacks.EarlyStopping(monitor = 'loss', patience = 15, restore_best_weights = True)
history = model.fit(train_X, train_y, epochs = 150, batch_size = 64, verbose = 1, validation_split = 0.1,
callbacks = [cb])
Summarizing model accuracy and Loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc = 'upper left')
plt.show()
Making the prediction
train_predict = model.predict(train_X)
test_predict = model.predict(test_X)
print('Shape train and test predict: ', train_predict.shape, test_predict.shape)
51/51 [==============================] - 2s 32ms/step
13/13 [==============================] - 0s 33ms/step
Shape train and test predict: (1623, 1) (405, 1)
Model Evaluation
actual_lstm = test_y
predicted_lstm = test_predict[:, 0]
evaluate_forecast_results(actual_lstm, predicted_lstm)
R2 Score: 0.94
MAE : 0.02
MSE: 0.0
RMSE: 0.03
NRMSE: 0.0619
WMAPE: -0.0224
df_train = pd.DataFrame(columns = ['Train data'])
df_train['Train data'] = train_data
df = pd.DataFrame(columns = ['Test data', 'Predicted data'])
df['Test data'] = actual_lstm
df['Predicted data'] = predicted_lstm
total_len = len(df_train['Train data']) + len(df['Test data'])
range(len(df_train['Train data']), total_len)
x_list = [x for x in range(len(df_train['Train data']), total_len)]
df.index = x_list
plt.rc("figure", figsize = (14,8))
plt.rcParams.update({'font.size': 16})
plt.xlabel('Time in days')
plt.ylabel('Cardano close price')
plt.title('Cardano price prediction using LSTM')
plt.plot(df_train['Train data'])
plt.plot(df[['Test data', 'Predicted data']])
plt.legend(['Train', 'Test', 'Predictions'], loc = 'lower right')
plt.show()
The full example is on my Kaggle account. This is the link.
https://www.kaggle.com/code/mixmore/cardano-price-prediction
This is just an explanation of the example on Kaggle.