Solana prediction
import os
import numpy as np
import pandas as pd
import math
from sklearn.metrics import mean_squared_error
import tensorflow as tf
import keras
from keras.models import Sequential
from keras.layers import Dense, Activation
from keras.layers import LSTM, SimpleRNN
dataset = pd.read_csv('SOL-USD.csv')
dataset.head()
Date | Open | High | Low | Close | Adj Close | Volume | |
---|---|---|---|---|---|---|---|
0 | 2020-04-10 | 0.832005 | 1.313487 | 0.694187 | 0.951054 | 0.951054 | 87364276.0 |
1 | 2020-04-11 | 0.951054 | 1.049073 | 0.765020 | 0.776819 | 0.776819 | 43862444.0 |
2 | 2020-04-12 | 0.785448 | 0.956670 | 0.762426 | 0.882507 | 0.882507 | 38736897.0 |
3 | 2020-04-13 | 0.890760 | 0.891603 | 0.773976 | 0.777832 | 0.777832 | 18211285.0 |
4 | 2020-04-14 | 0.777832 | 0.796472 | 0.628169 | 0.661925 | 0.661925 | 16747614.0 |
dataset.tail()
Date | Open | High | Low | Close | Adj Close | Volume | |
---|---|---|---|---|---|---|---|
1145 | 2023-05-30 | 20.587137 | 21.381718 | 20.519501 | 21.254084 | 21.254084 | 367512387.0 |
1146 | 2023-05-31 | 21.255285 | 21.318878 | 20.468752 | 20.824503 | 20.824503 | 246152341.0 |
1147 | 2023-06-01 | 20.824892 | 20.972609 | 20.464470 | 20.502218 | 20.502218 | 209047110.0 |
1148 | 2023-06-02 | NaN | NaN | NaN | NaN | NaN | NaN |
1149 | 2023-06-03 | 21.274784 | 21.301369 | 21.239672 | 21.240511 | 21.240511 | 230614624.0 |
dataset.describe()
Open | High | Low | Close | Adj Close | Volume | |
---|---|---|---|---|---|---|
count | 1149.000000 | 1149.000000 | 1149.000000 | 1149.000000 | 1149.000000 | 1.149000e+03 |
mean | 47.080754 | 49.284838 | 44.834747 | 47.074487 | 47.074487 | 1.049443e+09 |
std | 57.721843 | 60.189328 | 54.981287 | 57.656821 | 57.656821 | 1.408045e+09 |
min | 0.513391 | 0.559759 | 0.505194 | 0.515273 | 0.515273 | 6.520200e+05 |
25% | 3.837682 | 4.147920 | 3.639386 | 3.834676 | 3.834676 | 6.336138e+07 |
50% | 26.254124 | 27.696987 | 24.498320 | 26.370008 | 26.370008 | 5.557310e+08 |
75% | 46.903976 | 50.577522 | 44.076366 | 47.179443 | 47.179443 | 1.522024e+09 |
max | 258.781555 | 260.062103 | 246.122421 | 258.934326 | 258.934326 | 1.706864e+10 |
dataset.isnull().sum()
Date 0
Open 1
High 1
Low 1
Close 1
Adj Close 1
Volume 1
dtype: int64
dataset = dataset.dropna()
OHLC
import plotly.graph_objects as go
from datetime import datetime
fig = go.Figure(data=[go.Candlestick(x=dataset['Date'][500:],
open=dataset['Open'][500:],
high=dataset['High'][500:],
low=dataset['Low'][500:],
close=dataset['Adj Close'][500:])])
fig.show()
dataset['Adj Close'].dtype
dtype('float64')
ADF and KPSS Test
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.stattools import adfuller
result = adfuller(dataset['Adj Close'].values, autolag='AIC')
print(f'ADF Statistic: {result[0]}')
print(f'p-value: {result[1]}')
for key, value in result[4].items():
print('Critial Values:')
print(f' {key}, {value}')
ADF Statistic: -1.7441687885782005
p-value: 0.40851097149230764
Critial Values:
1%, -3.4361708439503587
Critial Values:
5%, -2.86411024137968
Critial Values:
10%, -2.5681384677365924
from numpy import log
result = adfuller(log(dataset['Adj Close'].values), autolag='AIC')
print(f'ADF Statistic: {result[0]}')
print(f'p-value: {result[1]}')
for key, value in result[4].items():
print('Critial Values:')
print(f' {key}, {value}')
ADF Statistic: -1.9329579891130544
p-value: 0.31671895485076934
Critial Values:
1%, -3.436064032324827
Critial Values:
5%, -2.864063122757945
Critial Values:
10%, -2.5681133731450605
KPSS Test
from statsmodels.tsa.stattools import kpss
result = kpss(dataset['Adj Close'].values, regression='c')
print('\nKPSS Statistic: %f' % result[0])
print('p-value: %f' % result[1])
for key, value in result[3].items():
print('Critial Values:')
print(f' {key}, {value}');
KPSS Statistic: 1.091674
p-value: 0.010000
Critial Values:
10%, 0.347
Critial Values:
5%, 0.463
Critial Values:
2.5%, 0.574
Critial Values:
1%, 0.739
The test statistic is outside of the range of p-values available in the
look-up table. The actual p-value is smaller than the p-value returned.
Autocorrelation Function (ACF)
from statsmodels.graphics.tsaplots import plot_acf
from statsmodels.graphics.tsaplots import plot_pacf
import matplotlib.pyplot as plt
from statsmodels.tsa.stattools import pacf
plt.rc("figure", figsize=(10,5))
plot_acf(dataset['Adj Close'])
print()
Partial Autocorrelation Function (PACF)
plt.rc("figure", figsize=(10,5))
plot_pacf(dataset['Adj Close'])
print()
The default method 'yw' can produce PACF values outside of the [-1,1] interval. After 0.13,
the default will change tounadjusted Yule-Walker ('ywm'). You can use this method
now by setting method='ywm'.
Price Forecasting
Adj Close price
data = dataset['Adj Close'].values
print('Shape of data: ', data.shape)
Shape of data: (1149,)
Train and Test data
# train and test data
train_length = int(len(data) * 0.8)
print('Train length: ', train_length)
train_data, test_data = data[:train_length], data[train_length:]
print('Shape of Train and Test data: ', train_data.shape, test_data.shape)
Train length: 919
Shape of Train and Test data: (919,) (230,)
Change Shape
train_data = train_data.reshape(-1, 1)
test_data = test_data.reshape(-1, 1)
print('Shape of Train and Test data: ', train_data.shape, test_data.shape)
Shape of Train and Test data: (919, 1) (230, 1)
Split
def create_dataset(dataset, lookback):
dataX, dataY = [], []
for i in range(len(dataset) - lookback -1):
a = dataset[i: (i+lookback), 0]
dataX.append(a)
b = dataset[i+lookback, 0]
dataY.append(b)
return np.array(dataX), np.array(dataY)
plot_pacf(data, lags=10)
plt.show()
pacf_value = pacf(data, nlags=20)
lag = 0
# collect lag values greater than 10% correlation
for x in pacf_value:
if x > 0.1:
lag += 1
else:
break
print('Selected look_back (or lag = ): ', lag)
Selected look_back (or lag = ): 2
train_X, train_y = create_dataset(train_data, lag)
test_X, test_y = create_dataset(test_data, lag)
print('Shape of train_X and train_y: ', train_X.shape, train_y.shape)
print('Shape of test_X and test_y: ', test_X.shape, test_y.shape)
Shape of train_X and train_y: (916, 2) (916,)
Shape of test_X and test_y: (227, 2) (227,)
print(train_data[:20])
for x in range(len(train_X[:20])):
print(test_X[x], test_y[x], )
[[0.951054]
[0.776819]
[0.882507]
[0.777832]
[0.661925]
[0.646651]
[0.690816]
[0.660728]
[0.681096]
[0.606969]
[0.538812]
[0.587659]
[0.691601]
[0.627457]
[0.634242]
[0.643329]
[0.635506]
[0.572372]
[0.661293]
[0.74584 ]]
[30.16884 31.224876] 30.164331
[31.224876 30.164331] 28.900799
[30.164331 28.900799] 28.085096
[28.900799 28.085096] 28.108677
[28.085096 28.108677] 28.013863
[28.108677 28.013863] 29.270071
[28.013863 29.270071] 28.310064
[29.270071 28.310064] 30.939976
[28.310064 30.939976] 31.284721
[30.939976 31.284721] 30.630625
[31.284721 30.630625] 32.111263
[30.630625 32.111263] 32.857128
[32.111263 32.857128] 32.965931
[32.857128 32.965931] 32.611038
[32.965931 32.611038] 32.248425
[32.611038 32.248425] 30.788076
[32.248425 30.788076] 30.84388
[30.788076 30.84388 ] 33.780853
[30.84388 33.780853] 36.765762
[33.780853 36.765762] 32.683582
np.random.seed(7)
model = Sequential()
model.add(Dense(64, input_dim = lag, activation='relu', name= "1st_hidden"))
# model.add(Dense(64, activation='relu', name = '2nd_hidden'))
model.add(Dense(1, name = 'Output_layer', activation='linear'))
# model.add(Activation("linear", name = 'Linear_activation'))
model.compile(loss="mean_squared_error", optimizer="adam")
model.summary()
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
1st_hidden (Dense) (None, 64) 192
Output_layer (Dense) (None, 1) 65
=================================================================
Total params: 257
Trainable params: 257
Non-trainable params: 0
_________________________________________________________________
epoch_number = 100
batches = 64
history = model.fit(train_X, train_y, epochs = epoch_number, batch_size = batches, verbose = 1,
shuffle=False,
validation_split=0.1)
plt.clf
plt.figure(figsize=(10,8))
plt.plot(history.history['loss'], label='train')
plt.plot(history.history['val_loss'], label='test')
plt.xlabel('Number of Epochs')
plt.ylabel('Train and Test Loss')
plt.title('Train and Test loss per epochs [Univariate]')
plt.legend()
plt.show()
Prediction
testPredict = model.predict(test_X)
testPredict = model.predict(test_X)
8/8 [==============================] - 0s 2ms/step
testPredict[:10]
array([[31.047565],
[30.675545],
[29.482199],
[28.52454 ],
[28.27652 ],
[28.220348],
[29.034996],
[28.795387],
[30.254524],
[31.337303]], dtype=float32)
Model evaluation
testScore = math.sqrt(mean_squared_error(test_y[:], testPredict[:,0]))
print('Test Score: %.2f RMSE' % (testScore))
Test Score: 1.46 RMSE
Actual Test data and Predicted Data
plt.figure(figsize=(16, 8))
plt.rcParams.update({'font.size': 12})
plt.plot(test_y[:], '#0077be',label = 'Actual')
plt.plot(testPredict[:,0], '#ff8841',label = 'Predicted')
plt.title('Solana Forecasting')
plt.ylabel('Solana Price [in Dollar]')
plt.xlabel('Time Steps [in Days] ')
plt.legend()
plt.show()
RNN - Recurrent Neural Network
"Adj Close" price
data = dataset['Adj Close'].values
print('Shape of data: ', data.shape)
Shape of data: (1149,)
# train and test data
train_length = int(len(data) * 0.8)
print('Train length: ', train_length)
train_data, test_data = data[:train_length], data[train_length:]
print('Shape of Train and Test data: ', len(train_data), len(test_data))
Train length: 919
Shape of Train and Test data: 919 230
from numpy import array
def split_sequence(sequence, n_steps):
X, y = list(), list()
for i in range(len(sequence)):
end_ix = i + n_steps
if end_ix > len(sequence)-1:
break
seq_x, seq_y = sequence[i:end_ix], sequence[end_ix]
X.append(seq_x)
y.append(seq_y)
return array(X), array(y)
lag = 2
n_features = 1
train_X, train_y = split_sequence(train_data, lag)
test_X, test_y = split_sequence(test_data, lag)
print('Shape of train_X and train_y: ', train_X.shape, train_y.shape)
print('Shape of test_X and test_y: ', test_X.shape, test_y.shape)
Shape of train_X and train_y: (917, 2) (917,)
Shape of test_X and test_y: (228, 2) (228,)
Reshape train_X and test_X
train_X = train_X.reshape((train_X.shape[0], train_X.shape[1], n_features))
test_X = test_X.reshape((test_X.shape[0], test_X.shape[1], n_features))
print('Shape of train_X and train_y: ', train_X.shape, train_y.shape)
print('Shape of test_X and test_y: ', test_X.shape, test_y.shape)
Shape of train_X and train_y: (917, 2, 1) (917,)
Shape of test_X and test_y: (228, 2, 1) (228,)
model = Sequential()
model.add(SimpleRNN(64, activation='relu', return_sequences=False,
input_shape=(lag, n_features)))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')
model.summary()
Model: "sequential_1"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
simple_rnn (SimpleRNN) (None, 64) 4224
dense (Dense) (None, 1) 65
=================================================================
Total params: 4,289
Trainable params: 4,289
Non-trainable params: 0
_________________________________________________________________
tf.config.run_functions_eagerly(True)
history = model.fit(train_X, train_y, epochs = 50, batch_size=64, verbose=1,
validation_split= 0.1)
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()
train_predict = model.predict(train_X)
test_predict = model.predict(test_X)
print('Shape of train and test predict: ', train_predict.shape, test_predict.shape)
29/29 [==============================] - 0s 12ms/step
8/8 [==============================] - 0s 13ms/step
Shape of train and test predict: (917, 1) (228, 1)
def measure_rmse(actual, predicted):
return math.sqrt(mean_squared_error(actual, predicted))
train_score = measure_rmse(train_y, train_predict)
test_score = measure_rmse(test_y, test_predict)
print('Train and Test RMSE: ', train_score, test_score)
Train and Test RMSE: 5.650083629122014 1.6063078004117175
Predicted data
plt.rc("figure", figsize=(14,8))
plt.rcParams.update({'font.size': 16})
plt.plot(test_y, label = 'Actual')
plt.plot(test_predict, label = 'Predicted')
plt.xlabel('Time in days')
plt.ylabel('Adjusted Close price')
plt.title('Solana price prediction using Simple RNN - Test data')
plt.legend()
plt.show()
The full example is on my Kaggle account. This is the link.
https://www.kaggle.com/code/mixmore/solana-prediction
This is just an explanation of the example on Kaggle.