Ethereum price prediction
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
Exploratory Data Analysis(EDA)
dataset = pd.DataFrame(pd.read_csv("ETH-USD.csv"))
dataset.shape
(2032, 7)
dataset.head()
Date | Open | High | Low | Close | Adj Close | Volume | |
---|---|---|---|---|---|---|---|
0 | 2017-11-09 | 308.644989 | 329.451996 | 307.056000 | 320.884003 | 320.884003 | 893249984 |
1 | 2017-11-10 | 320.670990 | 324.717987 | 294.541992 | 299.252991 | 299.252991 | 885985984 |
2 | 2017-11-11 | 298.585999 | 319.453003 | 298.191986 | 314.681000 | 314.681000 | 842300992 |
3 | 2017-11-12 | 314.690002 | 319.153015 | 298.513000 | 307.907990 | 307.907990 | 1613479936 |
4 | 2017-11-13 | 307.024994 | 328.415009 | 307.024994 | 316.716003 | 316.716003 | 1041889984 |
dataset.tail()
Date | Open | High | Low | Close | Adj Close | Volume | |
---|---|---|---|---|---|---|---|
2027 | 2023-05-29 | 1909.297485 | 1926.421753 | 1879.077515 | 1893.078125 | 1893.078125 | 5884674572 |
2028 | 2023-05-30 | 1893.093140 | 1916.574951 | 1883.934692 | 1901.026611 | 1901.026611 | 5363439784 |
2029 | 2023-05-31 | 1901.098267 | 1907.035400 | 1852.094727 | 1874.130493 | 1874.130493 | 5984512548 |
2030 | 2023-06-01 | 1873.914673 | 1887.705322 | 1846.227417 | 1862.201416 | 1862.201416 | 5640027197 |
2031 | 2023-06-02 | 1862.266724 | 1909.625732 | 1851.964722 | 1906.706543 | 1906.706543 | 6151811584 |
# find null values
dataset.isnull().sum()
Date 0
Open 0
High 0
Low 0
Close 0
Adj Close 0
Volume 0
dtype: int64
plt.figure(figsize = (10,10))
sns.heatmap(dataset.corr(), annot = True, fmt = ".1g", vmin = -1, vmax = 1, center = 0, linewidth = 3,
linecolor = "black", square = True)
plt.show()
dataset.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2032 entries, 0 to 2031
Data columns (total 7 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Date 2032 non-null object
1 Open 2032 non-null float64
2 High 2032 non-null float64
3 Low 2032 non-null float64
4 Close 2032 non-null float64
5 Adj Close 2032 non-null float64
6 Volume 2032 non-null int64
dtypes: float64(5), int64(1), object(1)
memory usage: 111.2+ KB
plt.figure(figsize = (20, 12))
x = np.arange(0, dataset.shape[0], 1)
plt.subplot(2,1,1)
plt.plot(x, dataset.Open.values, color = "red", label = "Open Ethereum Price")
plt.plot(x, dataset.Close.values, color = "blue", label = "Close Ethereum Price")
plt.title("Ethereum Prices", fontsize = 18)
plt.xlabel("Days", fontsize = 18)
plt.ylabel("Ethereum Prices in US Dollar", fontsize = 18)
plt.legend(loc = "best")
plt.grid(which = "major", axis = "both")
plt.subplot(2,1,2)
plt.plot(x, dataset.Volume.values, color = "green", label = "Ethereum Volume Available")
plt.title("Volume of Ethereum", fontsize = 18)
plt.xlabel("Days", fontsize = 18)
plt.ylabel("Volume", fontsize = 18)
plt.legend(loc = "best")
plt.grid(which = "major", axis = "both")
plt.show()
Hyperparameters
TIME_STEP = 7
DAYS = 20 # number of days at the end for which we have to predict. These will be in our validation set.
def dataset_split(dataset) :
train = dataset[0: len(dataset) - DAYS]
val = dataset[len(dataset) - DAYS - TIME_STEP : len(dataset)]
return train, val
dataset.drop(["Date","High", "Low", "Close", "Volume", "Adj Close"], axis = 1, inplace = True)
dataset = dataset.values
Scaling
import sklearn
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler(feature_range = (0,1))
dataset_scaled = scaler.fit_transform(dataset)
train, val = dataset_split(dataset_scaled)
train.shape, val.shape
((2012, 1), (27, 1))
Dataset prepare
train_x, train_y = [], []
for i in range(TIME_STEP, train.shape[0]) :
train_x.append(train[i - TIME_STEP : i, 0])
train_y.append(train[i, 0])
train_x, train_y = np.array(train_x), np.array(train_y)
val_x, val_y = [], []
for i in range(TIME_STEP, val.shape[0]) :
val_x.append(val[i - TIME_STEP : i, 0])
val_y.append(val[i, 0])
val_x, val_y = np.array(val_x), np.array(val_y)
train_x = np.reshape(train_x, (train_x.shape[0], train_x.shape[1], 1))
val_x = np.reshape(val_x, (val_x.shape[0], val_x.shape[1], 1))
print("Reshaped train_x = ", train_x.shape)
print("Shape of train_y = ", train_y.shape)
print("Reshaped val_x = ", val_x.shape)
print("Shape of val_y = ", val_y.shape)
Reshaped train_x = (2005, 7, 1)
Shape of train_y = (2005,)
Reshaped val_x = (20, 7, 1)
Shape of val_y = (20,)
LSTM -- long short term memory - :
import tensorflow as tf
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
gpus = tf.config.list_physical_devices("GPU")
print(gpus)
if len(gpus) == 1 :
strategy = tf.distribute.OneDeviceStrategy(device = "/gpu:0")
else:
strategy = tf.distribute.MirroredStrategy()
[]
tf.config.optimizer.set_experimental_options({"auto_mixed_precision" : True})
print("Mixed precision enabled")
Mixed precision enabled
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor= "loss", factor = 0.5, patience = 10,
min_lr = 0.000001, verbose = 1)
monitor_es = tf.keras.callbacks.EarlyStopping(monitor= "loss", patience = 25, restore_best_weights= False, verbose = True)
model = tf.keras.Sequential()
model.add(tf.keras.layers.LSTM(units = 128, return_sequences = True, input_shape = (train_x.shape[1], 1)))
model.add(tf.keras.layers.Dropout(0.4))
model.add(tf.keras.layers.LSTM(units = 128, return_sequences = True))
model.add(tf.keras.layers.Dropout(0.4))
model.add(tf.keras.layers.LSTM(units = 128, return_sequences = False))
model.add(tf.keras.layers.Dropout(0.4))
model.add(tf.keras.layers.Dense(units = 10, activation = "relu"))
model.add(tf.keras.layers.Dense(units = 1, activation = "relu"))
model.compile(tf.keras.optimizers.Adam(lr = 0.001), loss = "mean_squared_error")
model.summary()
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
lstm (LSTM) (None, 7, 128) 66560
dropout (Dropout) (None, 7, 128) 0
lstm_1 (LSTM) (None, 7, 128) 131584
dropout_1 (Dropout) (None, 7, 128) 0
lstm_2 (LSTM) (None, 128) 131584
dropout_2 (Dropout) (None, 128) 0
dense (Dense) (None, 10) 1290
dense_1 (Dense) (None, 1) 11
=================================================================
Total params: 331,029
Trainable params: 331,029
Non-trainable params: 0
_________________________________________________________________
with tf.device("/device:GPU:0"):
history = model.fit(train_x, train_y, epochs = 300, batch_size = 16,
callbacks = [reduce_lr, monitor_es])
plt.figure(figsize = (12, 4))
plt.plot(history.history["loss"], label = "Training loss")
plt.title("Loss analysis", fontsize = 18)
plt.xlabel("Epoch", fontsize = 18)
plt.ylabel("Loss", fontsize = 18)
plt.legend(["Train"])
plt.grid("both")

Save the model
model_json = model.to_json()
with open("AMD_open_1.json", "w") as json_file:
json_file.write(model_json)
model.save_weights("AMD_open_1.h5")
Load the model
from keras.models import model_from_json
json_file = open('AMD_open_1.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
loaded_model = model_from_json(loaded_model_json)
loaded_model.load_weights("AMD_open_1.h5")
print("Loaded model from disk")
loaded_model.compile(loss='mean_squared_error', optimizer='adam')
Loaded model from disk
real_prices = val[TIME_STEP:]
real_prices = scaler.inverse_transform(real_prices)
predicted_prices = loaded_model.predict(val_x)
predicted_prices = scaler.inverse_transform(predicted_prices)
1/1 [==============================] - 2s 2s/step
plt.figure(figsize= (16, 5))
plt.subplot(1,1,1)
x = np.arange(0, DAYS, 1)
plt.plot(x, real_prices, color = "red", label = "Real Ethereum Prices")
plt.plot(x, predicted_prices, color = "blue", label = "Predicted Ethereum Prices")
plt.title("Ethereum Open Prices", fontsize = 18)
plt.xlabel("Time In Days", fontsize = 18)
plt.ylabel("Ethereum Prices in US Dollars", fontsize = 18)
plt.legend()
original_training_prices = scaler.inverse_transform(train)
original_training_prices
array([[ 308.644989],
[ 320.67099 ],
[ 298.585999],
...,
[1842.492065],
[1795.176636],
[1808.375 ]])
x1 = np.arange(0,len(original_training_prices),1)
x2 = np.arange(len(original_training_prices), len(dataset), 1)
print(len(x1), len(x2))
2012 20
plt.figure(figsize= (16,8))
plt.subplot(1,1,1)
X = len(dataset)
x1 = np.arange(0,len(original_training_prices),1)
x2 = np.arange(len(original_training_prices), len(dataset), 1)
plt.plot(x1, original_training_prices, color = "green")
plt.plot(x2, real_prices, color = "red", label = "Real Ethereum Prices")
plt.plot(x2, predicted_prices, color = "blue", label = "Predicted Ethereum Prices")
plt.title("Ethereum Open Prices", fontsize = 18)
plt.xlabel("Time In Days", fontsize = 18)
plt.ylabel("Ethereum Prices in US Dollars", fontsize = 18)
plt.legend()
plt.grid("both")
The full example is on my Kaggle account. This is the link.
https://www.kaggle.com/code/mixmore/ethereum-price-prediction
This is just an explanation of the example on Kaggle.