Ethereum price prediction

import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

Exploratory Data Analysis(EDA)

dataset = pd.DataFrame(pd.read_csv("ETH-USD.csv"))

dataset.shape

(2032, 7)

dataset.head()

	Date	Open	High	Low	Close	Adj Close	Volume
0	2017-11-09	308.644989	329.451996	307.056000	320.884003	320.884003	893249984
1	2017-11-10	320.670990	324.717987	294.541992	299.252991	299.252991	885985984
2	2017-11-11	298.585999	319.453003	298.191986	314.681000	314.681000	842300992
3	2017-11-12	314.690002	319.153015	298.513000	307.907990	307.907990	1613479936
4	2017-11-13	307.024994	328.415009	307.024994	316.716003	316.716003	1041889984

  dataset.tail()
 

	Date	Open	High	Low	Close	Adj Close	Volume
2027	2023-05-29	1909.297485	1926.421753	1879.077515	1893.078125	1893.078125	5884674572
2028	2023-05-30	1893.093140	1916.574951	1883.934692	1901.026611	1901.026611	5363439784
2029	2023-05-31	1901.098267	1907.035400	1852.094727	1874.130493	1874.130493	5984512548
2030	2023-06-01	1873.914673	1887.705322	1846.227417	1862.201416	1862.201416	5640027197
2031	2023-06-02	1862.266724	1909.625732	1851.964722	1906.706543	1906.706543	6151811584

# find null values
dataset.isnull().sum()

Date         0
Open         0
High         0
Low          0
Close        0
Adj Close    0
Volume       0

dtype: int64

plt.figure(figsize = (10,10))
sns.heatmap(dataset.corr(), annot = True, fmt = ".1g", vmin = -1, vmax = 1, center = 0, linewidth = 3,
linecolor = "black", square = True)
plt.show()

  
    dataset.info()

    
 <class 'pandas.core.frame.DataFrame'>
RangeIndex: 2032 entries, 0 to 2031
Data columns (total 7 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Date       2032 non-null   object 
 1   Open       2032 non-null   float64
 2   High       2032 non-null   float64
 3   Low        2032 non-null   float64
 4   Close      2032 non-null   float64
 5   Adj Close  2032 non-null   float64
 6   Volume     2032 non-null   int64  
dtypes: float64(5), int64(1), object(1)
memory usage: 111.2+ KB

plt.figure(figsize = (20, 12))
x = np.arange(0, dataset.shape[0], 1)
plt.subplot(2,1,1)
plt.plot(x, dataset.Open.values, color = "red", label = "Open Ethereum Price")
plt.plot(x, dataset.Close.values, color = "blue", label = "Close Ethereum Price")
plt.title("Ethereum Prices", fontsize = 18)
plt.xlabel("Days", fontsize = 18)
plt.ylabel("Ethereum Prices in US Dollar", fontsize = 18)
plt.legend(loc = "best")
plt.grid(which = "major", axis = "both")

plt.subplot(2,1,2)
plt.plot(x, dataset.Volume.values, color = "green", label = "Ethereum Volume Available")
plt.title("Volume of Ethereum", fontsize = 18)
plt.xlabel("Days", fontsize = 18)
plt.ylabel("Volume", fontsize = 18)
plt.legend(loc = "best")
plt.grid(which = "major", axis = "both")
plt.show()

Hyperparameters

TIME_STEP = 7
DAYS = 20 # number of days at the end for which we have to predict. These will be in our validation set.

def dataset_split(dataset) :
    train = dataset[0: len(dataset) - DAYS]
    val = dataset[len(dataset) - DAYS - TIME_STEP : len(dataset)]
    return train, val

dataset.drop(["Date","High", "Low", "Close", "Volume", "Adj Close"], axis = 1, inplace = True)
dataset = dataset.values

Scaling

import sklearn
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler(feature_range = (0,1))
dataset_scaled = scaler.fit_transform(dataset)

train, val = dataset_split(dataset_scaled)

train.shape, val.shape

((2012, 1), (27, 1))

Dataset prepare

train_x, train_y = [], []
for i in range(TIME_STEP, train.shape[0]) :
    train_x.append(train[i - TIME_STEP : i, 0])
    train_y.append(train[i, 0])
train_x, train_y = np.array(train_x), np.array(train_y)

val_x, val_y = [], []
for i in range(TIME_STEP, val.shape[0]) :
    val_x.append(val[i - TIME_STEP : i, 0])
    val_y.append(val[i, 0])
val_x, val_y = np.array(val_x), np.array(val_y)

train_x = np.reshape(train_x, (train_x.shape[0], train_x.shape[1], 1))
val_x = np.reshape(val_x, (val_x.shape[0], val_x.shape[1], 1))
print("Reshaped train_x = ", train_x.shape)
print("Shape of train_y = ", train_y.shape)

print("Reshaped val_x = ", val_x.shape)
print("Shape of val_y = ", val_y.shape)

Reshaped train_x =  (2005, 7, 1)
Shape of train_y =  (2005,)
Reshaped val_x =  (20, 7, 1)
Shape of val_y =  (20,)

LSTM -- long short term memory - :

import tensorflow as tf

os.environ["CUDA_VISIBLE_DEVICES"] = "0"
gpus = tf.config.list_physical_devices("GPU")
print(gpus)
if len(gpus) == 1 :
strategy = tf.distribute.OneDeviceStrategy(device = "/gpu:0")
else:
strategy = tf.distribute.MirroredStrategy()

[]

tf.config.optimizer.set_experimental_options({"auto_mixed_precision" : True})
print("Mixed precision enabled")

Mixed precision enabled

reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor= "loss", factor = 0.5, patience = 10,
min_lr = 0.000001, verbose = 1)
monitor_es = tf.keras.callbacks.EarlyStopping(monitor= "loss", patience = 25, restore_best_weights= False, verbose = True)

model = tf.keras.Sequential()
model.add(tf.keras.layers.LSTM(units = 128, return_sequences = True, input_shape = (train_x.shape[1], 1)))
model.add(tf.keras.layers.Dropout(0.4))
model.add(tf.keras.layers.LSTM(units = 128, return_sequences = True))
model.add(tf.keras.layers.Dropout(0.4))
model.add(tf.keras.layers.LSTM(units = 128, return_sequences = False))
model.add(tf.keras.layers.Dropout(0.4))

model.add(tf.keras.layers.Dense(units = 10, activation = "relu"))
model.add(tf.keras.layers.Dense(units = 1, activation = "relu"))

model.compile(tf.keras.optimizers.Adam(lr = 0.001), loss = "mean_squared_error")

model.summary()

Model: "sequential"

_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 lstm (LSTM)                 (None, 7, 128)            66560     
                                                                 
 dropout (Dropout)           (None, 7, 128)            0         
                                                                 
 lstm_1 (LSTM)               (None, 7, 128)            131584    
                                                                 
 dropout_1 (Dropout)         (None, 7, 128)            0         
                                                                 
 lstm_2 (LSTM)               (None, 128)               131584    
                                                                 
 dropout_2 (Dropout)         (None, 128)               0         
                                                                 
 dense (Dense)               (None, 10)                1290      
                                                                 
 dense_1 (Dense)             (None, 1)                 11        
                                                                 
=================================================================
Total params: 331,029
Trainable params: 331,029
Non-trainable params: 0
_________________________________________________________________

with tf.device("/device:GPU:0"):
history = model.fit(train_x, train_y, epochs = 300, batch_size = 16,

callbacks = [reduce_lr, monitor_es])

plt.figure(figsize = (12, 4))
plt.plot(history.history["loss"], label = "Training loss")
plt.title("Loss analysis", fontsize = 18)
plt.xlabel("Epoch", fontsize = 18)
plt.ylabel("Loss", fontsize = 18)
plt.legend(["Train"])
plt.grid("both")

Save the model

model_json = model.to_json()
with open("AMD_open_1.json", "w") as json_file:
json_file.write(model_json)

model.save_weights("AMD_open_1.h5")

Load the model

from keras.models import model_from_json
json_file = open('AMD_open_1.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
loaded_model = model_from_json(loaded_model_json)
loaded_model.load_weights("AMD_open_1.h5")
print("Loaded model from disk")
loaded_model.compile(loss='mean_squared_error', optimizer='adam')

Loaded model from disk

real_prices = val[TIME_STEP:]
real_prices = scaler.inverse_transform(real_prices)

predicted_prices = loaded_model.predict(val_x)
predicted_prices = scaler.inverse_transform(predicted_prices)

1/1 [==============================] - 2s 2s/step

plt.figure(figsize= (16, 5))
plt.subplot(1,1,1)

x = np.arange(0, DAYS, 1)

plt.plot(x, real_prices, color = "red", label = "Real Ethereum Prices")
plt.plot(x, predicted_prices, color = "blue", label = "Predicted Ethereum Prices")
plt.title("Ethereum Open Prices", fontsize = 18)
plt.xlabel("Time In Days", fontsize = 18)
plt.ylabel("Ethereum Prices in US Dollars", fontsize = 18)
plt.legend()

    original_training_prices = scaler.inverse_transform(train)
original_training_prices
    
 array([[ 308.644989],
       [ 320.67099 ],
       [ 298.585999],
       ...,
       [1842.492065],
       [1795.176636],
       [1808.375   ]]) 
    x1 = np.arange(0,len(original_training_prices),1)
x2 = np.arange(len(original_training_prices), len(dataset), 1)
print(len(x1), len(x2))
    

2012 20

plt.figure(figsize= (16,8))
plt.subplot(1,1,1)

X = len(dataset)
x1 = np.arange(0,len(original_training_prices),1)
x2 = np.arange(len(original_training_prices), len(dataset), 1)

plt.plot(x1, original_training_prices, color = "green")
plt.plot(x2, real_prices, color = "red", label = "Real Ethereum Prices")
plt.plot(x2, predicted_prices, color = "blue", label = "Predicted Ethereum Prices")
plt.title("Ethereum Open Prices", fontsize = 18)
plt.xlabel("Time In Days", fontsize = 18)
plt.ylabel("Ethereum Prices in US Dollars", fontsize = 18)
plt.legend()
plt.grid("both")

The full example is on my Kaggle account. This is the link.

https://www.kaggle.com/code/mixmore/ethereum-price-prediction

This is just an explanation of the example on Kaggle.

Deep Side

07/03/2023

Ethereum price prediction

import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

Exploratory Data Analysis(EDA)

dataset = pd.DataFrame(pd.read_csv("ETH-USD.csv"))

dataset.shape

dataset.head()

dataset.tail()

# find null values
dataset.isnull().sum()

plt.figure(figsize = (10,10))
sns.heatmap(dataset.corr(), annot = True, fmt = ".1g", vmin = -1, vmax = 1, center = 0, linewidth = 3,
linecolor = "black", square = True)
plt.show()

dataset.info()

import sklearn
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler(feature_range = (0,1))
dataset_scaled = scaler.fit_transform(dataset)

train, val = dataset_split(dataset_scaled)

train.shape, val.shape

import tensorflow as tf

os.environ["CUDA_VISIBLE_DEVICES"] = "0"
gpus = tf.config.list_physical_devices("GPU")
print(gpus)
if len(gpus) == 1 :
strategy = tf.distribute.OneDeviceStrategy(device = "/gpu:0")
else:
strategy = tf.distribute.MirroredStrategy()

[]

tf.config.optimizer.set_experimental_options({"auto_mixed_precision" : True})
print("Mixed precision enabled")

Mixed precision enabled

reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor= "loss", factor = 0.5, patience = 10,
min_lr = 0.000001, verbose = 1)
monitor_es = tf.keras.callbacks.EarlyStopping(monitor= "loss", patience = 25, restore_best_weights= False, verbose = True)

model.compile(tf.keras.optimizers.Adam(lr = 0.001), loss = "mean_squared_error")

model.summary()

Model: "sequential"

with tf.device("/device:GPU:0"):
history = model.fit(train_x, train_y, epochs = 300, batch_size = 16,

callbacks = [reduce_lr, monitor_es])

plt.figure(figsize = (12, 4))
plt.plot(history.history["loss"], label = "Training loss")
plt.title("Loss analysis", fontsize = 18)
plt.xlabel("Epoch", fontsize = 18)
plt.ylabel("Loss", fontsize = 18)
plt.legend(["Train"])
plt.grid("both")

model_json = model.to_json()
with open("AMD_open_1.json", "w") as json_file:
json_file.write(model_json)

model.save_weights("AMD_open_1.h5")

real_prices = val[TIME_STEP:]
real_prices = scaler.inverse_transform(real_prices)

predicted_prices = loaded_model.predict(val_x)
predicted_prices = scaler.inverse_transform(predicted_prices)

original_training_prices = scaler.inverse_transform(train)
original_training_prices

x1 = np.arange(0,len(original_training_prices),1)
x2 = np.arange(len(original_training_prices), len(dataset), 1)
print(len(x1), len(x2))

07/03/2023

Ethereum price prediction

import osimport numpy as npimport pandas as pdimport matplotlib.pyplot as pltimport seaborn as sns

Exploratory Data Analysis(EDA)

dataset = pd.DataFrame(pd.read_csv("ETH-USD.csv"))

dataset.shape

dataset.head()

dataset.tail()

# find null valuesdataset.isnull().sum()

plt.figure(figsize = (10,10))sns.heatmap(dataset.corr(), annot = True, fmt = ".1g", vmin = -1, vmax = 1, center = 0, linewidth = 3, linecolor = "black", square = True)plt.show()

dataset.info()

import sklearnfrom sklearn.preprocessing import MinMaxScalerscaler = MinMaxScaler(feature_range = (0,1))dataset_scaled = scaler.fit_transform(dataset)train, val = dataset_split(dataset_scaled)train.shape, val.shape

import tensorflow as tf

os.environ["CUDA_VISIBLE_DEVICES"] = "0"gpus = tf.config.list_physical_devices("GPU")print(gpus)if len(gpus) == 1 : strategy = tf.distribute.OneDeviceStrategy(device = "/gpu:0")else: strategy = tf.distribute.MirroredStrategy()

[]

tf.config.optimizer.set_experimental_options({"auto_mixed_precision" : True})print("Mixed precision enabled")

Mixed precision enabled

reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor= "loss", factor = 0.5, patience = 10, min_lr = 0.000001, verbose = 1)monitor_es = tf.keras.callbacks.EarlyStopping(monitor= "loss", patience = 25, restore_best_weights= False, verbose = True)

model.compile(tf.keras.optimizers.Adam(lr = 0.001), loss = "mean_squared_error")

model.summary()

Model: "sequential"

with tf.device("/device:GPU:0"): history = model.fit(train_x, train_y, epochs = 300, batch_size = 16, callbacks = [reduce_lr, monitor_es])

plt.figure(figsize = (12, 4))plt.plot(history.history["loss"], label = "Training loss")plt.title("Loss analysis", fontsize = 18)plt.xlabel("Epoch", fontsize = 18)plt.ylabel("Loss", fontsize = 18)plt.legend(["Train"])plt.grid("both")

model_json = model.to_json()with open("AMD_open_1.json", "w") as json_file: json_file.write(model_json)model.save_weights("AMD_open_1.h5")

real_prices = val[TIME_STEP:]real_prices = scaler.inverse_transform(real_prices)predicted_prices = loaded_model.predict(val_x)predicted_prices = scaler.inverse_transform(predicted_prices)

original_training_prices = scaler.inverse_transform(train)original_training_prices

x1 = np.arange(0,len(original_training_prices),1)x2 = np.arange(len(original_training_prices), len(dataset), 1)print(len(x1), len(x2))

import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# find null values
dataset.isnull().sum()

plt.figure(figsize = (10,10))
sns.heatmap(dataset.corr(), annot = True, fmt = ".1g", vmin = -1, vmax = 1, center = 0, linewidth = 3,
linecolor = "black", square = True)
plt.show()

import sklearn
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler(feature_range = (0,1))
dataset_scaled = scaler.fit_transform(dataset)

train, val = dataset_split(dataset_scaled)

train.shape, val.shape

os.environ["CUDA_VISIBLE_DEVICES"] = "0"
gpus = tf.config.list_physical_devices("GPU")
print(gpus)
if len(gpus) == 1 :
strategy = tf.distribute.OneDeviceStrategy(device = "/gpu:0")
else:
strategy = tf.distribute.MirroredStrategy()

tf.config.optimizer.set_experimental_options({"auto_mixed_precision" : True})
print("Mixed precision enabled")

reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor= "loss", factor = 0.5, patience = 10,
min_lr = 0.000001, verbose = 1)
monitor_es = tf.keras.callbacks.EarlyStopping(monitor= "loss", patience = 25, restore_best_weights= False, verbose = True)

with tf.device("/device:GPU:0"):
history = model.fit(train_x, train_y, epochs = 300, batch_size = 16,

callbacks = [reduce_lr, monitor_es])

plt.figure(figsize = (12, 4))
plt.plot(history.history["loss"], label = "Training loss")
plt.title("Loss analysis", fontsize = 18)
plt.xlabel("Epoch", fontsize = 18)
plt.ylabel("Loss", fontsize = 18)
plt.legend(["Train"])
plt.grid("both")

model_json = model.to_json()
with open("AMD_open_1.json", "w") as json_file:
json_file.write(model_json)

model.save_weights("AMD_open_1.h5")

real_prices = val[TIME_STEP:]
real_prices = scaler.inverse_transform(real_prices)

predicted_prices = loaded_model.predict(val_x)
predicted_prices = scaler.inverse_transform(predicted_prices)

original_training_prices = scaler.inverse_transform(train)
original_training_prices

x1 = np.arange(0,len(original_training_prices),1)
x2 = np.arange(len(original_training_prices), len(dataset), 1)
print(len(x1), len(x2))