Ethereum price prediction
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
    Exploratory Data Analysis(EDA)
dataset = pd.DataFrame(pd.read_csv("ETH-USD.csv"))
      dataset.shape
(2032, 7) dataset.head()
| Date | Open | High | Low | Close | Adj Close | Volume | |
|---|---|---|---|---|---|---|---|
| 0 | 2017-11-09 | 308.644989 | 329.451996 | 307.056000 | 320.884003 | 320.884003 | 893249984 | 
| 1 | 2017-11-10 | 320.670990 | 324.717987 | 294.541992 | 299.252991 | 299.252991 | 885985984 | 
| 2 | 2017-11-11 | 298.585999 | 319.453003 | 298.191986 | 314.681000 | 314.681000 | 842300992 | 
| 3 | 2017-11-12 | 314.690002 | 319.153015 | 298.513000 | 307.907990 | 307.907990 | 1613479936 | 
| 4 | 2017-11-13 | 307.024994 | 328.415009 | 307.024994 | 316.716003 | 316.716003 | 1041889984 | 
  dataset.tail()
 | Date | Open | High | Low | Close | Adj Close | Volume | |
|---|---|---|---|---|---|---|---|
| 2027 | 2023-05-29 | 1909.297485 | 1926.421753 | 1879.077515 | 1893.078125 | 1893.078125 | 5884674572 | 
| 2028 | 2023-05-30 | 1893.093140 | 1916.574951 | 1883.934692 | 1901.026611 | 1901.026611 | 5363439784 | 
| 2029 | 2023-05-31 | 1901.098267 | 1907.035400 | 1852.094727 | 1874.130493 | 1874.130493 | 5984512548 | 
| 2030 | 2023-06-01 | 1873.914673 | 1887.705322 | 1846.227417 | 1862.201416 | 1862.201416 | 5640027197 | 
| 2031 | 2023-06-02 | 1862.266724 | 1909.625732 | 1851.964722 | 1906.706543 | 1906.706543 | 6151811584 | 
  # find null values
dataset.isnull().sum()
Date         0
Open         0
High         0
Low          0
Close        0
Adj Close    0
Volume       0 dtype: int64 plt.figure(figsize = (10,10))
sns.heatmap(dataset.corr(), annot = True, fmt = ".1g", vmin = -1, vmax = 1, center = 0, linewidth = 3,
           linecolor = "black", square = True)
plt.show()
      dataset.info()
     <class 'pandas.core.frame.DataFrame'>
RangeIndex: 2032 entries, 0 to 2031
Data columns (total 7 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Date       2032 non-null   object 
 1   Open       2032 non-null   float64
 2   High       2032 non-null   float64
 3   Low        2032 non-null   float64
 4   Close      2032 non-null   float64
 5   Adj Close  2032 non-null   float64
 6   Volume     2032 non-null   int64  
dtypes: float64(5), int64(1), object(1)
memory usage: 111.2+ KB plt.figure(figsize = (20, 12))
x = np.arange(0, dataset.shape[0], 1)
plt.subplot(2,1,1)
plt.plot(x, dataset.Open.values, color = "red", label = "Open Ethereum Price")
plt.plot(x, dataset.Close.values, color = "blue", label = "Close Ethereum Price")
plt.title("Ethereum Prices", fontsize = 18)
plt.xlabel("Days", fontsize = 18)
plt.ylabel("Ethereum Prices in US Dollar", fontsize = 18)
plt.legend(loc = "best")
plt.grid(which = "major", axis = "both")
plt.subplot(2,1,2)
plt.plot(x, dataset.Volume.values, color = "green", label = "Ethereum Volume Available")
plt.title("Volume of Ethereum", fontsize = 18)
plt.xlabel("Days", fontsize = 18)
plt.ylabel("Volume", fontsize = 18)
plt.legend(loc = "best")
plt.grid(which = "major", axis = "both")
plt.show()
     Hyperparameters
TIME_STEP = 7
DAYS = 20 # number of days at the end for which we have to predict. These will be in our validation set.
def dataset_split(dataset) : 
    train = dataset[0: len(dataset) - DAYS]
    val = dataset[len(dataset) - DAYS - TIME_STEP : len(dataset)]
    return train, val
dataset.drop(["Date","High", "Low", "Close", "Volume", "Adj Close"], axis = 1, inplace = True)
dataset = dataset.values
    Scaling
import sklearn
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler(feature_range = (0,1))
dataset_scaled = scaler.fit_transform(dataset)
train, val = dataset_split(dataset_scaled)
train.shape, val.shape
    ((2012, 1), (27, 1))
Dataset prepare
train_x, train_y = [], []
for i in range(TIME_STEP, train.shape[0]) : 
    train_x.append(train[i - TIME_STEP : i, 0])
    train_y.append(train[i, 0])
train_x, train_y = np.array(train_x), np.array(train_y)
val_x, val_y = [], []
for i in range(TIME_STEP, val.shape[0]) : 
    val_x.append(val[i - TIME_STEP : i, 0])
    val_y.append(val[i, 0])
val_x, val_y = np.array(val_x), np.array(val_y)
train_x = np.reshape(train_x, (train_x.shape[0], train_x.shape[1], 1))
val_x = np.reshape(val_x, (val_x.shape[0], val_x.shape[1], 1))
print("Reshaped train_x = ", train_x.shape)
print("Shape of train_y = ", train_y.shape)
print("Reshaped val_x = ", val_x.shape)
print("Shape of val_y = ", val_y.shape)
    Reshaped train_x =  (2005, 7, 1)
Shape of train_y =  (2005,)
Reshaped val_x =  (20, 7, 1)
Shape of val_y =  (20,)
LSTM -- long short term memory -  :
import tensorflow as tf
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
gpus = tf.config.list_physical_devices("GPU")
print(gpus)
if len(gpus) == 1 : 
    strategy = tf.distribute.OneDeviceStrategy(device = "/gpu:0")
else:
    strategy = tf.distribute.MirroredStrategy()
    []
tf.config.optimizer.set_experimental_options({"auto_mixed_precision" : True})
print("Mixed precision enabled")
    Mixed precision enabled
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor= "loss", factor = 0.5, patience = 10,
                                                 min_lr = 0.000001, verbose = 1)
monitor_es = tf.keras.callbacks.EarlyStopping(monitor= "loss", patience = 25, restore_best_weights= False, verbose = True)
    model = tf.keras.Sequential()
model.add(tf.keras.layers.LSTM(units = 128, return_sequences = True, input_shape = (train_x.shape[1], 1)))
model.add(tf.keras.layers.Dropout(0.4))
model.add(tf.keras.layers.LSTM(units = 128, return_sequences = True))
model.add(tf.keras.layers.Dropout(0.4))
model.add(tf.keras.layers.LSTM(units = 128, return_sequences = False))
model.add(tf.keras.layers.Dropout(0.4))
model.add(tf.keras.layers.Dense(units = 10, activation = "relu"))
model.add(tf.keras.layers.Dense(units = 1, activation = "relu"))
    model.compile(tf.keras.optimizers.Adam(lr = 0.001), loss = "mean_squared_error")
model.summary()
Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 lstm (LSTM)                 (None, 7, 128)            66560     
                                                                 
 dropout (Dropout)           (None, 7, 128)            0         
                                                                 
 lstm_1 (LSTM)               (None, 7, 128)            131584    
                                                                 
 dropout_1 (Dropout)         (None, 7, 128)            0         
                                                                 
 lstm_2 (LSTM)               (None, 128)               131584    
                                                                 
 dropout_2 (Dropout)         (None, 128)               0         
                                                                 
 dense (Dense)               (None, 10)                1290      
                                                                 
 dense_1 (Dense)             (None, 1)                 11        
                                                                 
=================================================================
Total params: 331,029
Trainable params: 331,029
Non-trainable params: 0
_________________________________________________________________  with tf.device("/device:GPU:0"):
    history = model.fit(train_x, train_y, epochs = 300, batch_size = 16,  
callbacks = [reduce_lr, monitor_es])
     
plt.figure(figsize = (12, 4))
plt.plot(history.history["loss"], label = "Training loss")
plt.title("Loss analysis", fontsize = 18)
plt.xlabel("Epoch", fontsize = 18)
plt.ylabel("Loss", fontsize = 18)
plt.legend(["Train"])
plt.grid("both") 
    
 
Save the model
model_json = model.to_json()
with open("AMD_open_1.json", "w") as json_file:
  json_file.write(model_json)
model.save_weights("AMD_open_1.h5")
      Load the model
from keras.models import model_from_json
json_file = open('AMD_open_1.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
loaded_model = model_from_json(loaded_model_json)
loaded_model.load_weights("AMD_open_1.h5")
print("Loaded model from disk")
loaded_model.compile(loss='mean_squared_error', optimizer='adam')
    Loaded model from disk
 real_prices = val[TIME_STEP:]
real_prices = scaler.inverse_transform(real_prices)
predicted_prices = loaded_model.predict(val_x)
predicted_prices = scaler.inverse_transform(predicted_prices)
    1/1 [==============================] - 2s 2s/step plt.figure(figsize= (16, 5))
plt.subplot(1,1,1)
x = np.arange(0, DAYS, 1)
plt.plot(x, real_prices, color = "red", label = "Real Ethereum Prices")
plt.plot(x, predicted_prices, color = "blue", label = "Predicted Ethereum Prices")
plt.title("Ethereum Open Prices", fontsize = 18)
plt.xlabel("Time In Days", fontsize = 18)
plt.ylabel("Ethereum Prices in US Dollars", fontsize = 18)
plt.legend()
 
original_training_prices = scaler.inverse_transform(train)
original_training_prices
     array([[ 308.644989],
       [ 320.67099 ],
       [ 298.585999],
       ...,
       [1842.492065],
       [1795.176636],
       [1808.375   ]]) x1 = np.arange(0,len(original_training_prices),1)
x2 = np.arange(len(original_training_prices), len(dataset), 1)
print(len(x1), len(x2))
    2012 20 plt.figure(figsize= (16,8))
plt.subplot(1,1,1)
X = len(dataset)
x1 = np.arange(0,len(original_training_prices),1)
x2 = np.arange(len(original_training_prices), len(dataset), 1)
plt.plot(x1, original_training_prices, color = "green")
plt.plot(x2, real_prices, color = "red", label = "Real Ethereum Prices")
plt.plot(x2, predicted_prices, color = "blue", label = "Predicted Ethereum Prices")
plt.title("Ethereum Open Prices", fontsize = 18)
plt.xlabel("Time In Days", fontsize = 18)
plt.ylabel("Ethereum Prices in US Dollars", fontsize = 18)
plt.legend()
plt.grid("both")
    
 
The full example is on my Kaggle account. This is the link.
https://www.kaggle.com/code/mixmore/ethereum-price-prediction
This is just an explanation of the example on Kaggle.
 