Bitcoin Price Prediction by LSTM
Import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
import datetime as dt
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.layers import LSTM
from itertools import cycle
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
import datetime as dt
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.layers import LSTM
from itertools import cycle
Load data
#Loading data
df=pd.read_csv('BTC-USD.csv')
df.set_index('Date',inplace=True)
df.head(5)
Date | Open | High | Low | Close | Adj Close | Volume |
---|---|---|---|---|---|---|
2014-09-17 | 465.864014 | 468.174011 | 452.421997 | 457.334015 | 457.334015 | 21056800.0 |
2014-09-18 | 456.859985 | 456.859985 | 413.104004 | 424.440002 | 424.440002 | 34483200.0 |
2014-09-19 | 424.102997 | 427.834991 | 384.532013 | 394.795990 | 394.795990 | 37919700.0 |
2014-09-20 | 394.673004 | 423.295990 | 389.882996 | 408.903992 | 408.903992 | 36863600.0 |
2014-09-21 | 408.084991 | 412.425995 | 393.181000 | 398.821014 | 398.821014 | 26580100.0 |
df.tail(5)
Date | Open | High | Low | Close | Adj Close | Volume |
---|---|---|---|---|---|---|
2023-05-28 | 26871.158203 | 28193.449219 | 26802.751953 | 28085.646484 | 28085.646484 | 1.454523e+10 |
2023-05-29 | 28075.591797 | 28432.039063 | 27563.876953 | 27745.884766 | 27745.884766 | 1.518131e+10 |
2023-05-30 | 27745.123047 | 28044.759766 | 27588.501953 | 27702.349609 | 27702.349609 | 1.325108e+10 |
2023-05-31 | NaN | NaN | NaN | NaN | NaN | NaN |
2023-06-01 | 27236.576172 | 27326.533203 | 27235.039063 | 27301.021484 | 27301.021484 | 1.596423e+10 |
df.describe()
Open | High | Low | Close | Adj Close | Volume | |
---|---|---|---|---|---|---|
count | 3179.000000 | 3179.000000 | 3179.000000 | 3179.000000 | 3179.000000 | 3.179000e+03 |
mean | 13432.217640 | 13763.078443 | 13068.545810 | 13439.410714 | 13439.410714 | 1.659098e+10 |
std | 16028.349128 | 16432.296940 | 15565.365494 | 16025.439960 | 16025.439960 | 1.962819e+10 |
min | 176.897003 | 211.731003 | 171.509995 | 178.102997 | 178.102997 | 5.914570e+06 |
25% | 741.108002 | 750.089508 | 733.108490 | 741.312989 | 741.312989 | 1.219535e+08 |
50% | 7500.700195 | 7680.430176 | 7349.120117 | 7514.470215 | 7514.470215 | 9.744636e+09 |
75% | 19510.696289 | 20034.084961 | 19129.965821 | 19545.489258 | 19545.489258 | 2.776349e+10 |
max | 67549.734375 | 68789.625000 | 66382.062500 | 67566.828125 | 67566.828125 | 3.509679e+11 |
df.info()
<class 'pandas.core.frame.DataFrame'>
Index: 3180 entries, 2014-09-17 to 2023-06-01
Data columns (total 6 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Open 3179 non-null float64
1 High 3179 non-null float64
2 Low 3179 non-null float64
3 Close 3179 non-null float64
4 Adj Close 3179 non-null float64
5 Volume 3179 non-null float64
dtypes: float64(6)
memory usage: 173.9+ KB
# find null values
df.isnull().sum()
Open 1
High 1
Low 1
Close 1
Adj Close 1
Volume 1
dtype: int64
#find duplicate values
df.duplicated().sum()
0
df = df.dropna()
Divide the data into test and training
# Use only 'Close'
new_df = df['Close']
new_df.index = df.index
final_df=new_df.values
train_data=final_df[0:2000,]
test_data=final_df[2000:,]
train_df = pd.DataFrame()
test_df = pd.DataFrame()
train_df['Close'] = train_data
train_df.index = new_df[0:2000].index
test_df['Close'] = test_data
test_df.index = new_df[2000:].index
print("train_data: ", train_df.shape)
print("test_data: ", test_df.shape)
train_data: (2000, 1) test_data: (1179, 1)
Using MinMaxScaler
# Using Min-Max scaler to scale data
scaler=MinMaxScaler(feature_range=(0,1))
scaled_data=scaler.fit_transform(final_df.reshape(-1,1))
X_train_data,y_train_data=[],[]
for i in range(60,len(train_df)):
X_train_data.append(scaled_data[i-60:i,0])
y_train_data.append(scaled_data[i,0])
X_train_data,y_train_data=np.array(X_train_data),np.array(y_train_data)
X_train_data=np.reshape(X_train_data,(X_train_data.shape[0],X_train_data.shape[1],1))
Building LSTM
# Initializing the LSTM model
model = Sequential()
model.add(LSTM(units = 50, return_sequences = True, input_shape = (X_train_data.shape[1], 1)))
model.add(Dropout(0.2))
model.add(LSTM(units = 50, return_sequences = True))
model.add(Dropout(0.2))
model.add(LSTM(units = 50, return_sequences = True))
model.add(Dropout(0.2))
model.add(LSTM(units = 50))
model.add(Dropout(0.2))
model.add(Dense(units = 1))
model.summary()
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
lstm (LSTM) (None, 60, 50) 10400
dropout (Dropout) (None, 60, 50) 0
lstm_1 (LSTM) (None, 60, 50) 20200
dropout_1 (Dropout) (None, 60, 50) 0
lstm_2 (LSTM) (None, 60, 50) 20200
dropout_2 (Dropout) (None, 60, 50) 0
lstm_3 (LSTM) (None, 50) 20200
dropout_3 (Dropout) (None, 50) 0
dense (Dense) (None, 1) 51
=================================================================
Total params: 71,051
Trainable params: 71,051
Non-trainable params: 0
_________________________________________________________________
model.compile(optimizer = 'adam', loss = 'mean_squared_error')
model.fit(X_train_data, y_train_data, epochs = 150, batch_size = 32);
Price prediction
input_data=new_df[len(new_df)-len(test_df)-60:].values
input_data=input_data.reshape(-1,1)
input_data=scaler.transform(input_data)
X_test=[]
for i in range(60,input_data.shape[0]):
X_test.append(input_data[i-60:i,0])
X_test=np.array(X_test)
X_test=np.reshape(X_test,(X_test.shape[0],X_test.shape[1],1))
predicted=model.predict(X_test)
predicted=scaler.inverse_transform(predicted)
37/37 [==============================] - 4s 42ms/step
test_df['Predictions']=predicted
plt.figure(figsize=(50,20))
plt.plot(train_df['Close'],label='Training Data')
plt.plot(test_df['Close'],label='Test Data')
plt.plot(test_df['Predictions'],label='Prediction')
plt.legend(fontsize="40", loc ="upper right")
plt.show()
fig = go.Figure() fig.add_trace(go.Scatter(x=train_df.index,y=train_df['Close'], mode='lines', name='Training Data')) fig.add_trace(go.Scatter(x=test_df.index,y=test_df['Close'], mode='lines', name='Test Data')) fig.add_trace(go.Scatter(x=test_df.index,y=test_df['Predictions'], mode='lines', name='Prediction'))
print('The Mean Squared Error is',mean_squared_error (test_df['Close'].values,
test_df['Predictions'].values))
print('The Mean Absolute Error is',mean_absolute_error (test_df['Close'].values,
test_df['Predictions'].values))
print('The Root Mean Squared Error is',np.sqrt (mean_squared_error(test_df['Close'].values,
test_df['Predictions'].values)))
The Mean Squared Error is 25574670.79478714
The Mean Absolute Error is 3215.975236052825
The Root Mean Squared Error is 5057.140574948173
The full example is on my Kaggle account. This is the link.
https://www.kaggle.com/code/mixmore/bitcoin-price-prediction-by-lstm
Here is just an explanation of the example on Kaggle.
https://www.kaggle.com/code/mixmore/bitcoin-price-prediction-by-lstm
Here is just an explanation of the example on Kaggle.