Bitcoin price prediction by machine learning
Import libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.pylab import rcParams
rcParams['figure.figsize'] = 20,10
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler(feature_range=(0, 1))
from math import floor,ceil,sqrt
import sys
import warnings
import datetime as dt
if not sys.warnoptions:
warnings.simplefilter("ignore")
from sklearn.linear_model import LinearRegression
from pmdarima.arima import auto_arima
from sklearn import neighbors
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import MinMaxScaler
from prophet import Prophet
from keras.models import Sequential
from keras.layers import Dense, Dropout, LSTM
df = pd.read_csv('BTC-USD.csv')
df.head(5)
Date | Open | High | Low | Close | Adj Close | Volume | |
---|---|---|---|---|---|---|---|
0 | 2014-09-17 | 465.864014 | 468.174011 | 452.421997 | 457.334015 | 457.334015 | 21056800.0 |
1 | 2014-09-18 | 456.859985 | 456.859985 | 413.104004 | 424.440002 | 424.440002 | 34483200.0 |
2 | 2014-09-19 | 424.102997 | 427.834991 | 384.532013 | 394.795990 | 394.795990 | 37919700.0 |
3 | 2014-09-20 | 394.673004 | 423.295990 | 389.882996 | 408.903992 | 408.903992 | 36863600.0 |
4 | 2014-09-21 | 408.084991 | 412.425995 | 393.181000 | 398.821014 | 398.821014 | 26580100.0 |
# find null values
df.isnull().sum()
Date 0
Open 1
High 1
Low 1
Close 1
Adj Close 1
Volume 1
dtype: int64
df = df.dropna()
df = df[['Date', 'Open', 'High', 'Low', 'Close','Volume']]
df['Date'] = pd.to_datetime(df.Date,format='%Y/%m/%d')
df.index = df['Date']
plt.figure(figsize=(16,8))
plt.plot(df['Close'], label='Close Price',color='g')
plt.xlabel('Date',size=20)
plt.ylabel('Bitcoin Price',size=20)
plt.title('Bitcoin Price',size=25)
plt.show()
Moving Average
def moving_avg_prediction(df):
shape=df.shape[0]
df_new=df[['Close']]
df_new.head()
train_set=df_new.iloc[:ceil(shape*0.75)]
valid_set=df_new.iloc[ceil(shape*0.75):]
print('-----------------------------------------------------------')
print('-----------Bitcoin price prediction by Moving Averages--------')
print('-----------------------------------------------------------')
print('Shape of Training Set',train_set.shape)
print('Shape of Validation Set',valid_set.shape)
preds = []
for i in range(0,valid_set.shape[0]):
a = train_set['Close'][len(train_set)-valid_set.shape[0]+i:].sum() + sum(preds)
b = a/(valid_set.shape[0])
preds.append(b)
rms=np.sqrt(np.mean(np.power((np.array(valid_set['Close'])-preds),2)))
print('RMSE value on validation set:',rms)
print('-----------------------------------------------------------')
print('-----------------------------------------------------------')
valid_set['Predictions'] = preds
plt.plot(train_set['Close'])
plt.plot(valid_set[['Close', 'Predictions']])
plt.xlabel('Date',size=20)
plt.ylabel('Bitcoin Price',size=20)
plt.title('Bitcoin Price Prediction by Moving Averages',size=20)
plt.legend(['Model Training Data','Actual Data','Predicted Data'])
moving_avg_prediction(df)
-----------------------------------------------------------
-----------Bitcoin price prediction by Moving Averages--------
-----------------------------------------------------------
Shape of Training Set (2385, 1)
Shape of Validation Set (794, 1)
RMSE value on validation set: 23322.77369822261
-----------------------------------------------------------
-----------------------------------------------------------
Linear Regression
def linear_regression_prediction(df):
shape=df.shape[0]
df_new=df[['Close']]
df_new.head()
train_set=df_new.iloc[:ceil(shape*0.75)]
valid_set=df_new.iloc[ceil(shape*0.75):]
print('-----------------------------------------------------------------')
print('-----------Bitcoin price prediction by LINEAR REGRESSION-----------')
print('-----------------------------------------------------------------')
print('Shape of Training Set',train_set.shape)
print('Shape of Validation Set',valid_set.shape)
train=train_set.reset_index()
valid=valid_set.reset_index()
x_train = train['Date'].map(dt.datetime.toordinal)
y_train = train[['Close']]
x_valid = valid['Date'].map(dt.datetime.toordinal)
y_valid = valid[['Close']]
#implement linear regression
model = LinearRegression()
model.fit(np.array(x_train).reshape(-1,1),y_train)
preds = model.predict(np.array(x_valid).reshape(-1,1))
rms=np.sqrt(np.mean(np.power((np.array(valid_set['Close'])-preds),2)))
print('RMSE value on validation set:',rms)
print('-----------------------------------------------------------')
print('-----------------------------------------------------------')
valid_set['Predictions'] = preds
plt.plot(train_set['Close'])
plt.plot(valid_set[['Close', 'Predictions']])
plt.xlabel('Date',size=20)
plt.ylabel('Bitcoin Price',size=20)
plt.title('Bitcoin Price Prediction by Linear Regression',size=20)
plt.legend(['Model Training Data','Actual Data','Predicted Data'])
linear_regression_prediction(df)
-----------------------------------------------------------------
-----------Bitcoin price prediction by LINEAR REGRESSION-----------
-----------------------------------------------------------------
Shape of Training Set (2385, 1)
Shape of Validation Set (794, 1)
RMSE value on validation set: 19696.932199208448
-----------------------------------------------------------
-----------------------------------------------------------
K-Nearest Neighbours
def k_nearest_neighbours_predict(df):
shape=df.shape[0]
df_new=df[['Close']]
df_new.head()
train_set=df_new.iloc[:ceil(shape*0.75)]
valid_set=df_new.iloc[ceil(shape*0.75):]
print('-------------------------------------------------------------------')
print('-----------Bitcoin Price Prediction by K-Nearest Neighbors-----------')
print('-------------------------------------------------------------------')
print('Shape of Training Set',train_set.shape)
print('Shape of Validation Set',valid_set.shape)
train=train_set.reset_index()
valid=valid_set.reset_index()
x_train = train['Date'].map(dt.datetime.toordinal)
y_train = train[['Close']]
x_valid = valid['Date'].map(dt.datetime.toordinal)
y_valid = valid[['Close']]
x_train_scaled = scaler.fit_transform(np.array(x_train).reshape(-1, 1))
x_train = pd.DataFrame(x_train_scaled)
x_valid_scaled = scaler.fit_transform(np.array(x_valid).reshape(-1, 1))
x_valid = pd.DataFrame(x_valid_scaled)
params = {'n_neighbors':[2,3,4,5,6,7,8,9]}
knn = neighbors.KNeighborsRegressor()
model = GridSearchCV(knn, params, cv=5)
model.fit(x_train,y_train)
preds = model.predict(x_valid)
rms=np.sqrt(np.mean(np.power((np.array(y_valid)-np.array(preds)),2)))
print('RMSE value on validation set:',rms)
print('-----------------------------------------------------------')
print('-----------------------------------------------------------')
valid_set['Predictions'] = preds
plt.plot(train_set['Close'])
plt.plot(valid_set[['Close', 'Predictions']])
plt.xlabel('Date',size=20)
plt.ylabel('Bitcoin Price',size=20)
plt.title('Bitcoin Price Prediction by K-Nearest Neighbors',size=20)
plt.legend(['Model Training Data','Actual Data','Predicted Data'])
k_nearest_neighbours_predict(df)
-------------------------------------------------------------------
-----------Bitcoin Price Prediction by K-Nearest Neighbors-----------
-------------------------------------------------------------------
Shape of Training Set (2385, 1)
Shape of Validation Set (794, 1)
RMSE value on validation set: 34103.44039453257
-----------------------------------------------------------
-----------------------------------------------------------
ARIMA
def auto_arima_prediction(df):
shape=df.shape[0]
df_new=df
data = df_new.sort_index(ascending=True, axis=0)
train_set=data[:ceil(shape*0.75)]
valid_set=data[ceil(shape*0.75):]
print('----------------------------------------------------------')
print('-----------Bitcoin Price Prediction by Auto ARIMA-----------')
print('----------------------------------------------------------')
print('Shape of Training Set',train_set.shape)
print('Shape of Validation Set',valid_set.shape)
training = train_set['Close']
validation = valid_set['Close']
model = auto_arima(training, start_p=1, start_q=1,max_p=3, max_q=3, m=12,start_P=0, seasonal=True,d=1, D=1, trace=True,error_action='ignore',suppress_warnings=True)
model.fit(training)
forecast = model.predict(n_periods=ceil(floor(df.shape[0]*0.25)))
forecast = pd.DataFrame(forecast,index = valid_set.index,columns=['Prediction'])
rms=np.sqrt(np.mean(np.power((np.array(valid_set['Close'])-np.array(forecast['Prediction'])),2)))
print('RMSE value on validation set:',rms)
print('-----------------------------------------------------------')
print('-----------------------------------------------------------')
plt.plot(train_set['Close'])
plt.plot(valid_set['Close'])
plt.plot(forecast['Prediction'])
plt.xlabel('Date',size=20)
plt.ylabel('Bitcoin Price',size=20)
plt.title('Bitcoin Price Prediction by Auto ARIMA',size=20)
plt.legend(['Model Training Data','Actual Data','Predicted Data'])
auto_arima_prediction(df)
----------------------------------------------------------
-----------Bitcoin Price Prediction by Auto ARIMA-----------
----------------------------------------------------------
Shape of Training Set (2385, 6)
Shape of Validation Set (794, 6)
Performing stepwise search to minimize aic
ARIMA(1,1,1)(0,1,1)[12] : AIC=inf, Time=15.10 sec
ARIMA(0,1,0)(0,1,0)[12] : AIC=37845.842, Time=0.23 sec
ARIMA(1,1,0)(1,1,0)[12] : AIC=37280.396, Time=3.16 sec
ARIMA(0,1,1)(0,1,1)[12] : AIC=inf, Time=9.31 sec
ARIMA(1,1,0)(0,1,0)[12] : AIC=37844.346, Time=0.22 sec
ARIMA(1,1,0)(2,1,0)[12] : AIC=36869.873, Time=24.01 sec
ARIMA(1,1,0)(2,1,1)[12] : AIC=inf, Time=152.63 sec
ARIMA(1,1,0)(1,1,1)[12] : AIC=inf, Time=15.55 sec
ARIMA(0,1,0)(2,1,0)[12] : AIC=36868.062, Time=4.08 sec
ARIMA(0,1,0)(1,1,0)[12] : AIC=37278.994, Time=0.81 sec
ARIMA(0,1,0)(2,1,1)[12] : AIC=inf, Time=97.14 sec
ARIMA(0,1,0)(1,1,1)[12] : AIC=inf, Time=10.28 sec
ARIMA(0,1,1)(2,1,0)[12] : AIC=36869.869, Time=33.30 sec
ARIMA(1,1,1)(2,1,0)[12] : AIC=inf, Time=81.31 sec
ARIMA(0,1,0)(2,1,0)[12] intercept : AIC=36869.951, Time=49.47 sec
Best model: ARIMA(0,1,0)(2,1,0)[12]
Total fit time: 496.644 seconds
RMSE value on validation set: nan
-----------------------------------------------------------
-----------------------------------------------------------
Prophet
def fb_prophet_prediction(df):
shape=df.shape[0]
df_new=df[['Close']]
df_new.reset_index(inplace=True)
df_new['Date'] = pd.to_datetime(df_new.Date,format='%Y-%m-%d')
df_new.index = df_new['Date']
df_new.rename(columns={'Close': 'y', 'Date': 'ds'}, inplace=True)
train_set=df_new.iloc[:ceil(shape*0.75)]
valid_set=df_new.iloc[ceil(shape*0.75):]
print('-------------------------------------------------------')
print('-----------Bitcoin Price Prediction by FB Prophet-----------')
print('-------------------------------------------------------')
print('Shape of Training Set',train_set.shape)
print('Shape of Validation Set',valid_set.shape)
model = Prophet()
model.fit(train_set)
close_prices = model.make_future_dataframe(periods=len(valid_set))
forecast = model.predict(close_prices)
forecast_valid = forecast['yhat'][ceil(shape*0.75):]
rms=np.sqrt(np.mean(np.power((np.array(valid_set['y'])-np.array(forecast_valid)),2)))
print('RMSE value on validation set:',rms)
print('-----------------------------------------------------------')
print('-----------------------------------------------------------')
valid_set['Predictions'] = forecast_valid.values
plt.plot(train_set['y'])
plt.plot(valid_set[['y', 'Predictions']])
plt.xlabel('Date',size=20)
plt.ylabel('Bitcoin Price',size=20)
plt.title('Bitcoin Price Prediction by FB Prophet',size=20)
plt.legend(['Model Training Data','Actual Data','Predicted Data'])
fb_prophet_prediction(df)
-------------------------------------------------------
-----------Bitcoin Price Prediction by FB Prophet-----------
-------------------------------------------------------
Shape of Training Set (2385, 2)
Shape of Validation Set (794, 2)
00:24:58 - cmdstanpy - INFO - Chain [1] start processing
00:24:59 - cmdstanpy - INFO - Chain [1] done processing
RMSE value on validation set: 35994.098874668
-----------------------------------------------------------
-----------------------------------------------------------
Long Short Term Memory (LSTM)
def lstm_prediction(df):
shape=df.shape[0]
df_new=df[['Close']]
df_new.head()
dataset = df_new.values
train=df_new[:ceil(shape*0.75)]
valid=df_new[ceil(shape*0.75):]
print('-----------------------------------------------------------------------------')
print('-----------Bitcoin Price Prediction by Long Short Term Memory (LSTM)-----------')
print('-----------------------------------------------------------------------------')
print('Shape of Training Set',train.shape)
print('Shape of Validation Set',valid.shape)
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(dataset)
x_train, y_train = [], []
for i in range(40,len(train)):
x_train.append(scaled_data[i-40:i,0])
y_train.append(scaled_data[i,0])
x_train, y_train = np.array(x_train), np.array(y_train)
x_train = np.reshape(x_train, (x_train.shape[0],x_train.shape[1],1))
model = Sequential()
model.add(LSTM(units=50, return_sequences=True, input_shape=(x_train.shape[1],1)))
model.add(LSTM(units=50))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(x_train, y_train, epochs=1, batch_size=1, verbose=2)
inputs = df_new[len(df_new) - len(valid) - 40:].values
inputs = inputs.reshape(-1,1)
inputs = scaler.transform(inputs)
X_test = []
for i in range(40,inputs.shape[0]):
X_test.append(inputs[i-40:i,0])
X_test = np.array(X_test)
X_test = np.reshape(X_test, (X_test.shape[0],X_test.shape[1],1))
closing_price = model.predict(X_test)
closing_price = scaler.inverse_transform(closing_price)
rms=np.sqrt(np.mean(np.power((valid-closing_price),2)))
print('RMSE value on validation set:',rms)
print('-----------------------------------------------------------')
print('-----------------------------------------------------------')
valid['Predictions'] = closing_price
plt.plot(train['Close'])
plt.plot(valid[['Close','Predictions']])
plt.xlabel('Date',size=20)
plt.ylabel('Bitcoin Price',size=20)
plt.title('Bitcoin Price Prediction by Long Short Term Memory (LSTM)',size=20)
plt.legend(['Model Training Data','Actual Data','Predicted Data'])
lstm_prediction(df)
-----------------------------------------------------------------------------
-----------Bitcoin Price Prediction by Long Short Term Memory (LSTM)-----------
-----------------------------------------------------------------------------
Shape of Training Set (2385, 1)
Shape of Validation Set (794, 1)
2345/2345 - 45s - loss: 9.6017e-04 - 45s/epoch - 19ms/step
25/25 [==============================] - 2s 17ms/step
RMSE value on validation set: Close 2615.411663
dtype: float64
-----------------------------------------------------------
-----------------------------------------------------------
The full example is on my Kaggle account.
This is the link.
https://www.kaggle.com/code/mixmore/bitcoin-price-prediction-by-machine-learning
This is just an explanation of the example on Kaggle.