from google.colab import drive
drive.mount('/content/drive')
import numpy as np
import pandas as pd
from google.colab import drive
import matplotlib.pyplot as plt
from sklearn import metrics
%matplotlib inline
from matplotlib.pylab import rcParams
import matplotlib.dates as mdates
import seaborn as sns
import warnings
import itertools
warnings.filterwarnings("ignore") # specify to ignore warning messages
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.svm import SVR
Load input data.
data = pd.read_csv('/content/avocado.csv')
data.head(5)
data.shape
Some relevant variables from the dataset:
Date - The date of the observation
AveragePrice - the average price of a single avocado
type - conventional or organic
year - the year
Region - the city or region of the observation
Total Volume - Total number of avocados sold
data.isnull().sum() # is there any NULL variable in the dataset?
data.describe().round(2)
# convert Date column's format;
data['Date'] =pd.to_datetime(data.Date)
data.sort_values(by=['Date'], inplace=True, ascending=True)
data.head()
# TIME SERIES ANALYSIS
df = data[['Date', 'AveragePrice']]
df = df.set_index('Date')
weekly_df = df.resample('W').mean()
w_df = weekly_df.reset_index().dropna()
w_df.sort_values(by=['Date'])
w_df.head()
# Plotting the weekly average prices by month;
fig = plt.figure(figsize = (27, 10))
ax = plt.axes()
#set ticks every month
ax.xaxis.set_major_locator(mdates.MonthLocator())
#set major ticks format
ax.xaxis.set_major_formatter(mdates.DateFormatter('%b'))
plt.plot(w_df['Date'],w_df['AveragePrice'],color='b', linewidth=1)
plt.xlabel("Date")
plt.ylabel("Avocado Price USD")
plt.show()
import logging
logging.getLogger('fbprophet').setLevel(logging.ERROR)
import fbprophet
Prophet = fbprophet.Prophet
Prophet requires the variable names in the time series to be:
y – Target
ds – Datetime
# Time Series Forecasts using Facebook's Prophet()
w_df1= w_df
w_df.columns = ['ds', 'y']
TEST_SIZE = 12
train, test = w_df.iloc[:-TEST_SIZE], w_df.iloc[-TEST_SIZE:]
model_P=Prophet(interval_width=0.95, yearly_seasonality=True, daily_seasonality=True, weekly_seasonality=False, changepoint_range=1)
#interval_width sets the uncertainty interval to produce a confidence interval around the forecast
model_P.add_seasonality(name='monthly', period=30.5, fourier_order=5, prior_scale=0.02)
model_P.fit(train)
future = model_P.make_future_dataframe(freq='W', periods=12)
forecast = model_P.predict(future)
forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail()
from fbprophet.plot import add_changepoints_to_plot
fig = model_P.plot(forecast)
a = add_changepoints_to_plot(fig.gca(), model_P, forecast)
plt.rcParams.update({'font.size': 12})
f = model_P.plot_components(forecast)
plt.savefig('forecast.png')
plt.show()
strt='2017-12-31'
end='2018-03-25'
predic = forecast[(forecast['ds']>strt) & (forecast['ds']<=end)]
print('MAE:', metrics.mean_absolute_error(test['y'],abs(predic ['yhat'])))
print('MSE:', metrics.mean_squared_error(test['y'],abs(predic ['yhat'])))
print('RMSE:', np.sqrt(metrics.mean_squared_error(test['y'],abs(predic ['yhat']))))
import xgboost as xgb
from xgboost import plot_importance, plot_tree
TEST_SIZE = 12
train, test = w_df1.iloc[:-TEST_SIZE], w_df1.iloc[-TEST_SIZE:]
X_train = np.arange(train.shape[0]).reshape((-1,1))
y_train = train['y'].values.reshape((-1,1))
X_test = np.arange(test.shape[0]).reshape((-1,1))
y_test = test['y'].values.reshape((-1,1))
X_train.shape , y_test.shape
reg = xgb.XGBRegressor(max_depth=4, n_estimators=100)
reg.fit(X_train, y_train,
eval_set=[(X_train, y_train), (X_test, y_test)],
early_stopping_rounds=20,
verbose=False)
Forecast on Test Set
pred= reg.predict(X_test)
# plot the results
rcParams['figure.figsize']=7,5
plt.plot(y_test, color = 'blue', label= 'real' )
plt.plot(pred, color = 'red', ls='-.', label = 'predictions')
plt.ylabel("Avocado Price USD")
plt.legend(loc=4)
plt.show()
print('MAE:', metrics.mean_absolute_error(y_test, pred))
print('MSE:', metrics.mean_squared_error(y_test, pred))
print('RMSE:', np.sqrt(metrics.mean_squared_error(y_test, pred)))
for k in ['linear','poly','rbf','sigmoid']:
clf = SVR(kernel=k)
clf.fit(X_train, y_train)
confidence = clf.score(X_train, y_train)
print(k,confidence)
model_svr=SVR(kernel='rbf', C=1, gamma= 0.5) # Parameter Tuning to get the best accuracy
model_svr.fit(X_train,y_train)
print(model_svr.score(X_train,y_train))
Forecast on Test Set
pred1= model_svr.predict(X_test)
print('MAE:', metrics.mean_absolute_error(y_test, pred1))
print('MSE:', metrics.mean_squared_error(y_test, pred1))
print('RMSE:', np.sqrt(metrics.mean_squared_error(y_test, pred1)))
# plot the results
rcParams['figure.figsize']=7,5
plt.plot(y_test, color = 'blue', label= 'real' )
plt.plot(pred1, color = 'red', ls='-.', label = 'predictions SVR')
plt.plot(pred, color = 'orange', ls='--', label = 'predictions XGBoost')
plt.ylabel("Avocado Price USD")
plt.legend(loc=4)
plt.show()
print('RMSE Facebook Prophet:', np.sqrt(metrics.mean_squared_error(test['y'],abs(predic ['yhat']))))
print('RMSE XGBoost:', np.sqrt(metrics.mean_squared_error(y_test, pred)))
print('RMSE SVR:', np.sqrt(metrics.mean_squared_error(y_test, pred1)))