import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime
from sklearn.model_selection import train_test_split
from statsmodels.tsa.arima.model import ARIMA


# define a function to transfer the type of 'Month' from string to date
def dateparse(x):
    return datetime.strptime(x, '%Y-%m')

# import data from csv
price = pd.read_csv('arima-sample-data-csv.csv', index_col=[0], parse_dates=[0], date_parser=dateparse)
price.head(10)


# draw a graph using matplot function
def test_stationarity(data):
    #Determing statistical properties
    mean = data.rolling(window=12).mean()
    std = data.rolling(window=12).std()

    #Plot rolling statistics:
    original = plt.plot(data, label='Original')
    mean = plt.plot(mean, color='red', label='Rolling Mean')
    std = plt.plot(std, color='black', label = 'Rolling Std')
    plt.legend(loc='best')
    plt.ylabel('Purchase Price', fontsize=11)
    plt.title('Concrete Purchase Price 2019-2021')
    plt.xticks(fontsize=8)
    plt.show(block=False)


test_stationarity(price)


# differencing the non stationary to stationary data
# by integrating of order 1
price_diff = price.diff(periods=1)
price_diff.plot()
plt.ylabel('Purchase Price after Differencing', fontsize=11)
plt.title('Concrete Purchase Price after Differencing 2019-2021')
leg = plt.legend(loc='upper left')
plt.show()   
# After differencing, the mean and other statistical properties return to constant
# Therefore, the parameter d in the ARIMA model should be 1


# split the testing and training data (80%=train, 20%=test, random state=10)
x = price.values
x_train, x_test = train_test_split(x, test_size=0.2, random_state=10)


# pick the ARIMA model with lowest AIC - choose the parameter p, d, q
import itertools
import warnings
warnings.filterwarnings('ignore')
p = range(10, 13)
q = range(0, 2)
d = range(1,2)
pdq = list(itertools.product(p, d, q))
smallest_aic = 10000
result_pdq = 0
for parameter in pdq:
    try:
        model_arima = ARIMA(x_train, order=parameter)
        model_arima_fit = model_arima.fit()
        if model_arima_fit.aic < smallest_aic:
            smallest_aic = model_arima_fit.aic
            result_pdq = parameter
    except:
        pass
print('These are parameters p,d,q with the lowest AIC:{}.'.format(result_pdq))

These are parameters p,d,q with the lowest AIC:(11, 1, 1).


# draw a graph with most fitted ARIMA model
model_arima = ARIMA(x_train, order=result_pdq)
model_arima_fit = model_arima.fit()
x_pred = model_arima_fit.forecast(steps=10)
plt.xlabel('Next Ten Time Periods', fontsize=11)
plt.ylabel('Price', fontsize=11)
plt.title('Price Forecasting for Next Ten Time Periods')
plt.plot(x_test, label='Actual Price')
plt.plot(x_pred, label='Predicted Price', color='red')
plt.ylim(0, 1000)
leg = plt.legend(loc='lower left')
plt.show()


# the summary of the most fitted ARIMA model 
print('Function Related Information')
print('AIC Number with Most Fitted ARIMA Model', model_arima_fit.aic)
print('-------------------------------------------------------------')
print('Predicted Price for the Next Ten Time Period', x_pred)

Function Related Information
AIC Number with Most Fitted ARIMA Model 338.47178354139584
-------------------------------------------------------------
Predicted Price for the Next Ten Time Period [357.03912331 539.09632475 320.97851364 369.61425201 265.9263721
 152.21175181 426.33373751 359.75924127 363.63837398 431.91655858]

	Price
Month
2019-01-01	270
2019-02-01	150
2019-03-01	182
2019-04-01	120
2019-05-01	177
2019-06-01	165
2019-07-01	250
2019-08-01	244
2019-09-01	194
2019-10-01	133

Case Introduction

Data and Model Design

Import Libraries

Data Visualization and Descriptive Analysis

Results from Data Visualization

Differencing Method

Training and Testing Sets

ARIMA Model

Conclusion