Source code for fsds_100719.ds.tsa

## Lab Function
import matplotlib.pyplot as plt
import matplotlib as mpl
import pandas as pd
import numpy as np

from IPython.display import display

## Lab Function
[docs]def stationarity_check(TS,plot=True,col=None,rollwindow=8): """ Performs the Augmented Dickey-Fuller unit root test on a time series. - The null hypothesis of the Augmented Dickey-Fuller is that there is a unit root, with the alternative that there is no unit root. - A unit root (also called a unit root process or a difference stationary process) is a stochastic trend in a time series, sometimes called a “random walk with drift”; - If a time series has a unit root, it shows a systematic pattern that is unpredictable, and non-stationary. From: https://learn.co/tracks/data-science-career-v2/module-4-a-complete-data-science-project-using-multiple-regression/working-with-time-series-data/time-series-decomposition """ # Import adfuller from statsmodels.tsa.stattools import adfuller if col is not None: # Perform the Dickey Fuller Test dftest = adfuller(TS[col]) # change the passengers column as required else: dftest=adfuller(TS) if plot: # Calculate rolling statistics rolmean = TS.rolling(window = rollwindow, center = False).mean() rolstd = TS.rolling(window = rollwindow, center = False).std() #Plot rolling statistics: fig = plt.figure(figsize=(12,6)) orig = plt.plot(TS, color='blue',label='Original') mean = plt.plot(rolmean, color='red', label='Rolling Mean') std = plt.plot(rolstd, color='black', label = 'Rolling Std') plt.legend(loc='best') plt.title('Rolling Mean & Standard Deviation') # plt.show(block=False) # Print Dickey-Fuller test results print ('[i] Results of Dickey-Fuller Test:') dfoutput = pd.Series(dftest[0:4], index=['Test Statistic','p-value','# of Lags Used','Number of Observations Used']) for key,value in dftest[4].items(): dfoutput['Critical Value (%s)'%key] = value sig = dfoutput['p-value']<.05 print (dfoutput) print() if sig: print(f"[i] p-val {dfoutput['p-value'].round(4)} is <.05, so we reject the null hypothesis.") print("\tThe time series is NOT stationary.") else: print(f"[i] p-val {dfoutput['p-value'].round(4)} is >.05, therefore we support the null hypothesis.") print('\tThe time series IS stationary.') return dfoutput
[docs]def calc_bollinger_bands(ts,window=20,col=None): """Calculates Bollinger Bands for time series. If ts is a dataframe, col specifies data. Normally used for financial/stock market data and uses 20 days for rolling calculations.""" bands_df = pd.DataFrame() if col is not None: ts=ts[col] ## Calc rolling Moving Average mean = ts.rolling(window).mean() std = ts.rolling(window).std() ## Calc MA +2*std(window) upper = mean+ 2*(std) ## Lower lower = mean -2*(std) ## COMBINE DATA INTO 1 DF bands_df['Raw Data'] = ts bands_df['Rolling Mean'] = mean bands_df['Lower Band'] = lower bands_df['Upper Band'] = upper return bands_df
[docs]def calc_bollinger_bands_plot(ts,window=20,col=None, figsize=(10,6), set_kws=dict( ylabel='House Price ($)', title="Bollinger Bands") ): """Calculates Bollinger Bands for time series. If ts is a dataframe, col specifies data. Normally used for financial/stock market data and uses 20 days for rolling calculations. """ plot_df = calc_bollinger_bands(ts=ts,window=window,col=col)#,figsize=figsize) ## SPECIFY STYLE PER COLUMN plot_styles = {} plot_styles['Raw Data'] = dict(lw=1,ls='-',c='black') plot_styles['Rolling Mean'] = dict(lw=3,alpha=0.6, c='green') plot_styles['Lower Band'] = dict(lw=2,ls=':',c='blue') plot_styles['Upper Band'] = dict(lw=2,ls=':',c='red') ## Make figure and loop through columns fig,ax = plt.subplots(figsize=figsize) for col in list(plot_df.columns): plot_df[col].plot(**plot_styles[col]) ax.legend() ax.set(**set_kws) return fig,ax