Source code for fsds_100719.ds.tsa

## Lab Function
import matplotlib.pyplot as plt
import matplotlib as mpl
import pandas as pd
import numpy as np

from IPython.display import display

## Lab Function
[docs]def stationarity_check(TS,plot=True,col=None,rollwindow=8):
    """
    Performs the Augmented Dickey-Fuller unit root test on a time series.

    - The null hypothesis of the Augmented Dickey-Fuller is that there is a unit root, with the alternative that there is no unit root. 
        - A unit root (also called a unit root process or a difference stationary process) 
        is a stochastic trend in a time series, sometimes called a “random walk with drift”; 
        - If a time series has a unit root, it shows a systematic pattern that is unpredictable, and non-stationary.
        
    From: https://learn.co/tracks/data-science-career-v2/module-4-a-complete-data-science-project-using-multiple-regression/working-with-time-series-data/time-series-decomposition
    """
    # Import adfuller
    from statsmodels.tsa.stattools import adfuller

    if col is not None:
        # Perform the Dickey Fuller Test
        dftest = adfuller(TS[col]) # change the passengers column as required 
    else:
        dftest=adfuller(TS)
 
    if plot:
        # Calculate rolling statistics
        rolmean = TS.rolling(window = rollwindow, center = False).mean()
        rolstd = TS.rolling(window = rollwindow, center = False).std()

        #Plot rolling statistics:
        fig = plt.figure(figsize=(12,6))
        orig = plt.plot(TS, color='blue',label='Original')
        mean = plt.plot(rolmean, color='red', label='Rolling Mean')
        std = plt.plot(rolstd, color='black', label = 'Rolling Std')
        plt.legend(loc='best')
        plt.title('Rolling Mean & Standard Deviation')
#     plt.show(block=False)
    
    # Print Dickey-Fuller test results
    print ('[i] Results of Dickey-Fuller Test:')

    dfoutput = pd.Series(dftest[0:4],
                         index=['Test Statistic','p-value','# of Lags Used','Number of Observations Used'])
    for key,value in dftest[4].items():
        dfoutput['Critical Value (%s)'%key] = value
        

    sig = dfoutput['p-value']<.05
    print (dfoutput)
    print()
    if sig:
        print(f"[i] p-val {dfoutput['p-value'].round(4)} is <.05, so we reject the null hypothesis.")
        print("\tThe time series is NOT stationary.")
    else:
        print(f"[i] p-val {dfoutput['p-value'].round(4)} is >.05, therefore we support the null hypothesis.")
        print('\tThe time series IS stationary.')
    
    return dfoutput


[docs]def calc_bollinger_bands(ts,window=20,col=None):
    """Calculates Bollinger Bands for time series. If ts is a dataframe, col specifies data.
    Normally used for financial/stock market data and uses 20 days for rolling calculations."""

    bands_df = pd.DataFrame()
    if col is not None:
        ts=ts[col]

    ## Calc rolling Moving Average
    mean = ts.rolling(window).mean()
    std = ts.rolling(window).std()

    ## Calc MA +2*std(window)
    upper = mean+ 2*(std)

    ## Lower
    lower = mean -2*(std)

    ## COMBINE DATA INTO 1 DF
    bands_df['Raw Data'] = ts
    bands_df['Rolling Mean'] = mean
    bands_df['Lower Band'] = lower
    bands_df['Upper Band'] = upper

    return bands_df

[docs]def calc_bollinger_bands_plot(ts,window=20,col=None,
                              figsize=(10,6),
                              set_kws=dict(
                              ylabel='House Price ($)',
                              title="Bollinger Bands")
                              ):                           
    """Calculates Bollinger Bands for time series. If ts is a dataframe, col specifies data.
    Normally used for financial/stock market data and uses 20 days for rolling calculations.
    """
    
    plot_df = calc_bollinger_bands(ts=ts,window=window,col=col)#,figsize=figsize)
    
    ## SPECIFY STYLE PER COLUMN
    plot_styles = {}
    plot_styles['Raw Data'] = dict(lw=1,ls='-',c='black')
    plot_styles['Rolling Mean'] = dict(lw=3,alpha=0.6, c='green')
    plot_styles['Lower Band'] = dict(lw=2,ls=':',c='blue')
    plot_styles['Upper Band'] = dict(lw=2,ls=':',c='red')    

    ## Make figure and loop through columns
    fig,ax = plt.subplots(figsize=figsize)
    for col in list(plot_df.columns):
        plot_df[col].plot(**plot_styles[col])
 
    ax.legend()
    ax.set(**set_kws)
    return fig,ax