# 1. import relevant libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pandas_datareader as pdr
%matplotlib inline
# 2. Download stock price data, store in dataframe
amzn = pdr.get_data_google('AMZN', start='2016-08-03')
# 3. Take a look at the data
amzn.head()
# 4. Get just the data we are interested in
amzn_close = amzn['Close']
# 5. Verify data
type(amzn_close.head())
# 6. Convert close-to-close dollar change into instantaneous rate of return, verify transformation
amzn_percent_change = np.log(amzn_close / amzn_close.shift(1)) * 100
amzn_percent_change.head()
# 7. Pandas describe function gives descriptive statistics, default assumes you are dealing with a sample
# Pandas also deals with missing values by ommitting them
amzn_percent_change.describe()
# 8. An alternative more comprehensive table of descriptives can be generated using scipy stats, assumes a sample
from scipy import stats
stats.describe(amzn_percent_change[1:])
# 9. for comparision generate random numbers that follow normal distribution
x = np.random.normal(.1064,1.1948, 252)
stats.describe(x)
# 10. use scipy stats to test the kurtosis, the null hypothesis is that the sample is drawn from a population
# where the underlying kurtosis is that of a normally distributed variable
print("x: ", stats.kurtosistest(x))
print("amzn: ", stats.kurtosistest(amzn_percent_change[1:]) )
# 11. plot histogram of price changes with normal curve overlay
import matplotlib.mlab as mlab
plt.hist(amzn_percent_change[1:], edgecolor='black', normed=True)
# overlay normal curve
mean = float(np.mean(amzn_percent_change))
sd = float(np.std(amzn_percent_change, ddof=1))
min_chng = float(np.min(amzn_percent_change))
max_chng = float(np.max(amzn_percent_change))
a = np.linspace(min_chng, max_chng,100)
plt.plot(a,mlab.normpdf(a,mean,sd))
# 12. compare to sample drawn from a normally distributed population with same attributes as amzn price change
plt.hist(x, normed=True)
mean = float(np.mean(x))
sd = float(np.std(x))
b = np.linspace(np.min(x), np.max(x), 100)
plt.plot(b,mlab.normpdf(b, mean, sd))
plt.show()