Introduction to Monto Carlo Simulation of a S&P 500-like investment¶
Starting with 10,000 and investing an additional 10,000 annually, what is the probability that you will have at least 1,000,000 after 30 years of investing in the S&P 500 etf?¶
You can download the Anaconda platform from http://continuum.io¶
In [2]:
# 1. import needed libraries, set plots to display in notebook
import numpy as np
from pandas import Series, DataFrame
%matplotlib inline
import matplotlib.pyplot as plots
# allows currency formatting
import locale
locale.setlocale(locale.LC_ALL, '')
Out[2]:
In [4]:
# 2. A traditional savings calculator approach
pv = 10000
time_horizon = 30
i =.07
additions = 10000
for year in range(time_horizon):
ending = pv * (1+i) + additions
print(locale.currency(ending, grouping=True))
pv = ending
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [7]:
# 3. Generating one possible future value based on market history; I will use 9% expected return with 18% volatility
pv = 10000
expected_return = .09
volatility = .18
time_horizon = 30
annual_addition = 10000
print("\tReturn", "\t\tEnding Value".rjust(18))
for year in range(time_horizon):
market_return = np.random.normal(expected_return, volatility)
fv = pv * (1 + market_return) + annual_addition
print("\t{}".ljust(10).format(round(market_return,4)), "\t{}".rjust(10).format(locale.currency(fv, grouping=True)))
pv = fv
In [8]:
# 4. Simulate portfolio ending market values
sim = DataFrame()
iterations = 5000
for x in range(iterations):
expected_return = .09
volatility = .18
time_horizon = 30
pv = 10000
annual_investment = 10000
stream = []
for i in range(time_horizon):
end = round(pv * (1 + np.random.normal(expected_return,volatility)) + annual_investment,2)
stream.append(end)
pv = end
sim[x] = stream
In [ ]:
In [11]:
# 5. Sample first five streams of annual ending values
first_five = list(range(5))
sim[first_five]
Out[11]:
In [12]:
# 6. Plot first five simulated portfolios
plots.plot(sim[first_five])
Out[12]:
In [ ]:
In [13]:
# 7. Generate summary statistics with numpy functions
print("Count:", len(sim.loc[29]))
print("Mean: ", locale.currency(np.mean(sim.loc[29]),grouping=True))
print("SD: ",locale.currency(np.std(sim.loc[29]),grouping=True))
print("Max: ",locale.currency(np.max(sim.loc[29]), grouping=True))
print("Min: ", locale.currency(np.min(sim.loc[29]), grouping=True))
In [14]:
# 8. Generating more comprehensive summary statistics with pandas describe function
ending_values = sim.loc[29]
ending_values.describe()
Out[14]:
In [17]:
# 9. Get a visualization of the distribution of ending values
plots.hist(ending_values, bins=100)
Out[17]:
In [26]:
# 10. Calculate probability of seeing a specific ending_value or less,
# for example get close to the 75%ile, or $1,000,000
len(ending_values[ending_values<1000000]) / len(ending_values)
Out[26]:
In [22]:
# 11. You can't really get a point estimate, but you can get a range ending values
len(ending_values[(ending_values> 800000) & (ending_values< 1100000)]) /len(ending_values)
Out[22]:
In [23]:
# 12. You can get a more comprehensive table of percentiles easily using numpy's percentile function
p_tiles = np.percentile(ending_values,[5,10,15,25,75,85,90, 95])
for p in range(len(p_tiles)):
l = [5,10,15,25,75,85,90,95]
print( "{}%-ile: ".format(l[p]).rjust(15),"{}".format(locale.currency(p_tiles[p], grouping=True)))
In [ ]: