Introduction to Monto Carlo Simulation of a S&P 500-like investment

Starting with 10,000 and investing an additional 10,000 annually, what is the probability that you will have at least 1,000,000 after 30 years of investing in the S&P 500 etf?

You can download the Anaconda platform from http://continuum.io

In [2]:
# 1. import needed libraries, set plots to display in notebook

import numpy as np
from pandas import Series, DataFrame
%matplotlib inline
import matplotlib.pyplot as plots
# allows currency formatting
import locale
locale.setlocale(locale.LC_ALL, '')
Out[2]:
'en_US.UTF-8'
In [4]:
# 2. A traditional savings calculator approach

pv = 10000
time_horizon = 30
i =.07
additions = 10000

for year in range(time_horizon):
    ending = pv * (1+i) + additions
    print(locale.currency(ending, grouping=True))
    pv = ending
$20,700.00
$32,149.00
$44,399.43
$57,507.39
$71,532.91
$86,540.21
$102,598.03
$119,779.89
$138,164.48
$157,835.99
$178,884.51
$201,406.43
$225,504.88
$251,290.22
$278,880.54
$308,402.17
$339,990.33
$373,789.65
$409,954.92
$448,651.77
$490,057.39
$534,361.41
$581,766.71
$632,490.38
$686,764.70
$744,838.23
$806,976.91
$873,465.29
$944,607.86
$1,020,730.41
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [7]:
# 3. Generating one possible future value based on market history; I will use 9% expected return with 18% volatility

pv = 10000
expected_return = .09
volatility = .18
time_horizon = 30
annual_addition = 10000

print("\tReturn", "\t\tEnding Value".rjust(18))
for year in range(time_horizon):
    market_return = np.random.normal(expected_return, volatility)
    fv = pv * (1 + market_return) + annual_addition
    print("\t{}".ljust(10).format(round(market_return,4)), "\t{}".rjust(10).format(locale.currency(fv, grouping=True)))
    pv = fv








    
    
    
    
    
    
    
    
    
	Return     		Ending Value
	-0.2048               	$17,952.35
	0.1713               	$31,027.71
	0.025               	$41,803.57
	0.3934               	$68,247.53
	0.1311               	$87,193.78
	0.059               	$102,338.96
	0.0492               	$117,370.59
	0.1776               	$148,211.52
	0.1447               	$179,660.96
	0.3233               	$247,744.05
	0.0131               	$260,984.80
	0.0031               	$271,804.67
	0.101               	$309,265.20
	0.059               	$337,521.01
	0.13               	$391,406.15
	-0.1634               	$337,440.21
	0.1988               	$414,529.00
	0.1478               	$485,804.51
	-0.0644               	$464,501.76
	0.3089               	$617,969.40
	0.1163               	$699,851.96
	0.0967               	$777,561.94
	0.1271               	$886,361.09
	0.2286               	$1,099,006.92
	-0.1747               	$917,010.86
	0.1719               	$1,084,648.77
	0.1578               	$1,265,833.32
	0.1391               	$1,451,968.19
	-0.0102               	$1,447,163.05
	-0.0651               	$1,362,992.89
In [8]:
# 4. Simulate portfolio ending market values 

sim = DataFrame()
iterations = 5000


for x in range(iterations):
    expected_return = .09
    volatility = .18
    time_horizon = 30
    pv = 10000
    annual_investment = 10000
    stream = []
    for i in range(time_horizon):
        end = round(pv * (1 + np.random.normal(expected_return,volatility)) + annual_investment,2)
        
        stream.append(end)

        pv = end

    
    sim[x] = stream
In [ ]:
 
In [11]:
# 5. Sample first five streams of annual ending values
first_five = list(range(5))
sim[first_five]
Out[11]:
0 1 2 3 4
0 20608.84 25270.36 22524.66 19888.64 24937.49
1 33740.72 35372.69 32873.72 36562.31 41552.59
2 45225.73 44210.06 45421.19 46184.55 51021.79
3 58379.60 61796.20 80086.09 55155.90 86519.77
4 85781.28 70098.26 68538.91 63000.86 108460.91
5 87940.43 82285.84 60522.85 71575.43 110073.63
6 82747.60 102489.51 96294.97 89848.70 133817.78
7 82756.72 123217.12 109410.25 135515.92 182665.42
8 144639.26 136355.22 142602.06 127311.36 214785.48
9 184432.55 169326.84 148948.71 136936.95 286522.75
10 209482.38 174629.00 183539.10 146726.65 354484.07
11 249300.45 176012.46 230442.52 139957.36 370285.78
12 312886.35 173168.94 284435.34 162090.12 530016.35
13 268715.01 167641.51 327392.49 180188.15 684339.57
14 226320.18 158084.61 338060.78 171399.13 719974.57
15 269760.74 165789.84 435316.26 196710.71 791131.04
16 378558.20 187355.21 609670.59 259472.33 670071.12
17 513870.43 193416.70 631204.26 265128.09 301802.36
18 415726.47 243682.94 640962.19 324803.89 246695.60
19 456953.67 326003.49 848527.30 461958.23 280869.29
20 447794.01 361800.47 810154.49 419162.23 285238.23
21 483183.11 438958.81 905952.35 527859.73 313787.79
22 502885.58 401016.84 784807.42 665936.48 300198.54
23 457012.27 441272.03 1015493.55 759131.46 319280.84
24 518696.43 542515.32 778863.92 821615.70 381654.14
25 525934.55 662417.47 795153.07 797219.66 391283.95
26 648207.61 694768.54 995491.81 632315.42 326099.13
27 592420.63 717087.52 897694.34 721284.01 367282.34
28 699679.32 699155.21 839630.49 626382.54 538488.72
29 840002.22 971588.23 1032396.34 890854.62 564475.89
In [12]:
# 6. Plot first five simulated portfolios
plots.plot(sim[first_five])
Out[12]:
[<matplotlib.lines.Line2D at 0x112161048>,
 <matplotlib.lines.Line2D at 0x112161208>,
 <matplotlib.lines.Line2D at 0x1121613c8>,
 <matplotlib.lines.Line2D at 0x1121615c0>,
 <matplotlib.lines.Line2D at 0x1121617b8>]
In [ ]:
 
In [13]:
# 7. Generate summary statistics with numpy functions

print("Count:", len(sim.loc[29]))
print("Mean: ", locale.currency(np.mean(sim.loc[29]),grouping=True))
print("SD: ",locale.currency(np.std(sim.loc[29]),grouping=True))
print("Max: ",locale.currency(np.max(sim.loc[29]), grouping=True))
print("Min: ", locale.currency(np.min(sim.loc[29]), grouping=True))
Count: 5000
Mean:  $1,511,783.96
SD:  $1,207,664.05
Max:  $15,360,616.84
Min:  $113,779.44
In [14]:
# 8. Generating more comprehensive summary statistics with pandas describe function
ending_values = sim.loc[29]
ending_values.describe()
Out[14]:
count    5.000000e+03
mean     1.511784e+06
std      1.207785e+06
min      1.137794e+05
25%      7.462566e+05
50%      1.184005e+06
75%      1.867706e+06
max      1.536062e+07
Name: 29, dtype: float64
In [17]:
# 9. Get a visualization of the distribution of ending values

plots.hist(ending_values, bins=100)
Out[17]:
(array([  63.,  229.,  342.,  526.,  495.,  442.,  392.,  403.,  299.,
         261.,  203.,  182.,  150.,  134.,  107.,   97.,   90.,   73.,
          64.,   44.,   65.,   53.,   31.,   25.,   24.,   22.,   20.,
          21.,   15.,   16.,    4.,    7.,   14.,    7.,    7.,    1.,
           8.,    3.,    5.,    3.,    3.,    4.,    4.,    3.,    2.,
           4.,    2.,    1.,    2.,    2.,    2.,    4.,    1.,    1.,
           2.,    1.,    1.,    1.,    2.,    1.,    0.,    0.,    1.,
           1.,    2.,    0.,    0.,    0.,    1.,    1.,    0.,    0.,
           1.,    0.,    0.,    0.,    0.,    0.,    0.,    0.,    0.,
           0.,    1.,    0.,    0.,    0.,    0.,    1.,    0.,    0.,
           0.,    0.,    0.,    0.,    0.,    0.,    0.,    0.,    0.,    1.]),
 array([   113779.44 ,    266247.814,    418716.188,    571184.562,
           723652.936,    876121.31 ,   1028589.684,   1181058.058,
          1333526.432,   1485994.806,   1638463.18 ,   1790931.554,
          1943399.928,   2095868.302,   2248336.676,   2400805.05 ,
          2553273.424,   2705741.798,   2858210.172,   3010678.546,
          3163146.92 ,   3315615.294,   3468083.668,   3620552.042,
          3773020.416,   3925488.79 ,   4077957.164,   4230425.538,
          4382893.912,   4535362.286,   4687830.66 ,   4840299.034,
          4992767.408,   5145235.782,   5297704.156,   5450172.53 ,
          5602640.904,   5755109.278,   5907577.652,   6060046.026,
          6212514.4  ,   6364982.774,   6517451.148,   6669919.522,
          6822387.896,   6974856.27 ,   7127324.644,   7279793.018,
          7432261.392,   7584729.766,   7737198.14 ,   7889666.514,
          8042134.888,   8194603.262,   8347071.636,   8499540.01 ,
          8652008.384,   8804476.758,   8956945.132,   9109413.506,
          9261881.88 ,   9414350.254,   9566818.628,   9719287.002,
          9871755.376,  10024223.75 ,  10176692.124,  10329160.498,
         10481628.872,  10634097.246,  10786565.62 ,  10939033.994,
         11091502.368,  11243970.742,  11396439.116,  11548907.49 ,
         11701375.864,  11853844.238,  12006312.612,  12158780.986,
         12311249.36 ,  12463717.734,  12616186.108,  12768654.482,
         12921122.856,  13073591.23 ,  13226059.604,  13378527.978,
         13530996.352,  13683464.726,  13835933.1  ,  13988401.474,
         14140869.848,  14293338.222,  14445806.596,  14598274.97 ,
         14750743.344,  14903211.718,  15055680.092,  15208148.466,
         15360616.84 ]),
 <a list of 100 Patch objects>)
In [26]:
# 10. Calculate probability of seeing a specific ending_value or less, 
# for example get close to the 75%ile, or $1,000,000

len(ending_values[ending_values<1000000]) / len(ending_values)
Out[26]:
0.405
In [22]:
# 11. You can't really get a point estimate, but you can get a range ending values

len(ending_values[(ending_values> 800000) & (ending_values< 1100000)]) /len(ending_values)
Out[22]:
0.1776
In [23]:
# 12. You can get a more comprehensive table of percentiles easily using numpy's percentile function

p_tiles = np.percentile(ending_values,[5,10,15,25,75,85,90, 95])
for p in range(len(p_tiles)):
    l = [5,10,15,25,75,85,90,95]
    print( "{}%-ile: ".format(l[p]).rjust(15),"{}".format(locale.currency(p_tiles[p], grouping=True)))
       5%-ile:  $400,786.79
      10%-ile:  $517,436.68
      15%-ile:  $606,444.07
      25%-ile:  $746,256.62
      75%-ile:  $1,867,705.61
      85%-ile:  $2,443,746.84
      90%-ile:  $2,879,132.81
      95%-ile:  $3,657,034.78
In [ ]: