From a46b3e6806b3ea3e5dd03d58e62961a19de2423f Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Fri, 6 Nov 2015 14:22:55 -0800 Subject: [PATCH] Run through ch11 --- ch11.ipynb | 2558 +++++++++++++++++++++++++++------------------------- 1 file changed, 1316 insertions(+), 1242 deletions(-) diff --git a/ch11.ipynb b/ch11.ipynb index b97b303fd..d9568de78 100644 --- a/ch11.ipynb +++ b/ch11.ipynb @@ -1,1245 +1,1319 @@ { + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Financial and Economic Data Applications" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "from __future__ import division\n", + "from pandas import Series, DataFrame\n", + "import pandas as pd\n", + "from numpy.random import randn\n", + "import numpy as np\n", + "pd.options.display.max_rows = 12\n", + "np.set_printoptions(precision=4, suppress=True)\n", + "import matplotlib.pyplot as plt\n", + "plt.rc('figure', figsize=(12, 6))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "%matplotlib inline" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "%pwd" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Data munging topics" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Time series and cross-section alignment" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "close_px = pd.read_csv('ch11/stock_px.csv', parse_dates=True, index_col=0)\n", + "volume = pd.read_csv('ch11/volume.csv', parse_dates=True, index_col=0)\n", + "prices = close_px.ix['2011-09-05':'2011-09-14', ['AAPL', 'JNJ', 'SPX', 'XOM']]\n", + "volume = volume.ix['2011-09-05':'2011-09-12', ['AAPL', 'JNJ', 'XOM']]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "prices" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "volume" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "prices * volume" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "vwap = (prices * volume).sum() / volume.sum()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "vwap" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "vwap.dropna()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "prices.align(volume, join='inner')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "s1 = Series(range(3), index=['a', 'b', 'c'])\n", + "s2 = Series(range(4), index=['d', 'b', 'c', 'e'])\n", + "s3 = Series(range(3), index=['f', 'a', 'c'])\n", + "DataFrame({'one': s1, 'two': s2, 'three': s3})" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "DataFrame({'one': s1, 'two': s2, 'three': s3}, index=list('face'))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Operations with time series of different frequencies" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "ts1 = Series(np.random.randn(3),\n", + " index=pd.date_range('2012-6-13', periods=3, freq='W-WED'))\n", + "ts1" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "ts1.resample('B')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "ts1.resample('B', fill_method='ffill')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "dates = pd.DatetimeIndex(['2012-6-12', '2012-6-17', '2012-6-18',\n", + " '2012-6-21', '2012-6-22', '2012-6-29'])\n", + "ts2 = Series(np.random.randn(6), index=dates)\n", + "ts2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "ts1.reindex(ts2.index, method='ffill')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "ts2 + ts1.reindex(ts2.index, method='ffill')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Using periods instead of timestamps" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "gdp = Series([1.78, 1.94, 2.08, 2.01, 2.15, 2.31, 2.46],\n", + " index=pd.period_range('1984Q2', periods=7, freq='Q-SEP'))\n", + "infl = Series([0.025, 0.045, 0.037, 0.04],\n", + " index=pd.period_range('1982', periods=4, freq='A-DEC'))\n", + "gdp" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "infl" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "infl_q = infl.asfreq('Q-SEP', how='end')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "infl_q" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "infl_q.reindex(gdp.index, method='ffill')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Time of day and \"as of\" data selection" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Make an intraday date range and time series\n", + "rng = pd.date_range('2012-06-01 09:30', '2012-06-01 15:59', freq='T')\n", + "# Make a 5-day series of 9:30-15:59 values\n", + "rng = rng.append([rng + pd.offsets.BDay(i) for i in range(1, 4)])\n", + "ts = Series(np.arange(len(rng), dtype=float), index=rng)\n", + "ts" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "from datetime import time\n", + "ts[time(10, 0)]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "ts.at_time(time(10, 0))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "ts.between_time(time(10, 0), time(10, 1))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "np.random.seed(12346)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Set most of the time series randomly to NA\n", + "indexer = np.sort(np.random.permutation(len(ts))[700:])\n", + "irr_ts = ts.copy()\n", + "irr_ts[indexer] = np.nan\n", + "irr_ts['2012-06-01 09:50':'2012-06-01 10:00']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "selection = pd.date_range('2012-06-01 10:00', periods=4, freq='B')\n", + "irr_ts.asof(selection)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Splicing together data sources" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "data1 = DataFrame(np.ones((6, 3), dtype=float),\n", + " columns=['a', 'b', 'c'],\n", + " index=pd.date_range('6/12/2012', periods=6))\n", + "data2 = DataFrame(np.ones((6, 3), dtype=float) * 2,\n", + " columns=['a', 'b', 'c'],\n", + " index=pd.date_range('6/13/2012', periods=6))\n", + "spliced = pd.concat([data1.ix[:'2012-06-14'], data2.ix['2012-06-15':]])\n", + "spliced" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "data2 = DataFrame(np.ones((6, 4), dtype=float) * 2,\n", + " columns=['a', 'b', 'c', 'd'],\n", + " index=pd.date_range('6/13/2012', periods=6))\n", + "spliced = pd.concat([data1.ix[:'2012-06-14'], data2.ix['2012-06-15':]])\n", + "spliced" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "spliced_filled = spliced.combine_first(data2)\n", + "spliced_filled" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "spliced.update(data2, overwrite=False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "spliced" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "cp_spliced = spliced.copy()\n", + "cp_spliced[['a', 'c']] = data1[['a', 'c']]\n", + "cp_spliced" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Return indexes and cumulative returns" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "import pandas.io.data as web\n", + "price = web.get_data_yahoo('AAPL', '2011-01-01')['Adj Close']\n", + "price[-5:]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "price['2011-10-03'] / price['2011-3-01'] - 1" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "returns = price.pct_change()\n", + "ret_index = (1 + returns).cumprod()\n", + "ret_index[0] = 1 # Set first value to 1\n", + "ret_index" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "m_returns = ret_index.resample('BM', how='last').pct_change()\n", + "m_returns['2012']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "m_rets = (1 + returns).resample('M', how='prod', kind='period') - 1\n", + "m_rets['2012']" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "returns[dividend_dates] += dividend_pcts" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Group transforms and analysis" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "pd.options.display.max_rows = 100\n", + "pd.options.display.max_columns = 10\n", + "np.random.seed(12345)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "import random; random.seed(0)\n", + "import string\n", + "\n", + "N = 1000\n", + "def rands(n):\n", + " choices = string.ascii_uppercase\n", + " return ''.join([random.choice(choices) for _ in xrange(n)])\n", + "tickers = np.array([rands(5) for _ in xrange(N)])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "M = 500\n", + "df = DataFrame({'Momentum' : np.random.randn(M) / 200 + 0.03,\n", + " 'Value' : np.random.randn(M) / 200 + 0.08,\n", + " 'ShortInterest' : np.random.randn(M) / 200 - 0.02},\n", + " index=tickers[:M])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "ind_names = np.array(['FINANCIAL', 'TECH'])\n", + "sampler = np.random.randint(0, len(ind_names), N)\n", + "industries = Series(ind_names[sampler], index=tickers,\n", + " name='industry')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "by_industry = df.groupby(industries)\n", + "by_industry.mean()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "by_industry.describe()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Within-Industry Standardize\n", + "def zscore(group):\n", + " return (group - group.mean()) / group.std()\n", + "\n", + "df_stand = by_industry.apply(zscore)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "df_stand.groupby(industries).agg(['mean', 'std'])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Within-industry rank descending\n", + "ind_rank = by_industry.rank(ascending=False)\n", + "ind_rank.groupby(industries).agg(['min', 'max'])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Industry rank and standardize\n", + "by_industry.apply(lambda x: zscore(x.rank()))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Group factor exposures" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "from numpy.random import rand\n", + "fac1, fac2, fac3 = np.random.rand(3, 1000)\n", + "\n", + "ticker_subset = tickers.take(np.random.permutation(N)[:1000])\n", + "\n", + "# Weighted sum of factors plus noise\n", + "port = Series(0.7 * fac1 - 1.2 * fac2 + 0.3 * fac3 + rand(1000),\n", + " index=ticker_subset)\n", + "factors = DataFrame({'f1': fac1, 'f2': fac2, 'f3': fac3},\n", + " index=ticker_subset)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "factors.corrwith(port)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "pd.ols(y=port, x=factors).beta" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "def beta_exposure(chunk, factors=None):\n", + " return pd.ols(y=chunk, x=factors).beta" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "by_ind = port.groupby(industries)\n", + "exposures = by_ind.apply(beta_exposure, factors=factors)\n", + "exposures.unstack()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Decile and quartile analysis" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "import pandas.io.data as web\n", + "data = web.get_data_yahoo('SPY', '2006-01-01')\n", + "data.info()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "px = data['Adj Close']\n", + "returns = px.pct_change()\n", + "\n", + "def to_index(rets):\n", + " index = (1 + rets).cumprod()\n", + " first_loc = max(index.index.get_loc(index.idxmax()) - 1, 0)\n", + " index.values[first_loc] = 1\n", + " return index\n", + "\n", + "def trend_signal(rets, lookback, lag):\n", + " signal = pd.rolling_sum(rets, lookback, min_periods=lookback - 5)\n", + " return signal.shift(lag)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "signal = trend_signal(returns, 100, 3)\n", + "trade_friday = signal.resample('W-FRI').resample('B', fill_method='ffill')\n", + "trade_rets = trade_friday.shift(1) * returns\n", + "trade_rets = trade_rets[:len(returns)]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "to_index(trade_rets).plot()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "vol = pd.rolling_std(returns, 250, min_periods=200) * np.sqrt(250)\n", + "\n", + "def sharpe(rets, ann=250):\n", + " return rets.mean() / rets.std() * np.sqrt(ann)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "cats = pd.qcut(vol, 4)\n", + "print('cats: %d, trade_rets: %d, vol: %d' % (len(cats), len(trade_rets), len(vol)))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "trade_rets.groupby(cats).agg(sharpe)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## More example applications" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Signal frontier analysis" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "names = ['AAPL', 'GOOG', 'MSFT', 'DELL', 'GS', 'MS', 'BAC', 'C']\n", + "def get_px(stock, start, end):\n", + " return web.get_data_yahoo(stock, start, end)['Adj Close']\n", + "px = DataFrame({n: get_px(n, None, None) for n in names})" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "#px = pd.read_csv('ch11/stock_px.csv')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "plt.close('all')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "px = px.asfreq('B').fillna(method='pad')\n", + "rets = px.pct_change()\n", + "((1 + rets).cumprod() - 1).plot()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "def calc_mom(price, lookback, lag):\n", + " mom_ret = price.shift(lag).pct_change(lookback)\n", + " ranks = mom_ret.rank(axis=1, ascending=False)\n", + " demeaned = ranks.subtract(ranks.mean(axis=1), axis=0)\n", + " return demeaned.divide(demeaned.std(axis=1), axis=0)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "compound = lambda x : (1 + x).prod() - 1\n", + "daily_sr = lambda x: x.mean() / x.std()\n", + "\n", + "def strat_sr(prices, lb, hold):\n", + " # Compute portfolio weights\n", + " freq = '%dB' % hold\n", + " port = calc_mom(prices, lb, lag=1)\n", + "\n", + " daily_rets = prices.pct_change()\n", + "\n", + " # Compute portfolio returns\n", + " port = port.shift(1).resample(freq, how='first')\n", + " returns = daily_rets.resample(freq, how=compound)\n", + " port_rets = (port * returns).sum(axis=1)\n", + "\n", + " return daily_sr(port_rets) * np.sqrt(252 / hold)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "strat_sr(px, 70, 30)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "from collections import defaultdict\n", + "\n", + "lookbacks = range(20, 90, 5)\n", + "holdings = range(20, 90, 5)\n", + "dd = defaultdict(dict)\n", + "for lb in lookbacks:\n", + " for hold in holdings:\n", + " dd[lb][hold] = strat_sr(px, lb, hold)\n", + "\n", + "ddf = DataFrame(dd)\n", + "ddf.index.name = 'Holding Period'\n", + "ddf.columns.name = 'Lookback Period'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "\n", + "def heatmap(df, cmap=plt.cm.gray_r):\n", + " fig = plt.figure()\n", + " ax = fig.add_subplot(111)\n", + " axim = ax.imshow(df.values, cmap=cmap, interpolation='nearest')\n", + " ax.set_xlabel(df.columns.name)\n", + " ax.set_xticks(np.arange(len(df.columns)))\n", + " ax.set_xticklabels(list(df.columns))\n", + " ax.set_ylabel(df.index.name)\n", + " ax.set_yticks(np.arange(len(df.index)))\n", + " ax.set_yticklabels(list(df.index))\n", + " plt.colorbar(axim)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "heatmap(ddf)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Future contract rolling" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "pd.options.display.max_rows = 10" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "import pandas.io.data as web\n", + "# Approximate price of S&P 500 index\n", + "px = web.get_data_yahoo('SPY')['Adj Close'] * 10\n", + "px" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "from datetime import datetime\n", + "expiry = {'ESU2': datetime(2012, 9, 21),\n", + " 'ESZ2': datetime(2012, 12, 21)}\n", + "expiry = Series(expiry).order()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "expiry" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "np.random.seed(12347)\n", + "N = 200\n", + "walk = (np.random.randint(0, 200, size=N) - 100) * 0.25\n", + "perturb = (np.random.randint(0, 20, size=N) - 10) * 0.25\n", + "walk = walk.cumsum()\n", + "\n", + "rng = pd.date_range(px.index[0], periods=len(px) + N, freq='B')\n", + "near = np.concatenate([px.values, px.values[-1] + walk])\n", + "far = np.concatenate([px.values, px.values[-1] + walk + perturb])\n", + "prices = DataFrame({'ESU2': near, 'ESZ2': far}, index=rng)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "prices.tail()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "def get_roll_weights(start, expiry, items, roll_periods=5):\n", + " # start : first date to compute weighting DataFrame\n", + " # expiry : Series of ticker -> expiration dates\n", + " # items : sequence of contract names\n", + "\n", + " dates = pd.date_range(start, expiry[-1], freq='B')\n", + " weights = DataFrame(np.zeros((len(dates), len(items))),\n", + " index=dates, columns=items)\n", + "\n", + " prev_date = weights.index[0]\n", + " for i, (item, ex_date) in enumerate(expiry.iteritems()):\n", + " if i < len(expiry) - 1:\n", + " weights.ix[prev_date:ex_date - pd.offsets.BDay(), item] = 1\n", + " roll_rng = pd.date_range(end=ex_date - pd.offsets.BDay(),\n", + " periods=roll_periods + 1, freq='B')\n", + "\n", + " decay_weights = np.linspace(0, 1, roll_periods + 1)\n", + " weights.ix[roll_rng, item] = 1 - decay_weights\n", + " weights.ix[roll_rng, expiry.index[i + 1]] = decay_weights\n", + " else:\n", + " weights.ix[prev_date:, item] = 1\n", + "\n", + " prev_date = ex_date\n", + "\n", + " return weights" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "weights = get_roll_weights('6/1/2012', expiry, prices.columns)\n", + "weights.ix['2012-09-12':'2012-09-21']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "rolled_returns = (prices.pct_change() * weights).sum(1)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Rolling correlation and linear regression" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "aapl = web.get_data_yahoo('AAPL', '2000-01-01')['Adj Close']\n", + "msft = web.get_data_yahoo('MSFT', '2000-01-01')['Adj Close']\n", + "\n", + "aapl_rets = aapl.pct_change()\n", + "msft_rets = msft.pct_change()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "plt.figure()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "pd.rolling_corr(aapl_rets, msft_rets, 250).plot()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "plt.figure()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "model = pd.ols(y=aapl_rets, x={'MSFT': msft_rets}, window=250)\n", + "model.beta" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "model.beta['MSFT'].plot()" + ] + } + ], "metadata": { - "name": "", - "signature": "sha256:a9eebd34b149c3c030b4256d857ef5171f5a0b76224da80d6620b3050ae86364" - }, - "nbformat": 3, - "nbformat_minor": 0, - "worksheets": [ - { - "cells": [ - { - "cell_type": "heading", - "level": 1, - "metadata": {}, - "source": [ - "Financial and Economic Data Applications" - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "from __future__ import division\n", - "from pandas import Series, DataFrame\n", - "import pandas as pd\n", - "from numpy.random import randn\n", - "import numpy as np\n", - "pd.options.display.max_rows = 12\n", - "np.set_printoptions(precision=4, suppress=True)\n", - "import matplotlib.pyplot as plt\n", - "plt.rc('figure', figsize=(12, 6))" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "%matplotlib inline" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "%pwd" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "%cd ../book_scripts" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "heading", - "level": 2, - "metadata": {}, - "source": [ - "Data munging topics" - ] - }, - { - "cell_type": "heading", - "level": 3, - "metadata": {}, - "source": [ - "Time series and cross-section alignment" - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "close_px = pd.read_csv('ch11/stock_px.csv', parse_dates=True, index_col=0)\n", - "volume = pd.read_csv('ch11/volume.csv', parse_dates=True, index_col=0)\n", - "prices = close_px.ix['2011-09-05':'2011-09-14', ['AAPL', 'JNJ', 'SPX', 'XOM']]\n", - "volume = volume.ix['2011-09-05':'2011-09-12', ['AAPL', 'JNJ', 'XOM']]" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "prices" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "volume" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "prices * volume" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "vwap = (prices * volume).sum() / volume.sum()" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "vwap" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "vwap.dropna()" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "prices.align(volume, join='inner')" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "s1 = Series(range(3), index=['a', 'b', 'c'])\n", - "s2 = Series(range(4), index=['d', 'b', 'c', 'e'])\n", - "s3 = Series(range(3), index=['f', 'a', 'c'])\n", - "DataFrame({'one': s1, 'two': s2, 'three': s3})" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "DataFrame({'one': s1, 'two': s2, 'three': s3}, index=list('face'))" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "heading", - "level": 3, - "metadata": {}, - "source": [ - "Operations with time series of different frequencies" - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "ts1 = Series(np.random.randn(3),\n", - " index=pd.date_range('2012-6-13', periods=3, freq='W-WED'))\n", - "ts1" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "ts1.resample('B')" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "ts1.resample('B', fill_method='ffill')" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "dates = pd.DatetimeIndex(['2012-6-12', '2012-6-17', '2012-6-18',\n", - " '2012-6-21', '2012-6-22', '2012-6-29'])\n", - "ts2 = Series(np.random.randn(6), index=dates)\n", - "ts2" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "ts1.reindex(ts2.index, method='ffill')" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "ts2 + ts1.reindex(ts2.index, method='ffill')" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "heading", - "level": 4, - "metadata": {}, - "source": [ - "Using periods instead of timestamps" - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "gdp = Series([1.78, 1.94, 2.08, 2.01, 2.15, 2.31, 2.46],\n", - " index=pd.period_range('1984Q2', periods=7, freq='Q-SEP'))\n", - "infl = Series([0.025, 0.045, 0.037, 0.04],\n", - " index=pd.period_range('1982', periods=4, freq='A-DEC'))\n", - "gdp" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "infl" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "infl_q = infl.asfreq('Q-SEP', how='end')" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "infl_q" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "infl_q.reindex(gdp.index, method='ffill')" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "heading", - "level": 3, - "metadata": {}, - "source": [ - "Time of day and \"as of\" data selection" - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "# Make an intraday date range and time series\n", - "rng = pd.date_range('2012-06-01 09:30', '2012-06-01 15:59', freq='T')\n", - "# Make a 5-day series of 9:30-15:59 values\n", - "rng = rng.append([rng + pd.offsets.BDay(i) for i in range(1, 4)])\n", - "ts = Series(np.arange(len(rng), dtype=float), index=rng)\n", - "ts" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "from datetime import time\n", - "ts[time(10, 0)]" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "ts.at_time(time(10, 0))" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "ts.between_time(time(10, 0), time(10, 1))" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "np.random.seed(12346)" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "# Set most of the time series randomly to NA\n", - "indexer = np.sort(np.random.permutation(len(ts))[700:])\n", - "irr_ts = ts.copy()\n", - "irr_ts[indexer] = np.nan\n", - "irr_ts['2012-06-01 09:50':'2012-06-01 10:00']" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "selection = pd.date_range('2012-06-01 10:00', periods=4, freq='B')\n", - "irr_ts.asof(selection)" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "heading", - "level": 3, - "metadata": {}, - "source": [ - "Splicing together data sources" - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "data1 = DataFrame(np.ones((6, 3), dtype=float),\n", - " columns=['a', 'b', 'c'],\n", - " index=pd.date_range('6/12/2012', periods=6))\n", - "data2 = DataFrame(np.ones((6, 3), dtype=float) * 2,\n", - " columns=['a', 'b', 'c'],\n", - " index=pd.date_range('6/13/2012', periods=6))\n", - "spliced = pd.concat([data1.ix[:'2012-06-14'], data2.ix['2012-06-15':]])\n", - "spliced" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "data2 = DataFrame(np.ones((6, 4), dtype=float) * 2,\n", - " columns=['a', 'b', 'c', 'd'],\n", - " index=pd.date_range('6/13/2012', periods=6))\n", - "spliced = pd.concat([data1.ix[:'2012-06-14'], data2.ix['2012-06-15':]])\n", - "spliced" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "spliced_filled = spliced.combine_first(data2)\n", - "spliced_filled" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "spliced.update(data2, overwrite=False)" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "spliced" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "cp_spliced = spliced.copy()\n", - "cp_spliced[['a', 'c']] = data1[['a', 'c']]\n", - "cp_spliced" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "heading", - "level": 3, - "metadata": {}, - "source": [ - "Return indexes and cumulative returns" - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "import pandas.io.data as web\n", - "price = web.get_data_yahoo('AAPL', '2011-01-01')['Adj Close']\n", - "price[-5:]" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "price['2011-10-03'] / price['2011-3-01'] - 1" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "returns = price.pct_change()\n", - "ret_index = (1 + returns).cumprod()\n", - "ret_index[0] = 1 # Set first value to 1\n", - "ret_index" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "m_returns = ret_index.resample('BM', how='last').pct_change()\n", - "m_returns['2012']" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "m_rets = (1 + returns).resample('M', how='prod', kind='period') - 1\n", - "m_rets['2012']" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "raw", - "metadata": {}, - "source": [ - "returns[dividend_dates] += dividend_pcts" - ] - }, - { - "cell_type": "heading", - "level": 2, - "metadata": {}, - "source": [ - "Group transforms and analysis" - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "pd.options.display.max_rows = 100\n", - "pd.options.display.max_columns = 10\n", - "np.random.seed(12345)" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "import random; random.seed(0)\n", - "import string\n", - "\n", - "N = 1000\n", - "def rands(n):\n", - " choices = string.ascii_uppercase\n", - " return ''.join([random.choice(choices) for _ in xrange(n)])\n", - "tickers = np.array([rands(5) for _ in xrange(N)])" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "M = 500\n", - "df = DataFrame({'Momentum' : np.random.randn(M) / 200 + 0.03,\n", - " 'Value' : np.random.randn(M) / 200 + 0.08,\n", - " 'ShortInterest' : np.random.randn(M) / 200 - 0.02},\n", - " index=tickers[:M])" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "ind_names = np.array(['FINANCIAL', 'TECH'])\n", - "sampler = np.random.randint(0, len(ind_names), N)\n", - "industries = Series(ind_names[sampler], index=tickers,\n", - " name='industry')" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "by_industry = df.groupby(industries)\n", - "by_industry.mean()" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "by_industry.describe()" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "# Within-Industry Standardize\n", - "def zscore(group):\n", - " return (group - group.mean()) / group.std()\n", - "\n", - "df_stand = by_industry.apply(zscore)" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "df_stand.groupby(industries).agg(['mean', 'std'])" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "# Within-industry rank descending\n", - "ind_rank = by_industry.rank(ascending=False)\n", - "ind_rank.groupby(industries).agg(['min', 'max'])" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "# Industry rank and standardize\n", - "by_industry.apply(lambda x: zscore(x.rank()))" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "heading", - "level": 3, - "metadata": {}, - "source": [ - "Group factor exposures" - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "from numpy.random import rand\n", - "fac1, fac2, fac3 = np.random.rand(3, 1000)\n", - "\n", - "ticker_subset = tickers.take(np.random.permutation(N)[:1000])\n", - "\n", - "# Weighted sum of factors plus noise\n", - "port = Series(0.7 * fac1 - 1.2 * fac2 + 0.3 * fac3 + rand(1000),\n", - " index=ticker_subset)\n", - "factors = DataFrame({'f1': fac1, 'f2': fac2, 'f3': fac3},\n", - " index=ticker_subset)" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "factors.corrwith(port)" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "pd.ols(y=port, x=factors).beta" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "def beta_exposure(chunk, factors=None):\n", - " return pd.ols(y=chunk, x=factors).beta" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "by_ind = port.groupby(industries)\n", - "exposures = by_ind.apply(beta_exposure, factors=factors)\n", - "exposures.unstack()" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "heading", - "level": 3, - "metadata": {}, - "source": [ - "Decile and quartile analysis" - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "import pandas.io.data as web\n", - "data = web.get_data_yahoo('SPY', '2006-01-01')\n", - "data.info()" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "px = data['Adj Close']\n", - "returns = px.pct_change()\n", - "\n", - "def to_index(rets):\n", - " index = (1 + rets).cumprod()\n", - " first_loc = max(index.index.get_loc(index.idxmax()) - 1, 0)\n", - " index.values[first_loc] = 1\n", - " return index\n", - "\n", - "def trend_signal(rets, lookback, lag):\n", - " signal = pd.rolling_sum(rets, lookback, min_periods=lookback - 5)\n", - " return signal.shift(lag)" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "signal = trend_signal(returns, 100, 3)\n", - "trade_friday = signal.resample('W-FRI').resample('B', fill_method='ffill')\n", - "trade_rets = trade_friday.shift(1) * returns\n", - "trade_rets = trade_rets[:len(returns)]" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "to_index(trade_rets).plot()" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "vol = pd.rolling_std(returns, 250, min_periods=200) * np.sqrt(250)\n", - "\n", - "def sharpe(rets, ann=250):\n", - " return rets.mean() / rets.std() * np.sqrt(ann)" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "cats = pd.qcut(vol, 4)\n", - "print('cats: %d, trade_rets: %d, vol: %d' % (len(cats), len(trade_rets), len(vol)))" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "trade_rets.groupby(cats).agg(sharpe)" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "heading", - "level": 2, - "metadata": {}, - "source": [ - "More example applications" - ] - }, - { - "cell_type": "heading", - "level": 3, - "metadata": {}, - "source": [ - "Signal frontier analysis" - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "names = ['AAPL', 'GOOG', 'MSFT', 'DELL', 'GS', 'MS', 'BAC', 'C']\n", - "def get_px(stock, start, end):\n", - " return web.get_data_yahoo(stock, start, end)['Adj Close']\n", - "px = DataFrame({n: get_px(n, None, None) for n in names})" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "px = pd.read_pickle('notebooks/stock_prices')" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "plt.close('all')" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "px = px.asfreq('B').fillna(method='pad')\n", - "rets = px.pct_change()\n", - "((1 + rets).cumprod() - 1).plot()" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "def calc_mom(price, lookback, lag):\n", - " mom_ret = price.shift(lag).pct_change(lookback)\n", - " ranks = mom_ret.rank(axis=1, ascending=False)\n", - " demeaned = ranks.subtract(ranks.mean(axis=1), axis=0)\n", - " return demeaned.divide(demeaned.std(axis=1), axis=0)" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "compound = lambda x : (1 + x).prod() - 1\n", - "daily_sr = lambda x: x.mean() / x.std()\n", - "\n", - "def strat_sr(prices, lb, hold):\n", - " # Compute portfolio weights\n", - " freq = '%dB' % hold\n", - " port = calc_mom(prices, lb, lag=1)\n", - "\n", - " daily_rets = prices.pct_change()\n", - "\n", - " # Compute portfolio returns\n", - " port = port.shift(1).resample(freq, how='first')\n", - " returns = daily_rets.resample(freq, how=compound)\n", - " port_rets = (port * returns).sum(axis=1)\n", - "\n", - " return daily_sr(port_rets) * np.sqrt(252 / hold)" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "strat_sr(px, 70, 30)" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "from collections import defaultdict\n", - "\n", - "lookbacks = range(20, 90, 5)\n", - "holdings = range(20, 90, 5)\n", - "dd = defaultdict(dict)\n", - "for lb in lookbacks:\n", - " for hold in holdings:\n", - " dd[lb][hold] = strat_sr(px, lb, hold)\n", - "\n", - "ddf = DataFrame(dd)\n", - "ddf.index.name = 'Holding Period'\n", - "ddf.columns.name = 'Lookback Period'" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "import matplotlib.pyplot as plt\n", - "\n", - "def heatmap(df, cmap=plt.cm.gray_r):\n", - " fig = plt.figure()\n", - " ax = fig.add_subplot(111)\n", - " axim = ax.imshow(df.values, cmap=cmap, interpolation='nearest')\n", - " ax.set_xlabel(df.columns.name)\n", - " ax.set_xticks(np.arange(len(df.columns)))\n", - " ax.set_xticklabels(list(df.columns))\n", - " ax.set_ylabel(df.index.name)\n", - " ax.set_yticks(np.arange(len(df.index)))\n", - " ax.set_yticklabels(list(df.index))\n", - " plt.colorbar(axim)" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "heatmap(ddf)" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "heading", - "level": 3, - "metadata": {}, - "source": [ - "Future contract rolling" - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "pd.options.display.max_rows = 10" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "import pandas.io.data as web\n", - "# Approximate price of S&P 500 index\n", - "px = web.get_data_yahoo('SPY')['Adj Close'] * 10\n", - "px" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "from datetime import datetime\n", - "expiry = {'ESU2': datetime(2012, 9, 21),\n", - " 'ESZ2': datetime(2012, 12, 21)}\n", - "expiry = Series(expiry).order()" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "expiry" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "np.random.seed(12347)\n", - "N = 200\n", - "walk = (np.random.randint(0, 200, size=N) - 100) * 0.25\n", - "perturb = (np.random.randint(0, 20, size=N) - 10) * 0.25\n", - "walk = walk.cumsum()\n", - "\n", - "rng = pd.date_range(px.index[0], periods=len(px) + N, freq='B')\n", - "near = np.concatenate([px.values, px.values[-1] + walk])\n", - "far = np.concatenate([px.values, px.values[-1] + walk + perturb])\n", - "prices = DataFrame({'ESU2': near, 'ESZ2': far}, index=rng)" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "prices.tail()" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "def get_roll_weights(start, expiry, items, roll_periods=5):\n", - " # start : first date to compute weighting DataFrame\n", - " # expiry : Series of ticker -> expiration dates\n", - " # items : sequence of contract names\n", - "\n", - " dates = pd.date_range(start, expiry[-1], freq='B')\n", - " weights = DataFrame(np.zeros((len(dates), len(items))),\n", - " index=dates, columns=items)\n", - "\n", - " prev_date = weights.index[0]\n", - " for i, (item, ex_date) in enumerate(expiry.iteritems()):\n", - " if i < len(expiry) - 1:\n", - " weights.ix[prev_date:ex_date - pd.offsets.BDay(), item] = 1\n", - " roll_rng = pd.date_range(end=ex_date - pd.offsets.BDay(),\n", - " periods=roll_periods + 1, freq='B')\n", - "\n", - " decay_weights = np.linspace(0, 1, roll_periods + 1)\n", - " weights.ix[roll_rng, item] = 1 - decay_weights\n", - " weights.ix[roll_rng, expiry.index[i + 1]] = decay_weights\n", - " else:\n", - " weights.ix[prev_date:, item] = 1\n", - "\n", - " prev_date = ex_date\n", - "\n", - " return weights" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "weights = get_roll_weights('6/1/2012', expiry, prices.columns)\n", - "weights.ix['2012-09-12':'2012-09-21']" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "rolled_returns = (prices.pct_change() * weights).sum(1)" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "heading", - "level": 3, - "metadata": {}, - "source": [ - "Rolling correlation and linear regression" - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "aapl = web.get_data_yahoo('AAPL', '2000-01-01')['Adj Close']\n", - "msft = web.get_data_yahoo('MSFT', '2000-01-01')['Adj Close']\n", - "\n", - "aapl_rets = aapl.pct_change()\n", - "msft_rets = msft.pct_change()" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "plt.figure()" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "pd.rolling_corr(aapl_rets, msft_rets, 250).plot()" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "plt.figure()" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "model = pd.ols(y=aapl_rets, x={'MSFT': msft_rets}, window=250)\n", - "model.beta" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "model.beta['MSFT'].plot()" - ], - "language": "python", - "metadata": {}, - "outputs": [] - } - ], - "metadata": {} + "kernelspec": { + "display_name": "Python 2", + "language": "python", + "name": "python2" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.10" } - ] -} \ No newline at end of file + }, + "nbformat": 4, + "nbformat_minor": 0 +}