Skip to content

Commit

Permalink
添加事件分析
Browse files Browse the repository at this point in the history
  • Loading branch information
wukan1986 committed Jan 19, 2024
1 parent 46c649f commit 86ab4f5
Show file tree
Hide file tree
Showing 11 changed files with 207 additions and 39 deletions.
6 changes: 6 additions & 0 deletions alphainspect/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from ._version import __version__

_ASSET_ = 'asset'
_DATE_ = 'date'
_GROUP_ = 'group'
_QUANTILE_ = 'factor_quantile'
2 changes: 1 addition & 1 deletion alphainspect/_version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.1.1"
__version__ = "0.2.0"
111 changes: 111 additions & 0 deletions alphainspect/events.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
from functools import lru_cache
from typing import Sequence, List

import numpy as np
import pandas as pd
import polars as pl
from matplotlib import pyplot as plt
from numpy.lib.stride_tricks import sliding_window_view

from alphainspect import _QUANTILE_, _DATE_, _ASSET_

_REG_AROUND_ = r'^[+-]\d+$'
_COL_AROUND_ = pl.col(_REG_AROUND_)


@lru_cache
def make_around_columns(periods_before: int = 3, periods_after: int = 15) -> List[str]:
"""生成表格区表头"""
return [f'{i:+02d}' for i in range(-periods_before, periods_after + 1)]


def with_around_price(df_pl: pl.DataFrame, price: str, periods_before: int = 5, periods_after: int = 15) -> pl.DataFrame:
"""添加事件前后复权价
Parameters
----------
df_pl
price
periods_before
periods_after
Returns
-------
"""

def _func_ts(df: pl.DataFrame,
normalize: bool = True):
# 一定要排序
df = df.sort(_DATE_)
n = len(df)

t0 = df[price].to_numpy()
# 准备数据,前后要留空间
a = np.empty(n + periods_before + periods_after, dtype=t0.dtype)
a[:periods_before] = np.nan
a[-periods_after - 1:] = np.nan
a[periods_before:periods_before + n] = t0

# 滑动窗口
b = sliding_window_view(a, periods_before + periods_after + 1)
# 将T+0置为1
if normalize:
b = b / b[:, [periods_before]]
# numpy转polars
c = pl.from_numpy(b, schema=make_around_columns(periods_before, periods_after))
return df.with_columns(c)

return df_pl.group_by(by=_ASSET_).map_groups(_func_ts).with_columns(_COL_AROUND_.fill_nan(None))


def plot_events_errorbar(df_pl: pl.DataFrame, ax=None) -> None:
"""事件前后误差条"""
min_max = df_pl.select(pl.min(_QUANTILE_).alias('min'), pl.max(_QUANTILE_).alias('max'))
min_max = min_max.to_dicts()[0]
_min, _max = min_max['min'], min_max['max']

df_pl = df_pl.select(_QUANTILE_, _COL_AROUND_)
mean_pl = df_pl.group_by(by=[_QUANTILE_]).agg(pl.mean(_REG_AROUND_)).sort(_QUANTILE_)
mean_pd: pd.DataFrame = mean_pl.to_pandas().set_index(_QUANTILE_).T
std_pl = df_pl.group_by(by=[_QUANTILE_]).agg(pl.std(_REG_AROUND_)).sort(_QUANTILE_)
std_pd: pd.DataFrame = std_pl.to_pandas().set_index(_QUANTILE_).T

a = mean_pd.loc[:, _max]
b = std_pd.loc[:, _max]

ax.errorbar(x=a.index, y=a, yerr=b)
ax.axvline(x=a.index.get_loc('+0'), c="r", ls="--", lw=1)
ax.set_xlabel('')
ax.set_title(f'Quantile {_max} errorbar')


def plot_events_average(df_pl: pl.DataFrame, ax=None) -> None:
"""事件前后标准化后平均价"""
df_pl = df_pl.select(_QUANTILE_, _COL_AROUND_)
mean_pl = df_pl.group_by(by=[_QUANTILE_]).agg(pl.mean(_REG_AROUND_)).sort(_QUANTILE_)
mean_pd: pd.DataFrame = mean_pl.to_pandas().set_index(_QUANTILE_).T
mean_pd.plot.line(title='Average Cumulative Returns by Quantile', ax=ax, cmap='coolwarm', lw=1)
ax.axvline(x=mean_pd.index.get_loc('+0'), c="r", ls="--", lw=1)
ax.set_xlabel('')


def plot_events_count(df_pl: pl.DataFrame, axvlines: Sequence[str] = (), ax=None) -> None:
"""事件发生次数"""
df_pl = df_pl.group_by(by=[_DATE_]).count()
df_pd = df_pl.to_pandas().set_index(_DATE_)
df_pd.plot.line(title='Distribution of events', ax=ax, lw=1, grid=True)
ax.set_xlabel('')
for v in axvlines:
ax.axvline(x=v, c="b", ls="--", lw=1)


def create_events_sheet(df_pl: pl.DataFrame, condition: pl.Expr, axvlines: Sequence[str] = ()):
# 一定要过滤空值
df_pl = df_pl.filter(pl.col(_QUANTILE_).is_not_null()).filter(condition)

fig, axes = plt.subplots(3, 1, figsize=(9, 12))

plot_events_count(df_pl, ax=axes[0], axvlines=axvlines)
plot_events_average(df_pl, ax=axes[1])
plot_events_errorbar(df_pl, ax=axes[2])
19 changes: 10 additions & 9 deletions alphainspect/ic.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from matplotlib import pyplot as plt
from statsmodels import api as sm

from alphainspect import _DATE_
from alphainspect.utils import rank_ic


Expand All @@ -26,10 +27,10 @@ def calc_ic(df_pl: pl.DataFrame, factor: str, forward_returns: Sequence[str]) ->
>>> calc_ic(df_pl, 'SMA_020', ['RETURN_OO_1', 'RETURN_OO_2', 'RETURN_CC_1'])
"""
return df_pl.group_by(by=['date']).agg(
return df_pl.group_by(by=[_DATE_]).agg(
# 这里没有换名,名字将与forward_returns对应
[rank_ic(x, factor) for x in forward_returns]
).sort('date')
).sort(_DATE_)


def plot_ic_ts(df_pl: pl.DataFrame, col: str,
Expand All @@ -41,10 +42,10 @@ def plot_ic_ts(df_pl: pl.DataFrame, col: str,
--------
>>> plot_ic_ts(df_pd, 'RETURN_OO_1')
"""
df_pl = df_pl.select(['date', col])
df_pl = df_pl.select([_DATE_, col])

df_pl = df_pl.select([
'date',
_DATE_,
pl.col(col).alias('ic'),
pl.col(col).rolling_mean(20).alias('sma_20'),
pl.col(col).fill_nan(0).cum_sum().alias('cum_sum'),
Expand All @@ -57,10 +58,10 @@ def plot_ic_ts(df_pl: pl.DataFrame, col: str,
ir = s.mean() / s.std()
rate = (s.abs() > 0.02).value_counts(normalize=True).loc[True]

ax1 = df_pd.plot.line(x='date', y=['ic', 'sma_20'], alpha=0.5, lw=1,
ax1 = df_pd.plot.line(x=_DATE_, y=['ic', 'sma_20'], alpha=0.5, lw=1,
title=f"{col},IC={ic:0.4f},>0.02={rate:0.2f},IR={ir:0.4f}",
ax=ax)
ax2 = df_pd.plot.line(x='date', y=['cum_sum'], alpha=0.9, lw=1,
ax2 = df_pd.plot.line(x=_DATE_, y=['cum_sum'], alpha=0.9, lw=1,
secondary_y='cum_sum', c='r',
ax=ax1)
ax1.axhline(y=ic, c="r", ls="--", lw=1)
Expand Down Expand Up @@ -116,9 +117,9 @@ def plot_ic_heatmap(df_pl: pl.DataFrame, col: str,
*,
ax=None) -> None:
"""月度IC热力图"""
df_pl = df_pl.select(['date', col,
pl.col('date').dt.year().alias('year'),
pl.col('date').dt.month().alias('month')
df_pl = df_pl.select([_DATE_, col,
pl.col(_DATE_).dt.year().alias('year'),
pl.col(_DATE_).dt.month().alias('month')
])
df_pl = df_pl.group_by(by=['year', 'month']).agg(pl.mean(col))
df_pd = df_pl.to_pandas().set_index(['year', 'month'])
Expand Down
9 changes: 5 additions & 4 deletions alphainspect/portfolio.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,15 @@
import seaborn as sns
from matplotlib import pyplot as plt

from alphainspect import _QUANTILE_, _DATE_, _ASSET_
from alphainspect.utils import cumulative_returns


def calc_cum_return_by_quantile(df_pl: pl.DataFrame, fwd_ret_1: str, period: int = 5) -> pd.DataFrame:
df_pd = df_pl.to_pandas().set_index(['date', 'asset'])
df_pd = df_pl.to_pandas().set_index([_DATE_, _ASSET_])
rr = df_pd[fwd_ret_1].unstack() # 1日收益率
q_max = df_pd['factor_quantile'].max()
pp = df_pd['factor_quantile'].unstack() # 信号仓位
q_max = df_pd[_QUANTILE_].max()
pp = df_pd[_QUANTILE_].unstack() # 信号仓位

out = pd.DataFrame(index=rr.index)
rr = rr.to_numpy()
Expand Down Expand Up @@ -50,12 +51,12 @@ def create_portfolio_sheet(df_pl: pl.DataFrame,
fwd_ret_1: str,
period=5,
*,
groups=('G0', 'G9'),
axvlines=()) -> None:
df_cum_ret = calc_cum_return_by_quantile(df_pl, fwd_ret_1, period)

fix, axes = plt.subplots(2, 1, figsize=(12, 9))
plot_quantile_portfolio(df_cum_ret, fwd_ret_1, period, axvlines=axvlines, ax=axes[0])
groups = df_cum_ret.columns[[0, -1]]
for i, g in enumerate(groups):
ax = plt.subplot(223 + i)
plot_portfolio_heatmap(df_cum_ret, group=g, ax=ax)
8 changes: 4 additions & 4 deletions alphainspect/reports.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import polars as pl
from matplotlib import pyplot as plt

from alphainspect import _QUANTILE_
from alphainspect.ic import calc_ic, plot_ic_ts, plot_ic_hist, plot_ic_heatmap
from alphainspect.portfolio import calc_cum_return_by_quantile, plot_quantile_portfolio
from alphainspect.turnover import calc_auto_correlation, calc_quantile_turnover, plot_factor_auto_correlation, plot_turnover_quantile
Expand Down Expand Up @@ -115,7 +116,6 @@ def create_3x2_sheet(df_pl: pl.DataFrame,
forward_return: str, fwd_ret_1: str,
*,
period: int = 5,
quantile: int = 9,
periods: Sequence[int] = (1, 5, 10, 20),
axvlines: Sequence[str] = ()) -> None:
"""画2*3图
Expand All @@ -130,8 +130,6 @@ def create_3x2_sheet(df_pl: pl.DataFrame,
用于记算累计收益的1期远期收益率
period: int
累计收益时持仓天数与资金份数
quantile:int
换手率关注第几层
periods:
换手率,多期比较
axvlines
Expand All @@ -153,4 +151,6 @@ def create_3x2_sheet(df_pl: pl.DataFrame,
df_auto_corr = calc_auto_correlation(df_pl, factor, periods=periods)
df_turnover = calc_quantile_turnover(df_pl, periods=periods)
plot_factor_auto_correlation(df_auto_corr, axvlines=axvlines, ax=axes[2, 0])
plot_turnover_quantile(df_turnover, quantile=quantile, periods=periods, axvlines=axvlines, ax=axes[2, 1])

q_min, q_max = df_turnover[_QUANTILE_].min(), df_turnover[_QUANTILE_].max()
plot_turnover_quantile(df_turnover, quantile=q_max, periods=periods, axvlines=axvlines, ax=axes[2, 1])
12 changes: 8 additions & 4 deletions alphainspect/returns.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
import polars as pl
import seaborn as sns

from alphainspect import _QUANTILE_


def plot_quantile_returns_bar(df_pl: pl.DataFrame, factor: str, forward_returns: Sequence[str],
*,
Expand All @@ -14,8 +16,8 @@ def plot_quantile_returns_bar(df_pl: pl.DataFrame, factor: str, forward_returns:
--------
>>> plot_quantile_returns_bar(df_pl, 'GP_0000', ['RETURN_OO_1', 'RETURN_OO_2', 'RETURN_CC_1'])
"""
df_pl = df_pl.group_by(by=['factor_quantile']).agg([pl.mean(y) for y in forward_returns]).sort('factor_quantile')
df_pd = df_pl.to_pandas().set_index('factor_quantile')
df_pl = df_pl.group_by(by=[_QUANTILE_]).agg([pl.mean(y) for y in forward_returns]).sort(_QUANTILE_)
df_pd = df_pl.to_pandas().set_index(_QUANTILE_)
ax = df_pd.plot.bar(ax=ax)
ax.set_title(f'{factor},Mean Return By Factor Quantile')
ax.set_xlabel('')
Expand All @@ -33,8 +35,8 @@ def plot_quantile_returns_violin(df_pl: pl.DataFrame, factor: str, forward_retur
速度有点慢
"""
# TODO 超大数据有必要截断吗?
df_pl = df_pl.select('factor_quantile', *forward_returns).tail(5000 * 60)
df_pd = df_pl.to_pandas().set_index('factor_quantile')
df_pl = df_pl.select(_QUANTILE_, *forward_returns).tail(5000 * 60)
df_pd = df_pl.to_pandas().set_index(_QUANTILE_)

df_pd = df_pd.stack().reset_index()
df_pd.columns = ['x', 'hue', 'y']
Expand All @@ -47,5 +49,7 @@ def plot_quantile_returns_violin(df_pl: pl.DataFrame, factor: str, forward_retur
def create_returns_sheet(df_pl: pl.DataFrame, factor: str, forward_returns: Sequence[str]):
fig, axes = plt.subplots(2, 1, figsize=(12, 9))

# 一定要过滤null才能用
df_pl = df_pl.filter(pl.col(_QUANTILE_).is_not_null())
plot_quantile_returns_bar(df_pl, factor, forward_returns, ax=axes[0])
plot_quantile_returns_violin(df_pl, factor, forward_returns, ax=axes[1])
30 changes: 17 additions & 13 deletions alphainspect/turnover.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import polars as pl
from matplotlib import pyplot as plt

from alphainspect import _QUANTILE_, _DATE_, _ASSET_
from alphainspect.utils import auto_corr


Expand All @@ -13,7 +14,7 @@ def calc_auto_correlation(df_pl: pl.DataFrame,
*,
periods: Sequence[int]):
"""计算排序自相关"""
return df_pl.group_by(by=['date']).agg([auto_corr(factor, p).alias(f'AC{p:02d}') for p in periods]).sort('date')
return df_pl.group_by(by=[_DATE_]).agg([auto_corr(factor, p).alias(f'AC{p:02d}') for p in periods]).sort(_DATE_)


def _list_to_set(x):
Expand All @@ -29,46 +30,49 @@ def _set_diff(curr: pd.Series, period: int):

def calc_quantile_turnover(df_pl: pl.DataFrame,
*,
periods: Sequence[int] = (1, 5, 10, 20)):
periods: Sequence[int] = (1, 5, 10, 20)) -> pd.DataFrame:
def _func_ts(df: pd.DataFrame, periods=periods):
for p in periods:
df[f'P{p:02d}'] = _set_diff(df['asset'], p)
df[f'P{p:02d}'] = _set_diff(df[_ASSET_], p)
return df

df_pd: pd.DataFrame = df_pl.group_by(by=['date', 'factor_quantile']).agg('asset').sort('date').to_pandas()
df_pd['asset'] = df_pd['asset'].apply(_list_to_set)
return df_pd.groupby(by='factor_quantile').apply(_func_ts)
df_pd: pd.DataFrame = df_pl.group_by(by=[_DATE_, _QUANTILE_]).agg(_ASSET_).sort(_DATE_).to_pandas()
df_pd[_ASSET_] = df_pd[_ASSET_].apply(_list_to_set)
return df_pd.groupby(by=_QUANTILE_).apply(_func_ts)


def plot_factor_auto_correlation(df_pl: pl.DataFrame,
*,
axvlines=(), ax=None):
df_pd = df_pl.to_pandas().set_index('date')
df_pd = df_pl.to_pandas().set_index(_DATE_)
ax = df_pd.plot(title='Factor Auto Correlation', cmap='coolwarm', alpha=0.7, lw=1, grid=True, ax=ax)
ax.set_xlabel('')
for v in axvlines:
ax.axvline(x=v, c="b", ls="--", lw=1)


def plot_turnover_quantile(df_pd: pd.DataFrame, quantile: int = 0,
def plot_turnover_quantile(df_pd: pd.DataFrame, quantile: int,
*,
periods: Sequence[int] = (1, 5, 10, 20), axvlines=(), ax=None):
df_pd = df_pd[df_pd['factor_quantile'] == quantile]
df_pd = df_pd.set_index('date')
df_pd = df_pd[df_pd[_QUANTILE_] == quantile]
df_pd = df_pd.set_index(_DATE_)
df_pd = df_pd[[f'P{p:02d}' for p in periods]]
ax = df_pd.plot(title=f'Quantile {quantile} Mean Turnover', alpha=0.7, lw=1, grid=True, ax=ax)
ax.set_xlabel('')
for v in axvlines:
ax.axvline(x=v, c="b", ls="--", lw=1)


def create_turnover_sheet(df, factor, quantiles: int = 10,
def create_turnover_sheet(df, factor,
*,
periods: Sequence[int] = (1, 5, 10, 20), axvlines=()):
df1 = calc_auto_correlation(df, factor, periods=periods)
df2 = calc_quantile_turnover(df, periods=periods)
q_min, q_max = df2[_QUANTILE_].min(), df2[_QUANTILE_].max()

fix, axes = plt.subplots(2, 1, figsize=(12, 9))
plot_factor_auto_correlation(df1, axvlines=axvlines, ax=axes[0])
groups = (0, quantiles - 1)
for i, q in enumerate(groups):

for i, q in enumerate((q_min, q_max)):
ax = plt.subplot(223 + i)
plot_turnover_quantile(df2, quantile=q, periods=periods, axvlines=axvlines, ax=ax)
10 changes: 7 additions & 3 deletions alphainspect/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import polars as pl
from polars import Series, Expr, Int16

from alphainspect import _QUANTILE_, _DATE_, _GROUP_
from alphainspect._nb import _sub_portfolio_returns


Expand Down Expand Up @@ -51,13 +52,16 @@ def with_factor_quantile(df_pl: pl.DataFrame, factor: str, quantiles: int = 10,

def _func_cs(df: pl.DataFrame):
return df.with_columns([
cs_bucket(pl.col(factor), quantiles).alias('factor_quantile'),
cs_bucket(pl.col(factor), quantiles).alias(_QUANTILE_),
])

# 将nan改成null
df_pl = df_pl.with_columns(pl.col(factor).fill_nan(None))

if by_group:
return df_pl.group_by(by=['date', 'group']).map_groups(_func_cs)
return df_pl.group_by(by=[_DATE_, _GROUP_]).map_groups(_func_cs)
else:
return df_pl.group_by(by=['date']).map_groups(_func_cs)
return df_pl.group_by(by=[_DATE_]).map_groups(_func_cs)


def cumulative_returns(returns: np.ndarray, weights: np.ndarray,
Expand Down
Loading

0 comments on commit 86ab4f5

Please sign in to comment.