Skip to content

Commit

Permalink
同时计算多套ICIR
Browse files Browse the repository at this point in the history
  • Loading branch information
wukan1986 committed Mar 1, 2024
1 parent 558b277 commit ac09133
Show file tree
Hide file tree
Showing 5 changed files with 92 additions and 15 deletions.
2 changes: 1 addition & 1 deletion alphainspect/_version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.2.4"
__version__ = "0.2.5"
73 changes: 61 additions & 12 deletions alphainspect/ic.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import pandas as pd
import polars as pl
import seaborn as sns
from loguru import logger
from matplotlib import pyplot as plt
from polars import Expr
from sklearn.feature_selection import mutual_info_regression
Expand Down Expand Up @@ -41,18 +42,20 @@ def calc_ic(df_pl: pl.DataFrame, factor: str, forward_returns: Sequence[str]) ->
).sort(_DATE_).fill_nan(None)


def calc_ic2(df_pl: pl.DataFrame, factors: Sequence[str], forward_returns: Sequence[str]) -> pl.DataFrame:
"""多因子多收益的IC矩阵。方便部分用户统计大量因子信息"""
return df_pl.group_by(_DATE_).agg(
[rank_ic(x, y).alias(f'{x}__{y}') for x, y in itertools.product(factors, forward_returns)]
).sort(_DATE_).fill_nan(None)
def calc_ic_mean(df_pl: pl.DataFrame):
return df_pl.select(pl.exclude(_DATE_).mean())


def calc_ir(df_pl: pl.DataFrame):
def calc_ic_ir(df_pl: pl.DataFrame):
"""计算ir,需保证没有nan,只有null"""
return df_pl.select(pl.exclude(_DATE_).mean() / pl.exclude(_DATE_).std(ddof=0))


def row_unstack(df_pl: pl.DataFrame, factors: Sequence[str], forward_returns: Sequence[str]) -> pd.DataFrame:
return pd.DataFrame(df_pl.to_numpy().reshape(len(factors), len(forward_returns)),
index=factors, columns=forward_returns)


def mutual_info_func(xx):
yx = np.vstack(xx).T
# 跳过nan
Expand Down Expand Up @@ -94,16 +97,17 @@ def plot_ic_ts(df_pl: pl.DataFrame, col: str,
pl.col(col).rolling_mean(20).alias('sma_20'),
pl.col(col).fill_nan(0).cum_sum().alias('cum_sum'),
])

df_pd = df_pl.to_pandas().dropna()
df_pd = df_pl.to_pandas().replace([-np.inf, np.inf], np.nan).dropna(subset='ic')
s: pd.Series = df_pd['ic']

ic = s.mean()
ir = s.mean() / s.std()
rate = (s.abs() > 0.02).value_counts(normalize=True).loc[True]

title = f"{col},IC={ic:0.4f},>0.02={rate:0.2f},IR={ir:0.4f}"
logger.info(title)
ax1 = df_pd.plot.line(x=_DATE_, y=['ic', 'sma_20'], alpha=0.5, lw=1,
title=f"{col},IC={ic:0.4f},>0.02={rate:0.2f},IR={ir:0.4f}",
title=title,
ax=ax)
ax2 = df_pd.plot.line(x=_DATE_, y=['cum_sum'], alpha=0.9, lw=1,
secondary_y='cum_sum', c='r',
Expand All @@ -123,7 +127,7 @@ def plot_ic_hist(df_pl: pl.DataFrame, col: str,
--------
>>> plot_ic_hist(df_pl, 'RETURN_OO_1')
"""
a = df_pl[col].to_pandas().dropna()
a = df_pl[col].to_pandas().replace([-np.inf, np.inf], np.nan).dropna()

mean = a.mean()
std = a.std()
Expand All @@ -139,7 +143,9 @@ def plot_ic_hist(df_pl: pl.DataFrame, col: str,
ax.axvline(x=mean, c="r", ls="--", lw=1)
ax.axvline(x=mean + std * 3, c="r", ls="--", lw=1)
ax.axvline(x=mean - std * 3, c="r", ls="--", lw=1)
ax.set_title(f"{col},mean={mean:0.4f},std={std:0.4f},skew={skew:0.4f},kurt={kurt:0.4f}")
title = f"{col},mean={mean:0.4f},std={std:0.4f},skew={skew:0.4f},kurt={kurt:0.4f}"
logger.info(title)
ax.set_title(title)
ax.set_xlabel('')


Expand All @@ -152,7 +158,7 @@ def plot_ic_qq(df_pl: pl.DataFrame, col: str,
--------
>>> plot_ic_qq(df_pl, 'RETURN_OO_1')
"""
a = df_pl[col].to_pandas().dropna()
a = df_pl[col].to_pandas().replace([-np.inf, np.inf], np.nan).dropna()

sm.qqplot(a, fit=True, line='45', ax=ax)

Expand Down Expand Up @@ -195,3 +201,46 @@ def create_ic_sheet(df_pl: pl.DataFrame, factor: str, forward_returns: Sequence[
plot_ic_heatmap(df_pl, forward_return, ax=axes[1, 1])

fig.tight_layout()


def calc_ic2(df_pl: pl.DataFrame, factors: Sequence[str], forward_returns: Sequence[str]) -> pl.DataFrame:
"""多因子多收益的IC矩阵。方便部分用户统计大量因子信息"""
return df_pl.group_by(_DATE_).agg(
[rank_ic(x, y).alias(f'{x}__{y}') for x, y in itertools.product(factors, forward_returns)]
).sort(_DATE_).fill_nan(None)


def plot_ic2_heatmap(df_pd: pd.DataFrame,
*,
title='Mean IC',
ax=None) -> None:
"""多个IC的热力图"""
ax = sns.heatmap(df_pd, annot=True, cmap='RdYlGn_r', cbar=False, annot_kws={"size": 7}, ax=ax)
ax.set_title(title)
ax.set_xlabel('')


def create_ic2_sheet(df_pl: pl.DataFrame, factors: Sequence[str], forward_returns: Sequence[str],
*,
axvlines=(), ):
df_pl = calc_ic2(df_pl, factors, forward_returns)
df_ic = calc_ic_mean(df_pl)
df_ir = calc_ic_ir(df_pl)
df_ic = row_unstack(df_ic, factors, forward_returns)
df_ir = row_unstack(df_ir, factors, forward_returns)
logger.info('Mean IC: {} \n{}', '=' * 60, df_ic)
logger.info('IC_IR: {} \n{}', '=' * 60, df_ir)

# 画ic与ir的热力图
fig, axes = plt.subplots(1, 2, figsize=(12, 9))
plot_ic2_heatmap(df_ic, title='Mean IC', ax=axes[0])
plot_ic2_heatmap(df_ir, title='IR', ax=axes[1])
fig.tight_layout()

# 画ic时序图
fig, axes = plt.subplots(len(factors), len(forward_returns), figsize=(12, 9))
axes = axes.flatten()
logger.info('IC TimeSeries: {}', '=' * 60)
for i, (x, y) in enumerate(itertools.product(factors, forward_returns)):
plot_ic_ts(df_pl, f'{x}__{y}', axvlines=axvlines, ax=axes[i])
fig.tight_layout()
4 changes: 2 additions & 2 deletions alphainspect/portfolio.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def calc_cum_return_by_quantile(df_pl: pl.DataFrame, fwd_ret_1: str, period: int
# !!!直接减是错误的,因为两资金是独立的,资金减少的一份由于资金不足对冲比例已经不再是1:1
# out['spread'] = out[f'G{q_max}'] - out[f'G0']

logger.info('累计收益计算完成')
logger.info('累计收益计算完成 \n{}', out.iloc[-1])
return out


Expand Down Expand Up @@ -68,7 +68,7 @@ def calc_cum_return_spread(df_pl: pl.DataFrame, fwd_ret_1: str, period: int = 5)
out[f'G{q_max} w=+1'] = cumulative_returns(rr, b9, funds=period, freq=period)
# 资金是共享的,每次调仓时需要将资金平分成两份
out[f'G{q_max}~G0 w=+.5/-.5'] = cumulative_returns(rr, bb, funds=period, freq=period, init_cash=1.0)
logger.info('多空收益计算完成')
logger.info('多空收益计算完成 \n{}', out.iloc[-1])
return out


Expand Down
1 change: 1 addition & 0 deletions alphainspect/returns.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ def plot_quantile_returns_bar(df_pl: pl.DataFrame, factor: str, forward_returns:
ax = df_pd.plot.bar(ax=ax)
ax.set_title(f'{factor},Mean Return By Factor Quantile')
ax.set_xlabel('')
# ax.bar_label(ax.containers[0])


def plot_quantile_returns_box(df_pl: pl.DataFrame, factor: str, forward_returns: Sequence[str], *, ax=None):
Expand Down
27 changes: 27 additions & 0 deletions examples/demo5.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# %%
import os
import sys
from pathlib import Path

from matplotlib import pyplot as plt

from alphainspect.ic import create_ic2_sheet

# 修改当前目录到上层目录,方便跨不同IDE中使用
pwd = str(Path(__file__).parents[1])
os.chdir(pwd)
sys.path.append(pwd)
# ===============
# %%
import polars as pl

df_output = pl.read_parquet('data/data.parquet')
# %%
period = 5
axvlines = ('2020-01-01',)

factors = ['STD_010', 'STD_020', 'SMA_010', 'SMA_020'] # 考察因子
forward_returns = ['RETURN_CC_1', 'RETURN_OO_1', 'RETURN_OO_2', 'RETURN_OO_5'] # 同一因子,不同持有期对比

create_ic2_sheet(df_output, factors, forward_returns)
plt.show()

0 comments on commit ac09133

Please sign in to comment.