Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement Enhanced Indexing as a Portfolio Optimizer #280

Merged
merged 32 commits into from
Mar 17, 2021
Merged
Show file tree
Hide file tree
Changes from 15 commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
988b42e
Add Structured Covariance Estimator to riskmodel.py
Feb 9, 2021
7b01c5c
Add an implementation of Enhanced Indexing to optimizer.py
Feb 9, 2021
9c2653f
Add an implementation of Enhanced Indexing to optimizer.py
Feb 9, 2021
4000518
Separate specific implementation of Portfolio Optimizer to folder.
Feb 22, 2021
b2e2142
Applied slight modification to follow PEP 8.
Feb 22, 2021
2cc057e
Fix minor mismatches of type hints.
Feb 22, 2021
9448a6e
Add a abstract class as the base class for all optimization related p…
Feb 22, 2021
42f8825
Reformat code to follow PEP 8.
Feb 22, 2021
f7d3e56
Merge optimization related portfolio construction back to portfolio/o…
Feb 22, 2021
58f74cf
Reformat code to follow PEP 8.
Feb 22, 2021
164687d
Add scikit-learn to dependencies.
Feb 22, 2021
b8647c1
Reformat code to follow PEP 8.
Feb 22, 2021
2f9d45e
Reformat code with black.
Feb 22, 2021
3787138
Format code with the latest version of black.
yongzhengqi Feb 22, 2021
dc4aa67
Black format
Derek-Wds Feb 22, 2021
f947a2f
Correct two mistakes in annotation.
Feb 22, 2021
d3caea6
Add unittest for TestStructuredCovEstimator.
Feb 22, 2021
527718a
Allow enhanced indexing to generate portfolio without industry relate…
Feb 22, 2021
2bff6eb
Split classes in riskmodel.py & optimizer.py into seperate files.
Mar 4, 2021
83c6e74
Reindex files.
Mar 4, 2021
0f3e3d2
Update __init__.py.
Mar 4, 2021
79c1142
Pass nan_option to structured covariance estimator.
Mar 8, 2021
4d5a30b
Resolve https://github.com/microsoft/qlib/pull/280\#discussion_r58916…
Mar 8, 2021
81b86f8
Update test to cover changes in structured_cov_estimator
Mar 8, 2021
351d598
Resolve https://github.com/microsoft/qlib/pull/280\#discussion_r58916…
Mar 8, 2021
c6675be
Resolve https://github.com/microsoft/qlib/pull/280\#discussion_r58916…
Mar 8, 2021
fc89fec
Resolve https://github.com/microsoft/qlib/pull/280\#discussion_r58916…
Mar 8, 2021
2f9af1a
Resolve https://github.com/microsoft/qlib/pull/280\#discussion_r58916…
Mar 8, 2021
7022675
Resolve https://github.com/microsoft/qlib/pull/280\#discussion_r58916…
Mar 8, 2021
6a305c7
Resolve https://github.com/microsoft/qlib/pull/280\#discussion_r58916…
Mar 8, 2021
8b9065c
Reformat with black.
Mar 8, 2021
53cf89d
Reformat with black.
Mar 8, 2021
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 3 additions & 4 deletions qlib/contrib/strategy/strategy.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
import pandas as pd

from ..backtest.order import Order
from ...utils import get_pre_trading_date
from .order_generator import OrderGenWInteract


Expand Down Expand Up @@ -390,11 +389,11 @@ def filter_stock(l):
current_stock_list = current_temp.get_stock_list()
value = cash * self.risk_degree / len(buy) if len(buy) > 0 else 0

# open_cost should be considered in the real trading environment, while the backtest in evaluate.py does not consider it
# as the aim of demo is to accomplish same strategy as evaluate.py, so comment out this line
# open_cost should be considered in the real trading environment, while the backtest in evaluate.py does not
# consider it as the aim of demo is to accomplish same strategy as evaluate.py, so comment out this line
# value = value / (1+trade_exchange.open_cost) # set open_cost limit
for code in buy:
# check is stock supended
# check is stock suspended
if not trade_exchange.is_stock_tradable(stock_id=code, trade_date=trade_date):
continue
# buy order
Expand Down
4 changes: 2 additions & 2 deletions qlib/model/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,8 @@ def fit(self, dataset: Dataset):

# get weights
try:
wdf_train, wdf_valid = dataset.prepare(["train", "valid"], col_set=["weight"], data_key=DataHandlerLP.DK_L)
w_train, w_valid = wdf_train["weight"], wdf_valid["weight"]
wdf_train, wdf_valid = dataset.prepare(["train", "valid"], col_set=["weight"],
data_key=DataHandlerLP.DK_L, w_train, w_valid = wdf_train["weight"], wdf_valid["weight"]
evanzd marked this conversation as resolved.
Show resolved Hide resolved
except KeyError as e:
w_train = pd.DataFrame(np.ones_like(y_train.values), index=y_train.index)
w_valid = pd.DataFrame(np.ones_like(y_valid.values), index=y_valid.index)
Expand Down
154 changes: 149 additions & 5 deletions qlib/model/riskmodel.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.

import warnings
import numpy as np
import pandas as pd

from typing import Union
from sklearn.decomposition import PCA, FactorAnalysis

from qlib.model.base import BaseModel

Expand Down Expand Up @@ -124,7 +123,7 @@ def _preprocess(self, X: np.ndarray) -> Union[np.ndarray, np.ma.MaskedArray]:
X = np.nan_to_num(X)
elif self.nan_option == self.MASK_NAN:
X = np.ma.masked_invalid(X)
# centerize
# centralize
if not self.assume_centered:
X = X - np.nanmean(X, axis=0)
return X
Expand Down Expand Up @@ -162,8 +161,9 @@ class ShrinkCovEstimator(RiskModel):
[3] Ledoit, O., & Wolf, M. (2003). Improved estimation of the covariance matrix of stock returns
with an application to portfolio selection.
Journal of Empirical Finance, 10(5), 603–621. https://doi.org/10.1016/S0927-5398(03)00007-0
[4] Chen, Y., Wiesel, A., Eldar, Y. C., & Hero, A. O. (2010). Shrinkage algorithms for MMSE covariance estimation.
IEEE Transactions on Signal Processing, 58(10), 5016–5029. https://doi.org/10.1109/TSP.2010.2053029
[4] Chen, Y., Wiesel, A., Eldar, Y. C., & Hero, A. O. (2010). Shrinkage algorithms for MMSE covariance
estimation. IEEE Transactions on Signal Processing, 58(10), 5016–5029.
https://doi.org/10.1109/TSP.2010.2053029
[5] https://www.econ.uzh.ch/dam/jcr:ffffffff-935a-b0d6-0000-00007f64e5b9/cov1para.m.zip
[6] https://www.econ.uzh.ch/dam/jcr:ffffffff-935a-b0d6-ffff-ffffde5e2d4e/covCor.m.zip
[7] https://www.econ.uzh.ch/dam/jcr:ffffffff-935a-b0d6-0000-0000648dfc98/covMarket.m.zip
Expand Down Expand Up @@ -465,3 +465,147 @@ def _predict(self, X: np.ndarray) -> np.ndarray:
SigmaY = SigmaU + Lowrank

return SigmaY


class StructuredCovEstimator(RiskModel):
"""Structured Covariance Estimator

This estimator assumes observations can be predicted by multiple factors
X = FB + U
where `F` can be specified by explicit risk factors or latent factors.

Therefore the structured covariance can be estimated by
cov(X) = F cov(B) F.T + cov(U)

We use latent factor models to estimate the structured covariance.
Specifically, the following latent factor models are supported:
- `pca`: Principal Component Analysis
- `fa`: Factor Analysis

Reference: [1] Fan, J., Liao, Y., & Liu, H. (2016). An overview of the estimation of large covariance and
precision matrices. Econometrics Journal, 19(1), C1–C32. https://doi.org/10.1111/ectj.12061
"""

FACTOR_MODEL_PCA = "pca"
FACTOR_MODEL_FA = "fa"

def __init__(
self,
factor_model: str = "pca",
num_factors: int = 10,
nan_option: str = "ignore",
assume_centered: bool = False,
scale_return: bool = True,
):
"""
Args:
factor_model (str): the latent factor models used to estimate the structured covariance (`pca`/`fa`).
num_factors (int): number of components to keep.
nan_option (str): nan handling option (`ignore`/`fill`).
assume_centered (bool): whether the data is assumed to be centered.
scale_return (bool): whether scale returns as percentage.
"""
super().__init__(nan_option, assume_centered, scale_return)

assert factor_model in [
self.FACTOR_MODEL_PCA,
self.FACTOR_MODEL_FA,
], "factor_model={} is not supported".format(factor_model)
self.solver = PCA if factor_model == self.FACTOR_MODEL_PCA else FactorAnalysis

self.num_factors = num_factors

def predict(
self,
X: Union[pd.Series, pd.DataFrame, np.ndarray],
return_corr: bool = False,
is_price: bool = True,
return_decomposed_components=False,
) -> Union[pd.DataFrame, np.ndarray, tuple]:
"""
Args:
X (pd.Series, pd.DataFrame or np.ndarray): data from which to estimate the covariance,
with variables as columns and observations as rows.
return_corr (bool): whether return the correlation matrix.
is_price (bool): whether `X` contains price (if not assume stock returns).
return_decomposed_components (bool): whether return decomposed components of the covariance matrix.

Returns:
tuple or pd.DataFrame or np.ndarray: decomposed covariance matrix or estimated covariance or correlation.
"""
assert (
not return_corr or not return_decomposed_components
), "Can only return either correlation matrix or decomposed components."

# transform input into 2D array
if not isinstance(X, (pd.Series, pd.DataFrame)):
columns = None
else:
if isinstance(X.index, pd.MultiIndex):
if isinstance(X, pd.DataFrame):
X = X.iloc[:, 0].unstack(level="instrument") # always use the first column
else:
X = X.unstack(level="instrument")
else:
# X is 2D DataFrame
pass
columns = X.columns # will be used to restore dataframe
X = X.values

# calculate pct_change
if is_price:
X = X[1:] / X[:-1] - 1 # NOTE: resulting `n - 1` rows

# scale return
if self.scale_return:
X *= 100

# handle nan and centered
X = self._preprocess(X)

if return_decomposed_components:
F, cov_b, var_u = self._predict(X, return_structured=True)
return F, cov_b, var_u
else:
# estimate covariance
S = self._predict(X)

# return correlation if needed
if return_corr:
vola = np.sqrt(np.diag(S))
corr = S / np.outer(vola, vola)
if columns is None:
return corr
return pd.DataFrame(corr, index=columns, columns=columns)

# return covariance
if columns is None:
return S
return pd.DataFrame(S, index=columns, columns=columns)

def _predict(self, X: np.ndarray, return_structured=False) -> Union[np.ndarray, tuple]:
"""
covariance estimation implementation

Args:
X (np.ndarray): data matrix containing multiple variables (columns) and observations (rows).
return_structured (bool): whether return decomposed components of the covariance matrix.

Returns:
tuple or np.ndarray: decomposed covariance matrix or covariance matrix.
"""

model = self.solver(self.num_factors, random_state=0).fit(X)

F = model.components_.T # num_features x num_factors
B = model.transform(X) # num_samples x num_factors
U = X - B @ F.T
cov_b = np.cov(B.T) # num_factors x num_factors
var_u = np.var(U, axis=0) # diagonal

if return_structured:
return F, cov_b, var_u

cov_x = F @ cov_b @ F.T + np.diag(var_u)

return cov_x
Loading