Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement Enhanced Indexing as a Portfolio Optimizer #280

Merged
merged 32 commits into from
Mar 17, 2021
Merged
Changes from 1 commit
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
988b42e
Add Structured Covariance Estimator to riskmodel.py
Feb 9, 2021
7b01c5c
Add an implementation of Enhanced Indexing to optimizer.py
Feb 9, 2021
9c2653f
Add an implementation of Enhanced Indexing to optimizer.py
Feb 9, 2021
4000518
Separate specific implementation of Portfolio Optimizer to folder.
Feb 22, 2021
b2e2142
Applied slight modification to follow PEP 8.
Feb 22, 2021
2cc057e
Fix minor mismatches of type hints.
Feb 22, 2021
9448a6e
Add a abstract class as the base class for all optimization related p…
Feb 22, 2021
42f8825
Reformat code to follow PEP 8.
Feb 22, 2021
f7d3e56
Merge optimization related portfolio construction back to portfolio/o…
Feb 22, 2021
58f74cf
Reformat code to follow PEP 8.
Feb 22, 2021
164687d
Add scikit-learn to dependencies.
Feb 22, 2021
b8647c1
Reformat code to follow PEP 8.
Feb 22, 2021
2f9d45e
Reformat code with black.
Feb 22, 2021
3787138
Format code with the latest version of black.
yongzhengqi Feb 22, 2021
dc4aa67
Black format
Derek-Wds Feb 22, 2021
f947a2f
Correct two mistakes in annotation.
Feb 22, 2021
d3caea6
Add unittest for TestStructuredCovEstimator.
Feb 22, 2021
527718a
Allow enhanced indexing to generate portfolio without industry relate…
Feb 22, 2021
2bff6eb
Split classes in riskmodel.py & optimizer.py into seperate files.
Mar 4, 2021
83c6e74
Reindex files.
Mar 4, 2021
0f3e3d2
Update __init__.py.
Mar 4, 2021
79c1142
Pass nan_option to structured covariance estimator.
Mar 8, 2021
4d5a30b
Resolve https://github.com/microsoft/qlib/pull/280\#discussion_r58916…
Mar 8, 2021
81b86f8
Update test to cover changes in structured_cov_estimator
Mar 8, 2021
351d598
Resolve https://github.com/microsoft/qlib/pull/280\#discussion_r58916…
Mar 8, 2021
c6675be
Resolve https://github.com/microsoft/qlib/pull/280\#discussion_r58916…
Mar 8, 2021
fc89fec
Resolve https://github.com/microsoft/qlib/pull/280\#discussion_r58916…
Mar 8, 2021
2f9af1a
Resolve https://github.com/microsoft/qlib/pull/280\#discussion_r58916…
Mar 8, 2021
7022675
Resolve https://github.com/microsoft/qlib/pull/280\#discussion_r58916…
Mar 8, 2021
6a305c7
Resolve https://github.com/microsoft/qlib/pull/280\#discussion_r58916…
Mar 8, 2021
8b9065c
Reformat with black.
Mar 8, 2021
53cf89d
Reformat with black.
Mar 8, 2021
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Black format
  • Loading branch information
Derek-Wds committed Feb 22, 2021
commit dc4aa675034724a9d2815763fd575b3ec56e76e2
10 changes: 9 additions & 1 deletion docs/conf.py
Original file line number Diff line number Diff line change
@@ -191,7 +191,15 @@
# (source start file, target name, title, author,
# dir menu entry, description, category)
texinfo_documents = [
(master_doc, "QLib", u"QLib Documentation", author, "QLib", "One line description of project.", "Miscellaneous",),
(
master_doc,
"QLib",
u"QLib Documentation",
author,
"QLib",
"One line description of project.",
"Miscellaneous",
),
]


12 changes: 10 additions & 2 deletions examples/benchmarks/TFT/libs/tft_model.py
Original file line number Diff line number Diff line change
@@ -721,7 +721,12 @@ def _build_base_graph(self):
encoder_steps = self.num_encoder_steps

# Inputs.
all_inputs = tf.keras.layers.Input(shape=(time_steps, combined_input_size,))
all_inputs = tf.keras.layers.Input(
shape=(
time_steps,
combined_input_size,
)
)

unknown_inputs, known_combined_layer, obs_inputs, static_inputs = self.get_tft_embeddings(all_inputs)

@@ -861,7 +866,10 @@ def get_lstm(return_state):
"""Returns LSTM cell initialized with default parameters."""
if self.use_cudnn:
lstm = tf.keras.layers.CuDNNLSTM(
self.hidden_layer_size, return_sequences=True, return_state=return_state, stateful=False,
self.hidden_layer_size,
return_sequences=True,
return_state=return_state,
stateful=False,
)
else:
lstm = tf.keras.layers.LSTM(
33 changes: 26 additions & 7 deletions examples/highfreq/highfreq_handler.py
Original file line number Diff line number Diff line change
@@ -20,7 +20,10 @@ def check_transform_proc(proc_l):
new_l = []
for p in proc_l:
p["kwargs"].update(
{"fit_start_time": fit_start_time, "fit_end_time": fit_end_time,}
{
"fit_start_time": fit_start_time,
"fit_end_time": fit_end_time,
}
)
new_l.append(p)
return new_l
@@ -30,7 +33,11 @@ def check_transform_proc(proc_l):

data_loader = {
"class": "QlibDataLoader",
"kwargs": {"config": self.get_feature_config(), "swap_level": False, "freq": "1min",},
"kwargs": {
"config": self.get_feature_config(),
"swap_level": False,
"freq": "1min",
},
}
super().__init__(
instruments=instruments,
@@ -61,7 +68,8 @@ def get_normalized_price_feature(price_field, shift=0):

feature_ops = template_norm.format(
template_if.format(
template_fillnan.format(template_paused.format("$close")), template_paused.format(price_field),
template_fillnan.format(template_paused.format("$close")),
template_paused.format(price_field),
),
template_fillnan.format(template_paused.format("$close")),
)
@@ -111,14 +119,24 @@ def get_normalized_price_feature(price_field, shift=0):

class HighFreqBacktestHandler(DataHandler):
def __init__(
self, instruments="csi300", start_time=None, end_time=None,
self,
instruments="csi300",
start_time=None,
end_time=None,
):
data_loader = {
"class": "QlibDataLoader",
"kwargs": {"config": self.get_feature_config(), "swap_level": False, "freq": "1min",},
"kwargs": {
"config": self.get_feature_config(),
"swap_level": False,
"freq": "1min",
},
}
super().__init__(
instruments=instruments, start_time=start_time, end_time=end_time, data_loader=data_loader,
instruments=instruments,
start_time=start_time,
end_time=end_time,
data_loader=data_loader,
)

def get_feature_config(self):
@@ -137,7 +155,8 @@ def get_feature_config(self):
fields += [
"Cut({0}, 240, None)".format(
template_if.format(
template_fillnan.format(template_paused.format("$close")), template_paused.format(simpson_vwap),
template_fillnan.format(template_paused.format("$close")),
template_paused.format(simpson_vwap),
)
)
]
4 changes: 3 additions & 1 deletion examples/highfreq/highfreq_processor.py
Original file line number Diff line number Diff line change
@@ -65,6 +65,8 @@ def __call__(self, df_features):
feat = df_values[:, [0, 1, 2, 3, 4, 10]].reshape(-1, 6 * 240)
feat_1 = df_values[:, [5, 6, 7, 8, 9, 11]].reshape(-1, 6 * 240)
df_new_features = pd.DataFrame(
data=np.concatenate((feat, feat_1), axis=1), index=idx, columns=["FEATURE_%d" % i for i in range(12 * 240)],
data=np.concatenate((feat, feat_1), axis=1),
index=idx,
columns=["FEATURE_%d" % i for i in range(12 * 240)],
).sort_index()
return df_new_features
35 changes: 30 additions & 5 deletions examples/highfreq/workflow.py
Original file line number Diff line number Diff line change
@@ -63,7 +63,13 @@ class HighfreqWorkflow(object):
"module_path": "highfreq_handler",
"kwargs": DATA_HANDLER_CONFIG0,
},
"segments": {"train": (start_time, train_end_time), "test": (test_start_time, end_time,),},
"segments": {
"train": (start_time, train_end_time),
"test": (
test_start_time,
end_time,
),
},
},
},
"dataset_backtest": {
@@ -75,7 +81,13 @@ class HighfreqWorkflow(object):
"module_path": "highfreq_handler",
"kwargs": DATA_HANDLER_CONFIG1,
},
"segments": {"train": (start_time, train_end_time), "test": (test_start_time, end_time,),},
"segments": {
"train": (start_time, train_end_time),
"test": (
test_start_time,
end_time,
),
},
},
},
}
@@ -140,11 +152,24 @@ def dump_and_load_dataset(self):
"start_time": "2021-01-19 00:00:00",
"end_time": "2021-01-25 16:00:00",
},
segment_kwargs={"test": ("2021-01-19 00:00:00", "2021-01-25 16:00:00",),},
segment_kwargs={
"test": (
"2021-01-19 00:00:00",
"2021-01-25 16:00:00",
),
},
)
dataset_backtest.init(
handler_kwargs={"start_time": "2021-01-19 00:00:00", "end_time": "2021-01-25 16:00:00",},
segment_kwargs={"test": ("2021-01-19 00:00:00", "2021-01-25 16:00:00",),},
handler_kwargs={
"start_time": "2021-01-19 00:00:00",
"end_time": "2021-01-25 16:00:00",
},
segment_kwargs={
"test": (
"2021-01-19 00:00:00",
"2021-01-25 16:00:00",
),
},
)

##=============get data=============
5 changes: 4 additions & 1 deletion examples/run_all_model.py
Original file line number Diff line number Diff line change
@@ -34,7 +34,10 @@
exp_manager = {
"class": "MLflowExpManager",
"module_path": "qlib.workflow.expm",
"kwargs": {"uri": "file:" + exp_path, "default_exp_name": "Experiment",},
"kwargs": {
"uri": "file:" + exp_path,
"default_exp_name": "Experiment",
},
}
if not exists_qlib_data(provider_uri):
print(f"Qlib data is not found in {provider_uri}")
5 changes: 4 additions & 1 deletion examples/workflow_by_code.py
Original file line number Diff line number Diff line change
@@ -81,7 +81,10 @@
"strategy": {
"class": "TopkDropoutStrategy",
"module_path": "qlib.contrib.strategy.strategy",
"kwargs": {"topk": 50, "n_drop": 5,},
"kwargs": {
"topk": 50,
"n_drop": 5,
},
},
"backtest": {
"verbose": False,
27 changes: 22 additions & 5 deletions scripts/data_collector/yahoo/collector.py
Original file line number Diff line number Diff line change
@@ -39,7 +39,13 @@ class YahooData:
INTERVAL_1d = "1d"

def __init__(
self, timezone: str = None, start=None, end=None, interval="1d", delay=0, show_1min_logging: bool = False,
self,
timezone: str = None,
start=None,
end=None,
interval="1d",
delay=0,
show_1min_logging: bool = False,
):
"""

@@ -119,7 +125,11 @@ def _get_simple(start_, end_):
self._sleep()
_remote_interval = "1m" if self._interval == self.INTERVAL_1min else self._interval
return self.get_data_from_remote(
symbol, interval=_remote_interval, start=start_, end=end_, show_1min_logging=self._show_1min_logging,
symbol,
interval=_remote_interval,
start=start_,
end=end_,
show_1min_logging=self._show_1min_logging,
)

_result = None
@@ -428,7 +438,9 @@ class YahooNormalize:
DAILY_FORMAT = "%Y-%m-%d"

def __init__(
self, date_field_name: str = "date", symbol_field_name: str = "symbol",
self,
date_field_name: str = "date",
symbol_field_name: str = "symbol",
):
"""

@@ -446,7 +458,10 @@ def __init__(

@staticmethod
def normalize_yahoo(
df: pd.DataFrame, calendar_list: list = None, date_field_name: str = "date", symbol_field_name: str = "symbol",
df: pd.DataFrame,
calendar_list: list = None,
date_field_name: str = "date",
symbol_field_name: str = "symbol",
):
if df.empty:
return df
@@ -551,7 +566,9 @@ class YahooNormalize1min(YahooNormalize, ABC):
CONSISTENT_1d = False

def __init__(
self, date_field_name: str = "date", symbol_field_name: str = "symbol",
self,
date_field_name: str = "date",
symbol_field_name: str = "symbol",
):
"""

13 changes: 11 additions & 2 deletions scripts/dump_bin.py
Original file line number Diff line number Diff line change
@@ -153,13 +153,22 @@ def get_dump_fields(self, df_columns: Iterable[str]) -> Iterable[str]:

@staticmethod
def _read_calendars(calendar_path: Path) -> List[pd.Timestamp]:
return sorted(map(pd.Timestamp, pd.read_csv(calendar_path, header=None).loc[:, 0].tolist(),))
return sorted(
map(
pd.Timestamp,
pd.read_csv(calendar_path, header=None).loc[:, 0].tolist(),
)
)

def _read_instruments(self, instrument_path: Path) -> pd.DataFrame:
df = pd.read_csv(
instrument_path,
sep=self.INSTRUMENTS_SEP,
names=[self.symbol_field_name, self.INSTRUMENTS_START_FIELD, self.INSTRUMENTS_END_FIELD,],
names=[
self.symbol_field_name,
self.INSTRUMENTS_START_FIELD,
self.INSTRUMENTS_END_FIELD,
],
)

return df
14 changes: 11 additions & 3 deletions setup.py
Original file line number Diff line number Diff line change
@@ -70,10 +70,16 @@
# Cython Extensions
extensions = [
Extension(
"qlib.data._libs.rolling", ["qlib/data/_libs/rolling.pyx"], language="c++", include_dirs=[NUMPY_INCLUDE],
"qlib.data._libs.rolling",
["qlib/data/_libs/rolling.pyx"],
language="c++",
include_dirs=[NUMPY_INCLUDE],
),
Extension(
"qlib.data._libs.expanding", ["qlib/data/_libs/expanding.pyx"], language="c++", include_dirs=[NUMPY_INCLUDE],
"qlib.data._libs.expanding",
["qlib/data/_libs/expanding.pyx"],
language="c++",
include_dirs=[NUMPY_INCLUDE],
),
]

@@ -92,7 +98,9 @@
# py_modules=['qlib'],
entry_points={
# 'console_scripts': ['mycli=mymodule:cli'],
"console_scripts": ["qrun=qlib.workflow.cli:run",],
"console_scripts": [
"qrun=qlib.workflow.cli:run",
],
},
ext_modules=extensions,
install_requires=REQUIRED,
9 changes: 7 additions & 2 deletions tests/test_all_pipeline.py
Original file line number Diff line number Diff line change
@@ -78,7 +78,10 @@
"strategy": {
"class": "TopkDropoutStrategy",
"module_path": "qlib.contrib.strategy.strategy",
"kwargs": {"topk": 50, "n_drop": 5,},
"kwargs": {
"topk": 50,
"n_drop": 5,
},
},
"backtest": {
"verbose": False,
@@ -173,7 +176,9 @@ def test_0_train(self):
def test_1_backtest(self):
analyze_df = backtest_analysis(TestAllFlow.PRED_SCORE, TestAllFlow.RID)
self.assertGreaterEqual(
analyze_df.loc(axis=0)["excess_return_with_cost", "annualized_return"].values[0], 0.10, "backtest failed",
analyze_df.loc(axis=0)["excess_return_with_cost", "annualized_return"].values[0],
0.10,
"backtest failed",
)


9 changes: 7 additions & 2 deletions tests/test_dump_data.py
Original file line number Diff line number Diff line change
@@ -40,7 +40,9 @@ def setUpClass(cls) -> None:
TestDumpData.STOCK_NAMES = list(map(lambda x: x.name[:-4].upper(), SOURCE_DIR.glob("*.csv")))
provider_uri = str(QLIB_DIR.resolve())
qlib.init(
provider_uri=provider_uri, expression_cache=None, dataset_cache=None,
provider_uri=provider_uri,
expression_cache=None,
dataset_cache=None,
)

@classmethod
@@ -52,7 +54,10 @@ def test_0_dump_bin(self):

def test_1_dump_calendars(self):
ori_calendars = set(
map(pd.Timestamp, pd.read_csv(QLIB_DIR.joinpath("calendars", "day.txt"), header=None).loc[:, 0].values,)
map(
pd.Timestamp,
pd.read_csv(QLIB_DIR.joinpath("calendars", "day.txt"), header=None).loc[:, 0].values,
)
)
res_calendars = set(D.calendar())
assert len(ori_calendars - res_calendars) == len(res_calendars - ori_calendars) == 0, "dump calendars failed"
4 changes: 3 additions & 1 deletion tests/test_get_data.py
Original file line number Diff line number Diff line change
@@ -26,7 +26,9 @@ class TestGetData(unittest.TestCase):
def setUpClass(cls) -> None:
provider_uri = str(QLIB_DIR.resolve())
qlib.init(
provider_uri=provider_uri, expression_cache=None, dataset_cache=None,
provider_uri=provider_uri,
expression_cache=None,
dataset_cache=None,
)

@classmethod