From c731c5a59b8d50fdc653842be7a1f7c944f11d14 Mon Sep 17 00:00:00 2001 From: Huoran Li Date: Fri, 29 Jul 2022 15:44:19 +0800 Subject: [PATCH 01/23] Use dict-like configuration --- qlib/rl/from_neutrader/config.py | 20 --- qlib/rl/from_neutrader/feature.py | 6 +- qlib/rl/order_execution/objects.py | 2 + qlib/rl/order_execution/simulator_qlib.py | 148 ++++++++++++---------- qlib/rl/order_execution/utils.py | 41 +----- tests/rl/test_qlib_simulator.py | 77 +++++------ 6 files changed, 131 insertions(+), 163 deletions(-) delete mode 100644 qlib/rl/from_neutrader/config.py create mode 100644 qlib/rl/order_execution/objects.py diff --git a/qlib/rl/from_neutrader/config.py b/qlib/rl/from_neutrader/config.py deleted file mode 100644 index d9a681b32d..0000000000 --- a/qlib/rl/from_neutrader/config.py +++ /dev/null @@ -1,20 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -from dataclasses import dataclass -from pathlib import Path -from typing import Optional, Tuple, Union - - -# TODO: In the future we should merge the dataclass-based config with Qlib's dict-based config. -@dataclass -class ExchangeConfig: - limit_threshold: Union[float, Tuple[str, str]] - deal_price: Union[str, Tuple[str, str]] - volume_threshold: dict - open_cost: float = 0.0005 - close_cost: float = 0.0015 - min_cost: float = 5.0 - trade_unit: Optional[float] = 100.0 - cash_limit: Optional[Union[Path, float]] = None - generate_report: bool = False diff --git a/qlib/rl/from_neutrader/feature.py b/qlib/rl/from_neutrader/feature.py index ca42af24c9..2b4279e1a6 100644 --- a/qlib/rl/from_neutrader/feature.py +++ b/qlib/rl/from_neutrader/feature.py @@ -70,10 +70,10 @@ def get(self, stock_id: str, date: pd.Timestamp, backtest: bool = False) -> pd.D return data -def init_qlib(config: dict, part: Optional[str] = None) -> None: +def init_qlib(qlib_config: dict) -> None: provider_uri_map = { - "day": config["provider_uri_day"].as_posix(), - "1min": config["provider_uri_1min"].as_posix(), + "day": qlib_config["provider_uri_day"].as_posix(), + "1min": qlib_config["provider_uri_1min"].as_posix(), } qlib.init( region=REG_CN, diff --git a/qlib/rl/order_execution/objects.py b/qlib/rl/order_execution/objects.py new file mode 100644 index 0000000000..ee6358bcd3 --- /dev/null +++ b/qlib/rl/order_execution/objects.py @@ -0,0 +1,2 @@ +FINEST_GRANULARITY = "1min" +COARSEST_GRANULARITY = "1day" diff --git a/qlib/rl/order_execution/simulator_qlib.py b/qlib/rl/order_execution/simulator_qlib.py index c75793f586..ad415b169a 100644 --- a/qlib/rl/order_execution/simulator_qlib.py +++ b/qlib/rl/order_execution/simulator_qlib.py @@ -3,22 +3,23 @@ from __future__ import annotations -from typing import Any, Callable, cast, Generator, List, Optional, Tuple +import copy +from typing import Any, cast, Generator, List, Optional, Tuple import numpy as np import pandas as pd +from qlib.backtest import get_strategy_executor from qlib.backtest.decision import BaseTradeDecision, Order, OrderHelper, TradeDecisionWO, TradeRange, TradeRangeByTime -from qlib.backtest.executor import BaseExecutor, NestedExecutor +from qlib.backtest.executor import NestedExecutor from qlib.backtest.utils import CommonInfrastructure from qlib.constant import EPS from qlib.rl.data.exchange_wrapper import QlibIntradayBacktestData -from qlib.rl.from_neutrader.config import ExchangeConfig from qlib.rl.from_neutrader.feature import init_qlib +from qlib.rl.order_execution.objects import COARSEST_GRANULARITY, FINEST_GRANULARITY from qlib.rl.order_execution.simulator_simple import SAOEMetrics, SAOEState from qlib.rl.order_execution.utils import ( dataframe_append, - get_common_infra, get_portfolio_and_indicator, get_ticks_slice, price_advantage, @@ -28,8 +29,8 @@ class DecomposedStrategy(BaseStrategy): - def __init__(self) -> None: - super().__init__() + def __init__(self, common_infra: CommonInfrastructure = None) -> None: + super().__init__(common_infra=common_infra) self.execute_order: Optional[Order] = None self.execute_result: List[Tuple[Order, float, float, float]] = [] @@ -66,12 +67,12 @@ class SingleOrderStrategy(BaseStrategy): # this logic is copied from FileOrderStrategy def __init__( self, - common_infra: CommonInfrastructure, order: Order, trade_range: TradeRange, instrument: str, ) -> None: - super().__init__(common_infra=common_infra) + super().__init__() + self._order = order self._trade_range = trade_range self._instrument = instrument @@ -91,9 +92,25 @@ def generate_trade_decision(self, execute_result: list = None) -> TradeDecisionW return TradeDecisionWO(order_list, self, self._trade_range) -# TODO: move these to the configuration files -FINEST_GRANULARITY = "1min" -COARSEST_GRANULARITY = "1day" +executor_config_template = { + "class": "NestedExecutor", + "module_path": "qlib.backtest.executor", + "kwargs": { + "time_per_step": COARSEST_GRANULARITY, + "inner_strategy": { + "class": "DecomposedStrategy", + "module_path": "qlib.rl.order_execution.simulator_qlib", + }, + "track_data": True, + }, +} +top_strategy_config_template = { + "class": "SingleOrderStrategy", + "module_path": "qlib.rl.order_execution.simulator_qlib", +} +exchange_kwargs_template = { + "freq": FINEST_GRANULARITY, +} class StateMaintainer: @@ -123,11 +140,14 @@ def __init__(self, order: Order, time_per_step: str, tick_index: pd.DatetimeInde def update( self, - inner_executor: BaseExecutor, - inner_strategy: DecomposedStrategy, + executor: NestedExecutor, done: bool, all_indicators: dict, ) -> None: + inner_executor = executor.inner_executor + inner_strategy = executor.inner_strategy + assert isinstance(inner_strategy, DecomposedStrategy) + execute_order = inner_strategy.execute_order execute_result = inner_strategy.execute_result exec_vol = np.array([e[0].deal_amount for e in execute_result]) @@ -272,10 +292,10 @@ class SingleAssetOrderExecutionQlib(Simulator[Order, SAOEState, float]): A string to describe the time granularity of each step. Current support "1min", "30min", and "1day" qlib_config (dict): Configuration used to initialize Qlib. - inner_executor_fn (Callable[[str, CommonInfrastructure], BaseExecutor]): - Function used to get the inner level executor. - exchange_config (ExchangeConfig): - Configuration used to create the Exchange instance. + inner_executor_config (dict): + Inner executor configuration + exchange_config (dict): + Exchange configuration """ def __init__( @@ -283,8 +303,8 @@ def __init__( order: Order, time_per_step: str, # "1min", "30min", "1day" qlib_config: dict, - inner_executor_fn: Callable[[str, CommonInfrastructure], BaseExecutor], - exchange_config: ExchangeConfig, + inner_executor_config: dict, + exchange_config: dict, ) -> None: assert time_per_step in ("1min", "30min", "1day") @@ -292,12 +312,7 @@ def __init__( assert order.start_time.date() == order.end_time.date(), "Start date and end date must be the same." - self._order = order - self._order_date = pd.Timestamp(order.start_time.date()) - self._trade_range = TradeRangeByTime(order.start_time.time(), order.end_time.time()) - self._qlib_config = qlib_config - self._inner_executor_fn = inner_executor_fn - self._exchange_config = exchange_config + init_qlib(qlib_config) self._time_per_step = time_per_step self._ticks_per_step = int(pd.Timedelta(time_per_step).total_seconds() // 60) @@ -307,56 +322,58 @@ def __init__( self._done = False - self._inner_strategy = DecomposedStrategy() - - self.reset(self._order) - - def reset(self, order: Order) -> None: - instrument = order.stock_id - - # TODO: Check this logic. Make sure we need to do this every time we reset the simulator. - init_qlib(self._qlib_config, instrument) - - common_infra = get_common_infra( - self._exchange_config, - trade_date=pd.Timestamp(self._order_date), - codes=[instrument], - ) - - # TODO: We can leverage interfaces like (https://tinyurl.com/y8f8fhv4) to create trading environment. - # TODO: By aligning the interface to create environments with Qlib, it will be easier to share the config and - # TODO: code between backtesting and training. - self._inner_executor = self._inner_executor_fn(self._time_per_step, common_infra) - self._executor = NestedExecutor( - time_per_step=COARSEST_GRANULARITY, - inner_executor=self._inner_executor, - inner_strategy=self._inner_strategy, - track_data=True, - common_infra=common_infra, + self.reset(order, inner_executor_config, exchange_config) + + def reset(self, order: Order, inner_executor_config: dict, exchange_config: dict) -> None: + order_date = pd.Timestamp(order.start_time.date()) + + top_strategy_config: dict = copy.deepcopy(top_strategy_config_template) + top_strategy_config.update({ + "kwargs": { + "order": order, + "trade_range": TradeRangeByTime(order.start_time.time(), order.end_time.time()), + "instrument": order.stock_id, + } + }) + + executor_config: dict = copy.deepcopy(executor_config_template) + executor_config["kwargs"].update({ + "inner_executor": inner_executor_config, + "start_time": order_date, + "end_time": order_date, + }) + + exchange_kwargs: dict = copy.deepcopy(exchange_kwargs_template) + exchange_kwargs.update({"codes": [order.stock_id], **exchange_config}) + + top_strategy, self._executor = get_strategy_executor( + start_time=order_date, + end_time=order_date + pd.DateOffset(1), + strategy=top_strategy_config, + executor=executor_config, + benchmark=order.stock_id, + account=1e12, + exchange_kwargs=exchange_kwargs, + pos_type="InfPosition", ) + top_strategy.reset(level_infra=self._executor.get_level_infra()) - exchange = self._inner_executor.trade_exchange + exchange = self._executor.trade_exchange self._ticks_index = pd.DatetimeIndex([e[1] for e in list(exchange.quote_df.index)]) self._ticks_for_order = get_ticks_slice( self._ticks_index, - self._order.start_time, - self._order.end_time, + order.start_time, + order.end_time, include_end=True, ) - self._backtest_data = QlibIntradayBacktestData( - order=self._order, + order=order, exchange=exchange, start_time=self._ticks_for_order[0], end_time=self._ticks_for_order[-1], ) - self.twap_price = self._backtest_data.get_deal_price().mean() - top_strategy = SingleOrderStrategy(common_infra, order, self._trade_range, instrument) - self._executor.reset(start_time=pd.Timestamp(self._order_date), end_time=pd.Timestamp(self._order_date)) - top_strategy.reset(level_infra=self._executor.get_level_infra()) - self._collect_data_loop = self._executor.collect_data(top_strategy.generate_trade_decision(), level=0) assert isinstance(self._collect_data_loop, Generator) @@ -364,12 +381,14 @@ def reset(self, order: Order) -> None: self._done = False self._maintainer = StateMaintainer( - order=self._order, + order=order, time_per_step=self._time_per_step, tick_index=self._ticks_index, twap_price=self.twap_price, ) + self._order = order + def _iter_strategy(self, action: float = None) -> DecomposedStrategy: """Iterate the _collect_data_loop until we get the next yield DecomposedStrategy.""" assert self._collect_data_loop is not None @@ -400,8 +419,7 @@ def step(self, action: float) -> None: _, all_indicators = get_portfolio_and_indicator(self._executor) self._maintainer.update( - inner_executor=self._inner_executor, - inner_strategy=self._inner_strategy, + executor=self._executor, done=self._done, all_indicators=all_indicators, ) @@ -409,7 +427,7 @@ def step(self, action: float) -> None: def get_state(self) -> SAOEState: return SAOEState( order=self._order, - cur_time=self._inner_executor.trade_calendar.get_step_time()[0], + cur_time=self._executor.inner_executor.trade_calendar.get_step_time()[0], position=self._maintainer.position, history_exec=self._maintainer.history_exec, history_steps=self._maintainer.history_steps, diff --git a/qlib/rl/order_execution/utils.py b/qlib/rl/order_execution/utils.py index e2d0de9812..779f860c33 100644 --- a/qlib/rl/order_execution/utils.py +++ b/qlib/rl/order_execution/utils.py @@ -3,54 +3,17 @@ from __future__ import annotations -from typing import Any, List, Tuple, cast +from typing import Any, cast, Tuple import numpy as np import pandas as pd -from qlib.backtest import CommonInfrastructure, get_exchange -from qlib.backtest.account import Account from qlib.backtest.decision import OrderDir from qlib.backtest.executor import BaseExecutor -from qlib.rl.from_neutrader.config import ExchangeConfig -from qlib.rl.order_execution.simulator_simple import ONE_SEC, _float_or_ndarray +from qlib.rl.order_execution.simulator_simple import _float_or_ndarray, ONE_SEC from qlib.utils.time import Freq -def get_common_infra( - config: ExchangeConfig, - trade_date: pd.Timestamp, - codes: List[str], - cash_limit: float = None, -) -> CommonInfrastructure: - # need to specify a range here for acceleration - if cash_limit is None: - trade_account = Account(init_cash=int(1e12), benchmark_config={}, pos_type="InfPosition") - else: - trade_account = Account( - init_cash=cash_limit, - benchmark_config={}, - pos_type="Position", - position_dict={code: {"amount": 1e12, "price": 1.0} for code in codes}, - ) - - exchange = get_exchange( - codes=codes, - freq="1min", - limit_threshold=config.limit_threshold, - deal_price=config.deal_price, - open_cost=config.open_cost, - close_cost=config.close_cost, - min_cost=config.min_cost if config.trade_unit is not None else 0, - start_time=trade_date, - end_time=trade_date + pd.DateOffset(1), - trade_unit=config.trade_unit, - volume_threshold=config.volume_threshold, - ) - - return CommonInfrastructure(trade_account=trade_account, trade_exchange=exchange) - - def get_ticks_slice( ticks_index: pd.DatetimeIndex, start: pd.Timestamp, diff --git a/tests/rl/test_qlib_simulator.py b/tests/rl/test_qlib_simulator.py index ca7820645f..ac94e589e5 100644 --- a/tests/rl/test_qlib_simulator.py +++ b/tests/rl/test_qlib_simulator.py @@ -7,11 +7,10 @@ import pytest from qlib.backtest.decision import Order, OrderDir -from qlib.backtest.executor import NestedExecutor, SimulatorExecutor -from qlib.backtest.utils import CommonInfrastructure -from qlib.contrib.strategy import TWAPStrategy +from qlib.backtest.executor import SimulatorExecutor from qlib.rl.order_execution import CategoricalActionInterpreter -from qlib.rl.order_execution.simulator_qlib import ExchangeConfig, SingleAssetOrderExecutionQlib +from qlib.rl.order_execution.objects import FINEST_GRANULARITY +from qlib.rl.order_execution.simulator_qlib import SingleAssetOrderExecutionQlib TOTAL_POSITION = 2100.0 @@ -32,22 +31,30 @@ def get_order() -> Order: ) -def get_simulator(order: Order) -> SingleAssetOrderExecutionQlib: - def _inner_executor_fn(time_per_step: str, common_infra: CommonInfrastructure) -> NestedExecutor: - return NestedExecutor( - time_per_step=time_per_step, - inner_strategy=TWAPStrategy(), - inner_executor=SimulatorExecutor( - time_per_step="1min", - verbose=False, - trade_type=SimulatorExecutor.TT_SERIAL, - generate_report=False, - common_infra=common_infra, - track_data=True, - ), - common_infra=common_infra, - track_data=True, - ) +def get_simulator(order: Order, time_per_step: str) -> SingleAssetOrderExecutionQlib: + _inner_executor_config = { + "class": "NestedExecutor", + "module_path": "qlib.backtest.executor", + "kwargs": { + "time_per_step": time_per_step, + "inner_strategy": { + "class": "TWAPStrategy", + "module_path": "qlib.contrib.strategy.rule_strategy", + }, + "inner_executor": { + "class": "SimulatorExecutor", + "module_path": "qlib.backtest.executor", + "kwargs": { + "time_per_step": FINEST_GRANULARITY, + "verbose": False, + "trade_type": SimulatorExecutor.TT_SERIAL, + "generate_report": False, + "track_data": True, + } + }, + "track_data": True, + }, + } DATA_ROOT_DIR = Path(__file__).parent.parent / ".data" / "rl" / "qlib_simulator" @@ -67,27 +74,25 @@ def _inner_executor_fn(time_per_step: str, common_infra: CommonInfrastructure) - } # fmt: on - exchange_config = ExchangeConfig( - limit_threshold=("$ask == 0", "$bid == 0"), - deal_price=("If($ask == 0, $bid, $ask)", "If($bid == 0, $ask, $bid)"), - volume_threshold={ + exchange_config = { + "limit_threshold": ("$ask == 0", "$bid == 0"), + "deal_price": ("If($ask == 0, $bid, $ask)", "If($bid == 0, $ask, $bid)"), + "volume_threshold": { "all": ("cum", "0.2 * DayCumsum($volume, '9:30', '14:29')"), "buy": ("current", "$askV1"), "sell": ("current", "$bidV1"), }, - open_cost=0.0005, - close_cost=0.0015, - min_cost=5.0, - trade_unit=None, - cash_limit=None, - generate_report=False, - ) + "open_cost": 0.0005, + "close_cost": 0.0015, + "min_cost": 5.0, + "trade_unit": None, + } return SingleAssetOrderExecutionQlib( order=order, - time_per_step="30min", + time_per_step=time_per_step, qlib_config=qlib_config, - inner_executor_fn=_inner_executor_fn, + inner_executor_config=_inner_executor_config, exchange_config=exchange_config, ) @@ -95,7 +100,7 @@ def _inner_executor_fn(time_per_step: str, common_infra: CommonInfrastructure) - @python_version_request def test_simulator_first_step(): order = get_order() - simulator = get_simulator(order) + simulator = get_simulator(order, time_per_step="30min") state = simulator.get_state() assert state.cur_time == pd.Timestamp("2019-03-04 09:30:00") assert state.position == TOTAL_POSITION @@ -130,7 +135,7 @@ def test_simulator_first_step(): @python_version_request def test_simulator_stop_twap() -> None: order = get_order() - simulator = get_simulator(order) + simulator = get_simulator(order, time_per_step="30min") NUM_STEPS = 7 for i in range(NUM_STEPS): simulator.step(TOTAL_POSITION / NUM_STEPS) @@ -157,7 +162,7 @@ def test_simulator_stop_twap() -> None: def test_interpreter() -> None: NUM_EXECUTION = 3 order = get_order() - simulator = get_simulator(order) + simulator = get_simulator(order, time_per_step="30min") interpreter_action = CategoricalActionInterpreter(values=NUM_EXECUTION) NUM_STEPS = 7 From 0490ff41ff7d9bf1335e8f9156347bd02a87ac5e Mon Sep 17 00:00:00 2001 From: Huoran Li Date: Fri, 29 Jul 2022 15:50:51 +0800 Subject: [PATCH 02/23] Rename from_neutrader to integration --- qlib/rl/{from_neutrader => integration}/__init__.py | 0 qlib/rl/{from_neutrader => integration}/feature.py | 0 qlib/rl/order_execution/simulator_qlib.py | 2 +- 3 files changed, 1 insertion(+), 1 deletion(-) rename qlib/rl/{from_neutrader => integration}/__init__.py (100%) rename qlib/rl/{from_neutrader => integration}/feature.py (100%) diff --git a/qlib/rl/from_neutrader/__init__.py b/qlib/rl/integration/__init__.py similarity index 100% rename from qlib/rl/from_neutrader/__init__.py rename to qlib/rl/integration/__init__.py diff --git a/qlib/rl/from_neutrader/feature.py b/qlib/rl/integration/feature.py similarity index 100% rename from qlib/rl/from_neutrader/feature.py rename to qlib/rl/integration/feature.py diff --git a/qlib/rl/order_execution/simulator_qlib.py b/qlib/rl/order_execution/simulator_qlib.py index ad415b169a..7e126d9a25 100644 --- a/qlib/rl/order_execution/simulator_qlib.py +++ b/qlib/rl/order_execution/simulator_qlib.py @@ -15,7 +15,7 @@ from qlib.backtest.utils import CommonInfrastructure from qlib.constant import EPS from qlib.rl.data.exchange_wrapper import QlibIntradayBacktestData -from qlib.rl.from_neutrader.feature import init_qlib +from qlib.rl.integration.feature import init_qlib from qlib.rl.order_execution.objects import COARSEST_GRANULARITY, FINEST_GRANULARITY from qlib.rl.order_execution.simulator_simple import SAOEMetrics, SAOEState from qlib.rl.order_execution.utils import ( From ef73eac5ee6b777035eb2ca67cb77fa18e84befc Mon Sep 17 00:00:00 2001 From: Default Date: Wed, 3 Aug 2022 13:50:43 +0800 Subject: [PATCH 03/23] SAOE strategy --- qlib/backtest/__init__.py | 2 +- qlib/backtest/executor.py | 9 + qlib/rl/order_execution/simulator_qlib.py | 392 +++----------------- qlib/rl/order_execution/state_maintainer.py | 219 +++++++++++ qlib/rl/order_execution/utils.py | 23 +- qlib/rl/strategy/__init__.py | 2 + qlib/rl/strategy/decomposed.py | 44 +++ qlib/rl/strategy/saoe.py | 57 +++ qlib/rl/strategy/single_order.py | 35 ++ qlib/strategy/base.py | 7 + tests/rl/test_qlib_simulator.py | 100 +++-- 11 files changed, 492 insertions(+), 398 deletions(-) create mode 100644 qlib/rl/order_execution/state_maintainer.py create mode 100644 qlib/rl/strategy/__init__.py create mode 100644 qlib/rl/strategy/decomposed.py create mode 100644 qlib/rl/strategy/saoe.py create mode 100644 qlib/rl/strategy/single_order.py diff --git a/qlib/backtest/__init__.py b/qlib/backtest/__init__.py index d3f4d72402..e8fe73c5a2 100644 --- a/qlib/backtest/__init__.py +++ b/qlib/backtest/__init__.py @@ -345,4 +345,4 @@ def format_decisions( return res -__all__ = ["Order", "backtest"] +__all__ = ["Order", "backtest", "get_strategy_executor"] diff --git a/qlib/backtest/executor.py b/qlib/backtest/executor.py index 13af7aea71..6948faca30 100644 --- a/qlib/backtest/executor.py +++ b/qlib/backtest/executor.py @@ -124,6 +124,9 @@ def __init__( self.dealt_order_amount: Dict[str, float] = defaultdict(float) self.deal_day = None + # whether the current executor is collecting data + self.is_collecting = False + def reset_common_infra(self, common_infra: CommonInfrastructure, copy_trade_account: bool = False) -> None: """ reset infrastructure for trading @@ -256,6 +259,8 @@ def collect_data( object trade decision """ + self.is_collecting = True + if self.track_data: yield trade_decision @@ -296,6 +301,8 @@ def collect_data( if return_value is not None: return_value.update({"execute_result": res}) + + self.is_collecting = False return res def get_all_executors(self) -> List[BaseExecutor]: @@ -473,6 +480,8 @@ def _collect_data( # do nothing and just step forward sub_cal.step() + self.inner_strategy.post_upper_level_exe_step() + return execute_result, {"inner_order_indicators": inner_order_indicators, "decision_list": decision_list} def post_inner_exe_step(self, inner_exe_res: List[object]) -> None: diff --git a/qlib/rl/order_execution/simulator_qlib.py b/qlib/rl/order_execution/simulator_qlib.py index 7e126d9a25..22ac718e93 100644 --- a/qlib/rl/order_execution/simulator_qlib.py +++ b/qlib/rl/order_execution/simulator_qlib.py @@ -3,282 +3,20 @@ from __future__ import annotations -import copy -from typing import Any, cast, Generator, List, Optional, Tuple +from typing import Generator, Optional -import numpy as np import pandas as pd - from qlib.backtest import get_strategy_executor -from qlib.backtest.decision import BaseTradeDecision, Order, OrderHelper, TradeDecisionWO, TradeRange, TradeRangeByTime +from qlib.backtest.decision import Order from qlib.backtest.executor import NestedExecutor -from qlib.backtest.utils import CommonInfrastructure -from qlib.constant import EPS from qlib.rl.data.exchange_wrapper import QlibIntradayBacktestData from qlib.rl.integration.feature import init_qlib -from qlib.rl.order_execution.objects import COARSEST_GRANULARITY, FINEST_GRANULARITY -from qlib.rl.order_execution.simulator_simple import SAOEMetrics, SAOEState +from qlib.rl.order_execution.simulator_simple import SAOEState from qlib.rl.order_execution.utils import ( - dataframe_append, - get_portfolio_and_indicator, get_ticks_slice, - price_advantage, ) from qlib.rl.simulator import Simulator -from qlib.strategy.base import BaseStrategy - - -class DecomposedStrategy(BaseStrategy): - def __init__(self, common_infra: CommonInfrastructure = None) -> None: - super().__init__(common_infra=common_infra) - - self.execute_order: Optional[Order] = None - self.execute_result: List[Tuple[Order, float, float, float]] = [] - - def generate_trade_decision(self, execute_result: list = None) -> Generator[Any, Any, BaseTradeDecision]: - # Once the following line is executed, this DecomposedStrategy (self) will be yielded to the outside - # of the entire executor, and the execution will be suspended. When the execution is resumed by `send()`, - # the sent item will be captured by `exec_vol`. The outside policy could communicate with the inner - # level strategy through this way. - exec_vol = yield self - - oh = self.trade_exchange.get_order_helper() - order = oh.create(self._order.stock_id, exec_vol, self._order.direction) - - self.execute_order = order - - return TradeDecisionWO([order], self) - - def alter_outer_trade_decision(self, outer_trade_decision: BaseTradeDecision) -> BaseTradeDecision: - return outer_trade_decision - - def post_exe_step(self, execute_result: list) -> None: - self.execute_result = execute_result - - def reset(self, outer_trade_decision: TradeDecisionWO = None, **kwargs: Any) -> None: - super().reset(outer_trade_decision=outer_trade_decision, **kwargs) - if outer_trade_decision is not None: - order_list = outer_trade_decision.order_list - assert len(order_list) == 1 - self._order = order_list[0] - - -class SingleOrderStrategy(BaseStrategy): - # this logic is copied from FileOrderStrategy - def __init__( - self, - order: Order, - trade_range: TradeRange, - instrument: str, - ) -> None: - super().__init__() - - self._order = order - self._trade_range = trade_range - self._instrument = instrument - - def alter_outer_trade_decision(self, outer_trade_decision: BaseTradeDecision) -> BaseTradeDecision: - return outer_trade_decision - - def generate_trade_decision(self, execute_result: list = None) -> TradeDecisionWO: - oh: OrderHelper = self.common_infra.get("trade_exchange").get_order_helper() - order_list = [ - oh.create( - code=self._instrument, - amount=self._order.amount, - direction=self._order.direction, - ), - ] - return TradeDecisionWO(order_list, self, self._trade_range) - - -executor_config_template = { - "class": "NestedExecutor", - "module_path": "qlib.backtest.executor", - "kwargs": { - "time_per_step": COARSEST_GRANULARITY, - "inner_strategy": { - "class": "DecomposedStrategy", - "module_path": "qlib.rl.order_execution.simulator_qlib", - }, - "track_data": True, - }, -} -top_strategy_config_template = { - "class": "SingleOrderStrategy", - "module_path": "qlib.rl.order_execution.simulator_qlib", -} -exchange_kwargs_template = { - "freq": FINEST_GRANULARITY, -} - - -class StateMaintainer: - """ - Maintain states of the environment. - - Example usage:: - - maintainer = StateMaintainer(...) # in reset - maintainer.update(...) # in step - # get states in get_state from maintainer - """ - - def __init__(self, order: Order, time_per_step: str, tick_index: pd.DatetimeIndex, twap_price: float) -> None: - super().__init__() - - self.position = order.amount - self._order = order - self._time_per_step = time_per_step - self._tick_index = tick_index - self._twap_price = twap_price - - metric_keys = list(SAOEMetrics.__annotations__.keys()) # pylint: disable=no-member - self.history_exec = pd.DataFrame(columns=metric_keys).set_index("datetime") - self.history_steps = pd.DataFrame(columns=metric_keys).set_index("datetime") - self.metrics: Optional[SAOEMetrics] = None - - def update( - self, - executor: NestedExecutor, - done: bool, - all_indicators: dict, - ) -> None: - inner_executor = executor.inner_executor - inner_strategy = executor.inner_strategy - assert isinstance(inner_strategy, DecomposedStrategy) - - execute_order = inner_strategy.execute_order - execute_result = inner_strategy.execute_result - exec_vol = np.array([e[0].deal_amount for e in execute_result]) - num_step = len(execute_result) - - assert execute_order is not None - - if num_step == 0: - market_volume = np.array([]) - market_price = np.array([]) - datetime_list = pd.DatetimeIndex([]) - else: - market_volume = np.array( - inner_executor.trade_exchange.get_volume( - execute_order.stock_id, - execute_result[0][0].start_time, - execute_result[-1][0].start_time, - method=None, - ), - ) - - trade_value = all_indicators[FINEST_GRANULARITY].iloc[-num_step:]["value"].values - deal_amount = all_indicators[FINEST_GRANULARITY].iloc[-num_step:]["deal_amount"].values - market_price = trade_value / deal_amount - - datetime_list = all_indicators[FINEST_GRANULARITY].index[-num_step:] - - assert market_price.shape == market_volume.shape == exec_vol.shape - - self.history_exec = dataframe_append( - self.history_exec, - self._collect_multi_order_metric( - order=self._order, - datetime=datetime_list, - market_vol=market_volume, - market_price=market_price, - exec_vol=exec_vol, - pa=all_indicators[self._time_per_step].iloc[-1]["pa"], - ), - ) - - self.history_steps = dataframe_append( - self.history_steps, - [ - self._collect_single_order_metric( - execute_order, - execute_order.start_time, - market_volume, - market_price, - exec_vol.sum(), - exec_vol, - ), - ], - ) - - if done: - self.metrics = self._collect_single_order_metric( - self._order, - self._tick_index[0], # start time - self.history_exec["market_volume"], - self.history_exec["market_price"], - self.history_steps["amount"].sum(), - self.history_exec["deal_amount"], - ) - - # TODO: check whether we need this. Can we get this information from Account? - # Do this at the end - self.position -= exec_vol.sum() - - def _collect_multi_order_metric( - self, - order: Order, - datetime: pd.Timestamp, - market_vol: np.ndarray, - market_price: np.ndarray, - exec_vol: np.ndarray, - pa: float, - ) -> SAOEMetrics: - return SAOEMetrics( - # It should have the same keys with SAOEMetrics, - # but the values do not necessarily have the annotated type. - # Some values could be vectorized (e.g., exec_vol). - stock_id=order.stock_id, - datetime=datetime, - direction=order.direction, - market_volume=market_vol, - market_price=market_price, - amount=exec_vol, - inner_amount=exec_vol, - deal_amount=exec_vol, - trade_price=market_price, - trade_value=market_price * exec_vol, - position=self.position - np.cumsum(exec_vol), - ffr=exec_vol / order.amount, - pa=pa, - ) - - def _collect_single_order_metric( - self, - order: Order, - datetime: pd.Timestamp, - market_vol: np.ndarray, - market_price: np.ndarray, - amount: float, # intended to trade such amount - exec_vol: np.ndarray, - ) -> SAOEMetrics: - assert len(market_vol) == len(market_price) == len(exec_vol) - - if np.abs(np.sum(exec_vol)) < EPS: - exec_avg_price = 0.0 - else: - exec_avg_price = cast(float, np.average(market_price, weights=exec_vol)) # could be nan - if hasattr(exec_avg_price, "item"): # could be numpy scalar - exec_avg_price = exec_avg_price.item() # type: ignore - - exec_sum = exec_vol.sum() - return SAOEMetrics( - stock_id=order.stock_id, - datetime=datetime, - direction=order.direction, - market_volume=market_vol.sum(), - market_price=market_price.mean() if len(market_price) > 0 else np.nan, - amount=amount, - inner_amount=exec_sum, - deal_amount=exec_sum, # in this simulator, there's no other restrictions - trade_price=exec_avg_price, - trade_value=float(np.sum(market_price * exec_vol)), - position=self.position - exec_sum, - ffr=float(exec_sum / order.amount), - pa=price_advantage(exec_avg_price, self._twap_price, order.direction), - ) +from qlib.rl.strategy.saoe import SAOEStrategy class SingleAssetOrderExecutionQlib(Simulator[Order, SAOEState, float]): @@ -292,8 +30,10 @@ class SingleAssetOrderExecutionQlib(Simulator[Order, SAOEState, float]): A string to describe the time granularity of each step. Current support "1min", "30min", and "1day" qlib_config (dict): Configuration used to initialize Qlib. - inner_executor_config (dict): - Inner executor configuration + strategy_config (dict): + Strategy configuration + executor_config (dict): + Executor configuration exchange_config (dict): Exchange configuration """ @@ -303,7 +43,8 @@ def __init__( order: Order, time_per_step: str, # "1min", "30min", "1day" qlib_config: dict, - inner_executor_config: dict, + strategy_config: dict, + executor_config: dict, exchange_config: dict, ) -> None: assert time_per_step in ("1min", "30min", "1day") @@ -314,89 +55,72 @@ def __init__( init_qlib(qlib_config) - self._time_per_step = time_per_step - self._ticks_per_step = int(pd.Timedelta(time_per_step).total_seconds() // 60) - self._executor: Optional[NestedExecutor] = None self._collect_data_loop: Optional[Generator] = None + self.reset(order, time_per_step, strategy_config, executor_config, exchange_config) - self._done = False - - self.reset(order, inner_executor_config, exchange_config) - - def reset(self, order: Order, inner_executor_config: dict, exchange_config: dict) -> None: - order_date = pd.Timestamp(order.start_time.date()) - - top_strategy_config: dict = copy.deepcopy(top_strategy_config_template) - top_strategy_config.update({ - "kwargs": { - "order": order, - "trade_range": TradeRangeByTime(order.start_time.time(), order.end_time.time()), - "instrument": order.stock_id, - } - }) - - executor_config: dict = copy.deepcopy(executor_config_template) - executor_config["kwargs"].update({ - "inner_executor": inner_executor_config, - "start_time": order_date, - "end_time": order_date, - }) - - exchange_kwargs: dict = copy.deepcopy(exchange_kwargs_template) - exchange_kwargs.update({"codes": [order.stock_id], **exchange_config}) - + def reset( + self, + order: Order, + time_per_step: str, + strategy_config: dict, + executor_config: dict, + exchange_config: dict, + ) -> None: top_strategy, self._executor = get_strategy_executor( - start_time=order_date, - end_time=order_date + pd.DateOffset(1), - strategy=top_strategy_config, + start_time=pd.Timestamp(order.start_time.date()), + end_time=pd.Timestamp(order.start_time.date()) + pd.DateOffset(1), + strategy=strategy_config, executor=executor_config, benchmark=order.stock_id, account=1e12, - exchange_kwargs=exchange_kwargs, + exchange_kwargs=exchange_config, pos_type="InfPosition", ) + assert isinstance(self._executor, NestedExecutor) top_strategy.reset(level_infra=self._executor.get_level_infra()) exchange = self._executor.trade_exchange - self._ticks_index = pd.DatetimeIndex([e[1] for e in list(exchange.quote_df.index)]) - self._ticks_for_order = get_ticks_slice( - self._ticks_index, + ticks_index = pd.DatetimeIndex([e[1] for e in list(exchange.quote_df.index)]) + ticks_for_order = get_ticks_slice( + ticks_index, order.start_time, order.end_time, include_end=True, ) - self._backtest_data = QlibIntradayBacktestData( + backtest_data = QlibIntradayBacktestData( order=order, exchange=exchange, - start_time=self._ticks_for_order[0], - end_time=self._ticks_for_order[-1], + start_time=ticks_for_order[0], + end_time=ticks_for_order[-1], ) - self.twap_price = self._backtest_data.get_deal_price().mean() + + self.twap_price = backtest_data.get_deal_price().mean() self._collect_data_loop = self._executor.collect_data(top_strategy.generate_trade_decision(), level=0) assert isinstance(self._collect_data_loop, Generator) - self._iter_strategy(action=None) - self._done = False + self._last_yielded_saoe_strategy = self._iter_strategy(action=None) - self._maintainer = StateMaintainer( + assert isinstance(self._executor.inner_strategy, SAOEStrategy) + self._executor.inner_strategy.create_saoe_maintainer( order=order, - time_per_step=self._time_per_step, - tick_index=self._ticks_index, + executor=self._executor.inner_executor, + backtest_data=backtest_data, + time_per_step=time_per_step, + ticks_index=ticks_index, twap_price=self.twap_price, + ticks_for_order=ticks_for_order, ) - self._order = order - - def _iter_strategy(self, action: float = None) -> DecomposedStrategy: - """Iterate the _collect_data_loop until we get the next yield DecomposedStrategy.""" + def _iter_strategy(self, action: float = None) -> SAOEStrategy: + """Iterate the _collect_data_loop until we get the next yield SAOEStrategy.""" assert self._collect_data_loop is not None strategy = next(self._collect_data_loop) if action is None else self._collect_data_loop.send(action) - while not isinstance(strategy, DecomposedStrategy): + while not isinstance(strategy, SAOEStrategy): strategy = next(self._collect_data_loop) if action is None else self._collect_data_loop.send(action) - assert isinstance(strategy, DecomposedStrategy) + assert isinstance(strategy, SAOEStrategy) return strategy def step(self, action: float) -> None: @@ -408,35 +132,17 @@ def step(self, action: float) -> None: The amount you wish to deal. The simulator doesn't guarantee all the amount to be successfully dealt. """ - assert not self._done, "Simulator has already done!" + assert not self.done(), "Simulator has already done!" try: - self._iter_strategy(action=action) + self._last_yielded_saoe_strategy = self._iter_strategy(action=action) except StopIteration: - self._done = True + pass assert self._executor is not None - _, all_indicators = get_portfolio_and_indicator(self._executor) - - self._maintainer.update( - executor=self._executor, - done=self._done, - all_indicators=all_indicators, - ) def get_state(self) -> SAOEState: - return SAOEState( - order=self._order, - cur_time=self._executor.inner_executor.trade_calendar.get_step_time()[0], - position=self._maintainer.position, - history_exec=self._maintainer.history_exec, - history_steps=self._maintainer.history_steps, - metrics=self._maintainer.metrics, - backtest_data=self._backtest_data, - ticks_per_step=self._ticks_per_step, - ticks_index=self._ticks_index, - ticks_for_order=self._ticks_for_order, - ) + return self._last_yielded_saoe_strategy.maintainer.saoe_state def done(self) -> bool: - return self._done + return not self._executor.is_collecting diff --git a/qlib/rl/order_execution/state_maintainer.py b/qlib/rl/order_execution/state_maintainer.py new file mode 100644 index 0000000000..af9ed5e9c0 --- /dev/null +++ b/qlib/rl/order_execution/state_maintainer.py @@ -0,0 +1,219 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +from typing import cast, Optional + +import numpy as np +import pandas as pd +from qlib.backtest import Order +from qlib.backtest.executor import BaseExecutor +from qlib.backtest.utils import TradeCalendarManager +from qlib.constant import EPS +from qlib.rl.data.exchange_wrapper import QlibIntradayBacktestData +from qlib.rl.order_execution import SAOEMetrics, SAOEState +from qlib.rl.order_execution.utils import dataframe_append, get_simulator_executor, get_ticks_slice, price_advantage + + +class SAOEStateMaintainer: + """ + Maintain states of the environment. + + Example usage:: + + maintainer = StateMaintainer(...) # in reset + maintainer.update(...) # in step + # get states in get_state from maintainer + """ + + def __init__( + self, + order: Order, + executor: BaseExecutor, + backtest_data: QlibIntradayBacktestData, + time_per_step: str, + ticks_index: pd.DatetimeIndex, + twap_price: float, + ticks_for_order: pd.DatetimeIndex, + ) -> None: + super().__init__() + + self.position = order.amount + self.order = order + self.executor = executor + self.backtest_data = backtest_data + self.time_per_step = time_per_step + self.ticks_index = ticks_index + self.ticks_for_order = ticks_for_order + self.twap_price = twap_price + + metric_keys = list(SAOEMetrics.__annotations__.keys()) # pylint: disable=no-member + self.history_exec = pd.DataFrame(columns=metric_keys).set_index("datetime") + self.history_steps = pd.DataFrame(columns=metric_keys).set_index("datetime") + self.metrics: Optional[SAOEMetrics] = None + + self.cur_time = ticks_for_order[0] + self.ticks_per_step = int(pd.Timedelta(self.time_per_step).total_seconds() // 60) + + def _next_time(self) -> pd.Timestamp: + current_loc = self.ticks_index.get_loc(self.cur_time) + next_loc = current_loc + self.ticks_per_step + next_loc = next_loc - next_loc % self.ticks_per_step + if next_loc < len(self.ticks_index) and self.ticks_index[next_loc] < self.order.end_time: + return self.ticks_index[next_loc] + else: + return self.order.end_time + + def update(self, execute_result: list) -> None: + exec_vol = np.array([e[0].deal_amount for e in execute_result]) + num_step = len(execute_result) + + if num_step == 0: + market_volume = np.array([]) + market_price = np.array([]) + datetime_list = pd.DatetimeIndex([]) + else: + market_volume = np.array( + self.executor.trade_exchange.get_volume( + self.order.stock_id, + execute_result[0][0].start_time, + execute_result[-1][0].start_time, + method=None, + ), + ) + + # Get data from the SimulatorExecutor's (lowest-level executor) indicator + simulator_executor = get_simulator_executor(self.executor) + simulator_trade_account = simulator_executor.trade_account + simulator_df = simulator_trade_account.get_trade_indicator().generate_trade_indicators_dataframe() + + trade_value = simulator_df.iloc[-num_step:]["value"].values + deal_amount = simulator_df.iloc[-num_step:]["deal_amount"].values + market_price = trade_value / deal_amount + datetime_list = simulator_df.index[-num_step:] + + assert market_price.shape == market_volume.shape == exec_vol.shape + + # Get data from the current level executor's indicator + current_trade_account = self.executor.trade_account + current_df = current_trade_account.get_trade_indicator().generate_trade_indicators_dataframe() + self.history_exec = dataframe_append( + self.history_exec, + self._collect_multi_order_metric( + order=self.order, + datetime=datetime_list, + market_vol=market_volume, + market_price=market_price, + exec_vol=exec_vol, + pa=current_df.iloc[-1]["pa"], + ), + ) + + self.history_steps = dataframe_append( + self.history_steps, + [ + self._collect_single_order_metric( + self.order, + self.cur_time, + market_volume, + market_price, + exec_vol.sum(), + exec_vol, + ), + ], + ) + + # TODO: check whether we need this. Can we get this information from Account? + # Do this at the end + self.position -= exec_vol.sum() + + self.cur_time = self._next_time() + + def generate_metrics_after_done(self) -> None: + """Generate metrics once the upper level execution is done""" + + self.metrics = self._collect_single_order_metric( + self.order, + self.ticks_index[0], # start time + self.history_exec["market_volume"], + self.history_exec["market_price"], + self.history_steps["amount"].sum(), + self.history_exec["deal_amount"], + ) + + def _collect_multi_order_metric( + self, + order: Order, + datetime: pd.Timestamp, + market_vol: np.ndarray, + market_price: np.ndarray, + exec_vol: np.ndarray, + pa: float, + ) -> SAOEMetrics: + return SAOEMetrics( + # It should have the same keys with SAOEMetrics, + # but the values do not necessarily have the annotated type. + # Some values could be vectorized (e.g., exec_vol). + stock_id=order.stock_id, + datetime=datetime, + direction=order.direction, + market_volume=market_vol, + market_price=market_price, + amount=exec_vol, + inner_amount=exec_vol, + deal_amount=exec_vol, + trade_price=market_price, + trade_value=market_price * exec_vol, + position=self.position - np.cumsum(exec_vol), + ffr=exec_vol / order.amount, + pa=pa, + ) + + def _collect_single_order_metric( + self, + order: Order, + datetime: pd.Timestamp, + market_vol: np.ndarray, + market_price: np.ndarray, + amount: float, # intended to trade such amount + exec_vol: np.ndarray, + ) -> SAOEMetrics: + assert len(market_vol) == len(market_price) == len(exec_vol) + + if np.abs(np.sum(exec_vol)) < EPS: + exec_avg_price = 0.0 + else: + exec_avg_price = cast(float, np.average(market_price, weights=exec_vol)) # could be nan + if hasattr(exec_avg_price, "item"): # could be numpy scalar + exec_avg_price = exec_avg_price.item() # type: ignore + + exec_sum = exec_vol.sum() + return SAOEMetrics( + stock_id=order.stock_id, + datetime=datetime, + direction=order.direction, + market_volume=market_vol.sum(), + market_price=market_price.mean() if len(market_price) > 0 else np.nan, + amount=amount, + inner_amount=exec_sum, + deal_amount=exec_sum, # in this simulator, there's no other restrictions + trade_price=exec_avg_price, + trade_value=float(np.sum(market_price * exec_vol)), + position=self.position - exec_sum, + ffr=float(exec_sum / order.amount), + pa=price_advantage(exec_avg_price, self.twap_price, order.direction), + ) + + @property + def saoe_state(self) -> SAOEState: + return SAOEState( + order=self.order, + cur_time=self.executor.trade_calendar.get_step_time()[0], + position=self.position, + history_exec=self.history_exec, + history_steps=self.history_steps, + metrics=self.metrics, + backtest_data=self.backtest_data, + ticks_per_step=int(pd.Timedelta(self.time_per_step).total_seconds() // 60), + ticks_index=self.ticks_index, + ticks_for_order=self.ticks_for_order, + ) diff --git a/qlib/rl/order_execution/utils.py b/qlib/rl/order_execution/utils.py index 779f860c33..123cc1a727 100644 --- a/qlib/rl/order_execution/utils.py +++ b/qlib/rl/order_execution/utils.py @@ -9,9 +9,8 @@ import pandas as pd from qlib.backtest.decision import OrderDir -from qlib.backtest.executor import BaseExecutor +from qlib.backtest.executor import BaseExecutor, NestedExecutor, SimulatorExecutor from qlib.rl.order_execution.simulator_simple import _float_or_ndarray, ONE_SEC -from qlib.utils.time import Freq def get_ticks_slice( @@ -57,18 +56,8 @@ def price_advantage( return cast(_float_or_ndarray, res_wo_nan) -def get_portfolio_and_indicator(executor: BaseExecutor) -> Tuple[dict, dict]: - all_executors = executor.get_all_executors() - all_portfolio_metrics = { - "{}{}".format(*Freq.parse(_executor.time_per_step)): _executor.trade_account.get_portfolio_metrics() - for _executor in all_executors - if _executor.trade_account.is_port_metr_enabled() - } - - all_indicators = {} - for _executor in all_executors: - key = "{}{}".format(*Freq.parse(_executor.time_per_step)) - all_indicators[key] = _executor.trade_account.get_trade_indicator().generate_trade_indicators_dataframe() - all_indicators[key + "_obj"] = _executor.trade_account.get_trade_indicator() - - return all_portfolio_metrics, all_indicators +def get_simulator_executor(executor: BaseExecutor) -> SimulatorExecutor: + while isinstance(executor, NestedExecutor): + executor = executor.inner_executor + assert isinstance(executor, SimulatorExecutor) + return executor diff --git a/qlib/rl/strategy/__init__.py b/qlib/rl/strategy/__init__.py new file mode 100644 index 0000000000..59e481eb93 --- /dev/null +++ b/qlib/rl/strategy/__init__.py @@ -0,0 +1,2 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. diff --git a/qlib/rl/strategy/decomposed.py b/qlib/rl/strategy/decomposed.py new file mode 100644 index 0000000000..d6fdf1b534 --- /dev/null +++ b/qlib/rl/strategy/decomposed.py @@ -0,0 +1,44 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +from typing import Any, Generator, Optional + +from qlib.backtest import CommonInfrastructure +from qlib.backtest.decision import BaseTradeDecision, TradeDecisionWO +from qlib.backtest.utils import LevelInfrastructure +from qlib.rl.order_execution.state_maintainer import SAOEStateMaintainer +from qlib.rl.strategy.saoe import SAOEStrategy + + +class DecomposedStrategy(SAOEStrategy): + def __init__( + self, + outer_trade_decision: BaseTradeDecision = None, + level_infra: LevelInfrastructure = None, + common_infra: CommonInfrastructure = None, + **kwargs, + ) -> None: + super().__init__(None, outer_trade_decision, level_infra, common_infra, **kwargs) + self.maintainer: Optional[SAOEStateMaintainer] = None + + def generate_trade_decision(self, execute_result: list = None) -> Generator[Any, Any, BaseTradeDecision]: + # Once the following line is executed, this DecomposedStrategy (self) will be yielded to the outside + # of the entire executor, and the execution will be suspended. When the execution is resumed by `send()`, + # the sent item will be captured by `exec_vol`. The outside policy could communicate with the inner + # level strategy through this way. + exec_vol = yield self + + oh = self.trade_exchange.get_order_helper() + order = oh.create(self._order.stock_id, exec_vol, self._order.direction) + + return TradeDecisionWO([order], self) + + def alter_outer_trade_decision(self, outer_trade_decision: BaseTradeDecision) -> BaseTradeDecision: + return outer_trade_decision + + def reset(self, outer_trade_decision: TradeDecisionWO = None, **kwargs: Any) -> None: + super().reset(outer_trade_decision=outer_trade_decision, **kwargs) + if outer_trade_decision is not None: + order_list = outer_trade_decision.order_list + assert len(order_list) == 1 + self._order = order_list[0] diff --git a/qlib/rl/strategy/saoe.py b/qlib/rl/strategy/saoe.py new file mode 100644 index 0000000000..1528698e7b --- /dev/null +++ b/qlib/rl/strategy/saoe.py @@ -0,0 +1,57 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +from abc import ABCMeta +from typing import Optional + +import pandas as pd +from qlib.backtest.decision import BaseTradeDecision, Order +from qlib.backtest.executor import BaseExecutor +from qlib.backtest.utils import CommonInfrastructure, LevelInfrastructure +from qlib.rl.data.exchange_wrapper import QlibIntradayBacktestData +from qlib.rl.order_execution.state_maintainer import SAOEStateMaintainer +from qlib.strategy.base import RLStrategy + + +class SAOEStrategy(RLStrategy, metaclass=ABCMeta): + """RL-based strategies that use SAOEState as state.""" + + def __init__( + self, + policy, + outer_trade_decision: BaseTradeDecision = None, + level_infra: LevelInfrastructure = None, + common_infra: CommonInfrastructure = None, + **kwargs, + ) -> None: + super(SAOEStrategy, self).__init__(policy, outer_trade_decision, level_infra, common_infra, **kwargs) + + self.maintainer: Optional[SAOEStateMaintainer] = None + + def create_saoe_maintainer( + self, + order: Order, + executor: BaseExecutor, + backtest_data: QlibIntradayBacktestData, + time_per_step: str, + ticks_index: pd.DatetimeIndex, + twap_price: float, + ticks_for_order: pd.DatetimeIndex, + ) -> None: + self.maintainer = SAOEStateMaintainer( + order=order, + executor=executor, + backtest_data=backtest_data, + time_per_step=time_per_step, + ticks_index=ticks_index, + twap_price=twap_price, + ticks_for_order=ticks_for_order, + ) + + def post_upper_level_exe_step(self) -> None: + self.maintainer.generate_metrics_after_done() + + def post_exe_step(self, execute_result: list) -> None: + self.maintainer.update( + execute_result=execute_result, + ) diff --git a/qlib/rl/strategy/single_order.py b/qlib/rl/strategy/single_order.py new file mode 100644 index 0000000000..0055781973 --- /dev/null +++ b/qlib/rl/strategy/single_order.py @@ -0,0 +1,35 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +from qlib.backtest import Order +from qlib.backtest.decision import BaseTradeDecision, OrderHelper, TradeDecisionWO, TradeRange +from qlib.strategy.base import BaseStrategy + + +class SingleOrderStrategy(BaseStrategy): + # this logic is copied from FileOrderStrategy + def __init__( + self, + order: Order, + trade_range: TradeRange, + instrument: str, + ) -> None: + super().__init__() + + self._order = order + self._trade_range = trade_range + self._instrument = instrument + + def alter_outer_trade_decision(self, outer_trade_decision: BaseTradeDecision) -> BaseTradeDecision: + return outer_trade_decision + + def generate_trade_decision(self, execute_result: list = None) -> TradeDecisionWO: + oh: OrderHelper = self.common_infra.get("trade_exchange").get_order_helper() + order_list = [ + oh.create( + code=self._instrument, + amount=self._order.amount, + direction=self._order.direction, + ), + ] + return TradeDecisionWO(order_list, self, self._trade_range) diff --git a/qlib/strategy/base.py b/qlib/strategy/base.py index 27df347fc5..888adfa2ba 100644 --- a/qlib/strategy/base.py +++ b/qlib/strategy/base.py @@ -8,6 +8,7 @@ if TYPE_CHECKING: from qlib.backtest.exchange import Exchange from qlib.backtest.position import BasePosition + from qlib.backtest.executor import BaseExecutor from typing import Tuple @@ -207,6 +208,12 @@ def get_data_cal_avail_range(self, rtype: str = "full") -> Tuple[int, int]: range_limit = self.outer_trade_decision.get_data_cal_range_limit(rtype=rtype) return max(cal_range[0], range_limit[0]), min(cal_range[1], range_limit[1]) + def post_upper_level_exe_step(self) -> None: + """ + A hook for doing sth after the upper level executor finished its execution (for example, finalize + the metrics collection). + """ + def post_exe_step(self, execute_result: list) -> None: """ A hook for doing sth after the corresponding executor finished its execution. diff --git a/tests/rl/test_qlib_simulator.py b/tests/rl/test_qlib_simulator.py index ac94e589e5..28a549eec6 100644 --- a/tests/rl/test_qlib_simulator.py +++ b/tests/rl/test_qlib_simulator.py @@ -1,12 +1,14 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. + import sys from pathlib import Path +from typing import Tuple import pandas as pd import pytest -from qlib.backtest.decision import Order, OrderDir +from qlib.backtest.decision import Order, OrderDir, TradeRangeByTime from qlib.backtest.executor import SimulatorExecutor from qlib.rl.order_execution import CategoricalActionInterpreter from qlib.rl.order_execution.objects import FINEST_GRANULARITY @@ -31,31 +33,72 @@ def get_order() -> Order: ) -def get_simulator(order: Order, time_per_step: str) -> SingleAssetOrderExecutionQlib: - _inner_executor_config = { +def get_configs(order: Order, time_per_step: str) -> Tuple[dict, dict, dict]: + strategy_config = { + "class": "SingleOrderStrategy", + "module_path": "qlib.rl.strategy.single_order", + "kwargs": { + "order": order, + "trade_range": TradeRangeByTime(order.start_time.time(), order.end_time.time()), + "instrument": order.stock_id, + }, + } + + executor_config = { "class": "NestedExecutor", "module_path": "qlib.backtest.executor", "kwargs": { - "time_per_step": time_per_step, - "inner_strategy": { - "class": "TWAPStrategy", - "module_path": "qlib.contrib.strategy.rule_strategy", - }, + "time_per_step": "1day", + "inner_strategy": {"class": "DecomposedStrategy", "module_path": "qlib.rl.strategy.decomposed"}, + "track_data": True, "inner_executor": { - "class": "SimulatorExecutor", + "class": "NestedExecutor", "module_path": "qlib.backtest.executor", "kwargs": { - "time_per_step": FINEST_GRANULARITY, - "verbose": False, - "trade_type": SimulatorExecutor.TT_SERIAL, - "generate_report": False, + "time_per_step": time_per_step, + "inner_strategy": { + "class": "TWAPStrategy", + "module_path": "qlib.contrib.strategy.rule_strategy", + }, + "inner_executor": { + "class": "SimulatorExecutor", + "module_path": "qlib.backtest.executor", + "kwargs": { + "time_per_step": FINEST_GRANULARITY, + "verbose": False, + "trade_type": SimulatorExecutor.TT_SERIAL, + "generate_report": False, + "track_data": True, + } + }, "track_data": True, - } + }, }, - "track_data": True, + "start_time": pd.Timestamp(order.start_time.date()), + "end_time": pd.Timestamp(order.start_time.date()), }, } + exchange_config = { + "freq": FINEST_GRANULARITY, + "codes": [order.stock_id], + "limit_threshold": ("$ask == 0", "$bid == 0"), + "deal_price": ("If($ask == 0, $bid, $ask)", "If($bid == 0, $ask, $bid)"), + "volume_threshold": { + "all": ("cum", "0.2 * DayCumsum($volume, '9:30', '14:29')"), + "buy": ("current", "$askV1"), + "sell": ("current", "$bidV1"), + }, + "open_cost": 0.0005, + "close_cost": 0.0015, + "min_cost": 5.0, + "trade_unit": None, + } + + return strategy_config, executor_config, exchange_config + + +def get_simulator(order: Order, time_per_step: str) -> SingleAssetOrderExecutionQlib: DATA_ROOT_DIR = Path(__file__).parent.parent / ".data" / "rl" / "qlib_simulator" # fmt: off @@ -74,25 +117,14 @@ def get_simulator(order: Order, time_per_step: str) -> SingleAssetOrderExecution } # fmt: on - exchange_config = { - "limit_threshold": ("$ask == 0", "$bid == 0"), - "deal_price": ("If($ask == 0, $bid, $ask)", "If($bid == 0, $ask, $bid)"), - "volume_threshold": { - "all": ("cum", "0.2 * DayCumsum($volume, '9:30', '14:29')"), - "buy": ("current", "$askV1"), - "sell": ("current", "$bidV1"), - }, - "open_cost": 0.0005, - "close_cost": 0.0015, - "min_cost": 5.0, - "trade_unit": None, - } + strategy_config, executor_config, exchange_config = get_configs(order, time_per_step) return SingleAssetOrderExecutionQlib( order=order, time_per_step=time_per_step, qlib_config=qlib_config, - inner_executor_config=_inner_executor_config, + strategy_config=strategy_config, + executor_config=executor_config, exchange_config=exchange_config, ) @@ -120,12 +152,12 @@ def test_simulator_first_step(): assert is_close(state.history_exec["trade_price"].iloc[0], 149.566483) assert is_close(state.history_exec["trade_value"].iloc[0], 1495.664825) assert is_close(state.history_exec["position"].iloc[0], TOTAL_POSITION - AMOUNT / 30) - # assert state.history_exec["ffr"].iloc[0] == 1 / 60 # FIXME + assert is_close(state.history_exec["ffr"].iloc[0], AMOUNT / TOTAL_POSITION / 30) assert is_close(state.history_steps["market_volume"].iloc[0], 1254848.5756835938) assert state.history_steps["amount"].iloc[0] == AMOUNT assert state.history_steps["deal_amount"].iloc[0] == AMOUNT - assert state.history_steps["ffr"].iloc[0] == 1.0 + assert state.history_steps["ffr"].iloc[0] == AMOUNT / TOTAL_POSITION assert is_close( state.history_steps["pa"].iloc[0] * (1.0 if order.direction == OrderDir.SELL else -1.0), (state.history_steps["trade_price"].iloc[0] / simulator.twap_price - 1) * 10000, @@ -174,9 +206,3 @@ def test_interpreter() -> None: position_history.append(state.position) assert position_history[-1] == max(TOTAL_POSITION - TOTAL_POSITION / NUM_EXECUTION * (i + 1), 0.0) - - -if __name__ == "__main__": - test_simulator_first_step() - test_simulator_stop_twap() - test_interpreter() From 1e8f0e90c1ad0f5a781f2527c25aa63251eef5a3 Mon Sep 17 00:00:00 2001 From: Huoran Li Date: Wed, 3 Aug 2022 13:54:05 +0800 Subject: [PATCH 04/23] Optimize file structure --- qlib/rl/order_execution/interpreter.py | 2 +- qlib/rl/order_execution/objects.py | 10 ++ qlib/rl/order_execution/reward.py | 2 +- qlib/rl/order_execution/simulator_qlib.py | 2 +- qlib/rl/order_execution/simulator_simple.py | 104 ++---------------- .../{state_maintainer.py => state.py} | 90 ++++++++++++++- qlib/rl/order_execution/utils.py | 8 +- qlib/rl/strategy/decomposed.py | 2 +- qlib/rl/strategy/saoe.py | 2 +- 9 files changed, 115 insertions(+), 107 deletions(-) rename qlib/rl/order_execution/{state_maintainer.py => state.py} (70%) diff --git a/qlib/rl/order_execution/interpreter.py b/qlib/rl/order_execution/interpreter.py index 602a15e54e..172ad1cff5 100644 --- a/qlib/rl/order_execution/interpreter.py +++ b/qlib/rl/order_execution/interpreter.py @@ -16,7 +16,7 @@ from qlib.rl.interpreter import ActionInterpreter, StateInterpreter from qlib.typehint import TypedDict -from .simulator_simple import SAOEState +from qlib.rl.order_execution.state import SAOEState __all__ = [ "FullHistoryStateInterpreter", diff --git a/qlib/rl/order_execution/objects.py b/qlib/rl/order_execution/objects.py index ee6358bcd3..a4ffb1a4f7 100644 --- a/qlib/rl/order_execution/objects.py +++ b/qlib/rl/order_execution/objects.py @@ -1,2 +1,12 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +from typing import TypeVar + +import numpy as np +import pandas as pd + FINEST_GRANULARITY = "1min" COARSEST_GRANULARITY = "1day" +ONE_SEC = pd.Timedelta("1s") # use 1 second to exclude the right interval point +float_or_ndarray = TypeVar("float_or_ndarray", float, np.ndarray) \ No newline at end of file diff --git a/qlib/rl/order_execution/reward.py b/qlib/rl/order_execution/reward.py index f15a152c66..b4f021f6ad 100644 --- a/qlib/rl/order_execution/reward.py +++ b/qlib/rl/order_execution/reward.py @@ -9,7 +9,7 @@ from qlib.rl.reward import Reward -from .simulator_simple import SAOEMetrics, SAOEState +from qlib.rl.order_execution.state import SAOEMetrics, SAOEState __all__ = ["PAPenaltyReward"] diff --git a/qlib/rl/order_execution/simulator_qlib.py b/qlib/rl/order_execution/simulator_qlib.py index 22ac718e93..a59b61ad81 100644 --- a/qlib/rl/order_execution/simulator_qlib.py +++ b/qlib/rl/order_execution/simulator_qlib.py @@ -11,7 +11,7 @@ from qlib.backtest.executor import NestedExecutor from qlib.rl.data.exchange_wrapper import QlibIntradayBacktestData from qlib.rl.integration.feature import init_qlib -from qlib.rl.order_execution.simulator_simple import SAOEState +from qlib.rl.order_execution.state import SAOEState from qlib.rl.order_execution.utils import ( get_ticks_slice, ) diff --git a/qlib/rl/order_execution/simulator_simple.py b/qlib/rl/order_execution/simulator_simple.py index 6d49457841..11d759cb9b 100644 --- a/qlib/rl/order_execution/simulator_simple.py +++ b/qlib/rl/order_execution/simulator_simple.py @@ -4,107 +4,22 @@ from __future__ import annotations from pathlib import Path -from typing import Any, NamedTuple, Optional, TypeVar, cast +from typing import Any, Optional, cast import numpy as np import pandas as pd from qlib.backtest.decision import Order, OrderDir from qlib.constant import EPS -from qlib.rl.data.pickle_styled import DealPriceType, IntradayBacktestData, load_simple_intraday_backtest_data +from qlib.rl.data.pickle_styled import DealPriceType, load_simple_intraday_backtest_data +from qlib.rl.order_execution.objects import float_or_ndarray, ONE_SEC +from qlib.rl.order_execution.state import SAOEMetrics, SAOEState from qlib.rl.simulator import Simulator from qlib.rl.utils import LogLevel -from qlib.typehint import TypedDict # TODO: Integrating Qlib's native data with simulator_simple -__all__ = ["SAOEMetrics", "SAOEState", "SingleAssetOrderExecution"] - -ONE_SEC = pd.Timedelta("1s") # use 1 second to exclude the right interval point - - -class SAOEMetrics(TypedDict): - """Metrics for SAOE accumulated for a "period". - It could be accumulated for a day, or a period of time (e.g., 30min), or calculated separately for every minute. - - Warnings - -------- - The type hints are for single elements. In lots of times, they can be vectorized. - For example, ``market_volume`` could be a list of float (or ndarray) rather tahn a single float. - """ - - stock_id: str - """Stock ID of this record.""" - datetime: pd.Timestamp | pd.DatetimeIndex # TODO: check this - """Datetime of this record (this is index in the dataframe).""" - direction: int - """Direction of the order. 0 for sell, 1 for buy.""" - - # Market information. - market_volume: np.ndarray | float - """(total) market volume traded in the period.""" - market_price: np.ndarray | float - """Deal price. If it's a period of time, this is the average market deal price.""" - - # Strategy records. - - amount: np.ndarray | float - """Total amount (volume) strategy intends to trade.""" - inner_amount: np.ndarray | float - """Total amount that the lower-level strategy intends to trade - (might be larger than amount, e.g., to ensure ffr).""" - - deal_amount: np.ndarray | float - """Amount that successfully takes effect (must be less than inner_amount).""" - trade_price: np.ndarray | float - """The average deal price for this strategy.""" - trade_value: np.ndarray | float - """Total worth of trading. In the simple simulation, trade_value = deal_amount * price.""" - position: np.ndarray | float - """Position left after this "period".""" - - # Accumulated metrics - - ffr: np.ndarray | float - """Completed how much percent of the daily order.""" - - pa: np.ndarray | float - """Price advantage compared to baseline (i.e., trade with baseline market price). - The baseline is trade price when using TWAP strategy to execute this order. - Please note that there could be data leak here). - Unit is BP (basis point, 1/10000).""" - - -class SAOEState(NamedTuple): - """Data structure holding a state for SAOE simulator.""" - - order: Order - """The order we are dealing with.""" - cur_time: pd.Timestamp - """Current time, e.g., 9:30.""" - position: float - """Current remaining volume to execute.""" - history_exec: pd.DataFrame - """See :attr:`SingleAssetOrderExecution.history_exec`.""" - history_steps: pd.DataFrame - """See :attr:`SingleAssetOrderExecution.history_steps`.""" - - metrics: Optional[SAOEMetrics] - """Daily metric, only available when the trading is in "done" state.""" - - backtest_data: IntradayBacktestData - """Backtest data is included in the state. - Actually, only the time index of this data is needed, at this moment. - I include the full data so that algorithms (e.g., VWAP) that relies on the raw data can be implemented. - Interpreter can use this as they wish, but they should be careful not to leak future data. - """ - - ticks_per_step: int - """How many ticks for each step.""" - ticks_index: pd.DatetimeIndex - """Trading ticks in all day, NOT sliced by order (defined in data). e.g., [9:30, 9:31, ..., 14:59].""" - ticks_for_order: pd.DatetimeIndex - """Trading ticks sliced by order, e.g., [9:45, 9:46, ..., 14:44].""" +__all__ = ["SingleAssetOrderExecution"] class SingleAssetOrderExecution(Simulator[Order, SAOEState, float]): @@ -391,14 +306,11 @@ def _dataframe_append(df: pd.DataFrame, other: Any) -> pd.DataFrame: return pd.concat([df, other_df], axis=0) -_float_or_ndarray = TypeVar("_float_or_ndarray", float, np.ndarray) - - def price_advantage( - exec_price: _float_or_ndarray, + exec_price: float_or_ndarray, baseline_price: float, direction: OrderDir | int, -) -> _float_or_ndarray: +) -> float_or_ndarray: if baseline_price == 0: # something is wrong with data. Should be nan here if isinstance(exec_price, float): return 0.0 @@ -414,4 +326,4 @@ def price_advantage( if res_wo_nan.size == 1: return res_wo_nan.item() else: - return cast(_float_or_ndarray, res_wo_nan) + return cast(float_or_ndarray, res_wo_nan) diff --git a/qlib/rl/order_execution/state_maintainer.py b/qlib/rl/order_execution/state.py similarity index 70% rename from qlib/rl/order_execution/state_maintainer.py rename to qlib/rl/order_execution/state.py index af9ed5e9c0..fd12e1d7f3 100644 --- a/qlib/rl/order_execution/state_maintainer.py +++ b/qlib/rl/order_execution/state.py @@ -1,7 +1,10 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. -from typing import cast, Optional +from typing import cast, NamedTuple, Optional + +from qlib.rl.data.pickle_styled import IntradayBacktestData +from typing_extensions import TypedDict import numpy as np import pandas as pd @@ -10,7 +13,6 @@ from qlib.backtest.utils import TradeCalendarManager from qlib.constant import EPS from qlib.rl.data.exchange_wrapper import QlibIntradayBacktestData -from qlib.rl.order_execution import SAOEMetrics, SAOEState from qlib.rl.order_execution.utils import dataframe_append, get_simulator_executor, get_ticks_slice, price_advantage @@ -217,3 +219,87 @@ def saoe_state(self) -> SAOEState: ticks_index=self.ticks_index, ticks_for_order=self.ticks_for_order, ) + + +class SAOEMetrics(TypedDict): + """Metrics for SAOE accumulated for a "period". + It could be accumulated for a day, or a period of time (e.g., 30min), or calculated separately for every minute. + + Warnings + -------- + The type hints are for single elements. In lots of times, they can be vectorized. + For example, ``market_volume`` could be a list of float (or ndarray) rather tahn a single float. + """ + + stock_id: str + """Stock ID of this record.""" + datetime: pd.Timestamp | pd.DatetimeIndex # TODO: check this + """Datetime of this record (this is index in the dataframe).""" + direction: int + """Direction of the order. 0 for sell, 1 for buy.""" + + # Market information. + market_volume: np.ndarray | float + """(total) market volume traded in the period.""" + market_price: np.ndarray | float + """Deal price. If it's a period of time, this is the average market deal price.""" + + # Strategy records. + + amount: np.ndarray | float + """Total amount (volume) strategy intends to trade.""" + inner_amount: np.ndarray | float + """Total amount that the lower-level strategy intends to trade + (might be larger than amount, e.g., to ensure ffr).""" + + deal_amount: np.ndarray | float + """Amount that successfully takes effect (must be less than inner_amount).""" + trade_price: np.ndarray | float + """The average deal price for this strategy.""" + trade_value: np.ndarray | float + """Total worth of trading. In the simple simulation, trade_value = deal_amount * price.""" + position: np.ndarray | float + """Position left after this "period".""" + + # Accumulated metrics + + ffr: np.ndarray | float + """Completed how much percent of the daily order.""" + + pa: np.ndarray | float + """Price advantage compared to baseline (i.e., trade with baseline market price). + The baseline is trade price when using TWAP strategy to execute this order. + Please note that there could be data leak here). + Unit is BP (basis point, 1/10000).""" + + +class SAOEState(NamedTuple): + """Data structure holding a state for SAOE simulator.""" + + order: Order + """The order we are dealing with.""" + cur_time: pd.Timestamp + """Current time, e.g., 9:30.""" + position: float + """Current remaining volume to execute.""" + history_exec: pd.DataFrame + """See :attr:`SingleAssetOrderExecution.history_exec`.""" + history_steps: pd.DataFrame + """See :attr:`SingleAssetOrderExecution.history_steps`.""" + + metrics: Optional[SAOEMetrics] + """Daily metric, only available when the trading is in "done" state.""" + + backtest_data: IntradayBacktestData + """Backtest data is included in the state. + Actually, only the time index of this data is needed, at this moment. + I include the full data so that algorithms (e.g., VWAP) that relies on the raw data can be implemented. + Interpreter can use this as they wish, but they should be careful not to leak future data. + """ + + ticks_per_step: int + """How many ticks for each step.""" + ticks_index: pd.DatetimeIndex + """Trading ticks in all day, NOT sliced by order (defined in data). e.g., [9:30, 9:31, ..., 14:59].""" + ticks_for_order: pd.DatetimeIndex + """Trading ticks sliced by order, e.g., [9:45, 9:46, ..., 14:44].""" \ No newline at end of file diff --git a/qlib/rl/order_execution/utils.py b/qlib/rl/order_execution/utils.py index 123cc1a727..fe38b94eec 100644 --- a/qlib/rl/order_execution/utils.py +++ b/qlib/rl/order_execution/utils.py @@ -10,7 +10,7 @@ from qlib.backtest.decision import OrderDir from qlib.backtest.executor import BaseExecutor, NestedExecutor, SimulatorExecutor -from qlib.rl.order_execution.simulator_simple import _float_or_ndarray, ONE_SEC +from qlib.rl.order_execution.objects import float_or_ndarray, ONE_SEC def get_ticks_slice( @@ -34,10 +34,10 @@ def dataframe_append(df: pd.DataFrame, other: Any) -> pd.DataFrame: def price_advantage( - exec_price: _float_or_ndarray, + exec_price: float_or_ndarray, baseline_price: float, direction: OrderDir | int, -) -> _float_or_ndarray: +) -> float_or_ndarray: if baseline_price == 0: # something is wrong with data. Should be nan here if isinstance(exec_price, float): return 0.0 @@ -53,7 +53,7 @@ def price_advantage( if res_wo_nan.size == 1: return res_wo_nan.item() else: - return cast(_float_or_ndarray, res_wo_nan) + return cast(float_or_ndarray, res_wo_nan) def get_simulator_executor(executor: BaseExecutor) -> SimulatorExecutor: diff --git a/qlib/rl/strategy/decomposed.py b/qlib/rl/strategy/decomposed.py index d6fdf1b534..1da1540f4a 100644 --- a/qlib/rl/strategy/decomposed.py +++ b/qlib/rl/strategy/decomposed.py @@ -6,7 +6,7 @@ from qlib.backtest import CommonInfrastructure from qlib.backtest.decision import BaseTradeDecision, TradeDecisionWO from qlib.backtest.utils import LevelInfrastructure -from qlib.rl.order_execution.state_maintainer import SAOEStateMaintainer +from qlib.rl.order_execution.state import SAOEStateMaintainer from qlib.rl.strategy.saoe import SAOEStrategy diff --git a/qlib/rl/strategy/saoe.py b/qlib/rl/strategy/saoe.py index 1528698e7b..69864fcf46 100644 --- a/qlib/rl/strategy/saoe.py +++ b/qlib/rl/strategy/saoe.py @@ -9,7 +9,7 @@ from qlib.backtest.executor import BaseExecutor from qlib.backtest.utils import CommonInfrastructure, LevelInfrastructure from qlib.rl.data.exchange_wrapper import QlibIntradayBacktestData -from qlib.rl.order_execution.state_maintainer import SAOEStateMaintainer +from qlib.rl.order_execution.state import SAOEStateMaintainer from qlib.strategy.base import RLStrategy From 8a868a66dbf44b090ee77a6895e94e024f5ecdc8 Mon Sep 17 00:00:00 2001 From: Huoran Li Date: Wed, 3 Aug 2022 13:57:37 +0800 Subject: [PATCH 05/23] Optimize code --- qlib/data/dataset/__init__.py | 2 +- qlib/rl/order_execution/interpreter.py | 1 + qlib/rl/order_execution/objects.py | 2 +- qlib/rl/order_execution/policy.py | 1 + qlib/rl/order_execution/state.py | 2 ++ 5 files changed, 6 insertions(+), 2 deletions(-) diff --git a/qlib/data/dataset/__init__.py b/qlib/data/dataset/__init__.py index c74092de34..5e98bfc97a 100644 --- a/qlib/data/dataset/__init__.py +++ b/qlib/data/dataset/__init__.py @@ -615,4 +615,4 @@ def _prepare_seg(self, slc: slice, **kwargs) -> TSDataSampler: return tsds -__all__ = ["Optional"] +__all__ = ["Optional", "Dataset", "DatasetH"] diff --git a/qlib/rl/order_execution/interpreter.py b/qlib/rl/order_execution/interpreter.py index 172ad1cff5..09f5f343da 100644 --- a/qlib/rl/order_execution/interpreter.py +++ b/qlib/rl/order_execution/interpreter.py @@ -23,6 +23,7 @@ "CurrentStepStateInterpreter", "CategoricalActionInterpreter", "TwapRelativeActionInterpreter", + "FullHistoryObs", ] diff --git a/qlib/rl/order_execution/objects.py b/qlib/rl/order_execution/objects.py index a4ffb1a4f7..2f6c81b825 100644 --- a/qlib/rl/order_execution/objects.py +++ b/qlib/rl/order_execution/objects.py @@ -9,4 +9,4 @@ FINEST_GRANULARITY = "1min" COARSEST_GRANULARITY = "1day" ONE_SEC = pd.Timedelta("1s") # use 1 second to exclude the right interval point -float_or_ndarray = TypeVar("float_or_ndarray", float, np.ndarray) \ No newline at end of file +float_or_ndarray = TypeVar("float_or_ndarray", float, np.ndarray) diff --git a/qlib/rl/order_execution/policy.py b/qlib/rl/order_execution/policy.py index 18c2e4f175..cfd3181ca2 100644 --- a/qlib/rl/order_execution/policy.py +++ b/qlib/rl/order_execution/policy.py @@ -1,5 +1,6 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. + from __future__ import annotations from pathlib import Path diff --git a/qlib/rl/order_execution/state.py b/qlib/rl/order_execution/state.py index fd12e1d7f3..9379868a8c 100644 --- a/qlib/rl/order_execution/state.py +++ b/qlib/rl/order_execution/state.py @@ -1,6 +1,8 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. +from __future__ import annotations + from typing import cast, NamedTuple, Optional from qlib.rl.data.pickle_styled import IntradayBacktestData From 03472eff4429f9af4e02371d5ddb978d0091909d Mon Sep 17 00:00:00 2001 From: Huoran Li Date: Wed, 3 Aug 2022 14:01:23 +0800 Subject: [PATCH 06/23] Format code --- qlib/rl/aux_info.py | 2 +- qlib/rl/data/exchange_wrapper.py | 1 + qlib/rl/integration/feature.py | 2 +- qlib/rl/order_execution/interpreter.py | 3 +-- qlib/rl/order_execution/reward.py | 3 +-- qlib/rl/order_execution/simulator_qlib.py | 5 ++--- qlib/rl/order_execution/simulator_simple.py | 2 +- qlib/rl/order_execution/state.py | 13 ++++++------- qlib/rl/order_execution/utils.py | 4 ++-- qlib/rl/strategy/saoe.py | 1 + qlib/rl/utils/finite_env.py | 3 ++- 11 files changed, 19 insertions(+), 20 deletions(-) diff --git a/qlib/rl/aux_info.py b/qlib/rl/aux_info.py index 9ab0834511..1fd581544e 100644 --- a/qlib/rl/aux_info.py +++ b/qlib/rl/aux_info.py @@ -3,7 +3,7 @@ from __future__ import annotations -from typing import Optional, TYPE_CHECKING, Generic, TypeVar +from typing import TYPE_CHECKING, Generic, Optional, TypeVar from qlib.typehint import final diff --git a/qlib/rl/data/exchange_wrapper.py b/qlib/rl/data/exchange_wrapper.py index bc36fa11b8..fb8daabf33 100644 --- a/qlib/rl/data/exchange_wrapper.py +++ b/qlib/rl/data/exchange_wrapper.py @@ -6,6 +6,7 @@ import pandas as pd from qlib.backtest import Exchange, Order + from .pickle_styled import IntradayBacktestData diff --git a/qlib/rl/integration/feature.py b/qlib/rl/integration/feature.py index 2b4279e1a6..347b2775ba 100644 --- a/qlib/rl/integration/feature.py +++ b/qlib/rl/integration/feature.py @@ -2,7 +2,7 @@ # Licensed under the MIT License. import collections -from typing import List, Optional +from typing import List import pandas as pd diff --git a/qlib/rl/order_execution/interpreter.py b/qlib/rl/order_execution/interpreter.py index 09f5f343da..089fc553cf 100644 --- a/qlib/rl/order_execution/interpreter.py +++ b/qlib/rl/order_execution/interpreter.py @@ -14,9 +14,8 @@ from qlib.constant import EPS from qlib.rl.data import pickle_styled from qlib.rl.interpreter import ActionInterpreter, StateInterpreter -from qlib.typehint import TypedDict - from qlib.rl.order_execution.state import SAOEState +from qlib.typehint import TypedDict __all__ = [ "FullHistoryStateInterpreter", diff --git a/qlib/rl/order_execution/reward.py b/qlib/rl/order_execution/reward.py index b4f021f6ad..99a88f8e44 100644 --- a/qlib/rl/order_execution/reward.py +++ b/qlib/rl/order_execution/reward.py @@ -7,9 +7,8 @@ import numpy as np -from qlib.rl.reward import Reward - from qlib.rl.order_execution.state import SAOEMetrics, SAOEState +from qlib.rl.reward import Reward __all__ = ["PAPenaltyReward"] diff --git a/qlib/rl/order_execution/simulator_qlib.py b/qlib/rl/order_execution/simulator_qlib.py index a59b61ad81..ff45a84781 100644 --- a/qlib/rl/order_execution/simulator_qlib.py +++ b/qlib/rl/order_execution/simulator_qlib.py @@ -6,15 +6,14 @@ from typing import Generator, Optional import pandas as pd + from qlib.backtest import get_strategy_executor from qlib.backtest.decision import Order from qlib.backtest.executor import NestedExecutor from qlib.rl.data.exchange_wrapper import QlibIntradayBacktestData from qlib.rl.integration.feature import init_qlib from qlib.rl.order_execution.state import SAOEState -from qlib.rl.order_execution.utils import ( - get_ticks_slice, -) +from qlib.rl.order_execution.utils import get_ticks_slice from qlib.rl.simulator import Simulator from qlib.rl.strategy.saoe import SAOEStrategy diff --git a/qlib/rl/order_execution/simulator_simple.py b/qlib/rl/order_execution/simulator_simple.py index 11d759cb9b..2c4ae822f4 100644 --- a/qlib/rl/order_execution/simulator_simple.py +++ b/qlib/rl/order_execution/simulator_simple.py @@ -12,7 +12,7 @@ from qlib.backtest.decision import Order, OrderDir from qlib.constant import EPS from qlib.rl.data.pickle_styled import DealPriceType, load_simple_intraday_backtest_data -from qlib.rl.order_execution.objects import float_or_ndarray, ONE_SEC +from qlib.rl.order_execution.objects import ONE_SEC, float_or_ndarray from qlib.rl.order_execution.state import SAOEMetrics, SAOEState from qlib.rl.simulator import Simulator from qlib.rl.utils import LogLevel diff --git a/qlib/rl/order_execution/state.py b/qlib/rl/order_execution/state.py index 9379868a8c..6b52cb39fb 100644 --- a/qlib/rl/order_execution/state.py +++ b/qlib/rl/order_execution/state.py @@ -3,19 +3,18 @@ from __future__ import annotations -from typing import cast, NamedTuple, Optional - -from qlib.rl.data.pickle_styled import IntradayBacktestData -from typing_extensions import TypedDict +from typing import NamedTuple, Optional, cast import numpy as np import pandas as pd +from typing_extensions import TypedDict + from qlib.backtest import Order from qlib.backtest.executor import BaseExecutor -from qlib.backtest.utils import TradeCalendarManager from qlib.constant import EPS from qlib.rl.data.exchange_wrapper import QlibIntradayBacktestData -from qlib.rl.order_execution.utils import dataframe_append, get_simulator_executor, get_ticks_slice, price_advantage +from qlib.rl.data.pickle_styled import IntradayBacktestData +from qlib.rl.order_execution.utils import dataframe_append, get_simulator_executor, price_advantage class SAOEStateMaintainer: @@ -304,4 +303,4 @@ class SAOEState(NamedTuple): ticks_index: pd.DatetimeIndex """Trading ticks in all day, NOT sliced by order (defined in data). e.g., [9:30, 9:31, ..., 14:59].""" ticks_for_order: pd.DatetimeIndex - """Trading ticks sliced by order, e.g., [9:45, 9:46, ..., 14:44].""" \ No newline at end of file + """Trading ticks sliced by order, e.g., [9:45, 9:46, ..., 14:44].""" diff --git a/qlib/rl/order_execution/utils.py b/qlib/rl/order_execution/utils.py index fe38b94eec..44012d9db1 100644 --- a/qlib/rl/order_execution/utils.py +++ b/qlib/rl/order_execution/utils.py @@ -3,14 +3,14 @@ from __future__ import annotations -from typing import Any, cast, Tuple +from typing import Any, cast import numpy as np import pandas as pd from qlib.backtest.decision import OrderDir from qlib.backtest.executor import BaseExecutor, NestedExecutor, SimulatorExecutor -from qlib.rl.order_execution.objects import float_or_ndarray, ONE_SEC +from qlib.rl.order_execution.objects import ONE_SEC, float_or_ndarray def get_ticks_slice( diff --git a/qlib/rl/strategy/saoe.py b/qlib/rl/strategy/saoe.py index 69864fcf46..e221fab092 100644 --- a/qlib/rl/strategy/saoe.py +++ b/qlib/rl/strategy/saoe.py @@ -5,6 +5,7 @@ from typing import Optional import pandas as pd + from qlib.backtest.decision import BaseTradeDecision, Order from qlib.backtest.executor import BaseExecutor from qlib.backtest.utils import CommonInfrastructure, LevelInfrastructure diff --git a/qlib/rl/utils/finite_env.py b/qlib/rl/utils/finite_env.py index 309b34e6dd..87f0900e16 100644 --- a/qlib/rl/utils/finite_env.py +++ b/qlib/rl/utils/finite_env.py @@ -11,13 +11,14 @@ import copy import warnings from contextlib import contextmanager -from typing import Any, Callable, cast, Dict, Generator, List, Optional, Set, Tuple, Type, Union +from typing import Any, Callable, Dict, Generator, List, Optional, Set, Tuple, Type, Union, cast import gym import numpy as np from tianshou.env import BaseVectorEnv, DummyVectorEnv, ShmemVectorEnv, SubprocVectorEnv from qlib.typehint import Literal + from .log import LogWriter __all__ = [ From e1beab57a2dfea7a7e1cdc6573c9b7aecf6188d2 Mon Sep 17 00:00:00 2001 From: Default Date: Wed, 3 Aug 2022 14:52:28 +0800 Subject: [PATCH 07/23] create_state_maintainer_recursive --- qlib/backtest/executor.py | 1 + qlib/rl/integration/__init__.py | 4 - qlib/rl/integration/feature.py | 109 ---------------------- qlib/rl/order_execution/simulator_qlib.py | 94 +++++++++++++++++-- qlib/rl/order_execution/state.py | 6 +- qlib/rl/strategy/decomposed.py | 11 +-- qlib/rl/strategy/saoe.py | 9 +- qlib/strategy/base.py | 1 - tests/rl/test_qlib_simulator.py | 2 +- 9 files changed, 97 insertions(+), 140 deletions(-) delete mode 100644 qlib/rl/integration/__init__.py delete mode 100644 qlib/rl/integration/feature.py diff --git a/qlib/backtest/executor.py b/qlib/backtest/executor.py index 6948faca30..501b1bbb35 100644 --- a/qlib/backtest/executor.py +++ b/qlib/backtest/executor.py @@ -480,6 +480,7 @@ def _collect_data( # do nothing and just step forward sub_cal.step() + # Lef inner strategy know that the outer level execution is done. self.inner_strategy.post_upper_level_exe_step() return execute_result, {"inner_order_indicators": inner_order_indicators, "decision_list": decision_list} diff --git a/qlib/rl/integration/__init__.py b/qlib/rl/integration/__init__.py deleted file mode 100644 index 765bdee0c1..0000000000 --- a/qlib/rl/integration/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -# TODO: find a better way to organize contents under this module. diff --git a/qlib/rl/integration/feature.py b/qlib/rl/integration/feature.py deleted file mode 100644 index 347b2775ba..0000000000 --- a/qlib/rl/integration/feature.py +++ /dev/null @@ -1,109 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import collections -from typing import List - -import pandas as pd - -import qlib -from qlib.config import REG_CN -from qlib.contrib.ops.high_freq import BFillNan, Cut, Date, DayCumsum, DayLast, FFillNan, IsInf, IsNull, Select -from qlib.data.dataset import DatasetH - - -class LRUCache: - def __init__(self, pool_size: int = 200): - self.pool_size = pool_size - self.contents: dict = {} - self.keys: collections.deque = collections.deque() - - def put(self, key, item): - if self.has(key): - self.keys.remove(key) - self.keys.append(key) - self.contents[key] = item - while len(self.contents) > self.pool_size: - self.contents.pop(self.keys.popleft()) - - def get(self, key): - return self.contents[key] - - def has(self, key): - return key in self.contents - - -class DataWrapper: - def __init__( - self, - feature_dataset: DatasetH, - backtest_dataset: DatasetH, - columns_today: List[str], - columns_yesterday: List[str], - _internal: bool = False, - ): - assert _internal, "Init function of data wrapper is for internal use only." - - self.feature_dataset = feature_dataset - self.backtest_dataset = backtest_dataset - self.columns_today = columns_today - self.columns_yesterday = columns_yesterday - - # TODO: We might have the chance to merge them. - self.feature_cache = LRUCache() - self.backtest_cache = LRUCache() - - def get(self, stock_id: str, date: pd.Timestamp, backtest: bool = False) -> pd.DataFrame: - start_time, end_time = date.replace(hour=0, minute=0, second=0), date.replace(hour=23, minute=59, second=59) - - if backtest: - dataset = self.backtest_dataset - cache = self.backtest_cache - else: - dataset = self.feature_dataset - cache = self.feature_cache - - if cache.has((start_time, end_time, stock_id)): - return cache.get((start_time, end_time, stock_id)) - data = dataset.handler.fetch(pd.IndexSlice[stock_id, start_time:end_time], level=None) - cache.put((start_time, end_time, stock_id), data) - return data - - -def init_qlib(qlib_config: dict) -> None: - provider_uri_map = { - "day": qlib_config["provider_uri_day"].as_posix(), - "1min": qlib_config["provider_uri_1min"].as_posix(), - } - qlib.init( - region=REG_CN, - auto_mount=False, - custom_ops=[DayLast, FFillNan, BFillNan, Date, Select, IsNull, IsInf, Cut, DayCumsum], - expression_cache=None, - calendar_provider={ - "class": "LocalCalendarProvider", - "module_path": "qlib.data.data", - "kwargs": { - "backend": { - "class": "FileCalendarStorage", - "module_path": "qlib.data.storage.file_storage", - "kwargs": {"provider_uri_map": provider_uri_map}, - }, - }, - }, - feature_provider={ - "class": "LocalFeatureProvider", - "module_path": "qlib.data.data", - "kwargs": { - "backend": { - "class": "FileFeatureStorage", - "module_path": "qlib.data.storage.file_storage", - "kwargs": {"provider_uri_map": provider_uri_map}, - }, - }, - }, - provider_uri=provider_uri_map, - kernels=1, - redis_port=-1, - clear_mem_cache=False, # init_qlib will be called for multiple times. Keep the cache for improving performance - ) diff --git a/qlib/rl/order_execution/simulator_qlib.py b/qlib/rl/order_execution/simulator_qlib.py index ff45a84781..438caec10b 100644 --- a/qlib/rl/order_execution/simulator_qlib.py +++ b/qlib/rl/order_execution/simulator_qlib.py @@ -6,18 +6,94 @@ from typing import Generator, Optional import pandas as pd - +import qlib from qlib.backtest import get_strategy_executor from qlib.backtest.decision import Order -from qlib.backtest.executor import NestedExecutor +from qlib.backtest.executor import BaseExecutor, NestedExecutor, SimulatorExecutor +from qlib.config import REG_CN +from qlib.contrib.ops.high_freq import BFillNan, Cut, Date, DayCumsum, DayLast, FFillNan, IsInf, IsNull, Select from qlib.rl.data.exchange_wrapper import QlibIntradayBacktestData -from qlib.rl.integration.feature import init_qlib from qlib.rl.order_execution.state import SAOEState from qlib.rl.order_execution.utils import get_ticks_slice from qlib.rl.simulator import Simulator from qlib.rl.strategy.saoe import SAOEStrategy +def init_qlib(qlib_config: dict) -> None: + provider_uri_map = { + "day": qlib_config["provider_uri_day"].as_posix(), + "1min": qlib_config["provider_uri_1min"].as_posix(), + } + qlib.init( + region=REG_CN, + auto_mount=False, + custom_ops=[DayLast, FFillNan, BFillNan, Date, Select, IsNull, IsInf, Cut, DayCumsum], + expression_cache=None, + calendar_provider={ + "class": "LocalCalendarProvider", + "module_path": "qlib.data.data", + "kwargs": { + "backend": { + "class": "FileCalendarStorage", + "module_path": "qlib.data.storage.file_storage", + "kwargs": {"provider_uri_map": provider_uri_map}, + }, + }, + }, + feature_provider={ + "class": "LocalFeatureProvider", + "module_path": "qlib.data.data", + "kwargs": { + "backend": { + "class": "FileFeatureStorage", + "module_path": "qlib.data.storage.file_storage", + "kwargs": {"provider_uri_map": provider_uri_map}, + }, + }, + }, + provider_uri=provider_uri_map, + kernels=1, + redis_port=-1, + clear_mem_cache=False, # init_qlib will be called for multiple times. Keep the cache for improving performance + ) + + +def create_state_maintainer_recursive( + executor: BaseExecutor, + order: Order, + backtest_data: QlibIntradayBacktestData, + time_per_step: str, + ticks_index: pd.DatetimeIndex, + twap_price: float, + ticks_for_order: pd.DatetimeIndex, +) -> None: + if isinstance(executor, SimulatorExecutor): + return + else: + assert isinstance(executor, NestedExecutor) + + if isinstance(executor.inner_strategy, SAOEStrategy): + executor.inner_strategy.create_saoe_maintainer( + order=order, + executor=executor.inner_executor, + backtest_data=backtest_data, + time_per_step=time_per_step, + ticks_index=ticks_index, + twap_price=twap_price, + ticks_for_order=ticks_for_order, + ) + + create_state_maintainer_recursive( + executor.inner_executor, + order, + backtest_data, + time_per_step, + ticks_index, + twap_price, + ticks_for_order, + ) + + class SingleAssetOrderExecutionQlib(Simulator[Order, SAOEState, float]): """Single-asset order execution (SAOE) simulator which is implemented based on Qlib backtest tools. @@ -54,7 +130,6 @@ def __init__( init_qlib(qlib_config) - self._executor: Optional[NestedExecutor] = None self._collect_data_loop: Optional[Generator] = None self.reset(order, time_per_step, strategy_config, executor_config, exchange_config) @@ -66,7 +141,7 @@ def reset( executor_config: dict, exchange_config: dict, ) -> None: - top_strategy, self._executor = get_strategy_executor( + strategy, self._executor = get_strategy_executor( start_time=pd.Timestamp(order.start_time.date()), end_time=pd.Timestamp(order.start_time.date()) + pd.DateOffset(1), strategy=strategy_config, @@ -77,7 +152,7 @@ def reset( pos_type="InfPosition", ) assert isinstance(self._executor, NestedExecutor) - top_strategy.reset(level_infra=self._executor.get_level_infra()) + strategy.reset(level_infra=self._executor.get_level_infra()) exchange = self._executor.trade_exchange ticks_index = pd.DatetimeIndex([e[1] for e in list(exchange.quote_df.index)]) @@ -96,15 +171,14 @@ def reset( self.twap_price = backtest_data.get_deal_price().mean() - self._collect_data_loop = self._executor.collect_data(top_strategy.generate_trade_decision(), level=0) + self._collect_data_loop = self._executor.collect_data(strategy.generate_trade_decision(), level=0) assert isinstance(self._collect_data_loop, Generator) self._last_yielded_saoe_strategy = self._iter_strategy(action=None) - assert isinstance(self._executor.inner_strategy, SAOEStrategy) - self._executor.inner_strategy.create_saoe_maintainer( + create_state_maintainer_recursive( + executor=self._executor, order=order, - executor=self._executor.inner_executor, backtest_data=backtest_data, time_per_step=time_per_step, ticks_index=ticks_index, diff --git a/qlib/rl/order_execution/state.py b/qlib/rl/order_execution/state.py index 6b52cb39fb..dd66813665 100644 --- a/qlib/rl/order_execution/state.py +++ b/qlib/rl/order_execution/state.py @@ -23,9 +23,9 @@ class SAOEStateMaintainer: Example usage:: - maintainer = StateMaintainer(...) # in reset - maintainer.update(...) # in step - # get states in get_state from maintainer + maintainer = StateMaintainer(...) + maintainer.update(...) + state = maintainer.saoe_state """ def __init__( diff --git a/qlib/rl/strategy/decomposed.py b/qlib/rl/strategy/decomposed.py index 1da1540f4a..7431fe3562 100644 --- a/qlib/rl/strategy/decomposed.py +++ b/qlib/rl/strategy/decomposed.py @@ -1,25 +1,24 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. -from typing import Any, Generator, Optional +from typing import Any, Generator -from qlib.backtest import CommonInfrastructure from qlib.backtest.decision import BaseTradeDecision, TradeDecisionWO -from qlib.backtest.utils import LevelInfrastructure -from qlib.rl.order_execution.state import SAOEStateMaintainer +from qlib.backtest.utils import CommonInfrastructure, LevelInfrastructure from qlib.rl.strategy.saoe import SAOEStrategy class DecomposedStrategy(SAOEStrategy): + """Decomposed strategy that needs actions from outside to generate trade decisions.""" + def __init__( self, outer_trade_decision: BaseTradeDecision = None, level_infra: LevelInfrastructure = None, common_infra: CommonInfrastructure = None, - **kwargs, + **kwargs: Any, ) -> None: super().__init__(None, outer_trade_decision, level_infra, common_infra, **kwargs) - self.maintainer: Optional[SAOEStateMaintainer] = None def generate_trade_decision(self, execute_result: list = None) -> Generator[Any, Any, BaseTradeDecision]: # Once the following line is executed, this DecomposedStrategy (self) will be yielded to the outside diff --git a/qlib/rl/strategy/saoe.py b/qlib/rl/strategy/saoe.py index e221fab092..f6f17e5a8d 100644 --- a/qlib/rl/strategy/saoe.py +++ b/qlib/rl/strategy/saoe.py @@ -2,10 +2,9 @@ # Licensed under the MIT License. from abc import ABCMeta -from typing import Optional +from typing import Any import pandas as pd - from qlib.backtest.decision import BaseTradeDecision, Order from qlib.backtest.executor import BaseExecutor from qlib.backtest.utils import CommonInfrastructure, LevelInfrastructure @@ -19,16 +18,14 @@ class SAOEStrategy(RLStrategy, metaclass=ABCMeta): def __init__( self, - policy, + policy: object, # TODO: add accurate typehint later. outer_trade_decision: BaseTradeDecision = None, level_infra: LevelInfrastructure = None, common_infra: CommonInfrastructure = None, - **kwargs, + **kwargs: Any, ) -> None: super(SAOEStrategy, self).__init__(policy, outer_trade_decision, level_infra, common_infra, **kwargs) - self.maintainer: Optional[SAOEStateMaintainer] = None - def create_saoe_maintainer( self, order: Order, diff --git a/qlib/strategy/base.py b/qlib/strategy/base.py index 888adfa2ba..550561cf54 100644 --- a/qlib/strategy/base.py +++ b/qlib/strategy/base.py @@ -8,7 +8,6 @@ if TYPE_CHECKING: from qlib.backtest.exchange import Exchange from qlib.backtest.position import BasePosition - from qlib.backtest.executor import BaseExecutor from typing import Tuple diff --git a/tests/rl/test_qlib_simulator.py b/tests/rl/test_qlib_simulator.py index 28a549eec6..d336ab2b6a 100644 --- a/tests/rl/test_qlib_simulator.py +++ b/tests/rl/test_qlib_simulator.py @@ -69,7 +69,7 @@ def get_configs(order: Order, time_per_step: str) -> Tuple[dict, dict, dict]: "trade_type": SimulatorExecutor.TT_SERIAL, "generate_report": False, "track_data": True, - } + }, }, "track_data": True, }, From 25aeee583464ca577572b848e65687195d1f9219 Mon Sep 17 00:00:00 2001 From: Huoran Li Date: Thu, 4 Aug 2022 13:31:27 +0800 Subject: [PATCH 08/23] Remove explicit time_per_step --- qlib/rl/order_execution/simulator_qlib.py | 13 ++----------- tests/rl/test_qlib_simulator.py | 15 +++++++-------- 2 files changed, 9 insertions(+), 19 deletions(-) diff --git a/qlib/rl/order_execution/simulator_qlib.py b/qlib/rl/order_execution/simulator_qlib.py index 438caec10b..04cb143046 100644 --- a/qlib/rl/order_execution/simulator_qlib.py +++ b/qlib/rl/order_execution/simulator_qlib.py @@ -62,7 +62,6 @@ def create_state_maintainer_recursive( executor: BaseExecutor, order: Order, backtest_data: QlibIntradayBacktestData, - time_per_step: str, ticks_index: pd.DatetimeIndex, twap_price: float, ticks_for_order: pd.DatetimeIndex, @@ -77,7 +76,7 @@ def create_state_maintainer_recursive( order=order, executor=executor.inner_executor, backtest_data=backtest_data, - time_per_step=time_per_step, + time_per_step=executor.inner_executor.time_per_step, ticks_index=ticks_index, twap_price=twap_price, ticks_for_order=ticks_for_order, @@ -87,7 +86,6 @@ def create_state_maintainer_recursive( executor.inner_executor, order, backtest_data, - time_per_step, ticks_index, twap_price, ticks_for_order, @@ -101,8 +99,6 @@ class SingleAssetOrderExecutionQlib(Simulator[Order, SAOEState, float]): ---------- order (Order): The seed to start an SAOE simulator is an order. - time_per_step (str): - A string to describe the time granularity of each step. Current support "1min", "30min", and "1day" qlib_config (dict): Configuration used to initialize Qlib. strategy_config (dict): @@ -116,14 +112,11 @@ class SingleAssetOrderExecutionQlib(Simulator[Order, SAOEState, float]): def __init__( self, order: Order, - time_per_step: str, # "1min", "30min", "1day" qlib_config: dict, strategy_config: dict, executor_config: dict, exchange_config: dict, ) -> None: - assert time_per_step in ("1min", "30min", "1day") - super().__init__(initial=order) assert order.start_time.date() == order.end_time.date(), "Start date and end date must be the same." @@ -131,12 +124,11 @@ def __init__( init_qlib(qlib_config) self._collect_data_loop: Optional[Generator] = None - self.reset(order, time_per_step, strategy_config, executor_config, exchange_config) + self.reset(order, strategy_config, executor_config, exchange_config) def reset( self, order: Order, - time_per_step: str, strategy_config: dict, executor_config: dict, exchange_config: dict, @@ -180,7 +172,6 @@ def reset( executor=self._executor, order=order, backtest_data=backtest_data, - time_per_step=time_per_step, ticks_index=ticks_index, twap_price=self.twap_price, ticks_for_order=ticks_for_order, diff --git a/tests/rl/test_qlib_simulator.py b/tests/rl/test_qlib_simulator.py index d336ab2b6a..6a0bd4c329 100644 --- a/tests/rl/test_qlib_simulator.py +++ b/tests/rl/test_qlib_simulator.py @@ -33,7 +33,7 @@ def get_order() -> Order: ) -def get_configs(order: Order, time_per_step: str) -> Tuple[dict, dict, dict]: +def get_configs(order: Order) -> Tuple[dict, dict, dict]: strategy_config = { "class": "SingleOrderStrategy", "module_path": "qlib.rl.strategy.single_order", @@ -55,7 +55,7 @@ def get_configs(order: Order, time_per_step: str) -> Tuple[dict, dict, dict]: "class": "NestedExecutor", "module_path": "qlib.backtest.executor", "kwargs": { - "time_per_step": time_per_step, + "time_per_step": "30min", "inner_strategy": { "class": "TWAPStrategy", "module_path": "qlib.contrib.strategy.rule_strategy", @@ -98,7 +98,7 @@ def get_configs(order: Order, time_per_step: str) -> Tuple[dict, dict, dict]: return strategy_config, executor_config, exchange_config -def get_simulator(order: Order, time_per_step: str) -> SingleAssetOrderExecutionQlib: +def get_simulator(order: Order) -> SingleAssetOrderExecutionQlib: DATA_ROOT_DIR = Path(__file__).parent.parent / ".data" / "rl" / "qlib_simulator" # fmt: off @@ -117,11 +117,10 @@ def get_simulator(order: Order, time_per_step: str) -> SingleAssetOrderExecution } # fmt: on - strategy_config, executor_config, exchange_config = get_configs(order, time_per_step) + strategy_config, executor_config, exchange_config = get_configs(order) return SingleAssetOrderExecutionQlib( order=order, - time_per_step=time_per_step, qlib_config=qlib_config, strategy_config=strategy_config, executor_config=executor_config, @@ -132,7 +131,7 @@ def get_simulator(order: Order, time_per_step: str) -> SingleAssetOrderExecution @python_version_request def test_simulator_first_step(): order = get_order() - simulator = get_simulator(order, time_per_step="30min") + simulator = get_simulator(order) state = simulator.get_state() assert state.cur_time == pd.Timestamp("2019-03-04 09:30:00") assert state.position == TOTAL_POSITION @@ -167,7 +166,7 @@ def test_simulator_first_step(): @python_version_request def test_simulator_stop_twap() -> None: order = get_order() - simulator = get_simulator(order, time_per_step="30min") + simulator = get_simulator(order) NUM_STEPS = 7 for i in range(NUM_STEPS): simulator.step(TOTAL_POSITION / NUM_STEPS) @@ -194,7 +193,7 @@ def test_simulator_stop_twap() -> None: def test_interpreter() -> None: NUM_EXECUTION = 3 order = get_order() - simulator = get_simulator(order, time_per_step="30min") + simulator = get_simulator(order) interpreter_action = CategoricalActionInterpreter(values=NUM_EXECUTION) NUM_STEPS = 7 From a84c1f198a3a0b9388ade9f74642a2b179d59d6f Mon Sep 17 00:00:00 2001 From: Default Date: Fri, 5 Aug 2022 13:46:35 +0800 Subject: [PATCH 09/23] CI test passed --- qlib/backtest/executor.py | 2 +- qlib/backtest/utils.py | 9 +++- qlib/rl/order_execution/simulator_qlib.py | 60 +++++---------------- qlib/rl/order_execution/state.py | 23 ++++---- qlib/rl/strategy/saoe.py | 66 ++++++++++++++--------- qlib/strategy/base.py | 5 ++ tests/rl/test_qlib_simulator.py | 4 +- 7 files changed, 83 insertions(+), 86 deletions(-) diff --git a/qlib/backtest/executor.py b/qlib/backtest/executor.py index 501b1bbb35..07da6b57bc 100644 --- a/qlib/backtest/executor.py +++ b/qlib/backtest/executor.py @@ -114,7 +114,7 @@ def __init__( self.track_data = track_data self._trade_exchange = trade_exchange self.level_infra = LevelInfrastructure() - self.level_infra.reset_infra(common_infra=common_infra) + self.level_infra.reset_infra(common_infra=common_infra, executor=self) self._settle_type = settle_type self.reset(start_time=start_time, end_time=end_time, common_infra=common_infra) if common_infra is None: diff --git a/qlib/backtest/utils.py b/qlib/backtest/utils.py index db35dc4820..05691f8228 100644 --- a/qlib/backtest/utils.py +++ b/qlib/backtest/utils.py @@ -21,6 +21,9 @@ from ..data.data import Cal +SAOE_DATA_KEY = "saoe_data" + + class TradeCalendarManager: """ Manager for trading calendar @@ -235,7 +238,9 @@ def update(self, other: BaseInfrastructure) -> None: class CommonInfrastructure(BaseInfrastructure): def get_support_infra(self) -> Set[str]: - return {"trade_account", "trade_exchange"} + # SAOE_DATA_KEY is used to store SAOE (single asset order execution) information that should be shared by + # all strategies. It should be dict. + return {"trade_account", "trade_exchange", SAOE_DATA_KEY} class LevelInfrastructure(BaseInfrastructure): @@ -248,7 +253,7 @@ def get_support_infra(self) -> Set[str]: sub_level_infra: - **NOTE**: this will only work after _init_sub_trading !!! """ - return {"trade_calendar", "sub_level_infra", "common_infra"} + return {"trade_calendar", "sub_level_infra", "common_infra", "executor"} def reset_cal( self, diff --git a/qlib/rl/order_execution/simulator_qlib.py b/qlib/rl/order_execution/simulator_qlib.py index 04cb143046..d56ca43f01 100644 --- a/qlib/rl/order_execution/simulator_qlib.py +++ b/qlib/rl/order_execution/simulator_qlib.py @@ -10,6 +10,7 @@ from qlib.backtest import get_strategy_executor from qlib.backtest.decision import Order from qlib.backtest.executor import BaseExecutor, NestedExecutor, SimulatorExecutor +from qlib.backtest.utils import SAOE_DATA_KEY from qlib.config import REG_CN from qlib.contrib.ops.high_freq import BFillNan, Cut, Date, DayCumsum, DayLast, FFillNan, IsInf, IsNull, Select from qlib.rl.data.exchange_wrapper import QlibIntradayBacktestData @@ -58,40 +59,6 @@ def init_qlib(qlib_config: dict) -> None: ) -def create_state_maintainer_recursive( - executor: BaseExecutor, - order: Order, - backtest_data: QlibIntradayBacktestData, - ticks_index: pd.DatetimeIndex, - twap_price: float, - ticks_for_order: pd.DatetimeIndex, -) -> None: - if isinstance(executor, SimulatorExecutor): - return - else: - assert isinstance(executor, NestedExecutor) - - if isinstance(executor.inner_strategy, SAOEStrategy): - executor.inner_strategy.create_saoe_maintainer( - order=order, - executor=executor.inner_executor, - backtest_data=backtest_data, - time_per_step=executor.inner_executor.time_per_step, - ticks_index=ticks_index, - twap_price=twap_price, - ticks_for_order=ticks_for_order, - ) - - create_state_maintainer_recursive( - executor.inner_executor, - order, - backtest_data, - ticks_index, - twap_price, - ticks_for_order, - ) - - class SingleAssetOrderExecutionQlib(Simulator[Order, SAOEState, float]): """Single-asset order execution (SAOE) simulator which is implemented based on Qlib backtest tools. @@ -134,8 +101,8 @@ def reset( exchange_config: dict, ) -> None: strategy, self._executor = get_strategy_executor( - start_time=pd.Timestamp(order.start_time.date()), - end_time=pd.Timestamp(order.start_time.date()) + pd.DateOffset(1), + start_time=order.start_time.replace(hour=0, minute=0, second=0), + end_time=order.start_time.replace(hour=0, minute=0, second=0) + pd.DateOffset(1), strategy=strategy_config, executor=executor_config, benchmark=order.stock_id, @@ -143,8 +110,9 @@ def reset( exchange_kwargs=exchange_config, pos_type="InfPosition", ) + assert isinstance(self._executor, NestedExecutor) - strategy.reset(level_infra=self._executor.get_level_infra()) + strategy.reset(level_infra=self._executor.get_level_infra()) # TODO: check if we could remove this exchange = self._executor.trade_exchange ticks_index = pd.DatetimeIndex([e[1] for e in list(exchange.quote_df.index)]) @@ -154,6 +122,7 @@ def reset( order.end_time, include_end=True, ) + backtest_data = QlibIntradayBacktestData( order=order, exchange=exchange, @@ -161,6 +130,12 @@ def reset( end_time=ticks_for_order[-1], ) + # Store ticks_for_order & backtest_data in the common_infra. They will be reused by all strategies. + common_infra = self._executor.common_infra + saoe_data = {} if not common_infra.has(SAOE_DATA_KEY) else common_infra.get(SAOE_DATA_KEY) + saoe_data[(order.stock_id, order.direction)] = (ticks_index, ticks_for_order, backtest_data) + common_infra.reset_infra(**{SAOE_DATA_KEY: saoe_data}) + self.twap_price = backtest_data.get_deal_price().mean() self._collect_data_loop = self._executor.collect_data(strategy.generate_trade_decision(), level=0) @@ -168,14 +143,7 @@ def reset( self._last_yielded_saoe_strategy = self._iter_strategy(action=None) - create_state_maintainer_recursive( - executor=self._executor, - order=order, - backtest_data=backtest_data, - ticks_index=ticks_index, - twap_price=self.twap_price, - ticks_for_order=ticks_for_order, - ) + self._order = order def _iter_strategy(self, action: float = None) -> SAOEStrategy: """Iterate the _collect_data_loop until we get the next yield SAOEStrategy.""" @@ -206,7 +174,7 @@ def step(self, action: float) -> None: assert self._executor is not None def get_state(self) -> SAOEState: - return self._last_yielded_saoe_strategy.maintainer.saoe_state + return self._last_yielded_saoe_strategy.get_saoe_state_by_order(self._order) def done(self) -> bool: return not self._executor.is_collecting diff --git a/qlib/rl/order_execution/state.py b/qlib/rl/order_execution/state.py index dd66813665..5824d0d090 100644 --- a/qlib/rl/order_execution/state.py +++ b/qlib/rl/order_execution/state.py @@ -9,7 +9,7 @@ import pandas as pd from typing_extensions import TypedDict -from qlib.backtest import Order +from qlib.backtest import Exchange, Order from qlib.backtest.executor import BaseExecutor from qlib.constant import EPS from qlib.rl.data.exchange_wrapper import QlibIntradayBacktestData @@ -32,30 +32,31 @@ def __init__( self, order: Order, executor: BaseExecutor, - backtest_data: QlibIntradayBacktestData, - time_per_step: str, + exchange: Exchange, + ticks_per_step: int, ticks_index: pd.DatetimeIndex, - twap_price: float, ticks_for_order: pd.DatetimeIndex, + backtest_data: QlibIntradayBacktestData, ) -> None: super().__init__() self.position = order.amount self.order = order self.executor = executor - self.backtest_data = backtest_data - self.time_per_step = time_per_step + self.exchange = exchange self.ticks_index = ticks_index self.ticks_for_order = ticks_for_order - self.twap_price = twap_price + self.backtest_data = backtest_data + + self.twap_price = self.backtest_data.get_deal_price().mean() metric_keys = list(SAOEMetrics.__annotations__.keys()) # pylint: disable=no-member self.history_exec = pd.DataFrame(columns=metric_keys).set_index("datetime") self.history_steps = pd.DataFrame(columns=metric_keys).set_index("datetime") self.metrics: Optional[SAOEMetrics] = None - self.cur_time = ticks_for_order[0] - self.ticks_per_step = int(pd.Timedelta(self.time_per_step).total_seconds() // 60) + self.cur_time = self.ticks_for_order[0] + self.ticks_per_step = ticks_per_step def _next_time(self) -> pd.Timestamp: current_loc = self.ticks_index.get_loc(self.cur_time) @@ -76,7 +77,7 @@ def update(self, execute_result: list) -> None: datetime_list = pd.DatetimeIndex([]) else: market_volume = np.array( - self.executor.trade_exchange.get_volume( + self.exchange.get_volume( self.order.stock_id, execute_result[0][0].start_time, execute_result[-1][0].start_time, @@ -216,7 +217,7 @@ def saoe_state(self) -> SAOEState: history_steps=self.history_steps, metrics=self.metrics, backtest_data=self.backtest_data, - ticks_per_step=int(pd.Timedelta(self.time_per_step).total_seconds() // 60), + ticks_per_step=self.ticks_per_step, ticks_index=self.ticks_index, ticks_for_order=self.ticks_for_order, ) diff --git a/qlib/rl/strategy/saoe.py b/qlib/rl/strategy/saoe.py index f6f17e5a8d..aae60d4802 100644 --- a/qlib/rl/strategy/saoe.py +++ b/qlib/rl/strategy/saoe.py @@ -1,15 +1,13 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. - +import collections from abc import ABCMeta -from typing import Any +from typing import Any, cast, Dict, Tuple import pandas as pd from qlib.backtest.decision import BaseTradeDecision, Order -from qlib.backtest.executor import BaseExecutor -from qlib.backtest.utils import CommonInfrastructure, LevelInfrastructure -from qlib.rl.data.exchange_wrapper import QlibIntradayBacktestData -from qlib.rl.order_execution.state import SAOEStateMaintainer +from qlib.backtest.utils import CommonInfrastructure, LevelInfrastructure, SAOE_DATA_KEY +from qlib.rl.order_execution.state import SAOEState, SAOEStateMaintainer from qlib.strategy.base import RLStrategy @@ -26,30 +24,48 @@ def __init__( ) -> None: super(SAOEStrategy, self).__init__(policy, outer_trade_decision, level_infra, common_infra, **kwargs) - def create_saoe_maintainer( - self, - order: Order, - executor: BaseExecutor, - backtest_data: QlibIntradayBacktestData, - time_per_step: str, - ticks_index: pd.DatetimeIndex, - twap_price: float, - ticks_for_order: pd.DatetimeIndex, - ) -> None: - self.maintainer = SAOEStateMaintainer( + self.maintainer_dict: Dict[Tuple[str, int], SAOEStateMaintainer] = {} + + def _create_saoe_maintainer(self, order: Order) -> SAOEStateMaintainer: + saoe_data = self.common_infra.get(SAOE_DATA_KEY) + ticks_index, ticks_for_order, backtest_data = saoe_data[(order.stock_id, order.direction)] + + return SAOEStateMaintainer( order=order, - executor=executor, - backtest_data=backtest_data, - time_per_step=time_per_step, + executor=self.executor, + exchange=self.trade_exchange, + ticks_per_step=int(pd.Timedelta(self.trade_calendar.get_freq()) / pd.Timedelta("1min")), ticks_index=ticks_index, - twap_price=twap_price, ticks_for_order=ticks_for_order, + backtest_data=backtest_data, ) + def reset( + self, + level_infra: LevelInfrastructure = None, + common_infra: CommonInfrastructure = None, + outer_trade_decision: BaseTradeDecision = None, + **kwargs, + ) -> None: + super(SAOEStrategy, self).reset(level_infra, common_infra, outer_trade_decision, **kwargs) + + self.maintainer_dict = {} + for decision in outer_trade_decision.get_decision(): + order = cast(Order, decision) + self.maintainer_dict[(order.stock_id, order.direction)] = self._create_saoe_maintainer(order) + + def get_saoe_state_by_order(self, order: Order) -> SAOEState: + return self.maintainer_dict[(order.stock_id, order.direction)].saoe_state + def post_upper_level_exe_step(self) -> None: - self.maintainer.generate_metrics_after_done() + for maintainer in self.maintainer_dict.values(): + maintainer.generate_metrics_after_done() def post_exe_step(self, execute_result: list) -> None: - self.maintainer.update( - execute_result=execute_result, - ) + results = collections.defaultdict(list) + if execute_result is not None: + for e in execute_result: + results[(e[0].stock_id, e[0].direction)].append(e) + + for (stock_id, direction), maintainer in self.maintainer_dict.items(): + maintainer.update(results[(stock_id, direction)]) diff --git a/qlib/strategy/base.py b/qlib/strategy/base.py index 550561cf54..c6294eea3e 100644 --- a/qlib/strategy/base.py +++ b/qlib/strategy/base.py @@ -8,6 +8,7 @@ if TYPE_CHECKING: from qlib.backtest.exchange import Exchange from qlib.backtest.position import BasePosition + from qlib.backtest.executor import BaseExecutor from typing import Tuple @@ -55,6 +56,10 @@ def __init__( self._reset(level_infra=level_infra, common_infra=common_infra, outer_trade_decision=outer_trade_decision) self._trade_exchange = trade_exchange + @property + def executor(self) -> BaseExecutor: + return self.level_infra.get("executor") + @property def trade_calendar(self) -> TradeCalendarManager: return self.level_infra.get("trade_calendar") diff --git a/tests/rl/test_qlib_simulator.py b/tests/rl/test_qlib_simulator.py index 6a0bd4c329..db41be3d61 100644 --- a/tests/rl/test_qlib_simulator.py +++ b/tests/rl/test_qlib_simulator.py @@ -7,7 +7,6 @@ import pandas as pd import pytest - from qlib.backtest.decision import Order, OrderDir, TradeRangeByTime from qlib.backtest.executor import SimulatorExecutor from qlib.rl.order_execution import CategoricalActionInterpreter @@ -205,3 +204,6 @@ def test_interpreter() -> None: position_history.append(state.position) assert position_history[-1] == max(TOTAL_POSITION - TOTAL_POSITION / NUM_EXECUTION * (i + 1), 0.0) + + +test_simulator_stop_twap() From fefad581d605ca1539ec25031c0b277984613403 Mon Sep 17 00:00:00 2001 From: Huoran Li Date: Fri, 5 Aug 2022 14:03:18 +0800 Subject: [PATCH 10/23] Resolve PR comments --- .../{objects.py => constants.py} | 0 qlib/rl/order_execution/simulator_qlib.py | 35 +++++- qlib/rl/order_execution/simulator_simple.py | 2 +- qlib/rl/order_execution/state.py | 2 +- qlib/rl/order_execution/strategy.py | 108 ++++++++++++++++++ qlib/rl/order_execution/utils.py | 2 +- qlib/rl/strategy/decomposed.py | 43 ------- qlib/rl/strategy/saoe.py | 71 ------------ tests/rl/test_qlib_simulator.py | 4 +- 9 files changed, 142 insertions(+), 125 deletions(-) rename qlib/rl/order_execution/{objects.py => constants.py} (100%) create mode 100644 qlib/rl/order_execution/strategy.py delete mode 100644 qlib/rl/strategy/decomposed.py delete mode 100644 qlib/rl/strategy/saoe.py diff --git a/qlib/rl/order_execution/objects.py b/qlib/rl/order_execution/constants.py similarity index 100% rename from qlib/rl/order_execution/objects.py rename to qlib/rl/order_execution/constants.py diff --git a/qlib/rl/order_execution/simulator_qlib.py b/qlib/rl/order_execution/simulator_qlib.py index d56ca43f01..244c55dfb1 100644 --- a/qlib/rl/order_execution/simulator_qlib.py +++ b/qlib/rl/order_execution/simulator_qlib.py @@ -17,10 +17,33 @@ from qlib.rl.order_execution.state import SAOEState from qlib.rl.order_execution.utils import get_ticks_slice from qlib.rl.simulator import Simulator -from qlib.rl.strategy.saoe import SAOEStrategy +from qlib.rl.order_execution.strategy import SAOEStrategy def init_qlib(qlib_config: dict) -> None: + """Initialize necessary resource to launch the workflow, including data direction, feature columns, etc.. + + Parameters + ---------- + qlib_config: + Qlib configuration. + + Example: + { + "provider_uri_day": DATA_ROOT_DIR / "qlib_1d", + "provider_uri_1min": DATA_ROOT_DIR / "qlib_1min", + "feature_root_dir": DATA_ROOT_DIR / "qlib_handler_stock", + "feature_columns_today": [ + "$open", "$high", "$low", "$close", "$vwap", "$bid", "$ask", "$volume", + "$bidV", "$bidV1", "$bidV3", "$bidV5", "$askV", "$askV1", "$askV3", "$askV5", + ], + "feature_columns_yesterday": [ + "$open_1", "$high_1", "$low_1", "$close_1", "$vwap_1", "$bid_1", "$ask_1", "$volume_1", + "$bidV_1", "$bidV1_1", "$bidV3_1", "$bidV5_1", "$askV_1", "$askV1_1", "$askV3_1", "$askV5_1", + ], + } + """ + provider_uri_map = { "day": qlib_config["provider_uri_day"].as_posix(), "1min": qlib_config["provider_uri_1min"].as_posix(), @@ -64,15 +87,15 @@ class SingleAssetOrderExecutionQlib(Simulator[Order, SAOEState, float]): Parameters ---------- - order (Order): + order The seed to start an SAOE simulator is an order. - qlib_config (dict): + qlib_config Configuration used to initialize Qlib. - strategy_config (dict): + strategy_config Strategy configuration - executor_config (dict): + executor_config Executor configuration - exchange_config (dict): + exchange_config Exchange configuration """ diff --git a/qlib/rl/order_execution/simulator_simple.py b/qlib/rl/order_execution/simulator_simple.py index 2c4ae822f4..5754d34c2f 100644 --- a/qlib/rl/order_execution/simulator_simple.py +++ b/qlib/rl/order_execution/simulator_simple.py @@ -12,7 +12,7 @@ from qlib.backtest.decision import Order, OrderDir from qlib.constant import EPS from qlib.rl.data.pickle_styled import DealPriceType, load_simple_intraday_backtest_data -from qlib.rl.order_execution.objects import ONE_SEC, float_or_ndarray +from qlib.rl.order_execution.constants import ONE_SEC, float_or_ndarray from qlib.rl.order_execution.state import SAOEMetrics, SAOEState from qlib.rl.simulator import Simulator from qlib.rl.utils import LogLevel diff --git a/qlib/rl/order_execution/state.py b/qlib/rl/order_execution/state.py index 5824d0d090..dbeab8dd3f 100644 --- a/qlib/rl/order_execution/state.py +++ b/qlib/rl/order_execution/state.py @@ -17,7 +17,7 @@ from qlib.rl.order_execution.utils import dataframe_append, get_simulator_executor, price_advantage -class SAOEStateMaintainer: +class QlibBacktestAdapter: """ Maintain states of the environment. diff --git a/qlib/rl/order_execution/strategy.py b/qlib/rl/order_execution/strategy.py new file mode 100644 index 0000000000..d36fadef13 --- /dev/null +++ b/qlib/rl/order_execution/strategy.py @@ -0,0 +1,108 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +import collections +from abc import ABCMeta +from typing import Any, cast, Dict, Generator, Tuple + +import pandas as pd +from qlib.backtest import CommonInfrastructure, Order +from qlib.backtest.decision import BaseTradeDecision, TradeDecisionWO +from qlib.backtest.utils import LevelInfrastructure, SAOE_DATA_KEY +from qlib.rl.order_execution.state import QlibBacktestAdapter, SAOEState +from qlib.strategy.base import RLStrategy + + +class SAOEStrategy(RLStrategy, metaclass=ABCMeta): + """RL-based strategies that use SAOEState as state.""" + + def __init__( + self, + policy: object, # TODO: add accurate typehint later. + outer_trade_decision: BaseTradeDecision = None, + level_infra: LevelInfrastructure = None, + common_infra: CommonInfrastructure = None, + **kwargs: Any, + ) -> None: + super(SAOEStrategy, self).__init__(policy, outer_trade_decision, level_infra, common_infra, **kwargs) + + self.adapter_dict: Dict[Tuple[str, int], QlibBacktestAdapter] = {} + + def _create_qlib_backtest_adapter(self, order: Order) -> QlibBacktestAdapter: + saoe_data = self.common_infra.get(SAOE_DATA_KEY) + ticks_index, ticks_for_order, backtest_data = saoe_data[(order.stock_id, order.direction)] + + return QlibBacktestAdapter( + order=order, + executor=self.executor, + exchange=self.trade_exchange, + ticks_per_step=int(pd.Timedelta(self.trade_calendar.get_freq()) / pd.Timedelta("1min")), + ticks_index=ticks_index, + ticks_for_order=ticks_for_order, + backtest_data=backtest_data, + ) + + def reset( + self, + level_infra: LevelInfrastructure = None, + common_infra: CommonInfrastructure = None, + outer_trade_decision: BaseTradeDecision = None, + **kwargs, + ) -> None: + super(SAOEStrategy, self).reset(level_infra, common_infra, outer_trade_decision, **kwargs) + + self.adapter_dict = {} + for decision in outer_trade_decision.get_decision(): + order = cast(Order, decision) + self.adapter_dict[(order.stock_id, order.direction)] = self._create_qlib_backtest_adapter(order) + + def get_saoe_state_by_order(self, order: Order) -> SAOEState: + return self.adapter_dict[(order.stock_id, order.direction)].saoe_state + + def post_upper_level_exe_step(self) -> None: + for maintainer in self.adapter_dict.values(): + maintainer.generate_metrics_after_done() + + def post_exe_step(self, execute_result: list) -> None: + results = collections.defaultdict(list) + if execute_result is not None: + for e in execute_result: + results[(e[0].stock_id, e[0].direction)].append(e) + + for (stock_id, direction), maintainer in self.adapter_dict.items(): + maintainer.update(results[(stock_id, direction)]) + + +class DecomposedStrategy(SAOEStrategy): + """Decomposed strategy that needs actions from outside to generate trade decisions.""" + + def __init__( + self, + outer_trade_decision: BaseTradeDecision = None, + level_infra: LevelInfrastructure = None, + common_infra: CommonInfrastructure = None, + **kwargs: Any, + ) -> None: + super().__init__(None, outer_trade_decision, level_infra, common_infra, **kwargs) + + def generate_trade_decision(self, execute_result: list = None) -> Generator[Any, Any, BaseTradeDecision]: + # Once the following line is executed, this DecomposedStrategy (self) will be yielded to the outside + # of the entire executor, and the execution will be suspended. When the execution is resumed by `send()`, + # the sent item will be captured by `exec_vol`. The outside policy could communicate with the inner + # level strategy through this way. + exec_vol = yield self + + oh = self.trade_exchange.get_order_helper() + order = oh.create(self._order.stock_id, exec_vol, self._order.direction) + + return TradeDecisionWO([order], self) + + def alter_outer_trade_decision(self, outer_trade_decision: BaseTradeDecision) -> BaseTradeDecision: + return outer_trade_decision + + def reset(self, outer_trade_decision: TradeDecisionWO = None, **kwargs: Any) -> None: + super().reset(outer_trade_decision=outer_trade_decision, **kwargs) + if outer_trade_decision is not None: + order_list = outer_trade_decision.order_list + assert len(order_list) == 1 + self._order = order_list[0] diff --git a/qlib/rl/order_execution/utils.py b/qlib/rl/order_execution/utils.py index 44012d9db1..f861237398 100644 --- a/qlib/rl/order_execution/utils.py +++ b/qlib/rl/order_execution/utils.py @@ -10,7 +10,7 @@ from qlib.backtest.decision import OrderDir from qlib.backtest.executor import BaseExecutor, NestedExecutor, SimulatorExecutor -from qlib.rl.order_execution.objects import ONE_SEC, float_or_ndarray +from qlib.rl.order_execution.constants import ONE_SEC, float_or_ndarray def get_ticks_slice( diff --git a/qlib/rl/strategy/decomposed.py b/qlib/rl/strategy/decomposed.py deleted file mode 100644 index 7431fe3562..0000000000 --- a/qlib/rl/strategy/decomposed.py +++ /dev/null @@ -1,43 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -from typing import Any, Generator - -from qlib.backtest.decision import BaseTradeDecision, TradeDecisionWO -from qlib.backtest.utils import CommonInfrastructure, LevelInfrastructure -from qlib.rl.strategy.saoe import SAOEStrategy - - -class DecomposedStrategy(SAOEStrategy): - """Decomposed strategy that needs actions from outside to generate trade decisions.""" - - def __init__( - self, - outer_trade_decision: BaseTradeDecision = None, - level_infra: LevelInfrastructure = None, - common_infra: CommonInfrastructure = None, - **kwargs: Any, - ) -> None: - super().__init__(None, outer_trade_decision, level_infra, common_infra, **kwargs) - - def generate_trade_decision(self, execute_result: list = None) -> Generator[Any, Any, BaseTradeDecision]: - # Once the following line is executed, this DecomposedStrategy (self) will be yielded to the outside - # of the entire executor, and the execution will be suspended. When the execution is resumed by `send()`, - # the sent item will be captured by `exec_vol`. The outside policy could communicate with the inner - # level strategy through this way. - exec_vol = yield self - - oh = self.trade_exchange.get_order_helper() - order = oh.create(self._order.stock_id, exec_vol, self._order.direction) - - return TradeDecisionWO([order], self) - - def alter_outer_trade_decision(self, outer_trade_decision: BaseTradeDecision) -> BaseTradeDecision: - return outer_trade_decision - - def reset(self, outer_trade_decision: TradeDecisionWO = None, **kwargs: Any) -> None: - super().reset(outer_trade_decision=outer_trade_decision, **kwargs) - if outer_trade_decision is not None: - order_list = outer_trade_decision.order_list - assert len(order_list) == 1 - self._order = order_list[0] diff --git a/qlib/rl/strategy/saoe.py b/qlib/rl/strategy/saoe.py deleted file mode 100644 index aae60d4802..0000000000 --- a/qlib/rl/strategy/saoe.py +++ /dev/null @@ -1,71 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. -import collections -from abc import ABCMeta -from typing import Any, cast, Dict, Tuple - -import pandas as pd -from qlib.backtest.decision import BaseTradeDecision, Order -from qlib.backtest.utils import CommonInfrastructure, LevelInfrastructure, SAOE_DATA_KEY -from qlib.rl.order_execution.state import SAOEState, SAOEStateMaintainer -from qlib.strategy.base import RLStrategy - - -class SAOEStrategy(RLStrategy, metaclass=ABCMeta): - """RL-based strategies that use SAOEState as state.""" - - def __init__( - self, - policy: object, # TODO: add accurate typehint later. - outer_trade_decision: BaseTradeDecision = None, - level_infra: LevelInfrastructure = None, - common_infra: CommonInfrastructure = None, - **kwargs: Any, - ) -> None: - super(SAOEStrategy, self).__init__(policy, outer_trade_decision, level_infra, common_infra, **kwargs) - - self.maintainer_dict: Dict[Tuple[str, int], SAOEStateMaintainer] = {} - - def _create_saoe_maintainer(self, order: Order) -> SAOEStateMaintainer: - saoe_data = self.common_infra.get(SAOE_DATA_KEY) - ticks_index, ticks_for_order, backtest_data = saoe_data[(order.stock_id, order.direction)] - - return SAOEStateMaintainer( - order=order, - executor=self.executor, - exchange=self.trade_exchange, - ticks_per_step=int(pd.Timedelta(self.trade_calendar.get_freq()) / pd.Timedelta("1min")), - ticks_index=ticks_index, - ticks_for_order=ticks_for_order, - backtest_data=backtest_data, - ) - - def reset( - self, - level_infra: LevelInfrastructure = None, - common_infra: CommonInfrastructure = None, - outer_trade_decision: BaseTradeDecision = None, - **kwargs, - ) -> None: - super(SAOEStrategy, self).reset(level_infra, common_infra, outer_trade_decision, **kwargs) - - self.maintainer_dict = {} - for decision in outer_trade_decision.get_decision(): - order = cast(Order, decision) - self.maintainer_dict[(order.stock_id, order.direction)] = self._create_saoe_maintainer(order) - - def get_saoe_state_by_order(self, order: Order) -> SAOEState: - return self.maintainer_dict[(order.stock_id, order.direction)].saoe_state - - def post_upper_level_exe_step(self) -> None: - for maintainer in self.maintainer_dict.values(): - maintainer.generate_metrics_after_done() - - def post_exe_step(self, execute_result: list) -> None: - results = collections.defaultdict(list) - if execute_result is not None: - for e in execute_result: - results[(e[0].stock_id, e[0].direction)].append(e) - - for (stock_id, direction), maintainer in self.maintainer_dict.items(): - maintainer.update(results[(stock_id, direction)]) diff --git a/tests/rl/test_qlib_simulator.py b/tests/rl/test_qlib_simulator.py index db41be3d61..305cf16391 100644 --- a/tests/rl/test_qlib_simulator.py +++ b/tests/rl/test_qlib_simulator.py @@ -10,7 +10,7 @@ from qlib.backtest.decision import Order, OrderDir, TradeRangeByTime from qlib.backtest.executor import SimulatorExecutor from qlib.rl.order_execution import CategoricalActionInterpreter -from qlib.rl.order_execution.objects import FINEST_GRANULARITY +from qlib.rl.order_execution.constants import FINEST_GRANULARITY from qlib.rl.order_execution.simulator_qlib import SingleAssetOrderExecutionQlib TOTAL_POSITION = 2100.0 @@ -48,7 +48,7 @@ def get_configs(order: Order) -> Tuple[dict, dict, dict]: "module_path": "qlib.backtest.executor", "kwargs": { "time_per_step": "1day", - "inner_strategy": {"class": "DecomposedStrategy", "module_path": "qlib.rl.strategy.decomposed"}, + "inner_strategy": {"class": "DecomposedStrategy", "module_path": "qlib.rl.order_execution.strategy"}, "track_data": True, "inner_executor": { "class": "NestedExecutor", From d697381f43af7109e89dcfc3f60ba61bbdef3aa3 Mon Sep 17 00:00:00 2001 From: Huoran Li Date: Fri, 5 Aug 2022 14:19:44 +0800 Subject: [PATCH 11/23] Pass all CI --- qlib/rl/order_execution/simulator_qlib.py | 2 +- qlib/rl/order_execution/strategy.py | 23 ++++++++++------------- 2 files changed, 11 insertions(+), 14 deletions(-) diff --git a/qlib/rl/order_execution/simulator_qlib.py b/qlib/rl/order_execution/simulator_qlib.py index 244c55dfb1..53e63f709a 100644 --- a/qlib/rl/order_execution/simulator_qlib.py +++ b/qlib/rl/order_execution/simulator_qlib.py @@ -9,7 +9,7 @@ import qlib from qlib.backtest import get_strategy_executor from qlib.backtest.decision import Order -from qlib.backtest.executor import BaseExecutor, NestedExecutor, SimulatorExecutor +from qlib.backtest.executor import NestedExecutor from qlib.backtest.utils import SAOE_DATA_KEY from qlib.config import REG_CN from qlib.contrib.ops.high_freq import BFillNan, Cut, Date, DayCumsum, DayLast, FFillNan, IsInf, IsNull, Select diff --git a/qlib/rl/order_execution/strategy.py b/qlib/rl/order_execution/strategy.py index d36fadef13..20eef6c848 100644 --- a/qlib/rl/order_execution/strategy.py +++ b/qlib/rl/order_execution/strategy.py @@ -42,19 +42,14 @@ def _create_qlib_backtest_adapter(self, order: Order) -> QlibBacktestAdapter: backtest_data=backtest_data, ) - def reset( - self, - level_infra: LevelInfrastructure = None, - common_infra: CommonInfrastructure = None, - outer_trade_decision: BaseTradeDecision = None, - **kwargs, - ) -> None: - super(SAOEStrategy, self).reset(level_infra, common_infra, outer_trade_decision, **kwargs) + def reset(self, outer_trade_decision: BaseTradeDecision = None, **kwargs: Any) -> None: + super(SAOEStrategy, self).reset(outer_trade_decision=outer_trade_decision, **kwargs) - self.adapter_dict = {} - for decision in outer_trade_decision.get_decision(): - order = cast(Order, decision) - self.adapter_dict[(order.stock_id, order.direction)] = self._create_qlib_backtest_adapter(order) + if outer_trade_decision is not None: + self.adapter_dict = {} + for decision in outer_trade_decision.get_decision(): + order = cast(Order, decision) + self.adapter_dict[(order.stock_id, order.direction)] = self._create_qlib_backtest_adapter(order) def get_saoe_state_by_order(self, order: Order) -> SAOEState: return self.adapter_dict[(order.stock_id, order.direction)].saoe_state @@ -100,8 +95,10 @@ def generate_trade_decision(self, execute_result: list = None) -> Generator[Any, def alter_outer_trade_decision(self, outer_trade_decision: BaseTradeDecision) -> BaseTradeDecision: return outer_trade_decision - def reset(self, outer_trade_decision: TradeDecisionWO = None, **kwargs: Any) -> None: + def reset(self, outer_trade_decision: BaseTradeDecision = None, **kwargs: Any) -> None: super().reset(outer_trade_decision=outer_trade_decision, **kwargs) + + assert isinstance(outer_trade_decision, TradeDecisionWO) if outer_trade_decision is not None: order_list = outer_trade_decision.order_list assert len(order_list) == 1 From edd62fd7d1b7e3c97eb3dfbc2b334525051a8b12 Mon Sep 17 00:00:00 2001 From: Huoran Li Date: Fri, 5 Aug 2022 14:47:37 +0800 Subject: [PATCH 12/23] Minor test issue --- tests/rl/test_qlib_simulator.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/tests/rl/test_qlib_simulator.py b/tests/rl/test_qlib_simulator.py index 305cf16391..df78976654 100644 --- a/tests/rl/test_qlib_simulator.py +++ b/tests/rl/test_qlib_simulator.py @@ -204,6 +204,3 @@ def test_interpreter() -> None: position_history.append(state.position) assert position_history[-1] == max(TOTAL_POSITION - TOTAL_POSITION / NUM_EXECUTION * (i + 1), 0.0) - - -test_simulator_stop_twap() From 4049bfda2121b2557e9368ef07796d872e925875 Mon Sep 17 00:00:00 2001 From: Default Date: Mon, 8 Aug 2022 11:51:56 +0800 Subject: [PATCH 13/23] Refine SAOE adapter logic --- qlib/backtest/decision.py | 5 ++ qlib/rl/order_execution/simulator_qlib.py | 42 +++++----------- qlib/rl/order_execution/state.py | 2 +- qlib/rl/order_execution/strategy.py | 59 ++++++++++++++++++----- qlib/strategy/base.py | 6 +++ 5 files changed, 72 insertions(+), 42 deletions(-) diff --git a/qlib/backtest/decision.py b/qlib/backtest/decision.py index 4828478c7e..d41fa66f60 100644 --- a/qlib/backtest/decision.py +++ b/qlib/backtest/decision.py @@ -135,6 +135,11 @@ def parse_dir(direction: Union[str, int, np.integer, OrderDir, np.ndarray]) -> U else: raise NotImplementedError(f"This type of input is not supported") + @property + def key(self) -> tuple: + """A hashable & unique key to identify this order. Usually used as the key in a dict.""" + return self.stock_id, self.start_time.replace(hour=0, minute=0, second=0), self.direction + class OrderHelper: """ diff --git a/qlib/rl/order_execution/simulator_qlib.py b/qlib/rl/order_execution/simulator_qlib.py index 53e63f709a..15d0edf128 100644 --- a/qlib/rl/order_execution/simulator_qlib.py +++ b/qlib/rl/order_execution/simulator_qlib.py @@ -89,32 +89,30 @@ class SingleAssetOrderExecutionQlib(Simulator[Order, SAOEState, float]): ---------- order The seed to start an SAOE simulator is an order. - qlib_config - Configuration used to initialize Qlib. strategy_config Strategy configuration executor_config Executor configuration exchange_config Exchange configuration + qlib_config + Configuration used to initialize Qlib. If it is None, Qlib will not be initialized. """ def __init__( self, order: Order, - qlib_config: dict, strategy_config: dict, executor_config: dict, exchange_config: dict, + qlib_config: dict = None, ) -> None: super().__init__(initial=order) assert order.start_time.date() == order.end_time.date(), "Start date and end date must be the same." - init_qlib(qlib_config) - self._collect_data_loop: Optional[Generator] = None - self.reset(order, strategy_config, executor_config, exchange_config) + self.reset(order, strategy_config, executor_config, exchange_config, qlib_config) def reset( self, @@ -122,7 +120,11 @@ def reset( strategy_config: dict, executor_config: dict, exchange_config: dict, + qlib_config: dict = None, ) -> None: + if qlib_config is not None: + init_qlib(qlib_config) + strategy, self._executor = get_strategy_executor( start_time=order.start_time.replace(hour=0, minute=0, second=0), end_time=order.start_time.replace(hour=0, minute=0, second=0) + pd.DateOffset(1), @@ -137,30 +139,6 @@ def reset( assert isinstance(self._executor, NestedExecutor) strategy.reset(level_infra=self._executor.get_level_infra()) # TODO: check if we could remove this - exchange = self._executor.trade_exchange - ticks_index = pd.DatetimeIndex([e[1] for e in list(exchange.quote_df.index)]) - ticks_for_order = get_ticks_slice( - ticks_index, - order.start_time, - order.end_time, - include_end=True, - ) - - backtest_data = QlibIntradayBacktestData( - order=order, - exchange=exchange, - start_time=ticks_for_order[0], - end_time=ticks_for_order[-1], - ) - - # Store ticks_for_order & backtest_data in the common_infra. They will be reused by all strategies. - common_infra = self._executor.common_infra - saoe_data = {} if not common_infra.has(SAOE_DATA_KEY) else common_infra.get(SAOE_DATA_KEY) - saoe_data[(order.stock_id, order.direction)] = (ticks_index, ticks_for_order, backtest_data) - common_infra.reset_infra(**{SAOE_DATA_KEY: saoe_data}) - - self.twap_price = backtest_data.get_deal_price().mean() - self._collect_data_loop = self._executor.collect_data(strategy.generate_trade_decision(), level=0) assert isinstance(self._collect_data_loop, Generator) @@ -168,6 +146,10 @@ def reset( self._order = order + @property + def twap_price(self) -> float: + return self._last_yielded_saoe_strategy.adapter_dict[self._order.key].twap_price + def _iter_strategy(self, action: float = None) -> SAOEStrategy: """Iterate the _collect_data_loop until we get the next yield SAOEStrategy.""" assert self._collect_data_loop is not None diff --git a/qlib/rl/order_execution/state.py b/qlib/rl/order_execution/state.py index dbeab8dd3f..02885bce6b 100644 --- a/qlib/rl/order_execution/state.py +++ b/qlib/rl/order_execution/state.py @@ -55,7 +55,7 @@ def __init__( self.history_steps = pd.DataFrame(columns=metric_keys).set_index("datetime") self.metrics: Optional[SAOEMetrics] = None - self.cur_time = self.ticks_for_order[0] + self.cur_time = max(ticks_for_order[0], order.start_time) self.ticks_per_step = ticks_per_step def _next_time(self) -> pd.Timestamp: diff --git a/qlib/rl/order_execution/strategy.py b/qlib/rl/order_execution/strategy.py index 20eef6c848..c6d1faa436 100644 --- a/qlib/rl/order_execution/strategy.py +++ b/qlib/rl/order_execution/strategy.py @@ -7,9 +7,11 @@ import pandas as pd from qlib.backtest import CommonInfrastructure, Order -from qlib.backtest.decision import BaseTradeDecision, TradeDecisionWO +from qlib.backtest.decision import BaseTradeDecision, TradeDecisionWO, TradeRange, TradeRangeByTime from qlib.backtest.utils import LevelInfrastructure, SAOE_DATA_KEY +from qlib.rl.data.exchange_wrapper import QlibIntradayBacktestData from qlib.rl.order_execution.state import QlibBacktestAdapter, SAOEState +from qlib.rl.order_execution.utils import get_ticks_slice from qlib.strategy.base import RLStrategy @@ -26,17 +28,51 @@ def __init__( ) -> None: super(SAOEStrategy, self).__init__(policy, outer_trade_decision, level_infra, common_infra, **kwargs) - self.adapter_dict: Dict[Tuple[str, int], QlibBacktestAdapter] = {} + self.adapter_dict: Dict[tuple, QlibBacktestAdapter] = {} - def _create_qlib_backtest_adapter(self, order: Order) -> QlibBacktestAdapter: - saoe_data = self.common_infra.get(SAOE_DATA_KEY) - ticks_index, ticks_for_order, backtest_data = saoe_data[(order.stock_id, order.direction)] + def _create_qlib_backtest_adapter(self, order: Order, trade_range: TradeRange) -> QlibBacktestAdapter: + if not self.common_infra.has(SAOE_DATA_KEY): + self.common_infra.reset_infra(**{SAOE_DATA_KEY: {}}) + # saoe_data can be considered as some type of cache. Use it to avoid unnecessary data reload. + # The data for one order would be loaded only once. All strategies will reuse this data. + saoe_data = self.common_infra.get(SAOE_DATA_KEY) + if order.key not in saoe_data: + data = self.trade_exchange.get_deal_price( + stock_id=order.stock_id, + start_time=order.start_time.replace(hour=0, minute=0, second=0), + end_time=order.start_time.replace(hour=23, minute=59, second=59), + direction=order.direction, + method=None + ) + + ticks_index = pd.DatetimeIndex(data.index) + if isinstance(trade_range, TradeRangeByTime): + ticks_for_order = get_ticks_slice( + ticks_index, + trade_range.start_time, + trade_range.end_time, + include_end=True, + ) + else: + ticks_for_order = None # FIXME: implement this logic + start_time = None # FIXME: implement this logic + + backtest_data = QlibIntradayBacktestData( + order=order, + exchange=self.trade_exchange, + start_time=ticks_for_order[0], + end_time=ticks_for_order[-1], + ) + + saoe_data[order.key] = (ticks_index, ticks_for_order, backtest_data) + + ticks_index, ticks_for_order, backtest_data = saoe_data[order.key] return QlibBacktestAdapter( order=order, executor=self.executor, exchange=self.trade_exchange, - ticks_per_step=int(pd.Timedelta(self.trade_calendar.get_freq()) / pd.Timedelta("1min")), + ticks_per_step=self.ticks_per_step, ticks_index=ticks_index, ticks_for_order=ticks_for_order, backtest_data=backtest_data, @@ -45,14 +81,15 @@ def _create_qlib_backtest_adapter(self, order: Order) -> QlibBacktestAdapter: def reset(self, outer_trade_decision: BaseTradeDecision = None, **kwargs: Any) -> None: super(SAOEStrategy, self).reset(outer_trade_decision=outer_trade_decision, **kwargs) + trade_range = outer_trade_decision.trade_range if outer_trade_decision is not None: self.adapter_dict = {} for decision in outer_trade_decision.get_decision(): order = cast(Order, decision) - self.adapter_dict[(order.stock_id, order.direction)] = self._create_qlib_backtest_adapter(order) + self.adapter_dict[order.key] = self._create_qlib_backtest_adapter(order, trade_range) def get_saoe_state_by_order(self, order: Order) -> SAOEState: - return self.adapter_dict[(order.stock_id, order.direction)].saoe_state + return self.adapter_dict[order.key].saoe_state def post_upper_level_exe_step(self) -> None: for maintainer in self.adapter_dict.values(): @@ -62,10 +99,10 @@ def post_exe_step(self, execute_result: list) -> None: results = collections.defaultdict(list) if execute_result is not None: for e in execute_result: - results[(e[0].stock_id, e[0].direction)].append(e) + results[e[0].key].append(e) - for (stock_id, direction), maintainer in self.adapter_dict.items(): - maintainer.update(results[(stock_id, direction)]) + for key, maintainer in self.adapter_dict.items(): + maintainer.update(results[key]) class DecomposedStrategy(SAOEStrategy): diff --git a/qlib/strategy/base.py b/qlib/strategy/base.py index c6294eea3e..6c173a9ec3 100644 --- a/qlib/strategy/base.py +++ b/qlib/strategy/base.py @@ -5,6 +5,8 @@ from abc import ABCMeta, abstractmethod from typing import Any, Generator, Optional, TYPE_CHECKING, Union +import pandas as pd + if TYPE_CHECKING: from qlib.backtest.exchange import Exchange from qlib.backtest.position import BasePosition @@ -64,6 +66,10 @@ def executor(self) -> BaseExecutor: def trade_calendar(self) -> TradeCalendarManager: return self.level_infra.get("trade_calendar") + @property + def ticks_per_step(self) -> int: + return int(pd.Timedelta(self.trade_calendar.get_freq()) / pd.Timedelta("1min")) + @property def trade_position(self) -> BasePosition: return self.common_infra.get("trade_account").current_position From bbf500ca5ea9df743d18ceee47c9f1ea6d332df5 Mon Sep 17 00:00:00 2001 From: Huoran Li Date: Mon, 8 Aug 2022 14:07:29 +0800 Subject: [PATCH 14/23] Minor bugfix --- qlib/rl/order_execution/simulator_qlib.py | 5 +---- qlib/rl/order_execution/state.py | 2 +- qlib/rl/order_execution/strategy.py | 9 +++++---- 3 files changed, 7 insertions(+), 9 deletions(-) diff --git a/qlib/rl/order_execution/simulator_qlib.py b/qlib/rl/order_execution/simulator_qlib.py index 15d0edf128..35e829fb7f 100644 --- a/qlib/rl/order_execution/simulator_qlib.py +++ b/qlib/rl/order_execution/simulator_qlib.py @@ -10,14 +10,11 @@ from qlib.backtest import get_strategy_executor from qlib.backtest.decision import Order from qlib.backtest.executor import NestedExecutor -from qlib.backtest.utils import SAOE_DATA_KEY from qlib.config import REG_CN from qlib.contrib.ops.high_freq import BFillNan, Cut, Date, DayCumsum, DayLast, FFillNan, IsInf, IsNull, Select -from qlib.rl.data.exchange_wrapper import QlibIntradayBacktestData from qlib.rl.order_execution.state import SAOEState -from qlib.rl.order_execution.utils import get_ticks_slice -from qlib.rl.simulator import Simulator from qlib.rl.order_execution.strategy import SAOEStrategy +from qlib.rl.simulator import Simulator def init_qlib(qlib_config: dict) -> None: diff --git a/qlib/rl/order_execution/state.py b/qlib/rl/order_execution/state.py index 02885bce6b..9e93562701 100644 --- a/qlib/rl/order_execution/state.py +++ b/qlib/rl/order_execution/state.py @@ -211,7 +211,7 @@ def _collect_single_order_metric( def saoe_state(self) -> SAOEState: return SAOEState( order=self.order, - cur_time=self.executor.trade_calendar.get_step_time()[0], + cur_time=self.cur_time, position=self.position, history_exec=self.history_exec, history_steps=self.history_steps, diff --git a/qlib/rl/order_execution/strategy.py b/qlib/rl/order_execution/strategy.py index c6d1faa436..7b4ced2309 100644 --- a/qlib/rl/order_execution/strategy.py +++ b/qlib/rl/order_execution/strategy.py @@ -3,7 +3,7 @@ import collections from abc import ABCMeta -from typing import Any, cast, Dict, Generator, Tuple +from typing import Any, cast, Dict, Generator import pandas as pd from qlib.backtest import CommonInfrastructure, Order @@ -43,7 +43,7 @@ def _create_qlib_backtest_adapter(self, order: Order, trade_range: TradeRange) - start_time=order.start_time.replace(hour=0, minute=0, second=0), end_time=order.start_time.replace(hour=23, minute=59, second=59), direction=order.direction, - method=None + method=None, ) ticks_index = pd.DatetimeIndex(data.index) @@ -56,7 +56,6 @@ def _create_qlib_backtest_adapter(self, order: Order, trade_range: TradeRange) - ) else: ticks_for_order = None # FIXME: implement this logic - start_time = None # FIXME: implement this logic backtest_data = QlibIntradayBacktestData( order=order, @@ -81,8 +80,10 @@ def _create_qlib_backtest_adapter(self, order: Order, trade_range: TradeRange) - def reset(self, outer_trade_decision: BaseTradeDecision = None, **kwargs: Any) -> None: super(SAOEStrategy, self).reset(outer_trade_decision=outer_trade_decision, **kwargs) - trade_range = outer_trade_decision.trade_range if outer_trade_decision is not None: + trade_range = outer_trade_decision.trade_range + assert trade_range is not None + self.adapter_dict = {} for decision in outer_trade_decision.get_decision(): order = cast(Order, decision) From 0824ced4196b98f8458a2f342d4fa51896a6bfc0 Mon Sep 17 00:00:00 2001 From: Huoran Li Date: Tue, 16 Aug 2022 13:01:53 +0800 Subject: [PATCH 15/23] Cherry pick updates --- qlib/backtest/executor.py | 2 + qlib/backtest/utils.py | 4 +- qlib/rl/integration/__init__.py | 2 + qlib/rl/integration/feature.py | 182 ++++++++++++++++++++++ qlib/rl/order_execution/simulator_qlib.py | 68 +------- qlib/rl/order_execution/state.py | 95 +++++++---- qlib/rl/order_execution/strategy.py | 31 +++- qlib/strategy/base.py | 3 +- 8 files changed, 279 insertions(+), 108 deletions(-) create mode 100644 qlib/rl/integration/__init__.py create mode 100644 qlib/rl/integration/feature.py diff --git a/qlib/backtest/executor.py b/qlib/backtest/executor.py index 07da6b57bc..c53f2b2fdf 100644 --- a/qlib/backtest/executor.py +++ b/qlib/backtest/executor.py @@ -137,6 +137,8 @@ def reset_common_infra(self, common_infra: CommonInfrastructure, copy_trade_acco else: self.common_infra.update(common_infra) + self.level_infra.reset_infra(common_infra=self.common_infra) + if common_infra.has("trade_account"): # NOTE: there is a trick in the code. # shallow copy is used instead of deepcopy. diff --git a/qlib/backtest/utils.py b/qlib/backtest/utils.py index 05691f8228..a1470a3398 100644 --- a/qlib/backtest/utils.py +++ b/qlib/backtest/utils.py @@ -187,8 +187,8 @@ def get_range_idx(self, start_time: pd.Timestamp, end_time: pd.Timestamp) -> Tup Tuple[int, int]: the index of the range. **the left and right are closed** """ - left = bisect.bisect_right(list(self._calendar), start_time) - 1 - right = bisect.bisect_right(list(self._calendar), end_time) - 1 + left = bisect.bisect_right(self._calendar, start_time) - 1 + right = bisect.bisect_right(self._calendar, end_time) - 1 left -= self.start_index right -= self.start_index diff --git a/qlib/rl/integration/__init__.py b/qlib/rl/integration/__init__.py new file mode 100644 index 0000000000..59e481eb93 --- /dev/null +++ b/qlib/rl/integration/__init__.py @@ -0,0 +1,2 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. diff --git a/qlib/rl/integration/feature.py b/qlib/rl/integration/feature.py new file mode 100644 index 0000000000..8a5b653ece --- /dev/null +++ b/qlib/rl/integration/feature.py @@ -0,0 +1,182 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +from __future__ import annotations + +import collections +import pickle +from pathlib import Path +from typing import List + +import numpy as np +import pandas as pd +import qlib +from qlib.constant import REG_CN +from qlib.contrib.ops.high_freq import BFillNan, Cut, Date, DayCumsum, DayLast, FFillNan, IsInf, IsNull, Select +from qlib.data.dataset import DatasetH + +dataset = None + + +class LRUCache: + def __init__(self, pool_size: int = 200): + self.pool_size = pool_size + self.contents = dict() + self.keys = collections.deque() + + def put(self, key, item): + if self.has(key): + self.keys.remove(key) + self.keys.append(key) + self.contents[key] = item + while len(self.contents) > self.pool_size: + self.contents.pop(self.keys.popleft()) + + def get(self, key): + return self.contents[key] + + def has(self, key): + return key in self.contents + + +class DataWrapper: + def __init__(self, feature_dataset: DatasetH, backtest_dataset: DatasetH, + columns_today: List[str], columns_yesterday: List[str], _internal: bool = False): + assert _internal, 'Init function of data wrapper is for internal use only.' + + self.feature_dataset = feature_dataset + self.backtest_dataset = backtest_dataset + self.columns_today = columns_today + self.columns_yesterday = columns_yesterday + + self.feature_cache = LRUCache() + self.backtest_cache = LRUCache() + + def get(self, stock_id: str, date: pd.Timestamp, backtest: bool = False): + start_time, end_time = date.replace(hour=0, minute=0, second=0), date.replace(hour=23, minute=59, second=59) + + if backtest: + dataset = self.backtest_dataset + cache = self.backtest_cache + else: + dataset = self.feature_dataset + cache = self.feature_cache + + if cache.has((start_time, end_time, stock_id)): + return cache.get((start_time, end_time, stock_id)) + data = dataset.handler.fetch(pd.IndexSlice[stock_id, start_time:end_time], level=None) + cache.put((start_time, end_time, stock_id), data) + return data + + +def init_qlib(qlib_config: dict, part: str = None) -> None: + """Initialize necessary resource to launch the workflow, including data direction, feature columns, etc.. + + Parameters + ---------- + qlib_config: + Qlib configuration. + + Example: + { + "provider_uri_day": DATA_ROOT_DIR / "qlib_1d", + "provider_uri_1min": DATA_ROOT_DIR / "qlib_1min", + "feature_root_dir": DATA_ROOT_DIR / "qlib_handler_stock", + "feature_columns_today": [ + "$open", "$high", "$low", "$close", "$vwap", "$bid", "$ask", "$volume", + "$bidV", "$bidV1", "$bidV3", "$bidV5", "$askV", "$askV1", "$askV3", "$askV5", + ], + "feature_columns_yesterday": [ + "$open_1", "$high_1", "$low_1", "$close_1", "$vwap_1", "$bid_1", "$ask_1", "$volume_1", + "$bidV_1", "$bidV1_1", "$bidV3_1", "$bidV5_1", "$askV_1", "$askV1_1", "$askV3_1", "$askV5_1", + ], + } + part + Identifying which part (stock / date) to load. + """ + + global dataset + + def _convert_to_path(path: str | Path) -> Path: + return path if isinstance(path, Path) else Path(path) + + provider_uri_map = { + "day": _convert_to_path(qlib_config["provider_uri_day"]).as_posix(), + "1min": _convert_to_path(qlib_config["provider_uri_1min"]).as_posix(), + } + qlib.init( + region=REG_CN, + auto_mount=False, + custom_ops=[DayLast, FFillNan, BFillNan, Date, Select, IsNull, IsInf, Cut, DayCumsum], + expression_cache=None, + calendar_provider={ + "class": "LocalCalendarProvider", + "module_path": "qlib.data.data", + "kwargs": { + "backend": { + "class": "FileCalendarStorage", + "module_path": "qlib.data.storage.file_storage", + "kwargs": {"provider_uri_map": provider_uri_map}, + }, + }, + }, + feature_provider={ + "class": "LocalFeatureProvider", + "module_path": "qlib.data.data", + "kwargs": { + "backend": { + "class": "FileFeatureStorage", + "module_path": "qlib.data.storage.file_storage", + "kwargs": {"provider_uri_map": provider_uri_map}, + }, + }, + }, + provider_uri=provider_uri_map, + kernels=1, + redis_port=-1, + clear_mem_cache=False, # init_qlib will be called for multiple times. Keep the cache for improving performance + ) + + if part == "skip": + return + + # this won't work if it's put outside in case of multiprocessing + from qlib.data import D + + if part is None: + feature_path = Path(qlib_config["feature_root_dir"]) / 'feature.pkl' + backtest_path = Path(qlib_config["feature_root_dir"]) / 'backtest.pkl' + else: + feature_path = Path(qlib_config["feature_root_dir"]) / 'feature' / (part + '.pkl') + backtest_path = Path(qlib_config["feature_root_dir"]) / 'backtest' / (part + '.pkl') + + with feature_path.open('rb') as f: + feature_dataset = pickle.load(f) + with backtest_path.open('rb') as f: + backtest_dataset = pickle.load(f) + + dataset = DataWrapper( + feature_dataset, + backtest_dataset, + qlib_config["feature_columns_today"], + qlib_config["feature_columns_yesterday"], + _internal=True + ) + + +def fetch_features(stock_id: str, date: pd.Timestamp, yesterday: bool = False, backtest: bool = False): + assert dataset is not None, 'You must call init_qlib() before doing this.' + + if backtest: + fields = ['$close', '$volume'] + else: + fields = dataset.columns_yesterday if yesterday else dataset.columns_today + + data = dataset.get(stock_id, date, backtest) + if data is None or len(data) == 0: + # create a fake index, but RL doesn't care about index + data = pd.DataFrame(0., index=np.arange(240), columns=fields, dtype=np.float32) # FIXME: hardcode here + else: + data = data.rename(columns={c: c.rstrip('0') for c in data.columns}) + data = data[fields] + return data diff --git a/qlib/rl/order_execution/simulator_qlib.py b/qlib/rl/order_execution/simulator_qlib.py index 35e829fb7f..e7638ffb65 100644 --- a/qlib/rl/order_execution/simulator_qlib.py +++ b/qlib/rl/order_execution/simulator_qlib.py @@ -6,79 +6,15 @@ from typing import Generator, Optional import pandas as pd -import qlib from qlib.backtest import get_strategy_executor from qlib.backtest.decision import Order from qlib.backtest.executor import NestedExecutor -from qlib.config import REG_CN -from qlib.contrib.ops.high_freq import BFillNan, Cut, Date, DayCumsum, DayLast, FFillNan, IsInf, IsNull, Select +from qlib.rl.integration.feature import init_qlib from qlib.rl.order_execution.state import SAOEState from qlib.rl.order_execution.strategy import SAOEStrategy from qlib.rl.simulator import Simulator -def init_qlib(qlib_config: dict) -> None: - """Initialize necessary resource to launch the workflow, including data direction, feature columns, etc.. - - Parameters - ---------- - qlib_config: - Qlib configuration. - - Example: - { - "provider_uri_day": DATA_ROOT_DIR / "qlib_1d", - "provider_uri_1min": DATA_ROOT_DIR / "qlib_1min", - "feature_root_dir": DATA_ROOT_DIR / "qlib_handler_stock", - "feature_columns_today": [ - "$open", "$high", "$low", "$close", "$vwap", "$bid", "$ask", "$volume", - "$bidV", "$bidV1", "$bidV3", "$bidV5", "$askV", "$askV1", "$askV3", "$askV5", - ], - "feature_columns_yesterday": [ - "$open_1", "$high_1", "$low_1", "$close_1", "$vwap_1", "$bid_1", "$ask_1", "$volume_1", - "$bidV_1", "$bidV1_1", "$bidV3_1", "$bidV5_1", "$askV_1", "$askV1_1", "$askV3_1", "$askV5_1", - ], - } - """ - - provider_uri_map = { - "day": qlib_config["provider_uri_day"].as_posix(), - "1min": qlib_config["provider_uri_1min"].as_posix(), - } - qlib.init( - region=REG_CN, - auto_mount=False, - custom_ops=[DayLast, FFillNan, BFillNan, Date, Select, IsNull, IsInf, Cut, DayCumsum], - expression_cache=None, - calendar_provider={ - "class": "LocalCalendarProvider", - "module_path": "qlib.data.data", - "kwargs": { - "backend": { - "class": "FileCalendarStorage", - "module_path": "qlib.data.storage.file_storage", - "kwargs": {"provider_uri_map": provider_uri_map}, - }, - }, - }, - feature_provider={ - "class": "LocalFeatureProvider", - "module_path": "qlib.data.data", - "kwargs": { - "backend": { - "class": "FileFeatureStorage", - "module_path": "qlib.data.storage.file_storage", - "kwargs": {"provider_uri_map": provider_uri_map}, - }, - }, - }, - provider_uri=provider_uri_map, - kernels=1, - redis_port=-1, - clear_mem_cache=False, # init_qlib will be called for multiple times. Keep the cache for improving performance - ) - - class SingleAssetOrderExecutionQlib(Simulator[Order, SAOEState, float]): """Single-asset order execution (SAOE) simulator which is implemented based on Qlib backtest tools. @@ -120,7 +56,7 @@ def reset( qlib_config: dict = None, ) -> None: if qlib_config is not None: - init_qlib(qlib_config) + init_qlib(qlib_config, part="skip") strategy, self._executor = get_strategy_executor( start_time=order.start_time.replace(hour=0, minute=0, second=0), diff --git a/qlib/rl/order_execution/state.py b/qlib/rl/order_execution/state.py index 9e93562701..97c2ea942a 100644 --- a/qlib/rl/order_execution/state.py +++ b/qlib/rl/order_execution/state.py @@ -3,18 +3,36 @@ from __future__ import annotations -from typing import NamedTuple, Optional, cast +from typing import cast, NamedTuple, Optional, Tuple import numpy as np import pandas as pd -from typing_extensions import TypedDict - from qlib.backtest import Exchange, Order from qlib.backtest.executor import BaseExecutor -from qlib.constant import EPS +from qlib.constant import EPS, REG_CN from qlib.rl.data.exchange_wrapper import QlibIntradayBacktestData from qlib.rl.data.pickle_styled import IntradayBacktestData -from qlib.rl.order_execution.utils import dataframe_append, get_simulator_executor, price_advantage +from qlib.rl.order_execution.utils import dataframe_append, price_advantage +from qlib.utils.time import get_day_min_idx_range +from typing_extensions import TypedDict + + +def _get_all_timestamps( + start: pd.Timestamp, + end: pd.Timestamp, + granularity: pd.Timedelta = pd.Timedelta("1min"), + include_end: bool = True, +) -> pd.DatetimeIndex: + ret = [] + while start <= end: + ret.append(start) + start += granularity + + if ret[-1] > end: + ret.pop() + if ret[-1] == end and not include_end: + ret.pop() + return pd.DatetimeIndex(ret) class QlibBacktestAdapter: @@ -67,33 +85,42 @@ def _next_time(self) -> pd.Timestamp: else: return self.order.end_time - def update(self, execute_result: list) -> None: - exec_vol = np.array([e[0].deal_amount for e in execute_result]) - num_step = len(execute_result) - - if num_step == 0: - market_volume = np.array([]) - market_price = np.array([]) - datetime_list = pd.DatetimeIndex([]) - else: - market_volume = np.array( - self.exchange.get_volume( - self.order.stock_id, - execute_result[0][0].start_time, - execute_result[-1][0].start_time, - method=None, - ), - ) - - # Get data from the SimulatorExecutor's (lowest-level executor) indicator - simulator_executor = get_simulator_executor(self.executor) - simulator_trade_account = simulator_executor.trade_account - simulator_df = simulator_trade_account.get_trade_indicator().generate_trade_indicators_dataframe() - - trade_value = simulator_df.iloc[-num_step:]["value"].values - deal_amount = simulator_df.iloc[-num_step:]["deal_amount"].values - market_price = trade_value / deal_amount - datetime_list = simulator_df.index[-num_step:] + def update( + self, + execute_result: list, + last_step_range: Tuple[int, int], + ) -> None: + last_step_size = last_step_range[1] - last_step_range[0] + 1 + start_time = self.ticks_index[last_step_range[0]] + end_time = self.ticks_index[last_step_range[1]] + + exec_vol = np.zeros(last_step_size) + for order, _, __, ___ in execute_result: + idx, _ = get_day_min_idx_range(order.start_time, order.end_time, '1min', REG_CN) + exec_vol[idx - last_step_range[0]] = order.deal_amount + + if exec_vol.sum() > self.position and exec_vol.sum() > 0.0: + assert exec_vol.sum() < self.position + 1, f'{exec_vol} too large' + exec_vol *= self.position / (exec_vol.sum()) + + market_volume = np.array( + self.exchange.get_volume( + self.order.stock_id, + pd.Timestamp(start_time), + pd.Timestamp(end_time), + method=None, + ), + ).reshape(-1) + + market_price = np.array( + self.exchange.get_deal_price( + self.order.stock_id, + pd.Timestamp(start_time), + pd.Timestamp(end_time), + method=None, + direction=self.order.direction, + ), + ).reshape(-1) assert market_price.shape == market_volume.shape == exec_vol.shape @@ -104,7 +131,7 @@ def update(self, execute_result: list) -> None: self.history_exec, self._collect_multi_order_metric( order=self.order, - datetime=datetime_list, + datetime=_get_all_timestamps(start_time, end_time, include_end=True), market_vol=market_volume, market_price=market_price, exec_vol=exec_vol, @@ -147,7 +174,7 @@ def generate_metrics_after_done(self) -> None: def _collect_multi_order_metric( self, order: Order, - datetime: pd.Timestamp, + datetime: pd.DatetimeIndex, market_vol: np.ndarray, market_price: np.ndarray, exec_vol: np.ndarray, diff --git a/qlib/rl/order_execution/strategy.py b/qlib/rl/order_execution/strategy.py index 7b4ced2309..8f63bec5cf 100644 --- a/qlib/rl/order_execution/strategy.py +++ b/qlib/rl/order_execution/strategy.py @@ -1,11 +1,14 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. +from __future__ import annotations + import collections from abc import ABCMeta -from typing import Any, cast, Dict, Generator +from typing import Any, Dict, Generator, Tuple, cast import pandas as pd + from qlib.backtest import CommonInfrastructure, Order from qlib.backtest.decision import BaseTradeDecision, TradeDecisionWO, TradeRange, TradeRangeByTime from qlib.backtest.utils import LevelInfrastructure, SAOE_DATA_KEY @@ -26,9 +29,16 @@ def __init__( common_infra: CommonInfrastructure = None, **kwargs: Any, ) -> None: - super(SAOEStrategy, self).__init__(policy, outer_trade_decision, level_infra, common_infra, **kwargs) + super(SAOEStrategy, self).__init__( + policy=policy, + outer_trade_decision=outer_trade_decision, + level_infra=level_infra, + common_infra=common_infra, + **kwargs, + ) self.adapter_dict: Dict[tuple, QlibBacktestAdapter] = {} + self._last_step_range = (0, 0) def _create_qlib_backtest_adapter(self, order: Order, trade_range: TradeRange) -> QlibBacktestAdapter: if not self.common_infra.has(SAOE_DATA_KEY): @@ -67,6 +77,7 @@ def _create_qlib_backtest_adapter(self, order: Order, trade_range: TradeRange) - saoe_data[order.key] = (ticks_index, ticks_for_order, backtest_data) ticks_index, ticks_for_order, backtest_data = saoe_data[order.key] + return QlibBacktestAdapter( order=order, executor=self.executor, @@ -77,10 +88,16 @@ def _create_qlib_backtest_adapter(self, order: Order, trade_range: TradeRange) - backtest_data=backtest_data, ) + def _update_last_step_range(self, step_range: Tuple[int, int]) -> None: + self._last_step_range = step_range + def reset(self, outer_trade_decision: BaseTradeDecision = None, **kwargs: Any) -> None: super(SAOEStrategy, self).reset(outer_trade_decision=outer_trade_decision, **kwargs) - if outer_trade_decision is not None: + self.adapter_dict = {} + self._last_step_range = (0, 0) + + if outer_trade_decision is not None and not outer_trade_decision.empty(): trade_range = outer_trade_decision.trade_range assert trade_range is not None @@ -97,13 +114,18 @@ def post_upper_level_exe_step(self) -> None: maintainer.generate_metrics_after_done() def post_exe_step(self, execute_result: list) -> None: + last_step_length = self._last_step_range[1] - self._last_step_range[0] + if last_step_length <= 0: + assert not execute_result + return + results = collections.defaultdict(list) if execute_result is not None: for e in execute_result: results[e[0].key].append(e) for key, maintainer in self.adapter_dict.items(): - maintainer.update(results[key]) + maintainer.update(results[key], self._last_step_range) class DecomposedStrategy(SAOEStrategy): @@ -127,6 +149,7 @@ def generate_trade_decision(self, execute_result: list = None) -> Generator[Any, oh = self.trade_exchange.get_order_helper() order = oh.create(self._order.stock_id, exec_vol, self._order.direction) + self._update_last_step_range(self.get_data_cal_avail_range(rtype="step")) return TradeDecisionWO([order], self) diff --git a/qlib/strategy/base.py b/qlib/strategy/base.py index 6c173a9ec3..615ddcbc38 100644 --- a/qlib/strategy/base.py +++ b/qlib/strategy/base.py @@ -169,7 +169,6 @@ def update_trade_decision( # default to return None, which indicates that the trade decision is not changed return None - # FIXME: do not define this method as an abstract one since it is never implemented def alter_outer_trade_decision(self, outer_trade_decision: BaseTradeDecision) -> BaseTradeDecision: """ A method for updating the outer_trade_decision. @@ -186,7 +185,7 @@ def alter_outer_trade_decision(self, outer_trade_decision: BaseTradeDecision) -> """ # default to reset the decision directly # NOTE: normally, user should do something to the strategy due to the change of outer decision - raise NotImplementedError(f"Please implement the `alter_outer_trade_decision` method") + pass # helper methods: not necessary but for convenience def get_data_cal_avail_range(self, rtype: str = "full") -> Tuple[int, int]: From 48d5a1da3297959628d4ecc1834542f3a74224e9 Mon Sep 17 00:00:00 2001 From: Huoran Li Date: Tue, 16 Aug 2022 14:14:50 +0800 Subject: [PATCH 16/23] Resolve PR comments --- qlib/constant.py | 9 +++++++ qlib/rl/integration/feature.py | 26 +++------------------ qlib/rl/order_execution/constants.py | 12 ---------- qlib/rl/order_execution/simulator_simple.py | 3 +-- qlib/rl/order_execution/strategy.py | 14 +++++------ qlib/rl/order_execution/utils.py | 2 +- qlib/rl/strategy/single_order.py | 3 ++- qlib/rl/utils/cache.py | 24 +++++++++++++++++++ qlib/strategy/base.py | 4 +++- tests/rl/test_qlib_simulator.py | 2 +- 10 files changed, 51 insertions(+), 48 deletions(-) delete mode 100644 qlib/rl/order_execution/constants.py create mode 100644 qlib/rl/utils/cache.py diff --git a/qlib/constant.py b/qlib/constant.py index 458890957d..cad1a7e6b3 100644 --- a/qlib/constant.py +++ b/qlib/constant.py @@ -2,6 +2,11 @@ # Licensed under the MIT License. # REGION CONST +from typing import TypeVar + +import numpy as np +import pandas as pd + REG_CN = "cn" REG_US = "us" REG_TW = "tw" @@ -11,3 +16,7 @@ # Infinity in integer INF = 10**18 +FINEST_GRANULARITY = "1min" +COARSEST_GRANULARITY = "1day" +ONE_SEC = pd.Timedelta("1s") # use 1 second to exclude the right interval point +float_or_ndarray = TypeVar("float_or_ndarray", float, np.ndarray) \ No newline at end of file diff --git a/qlib/rl/integration/feature.py b/qlib/rl/integration/feature.py index 8a5b653ece..07e4c0a2b8 100644 --- a/qlib/rl/integration/feature.py +++ b/qlib/rl/integration/feature.py @@ -3,7 +3,6 @@ from __future__ import annotations -import collections import pickle from pathlib import Path from typing import List @@ -14,31 +13,11 @@ from qlib.constant import REG_CN from qlib.contrib.ops.high_freq import BFillNan, Cut, Date, DayCumsum, DayLast, FFillNan, IsInf, IsNull, Select from qlib.data.dataset import DatasetH +from qlib.rl.utils.cache import LRUCache dataset = None -class LRUCache: - def __init__(self, pool_size: int = 200): - self.pool_size = pool_size - self.contents = dict() - self.keys = collections.deque() - - def put(self, key, item): - if self.has(key): - self.keys.remove(key) - self.keys.append(key) - self.contents[key] = item - while len(self.contents) > self.pool_size: - self.contents.pop(self.keys.popleft()) - - def get(self, key): - return self.contents[key] - - def has(self, key): - return key in self.contents - - class DataWrapper: def __init__(self, feature_dataset: DatasetH, backtest_dataset: DatasetH, columns_today: List[str], columns_yesterday: List[str], _internal: bool = False): @@ -77,7 +56,8 @@ def init_qlib(qlib_config: dict, part: str = None) -> None: qlib_config: Qlib configuration. - Example: + Example:: + { "provider_uri_day": DATA_ROOT_DIR / "qlib_1d", "provider_uri_1min": DATA_ROOT_DIR / "qlib_1min", diff --git a/qlib/rl/order_execution/constants.py b/qlib/rl/order_execution/constants.py deleted file mode 100644 index 2f6c81b825..0000000000 --- a/qlib/rl/order_execution/constants.py +++ /dev/null @@ -1,12 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -from typing import TypeVar - -import numpy as np -import pandas as pd - -FINEST_GRANULARITY = "1min" -COARSEST_GRANULARITY = "1day" -ONE_SEC = pd.Timedelta("1s") # use 1 second to exclude the right interval point -float_or_ndarray = TypeVar("float_or_ndarray", float, np.ndarray) diff --git a/qlib/rl/order_execution/simulator_simple.py b/qlib/rl/order_execution/simulator_simple.py index 5754d34c2f..59cd92b4fe 100644 --- a/qlib/rl/order_execution/simulator_simple.py +++ b/qlib/rl/order_execution/simulator_simple.py @@ -10,9 +10,8 @@ import pandas as pd from qlib.backtest.decision import Order, OrderDir -from qlib.constant import EPS +from qlib.constant import EPS, ONE_SEC, float_or_ndarray from qlib.rl.data.pickle_styled import DealPriceType, load_simple_intraday_backtest_data -from qlib.rl.order_execution.constants import ONE_SEC, float_or_ndarray from qlib.rl.order_execution.state import SAOEMetrics, SAOEState from qlib.rl.simulator import Simulator from qlib.rl.utils import LogLevel diff --git a/qlib/rl/order_execution/strategy.py b/qlib/rl/order_execution/strategy.py index 8f63bec5cf..b5be3f9879 100644 --- a/qlib/rl/order_execution/strategy.py +++ b/qlib/rl/order_execution/strategy.py @@ -4,7 +4,6 @@ from __future__ import annotations import collections -from abc import ABCMeta from typing import Any, Dict, Generator, Tuple, cast import pandas as pd @@ -15,10 +14,11 @@ from qlib.rl.data.exchange_wrapper import QlibIntradayBacktestData from qlib.rl.order_execution.state import QlibBacktestAdapter, SAOEState from qlib.rl.order_execution.utils import get_ticks_slice +from qlib.rl.utils.cache import LRUCache from qlib.strategy.base import RLStrategy -class SAOEStrategy(RLStrategy, metaclass=ABCMeta): +class SAOEStrategy(RLStrategy): """RL-based strategies that use SAOEState as state.""" def __init__( @@ -42,12 +42,12 @@ def __init__( def _create_qlib_backtest_adapter(self, order: Order, trade_range: TradeRange) -> QlibBacktestAdapter: if not self.common_infra.has(SAOE_DATA_KEY): - self.common_infra.reset_infra(**{SAOE_DATA_KEY: {}}) + self.common_infra.reset_infra(**{SAOE_DATA_KEY: LRUCache(pool_size=100)}) # saoe_data can be considered as some type of cache. Use it to avoid unnecessary data reload. # The data for one order would be loaded only once. All strategies will reuse this data. - saoe_data = self.common_infra.get(SAOE_DATA_KEY) - if order.key not in saoe_data: + saoe_data = cast(LRUCache, self.common_infra.get(SAOE_DATA_KEY)) + if not saoe_data.has(order.key): data = self.trade_exchange.get_deal_price( stock_id=order.stock_id, start_time=order.start_time.replace(hour=0, minute=0, second=0), @@ -74,9 +74,9 @@ def _create_qlib_backtest_adapter(self, order: Order, trade_range: TradeRange) - end_time=ticks_for_order[-1], ) - saoe_data[order.key] = (ticks_index, ticks_for_order, backtest_data) + saoe_data.put(key=order.key, item=(ticks_index, ticks_for_order, backtest_data)) - ticks_index, ticks_for_order, backtest_data = saoe_data[order.key] + ticks_index, ticks_for_order, backtest_data = saoe_data.get(order.key) return QlibBacktestAdapter( order=order, diff --git a/qlib/rl/order_execution/utils.py b/qlib/rl/order_execution/utils.py index f861237398..a498037ad0 100644 --- a/qlib/rl/order_execution/utils.py +++ b/qlib/rl/order_execution/utils.py @@ -10,7 +10,7 @@ from qlib.backtest.decision import OrderDir from qlib.backtest.executor import BaseExecutor, NestedExecutor, SimulatorExecutor -from qlib.rl.order_execution.constants import ONE_SEC, float_or_ndarray +from qlib.constant import ONE_SEC, float_or_ndarray def get_ticks_slice( diff --git a/qlib/rl/strategy/single_order.py b/qlib/rl/strategy/single_order.py index 0055781973..d5b5915709 100644 --- a/qlib/rl/strategy/single_order.py +++ b/qlib/rl/strategy/single_order.py @@ -7,7 +7,8 @@ class SingleOrderStrategy(BaseStrategy): - # this logic is copied from FileOrderStrategy + """Strategy used to generate a trade decision with exactly one order. + """ def __init__( self, order: Order, diff --git a/qlib/rl/utils/cache.py b/qlib/rl/utils/cache.py new file mode 100644 index 0000000000..abe35afa1f --- /dev/null +++ b/qlib/rl/utils/cache.py @@ -0,0 +1,24 @@ +from __future__ import annotations + +import collections + + +class LRUCache: + def __init__(self, pool_size: int = 200): + self.pool_size = pool_size + self.contents = dict() + self.keys = collections.deque() + + def put(self, key, item): + if self.has(key): + self.keys.remove(key) + self.keys.append(key) + self.contents[key] = item + while len(self.contents) > self.pool_size: + self.contents.pop(self.keys.popleft()) + + def get(self, key): + return self.contents[key] + + def has(self, key): + return key in self.contents diff --git a/qlib/strategy/base.py b/qlib/strategy/base.py index 615ddcbc38..41103b8bde 100644 --- a/qlib/strategy/base.py +++ b/qlib/strategy/base.py @@ -7,6 +7,8 @@ import pandas as pd +from ..constant import FINEST_GRANULARITY + if TYPE_CHECKING: from qlib.backtest.exchange import Exchange from qlib.backtest.position import BasePosition @@ -68,7 +70,7 @@ def trade_calendar(self) -> TradeCalendarManager: @property def ticks_per_step(self) -> int: - return int(pd.Timedelta(self.trade_calendar.get_freq()) / pd.Timedelta("1min")) + return int(pd.Timedelta(self.trade_calendar.get_freq()) / pd.Timedelta(FINEST_GRANULARITY)) @property def trade_position(self) -> BasePosition: diff --git a/tests/rl/test_qlib_simulator.py b/tests/rl/test_qlib_simulator.py index df78976654..230611fa66 100644 --- a/tests/rl/test_qlib_simulator.py +++ b/tests/rl/test_qlib_simulator.py @@ -10,7 +10,7 @@ from qlib.backtest.decision import Order, OrderDir, TradeRangeByTime from qlib.backtest.executor import SimulatorExecutor from qlib.rl.order_execution import CategoricalActionInterpreter -from qlib.rl.order_execution.constants import FINEST_GRANULARITY +from qlib.constant import FINEST_GRANULARITY from qlib.rl.order_execution.simulator_qlib import SingleAssetOrderExecutionQlib TOTAL_POSITION = 2100.0 From 4e7d3db06b4e09afdfaca6220454010c0e21277c Mon Sep 17 00:00:00 2001 From: Huoran Li Date: Tue, 16 Aug 2022 14:49:05 +0800 Subject: [PATCH 17/23] CI issues --- qlib/backtest/utils.py | 5 ++-- qlib/constant.py | 2 +- qlib/rl/integration/feature.py | 42 ++++++++++++++++++-------------- qlib/rl/order_execution/state.py | 4 +-- qlib/rl/strategy/single_order.py | 4 +-- qlib/rl/utils/cache.py | 6 ++--- qlib/strategy/base.py | 1 - 7 files changed, 34 insertions(+), 30 deletions(-) diff --git a/qlib/backtest/utils.py b/qlib/backtest/utils.py index a1470a3398..f2a4851684 100644 --- a/qlib/backtest/utils.py +++ b/qlib/backtest/utils.py @@ -3,7 +3,6 @@ from __future__ import annotations -import bisect from abc import abstractmethod from typing import TYPE_CHECKING, Any, Set, Tuple, Union @@ -187,8 +186,8 @@ def get_range_idx(self, start_time: pd.Timestamp, end_time: pd.Timestamp) -> Tup Tuple[int, int]: the index of the range. **the left and right are closed** """ - left = bisect.bisect_right(self._calendar, start_time) - 1 - right = bisect.bisect_right(self._calendar, end_time) - 1 + left = np.searchsorted(self._calendar, start_time, side="right") - 1 + right = np.searchsorted(self._calendar, end_time, side="right") - 1 left -= self.start_index right -= self.start_index diff --git a/qlib/constant.py b/qlib/constant.py index cad1a7e6b3..607f0bcf51 100644 --- a/qlib/constant.py +++ b/qlib/constant.py @@ -19,4 +19,4 @@ FINEST_GRANULARITY = "1min" COARSEST_GRANULARITY = "1day" ONE_SEC = pd.Timedelta("1s") # use 1 second to exclude the right interval point -float_or_ndarray = TypeVar("float_or_ndarray", float, np.ndarray) \ No newline at end of file +float_or_ndarray = TypeVar("float_or_ndarray", float, np.ndarray) diff --git a/qlib/rl/integration/feature.py b/qlib/rl/integration/feature.py index 07e4c0a2b8..3c35e9c47b 100644 --- a/qlib/rl/integration/feature.py +++ b/qlib/rl/integration/feature.py @@ -19,9 +19,15 @@ class DataWrapper: - def __init__(self, feature_dataset: DatasetH, backtest_dataset: DatasetH, - columns_today: List[str], columns_yesterday: List[str], _internal: bool = False): - assert _internal, 'Init function of data wrapper is for internal use only.' + def __init__( + self, + feature_dataset: DatasetH, + backtest_dataset: DatasetH, + columns_today: List[str], + columns_yesterday: List[str], + _internal: bool = False, + ): + assert _internal, "Init function of data wrapper is for internal use only." self.feature_dataset = feature_dataset self.backtest_dataset = backtest_dataset @@ -31,7 +37,7 @@ def __init__(self, feature_dataset: DatasetH, backtest_dataset: DatasetH, self.feature_cache = LRUCache() self.backtest_cache = LRUCache() - def get(self, stock_id: str, date: pd.Timestamp, backtest: bool = False): + def get(self, stock_id: str, date: pd.Timestamp, backtest: bool = False) -> pd.DataFrame: start_time, end_time = date.replace(hour=0, minute=0, second=0), date.replace(hour=23, minute=59, second=59) if backtest: @@ -75,7 +81,7 @@ def init_qlib(qlib_config: dict, part: str = None) -> None: Identifying which part (stock / date) to load. """ - global dataset + global dataset # pylint: disable=W0603 def _convert_to_path(path: str | Path) -> Path: return path if isinstance(path, Path) else Path(path) @@ -121,18 +127,18 @@ def _convert_to_path(path: str | Path) -> Path: return # this won't work if it's put outside in case of multiprocessing - from qlib.data import D + from qlib.data import D # noqa pylint: disable=C0415,W0611 if part is None: - feature_path = Path(qlib_config["feature_root_dir"]) / 'feature.pkl' - backtest_path = Path(qlib_config["feature_root_dir"]) / 'backtest.pkl' + feature_path = Path(qlib_config["feature_root_dir"]) / "feature.pkl" + backtest_path = Path(qlib_config["feature_root_dir"]) / "backtest.pkl" else: - feature_path = Path(qlib_config["feature_root_dir"]) / 'feature' / (part + '.pkl') - backtest_path = Path(qlib_config["feature_root_dir"]) / 'backtest' / (part + '.pkl') + feature_path = Path(qlib_config["feature_root_dir"]) / "feature" / (part + ".pkl") + backtest_path = Path(qlib_config["feature_root_dir"]) / "backtest" / (part + ".pkl") - with feature_path.open('rb') as f: + with feature_path.open("rb") as f: feature_dataset = pickle.load(f) - with backtest_path.open('rb') as f: + with backtest_path.open("rb") as f: backtest_dataset = pickle.load(f) dataset = DataWrapper( @@ -140,23 +146,23 @@ def _convert_to_path(path: str | Path) -> Path: backtest_dataset, qlib_config["feature_columns_today"], qlib_config["feature_columns_yesterday"], - _internal=True + _internal=True, ) -def fetch_features(stock_id: str, date: pd.Timestamp, yesterday: bool = False, backtest: bool = False): - assert dataset is not None, 'You must call init_qlib() before doing this.' +def fetch_features(stock_id: str, date: pd.Timestamp, yesterday: bool = False, backtest: bool = False) -> pd.DataFrame: + assert dataset is not None, "You must call init_qlib() before doing this." if backtest: - fields = ['$close', '$volume'] + fields = ["$close", "$volume"] else: fields = dataset.columns_yesterday if yesterday else dataset.columns_today data = dataset.get(stock_id, date, backtest) if data is None or len(data) == 0: # create a fake index, but RL doesn't care about index - data = pd.DataFrame(0., index=np.arange(240), columns=fields, dtype=np.float32) # FIXME: hardcode here + data = pd.DataFrame(0.0, index=np.arange(240), columns=fields, dtype=np.float32) # FIXME: hardcode here else: - data = data.rename(columns={c: c.rstrip('0') for c in data.columns}) + data = data.rename(columns={c: c.rstrip("0") for c in data.columns}) data = data[fields] return data diff --git a/qlib/rl/order_execution/state.py b/qlib/rl/order_execution/state.py index 97c2ea942a..8e8067fec3 100644 --- a/qlib/rl/order_execution/state.py +++ b/qlib/rl/order_execution/state.py @@ -96,11 +96,11 @@ def update( exec_vol = np.zeros(last_step_size) for order, _, __, ___ in execute_result: - idx, _ = get_day_min_idx_range(order.start_time, order.end_time, '1min', REG_CN) + idx, _ = get_day_min_idx_range(order.start_time, order.end_time, "1min", REG_CN) exec_vol[idx - last_step_range[0]] = order.deal_amount if exec_vol.sum() > self.position and exec_vol.sum() > 0.0: - assert exec_vol.sum() < self.position + 1, f'{exec_vol} too large' + assert exec_vol.sum() < self.position + 1, f"{exec_vol} too large" exec_vol *= self.position / (exec_vol.sum()) market_volume = np.array( diff --git a/qlib/rl/strategy/single_order.py b/qlib/rl/strategy/single_order.py index d5b5915709..042c88aa3c 100644 --- a/qlib/rl/strategy/single_order.py +++ b/qlib/rl/strategy/single_order.py @@ -7,8 +7,8 @@ class SingleOrderStrategy(BaseStrategy): - """Strategy used to generate a trade decision with exactly one order. - """ + """Strategy used to generate a trade decision with exactly one order.""" + def __init__( self, order: Order, diff --git a/qlib/rl/utils/cache.py b/qlib/rl/utils/cache.py index abe35afa1f..b0b3b43fb4 100644 --- a/qlib/rl/utils/cache.py +++ b/qlib/rl/utils/cache.py @@ -4,10 +4,10 @@ class LRUCache: - def __init__(self, pool_size: int = 200): + def __init__(self, pool_size: int = 200) -> None: self.pool_size = pool_size - self.contents = dict() - self.keys = collections.deque() + self.contents: dict = {} + self.keys: collections.deque = collections.deque() def put(self, key, item): if self.has(key): diff --git a/qlib/strategy/base.py b/qlib/strategy/base.py index 41103b8bde..3725223778 100644 --- a/qlib/strategy/base.py +++ b/qlib/strategy/base.py @@ -187,7 +187,6 @@ def alter_outer_trade_decision(self, outer_trade_decision: BaseTradeDecision) -> """ # default to reset the decision directly # NOTE: normally, user should do something to the strategy due to the change of outer decision - pass # helper methods: not necessary but for convenience def get_data_cal_avail_range(self, rtype: str = "full") -> Tuple[int, int]: From 422163ae1e141625039c1ff33d76210f8dbd24b7 Mon Sep 17 00:00:00 2001 From: Default Date: Wed, 17 Aug 2022 09:47:16 +0800 Subject: [PATCH 18/23] Refine adapter & saoe_data logic --- qlib/backtest/utils.py | 9 +---- qlib/rl/data/exchange_wrapper.py | 59 +++++++++++++++++++++++++++-- qlib/rl/order_execution/state.py | 31 ++++++++------- qlib/rl/order_execution/strategy.py | 59 +++++------------------------ 4 files changed, 81 insertions(+), 77 deletions(-) diff --git a/qlib/backtest/utils.py b/qlib/backtest/utils.py index f2a4851684..f815d10554 100644 --- a/qlib/backtest/utils.py +++ b/qlib/backtest/utils.py @@ -4,7 +4,7 @@ from __future__ import annotations from abc import abstractmethod -from typing import TYPE_CHECKING, Any, Set, Tuple, Union +from typing import Any, Set, Tuple, TYPE_CHECKING, Union import numpy as np @@ -20,9 +20,6 @@ from ..data.data import Cal -SAOE_DATA_KEY = "saoe_data" - - class TradeCalendarManager: """ Manager for trading calendar @@ -237,9 +234,7 @@ def update(self, other: BaseInfrastructure) -> None: class CommonInfrastructure(BaseInfrastructure): def get_support_infra(self) -> Set[str]: - # SAOE_DATA_KEY is used to store SAOE (single asset order execution) information that should be shared by - # all strategies. It should be dict. - return {"trade_account", "trade_exchange", SAOE_DATA_KEY} + return {"trade_account", "trade_exchange"} class LevelInfrastructure(BaseInfrastructure): diff --git a/qlib/rl/data/exchange_wrapper.py b/qlib/rl/data/exchange_wrapper.py index fb8daabf33..62305dad6f 100644 --- a/qlib/rl/data/exchange_wrapper.py +++ b/qlib/rl/data/exchange_wrapper.py @@ -3,22 +3,33 @@ from typing import cast +import cachetools import pandas as pd from qlib.backtest import Exchange, Order - +from qlib.backtest.decision import TradeRange, TradeRangeByTime +from qlib.rl.order_execution.utils import get_ticks_slice from .pickle_styled import IntradayBacktestData +from ...utils.index_data import IndexData class QlibIntradayBacktestData(IntradayBacktestData): """Backtest data for Qlib simulator""" - def __init__(self, order: Order, exchange: Exchange, start_time: pd.Timestamp, end_time: pd.Timestamp) -> None: + def __init__( + self, + order: Order, + exchange: Exchange, + ticks_index: pd.DatetimeIndex, + ticks_for_order: pd.DatetimeIndex, + ) -> None: super(QlibIntradayBacktestData, self).__init__() self._order = order self._exchange = exchange - self._start_time = start_time - self._end_time = end_time + self._start_time = ticks_for_order[0] + self._end_time = ticks_for_order[-1] + self.ticks_index = ticks_index + self.ticks_for_order = ticks_for_order self._deal_price = cast( pd.Series, @@ -57,3 +68,43 @@ def get_volume(self) -> pd.Series: def get_time_index(self) -> pd.DatetimeIndex: return pd.DatetimeIndex([e[1] for e in list(self._exchange.quote_df.index)]) + + +@cachetools.cached( # type: ignore + cache=cachetools.LRUCache(100), + key=lambda order, _, __: order.key, +) +def load_qlib_backtest_data( + order: Order, + trade_exchange: Exchange, + trade_range: TradeRange, +) -> QlibIntradayBacktestData: + data = cast( + IndexData, + trade_exchange.get_deal_price( + stock_id=order.stock_id, + start_time=order.start_time.replace(hour=0, minute=0, second=0), + end_time=order.start_time.replace(hour=23, minute=59, second=59), + direction=order.direction, + method=None, + ), + ) + + ticks_index = pd.DatetimeIndex(data.index) + if isinstance(trade_range, TradeRangeByTime): + ticks_for_order = get_ticks_slice( + ticks_index, + trade_range.start_time, + trade_range.end_time, + include_end=True, + ) + else: + ticks_for_order = None # FIXME: implement this logic + + backtest_data = QlibIntradayBacktestData( + order=order, + exchange=trade_exchange, + ticks_index=ticks_index, + ticks_for_order=ticks_for_order, + ) + return backtest_data diff --git a/qlib/rl/order_execution/state.py b/qlib/rl/order_execution/state.py index 8e8067fec3..639cc95089 100644 --- a/qlib/rl/order_execution/state.py +++ b/qlib/rl/order_execution/state.py @@ -41,9 +41,9 @@ class QlibBacktestAdapter: Example usage:: - maintainer = StateMaintainer(...) - maintainer.update(...) - state = maintainer.saoe_state + adapter = QlibBacktestAdapter(...) + adapter.update(...) + state = adapter.saoe_state """ def __init__( @@ -52,8 +52,6 @@ def __init__( executor: BaseExecutor, exchange: Exchange, ticks_per_step: int, - ticks_index: pd.DatetimeIndex, - ticks_for_order: pd.DatetimeIndex, backtest_data: QlibIntradayBacktestData, ) -> None: super().__init__() @@ -62,8 +60,6 @@ def __init__( self.order = order self.executor = executor self.exchange = exchange - self.ticks_index = ticks_index - self.ticks_for_order = ticks_for_order self.backtest_data = backtest_data self.twap_price = self.backtest_data.get_deal_price().mean() @@ -73,15 +69,18 @@ def __init__( self.history_steps = pd.DataFrame(columns=metric_keys).set_index("datetime") self.metrics: Optional[SAOEMetrics] = None - self.cur_time = max(ticks_for_order[0], order.start_time) + self.cur_time = max(backtest_data.ticks_for_order[0], order.start_time) self.ticks_per_step = ticks_per_step def _next_time(self) -> pd.Timestamp: - current_loc = self.ticks_index.get_loc(self.cur_time) + current_loc = self.backtest_data.ticks_index.get_loc(self.cur_time) next_loc = current_loc + self.ticks_per_step next_loc = next_loc - next_loc % self.ticks_per_step - if next_loc < len(self.ticks_index) and self.ticks_index[next_loc] < self.order.end_time: - return self.ticks_index[next_loc] + if ( + next_loc < len(self.backtest_data.ticks_index) + and self.backtest_data.ticks_index[next_loc] < self.order.end_time + ): + return self.backtest_data.ticks_index[next_loc] else: return self.order.end_time @@ -91,8 +90,8 @@ def update( last_step_range: Tuple[int, int], ) -> None: last_step_size = last_step_range[1] - last_step_range[0] + 1 - start_time = self.ticks_index[last_step_range[0]] - end_time = self.ticks_index[last_step_range[1]] + start_time = self.backtest_data.ticks_index[last_step_range[0]] + end_time = self.backtest_data.ticks_index[last_step_range[1]] exec_vol = np.zeros(last_step_size) for order, _, __, ___ in execute_result: @@ -164,7 +163,7 @@ def generate_metrics_after_done(self) -> None: self.metrics = self._collect_single_order_metric( self.order, - self.ticks_index[0], # start time + self.backtest_data.ticks_index[0], # start time self.history_exec["market_volume"], self.history_exec["market_price"], self.history_steps["amount"].sum(), @@ -245,8 +244,8 @@ def saoe_state(self) -> SAOEState: metrics=self.metrics, backtest_data=self.backtest_data, ticks_per_step=self.ticks_per_step, - ticks_index=self.ticks_index, - ticks_for_order=self.ticks_for_order, + ticks_index=self.backtest_data.ticks_index, + ticks_for_order=self.backtest_data.ticks_for_order, ) diff --git a/qlib/rl/order_execution/strategy.py b/qlib/rl/order_execution/strategy.py index b5be3f9879..8726d371ba 100644 --- a/qlib/rl/order_execution/strategy.py +++ b/qlib/rl/order_execution/strategy.py @@ -4,17 +4,13 @@ from __future__ import annotations import collections -from typing import Any, Dict, Generator, Tuple, cast - -import pandas as pd +from typing import Any, cast, Dict, Generator, Tuple from qlib.backtest import CommonInfrastructure, Order -from qlib.backtest.decision import BaseTradeDecision, TradeDecisionWO, TradeRange, TradeRangeByTime -from qlib.backtest.utils import LevelInfrastructure, SAOE_DATA_KEY -from qlib.rl.data.exchange_wrapper import QlibIntradayBacktestData +from qlib.backtest.decision import BaseTradeDecision, TradeDecisionWO, TradeRange +from qlib.backtest.utils import LevelInfrastructure +from qlib.rl.data.exchange_wrapper import load_qlib_backtest_data from qlib.rl.order_execution.state import QlibBacktestAdapter, SAOEState -from qlib.rl.order_execution.utils import get_ticks_slice -from qlib.rl.utils.cache import LRUCache from qlib.strategy.base import RLStrategy @@ -41,50 +37,13 @@ def __init__( self._last_step_range = (0, 0) def _create_qlib_backtest_adapter(self, order: Order, trade_range: TradeRange) -> QlibBacktestAdapter: - if not self.common_infra.has(SAOE_DATA_KEY): - self.common_infra.reset_infra(**{SAOE_DATA_KEY: LRUCache(pool_size=100)}) - - # saoe_data can be considered as some type of cache. Use it to avoid unnecessary data reload. - # The data for one order would be loaded only once. All strategies will reuse this data. - saoe_data = cast(LRUCache, self.common_infra.get(SAOE_DATA_KEY)) - if not saoe_data.has(order.key): - data = self.trade_exchange.get_deal_price( - stock_id=order.stock_id, - start_time=order.start_time.replace(hour=0, minute=0, second=0), - end_time=order.start_time.replace(hour=23, minute=59, second=59), - direction=order.direction, - method=None, - ) - - ticks_index = pd.DatetimeIndex(data.index) - if isinstance(trade_range, TradeRangeByTime): - ticks_for_order = get_ticks_slice( - ticks_index, - trade_range.start_time, - trade_range.end_time, - include_end=True, - ) - else: - ticks_for_order = None # FIXME: implement this logic - - backtest_data = QlibIntradayBacktestData( - order=order, - exchange=self.trade_exchange, - start_time=ticks_for_order[0], - end_time=ticks_for_order[-1], - ) - - saoe_data.put(key=order.key, item=(ticks_index, ticks_for_order, backtest_data)) - - ticks_index, ticks_for_order, backtest_data = saoe_data.get(order.key) + backtest_data = load_qlib_backtest_data(order, self.trade_exchange, trade_range) return QlibBacktestAdapter( order=order, executor=self.executor, exchange=self.trade_exchange, ticks_per_step=self.ticks_per_step, - ticks_index=ticks_index, - ticks_for_order=ticks_for_order, backtest_data=backtest_data, ) @@ -110,8 +69,8 @@ def get_saoe_state_by_order(self, order: Order) -> SAOEState: return self.adapter_dict[order.key].saoe_state def post_upper_level_exe_step(self) -> None: - for maintainer in self.adapter_dict.values(): - maintainer.generate_metrics_after_done() + for adapter in self.adapter_dict.values(): + adapter.generate_metrics_after_done() def post_exe_step(self, execute_result: list) -> None: last_step_length = self._last_step_range[1] - self._last_step_range[0] @@ -124,8 +83,8 @@ def post_exe_step(self, execute_result: list) -> None: for e in execute_result: results[e[0].key].append(e) - for key, maintainer in self.adapter_dict.items(): - maintainer.update(results[key], self._last_step_range) + for key, adapter in self.adapter_dict.items(): + adapter.update(results[key], self._last_step_range) class DecomposedStrategy(SAOEStrategy): From fe3b02f1523373489faee37ae64ac5cec05d4d21 Mon Sep 17 00:00:00 2001 From: Huoran Li Date: Tue, 23 Aug 2022 13:04:57 +0800 Subject: [PATCH 19/23] Resolve PR comments --- qlib/backtest/decision.py | 14 +++++++++++-- qlib/backtest/executor.py | 9 ++------ qlib/rl/data/exchange_wrapper.py | 6 +++--- qlib/rl/integration/feature.py | 24 +++++++--------------- qlib/rl/order_execution/simulator_qlib.py | 25 +++++++++++++++-------- qlib/rl/order_execution/state.py | 2 -- qlib/rl/order_execution/strategy.py | 9 +++----- qlib/rl/strategy/single_order.py | 5 +---- qlib/rl/utils/cache.py | 24 ---------------------- qlib/strategy/base.py | 7 ++++++- 10 files changed, 50 insertions(+), 75 deletions(-) delete mode 100644 qlib/rl/utils/cache.py diff --git a/qlib/backtest/decision.py b/qlib/backtest/decision.py index d41fa66f60..042b73fea8 100644 --- a/qlib/backtest/decision.py +++ b/qlib/backtest/decision.py @@ -135,10 +135,20 @@ def parse_dir(direction: Union[str, int, np.integer, OrderDir, np.ndarray]) -> U else: raise NotImplementedError(f"This type of input is not supported") + @property + def key_by_day(self) -> tuple: + """A hashable & unique key to identify this order, under the granularity in day.""" + return self.stock_id, self.date, self.direction + @property def key(self) -> tuple: - """A hashable & unique key to identify this order. Usually used as the key in a dict.""" - return self.stock_id, self.start_time.replace(hour=0, minute=0, second=0), self.direction + """A hashable & unique key to identify this order.""" + return self.stock_id, self.start_time, self.end_time, self.direction + + @property + def date(self) -> pd.Timestamp: + """Date of the order.""" + return pd.Timestamp(self.start_time.replace(hour=0, minute=0, second=0)) class OrderHelper: diff --git a/qlib/backtest/executor.py b/qlib/backtest/executor.py index c53f2b2fdf..664f33a3cd 100644 --- a/qlib/backtest/executor.py +++ b/qlib/backtest/executor.py @@ -124,9 +124,6 @@ def __init__( self.dealt_order_amount: Dict[str, float] = defaultdict(float) self.deal_day = None - # whether the current executor is collecting data - self.is_collecting = False - def reset_common_infra(self, common_infra: CommonInfrastructure, copy_trade_account: bool = False) -> None: """ reset infrastructure for trading @@ -261,7 +258,6 @@ def collect_data( object trade decision """ - self.is_collecting = True if self.track_data: yield trade_decision @@ -304,7 +300,6 @@ def collect_data( if return_value is not None: return_value.update({"execute_result": res}) - self.is_collecting = False return res def get_all_executors(self) -> List[BaseExecutor]: @@ -405,7 +400,7 @@ def _update_trade_decision(self, trade_decision: BaseTradeDecision) -> BaseTrade trade_decision = updated_trade_decision # NEW UPDATE # create a hook for inner strategy to update outer decision - self.inner_strategy.alter_outer_trade_decision(trade_decision) + trade_decision = self.inner_strategy.alter_outer_trade_decision(trade_decision) return trade_decision def _collect_data( @@ -482,7 +477,7 @@ def _collect_data( # do nothing and just step forward sub_cal.step() - # Lef inner strategy know that the outer level execution is done. + # Let inner strategy know that the outer level execution is done. self.inner_strategy.post_upper_level_exe_step() return execute_result, {"inner_order_indicators": inner_order_indicators, "decision_list": decision_list} diff --git a/qlib/rl/data/exchange_wrapper.py b/qlib/rl/data/exchange_wrapper.py index 62305dad6f..6a657fc6f1 100644 --- a/qlib/rl/data/exchange_wrapper.py +++ b/qlib/rl/data/exchange_wrapper.py @@ -72,7 +72,7 @@ def get_time_index(self) -> pd.DatetimeIndex: @cachetools.cached( # type: ignore cache=cachetools.LRUCache(100), - key=lambda order, _, __: order.key, + key=lambda order, _, __: order.key_by_day, ) def load_qlib_backtest_data( order: Order, @@ -83,8 +83,8 @@ def load_qlib_backtest_data( IndexData, trade_exchange.get_deal_price( stock_id=order.stock_id, - start_time=order.start_time.replace(hour=0, minute=0, second=0), - end_time=order.start_time.replace(hour=23, minute=59, second=59), + start_time=order.date, + end_time=order.date + pd.Timedelta("1day") - pd.Timedelta("1s"), direction=order.direction, method=None, ), diff --git a/qlib/rl/integration/feature.py b/qlib/rl/integration/feature.py index 3c35e9c47b..0b3a27159a 100644 --- a/qlib/rl/integration/feature.py +++ b/qlib/rl/integration/feature.py @@ -7,13 +7,13 @@ from pathlib import Path from typing import List +import cachetools import numpy as np import pandas as pd import qlib from qlib.constant import REG_CN from qlib.contrib.ops.high_freq import BFillNan, Cut, Date, DayCumsum, DayLast, FFillNan, IsInf, IsNull, Select from qlib.data.dataset import DatasetH -from qlib.rl.utils.cache import LRUCache dataset = None @@ -34,24 +34,14 @@ def __init__( self.columns_today = columns_today self.columns_yesterday = columns_yesterday - self.feature_cache = LRUCache() - self.backtest_cache = LRUCache() - + @cachetools.cached( # type: ignore + cache=cachetools.LRUCache(100), + key=lambda stock_id, date, backtest: (stock_id, date.replace(hour=0, minute=0, second=0), backtest), + ) def get(self, stock_id: str, date: pd.Timestamp, backtest: bool = False) -> pd.DataFrame: start_time, end_time = date.replace(hour=0, minute=0, second=0), date.replace(hour=23, minute=59, second=59) - - if backtest: - dataset = self.backtest_dataset - cache = self.backtest_cache - else: - dataset = self.feature_dataset - cache = self.feature_cache - - if cache.has((start_time, end_time, stock_id)): - return cache.get((start_time, end_time, stock_id)) - data = dataset.handler.fetch(pd.IndexSlice[stock_id, start_time:end_time], level=None) - cache.put((start_time, end_time, stock_id), data) - return data + dataset = self.backtest_dataset if backtest else self.feature_dataset + return dataset.handler.fetch(pd.IndexSlice[stock_id, start_time:end_time], level=None) def init_qlib(qlib_config: dict, part: str = None) -> None: diff --git a/qlib/rl/order_execution/simulator_qlib.py b/qlib/rl/order_execution/simulator_qlib.py index e7638ffb65..808310dbaa 100644 --- a/qlib/rl/order_execution/simulator_qlib.py +++ b/qlib/rl/order_execution/simulator_qlib.py @@ -6,11 +6,11 @@ from typing import Generator, Optional import pandas as pd -from qlib.backtest import get_strategy_executor +from qlib.backtest import collect_data_loop, get_strategy_executor from qlib.backtest.decision import Order from qlib.backtest.executor import NestedExecutor from qlib.rl.integration.feature import init_qlib -from qlib.rl.order_execution.state import SAOEState +from qlib.rl.order_execution.state import QlibBacktestAdapter, SAOEState from qlib.rl.order_execution.strategy import SAOEStrategy from qlib.rl.simulator import Simulator @@ -59,8 +59,8 @@ def reset( init_qlib(qlib_config, part="skip") strategy, self._executor = get_strategy_executor( - start_time=order.start_time.replace(hour=0, minute=0, second=0), - end_time=order.start_time.replace(hour=0, minute=0, second=0) + pd.DateOffset(1), + start_time=order.date, + end_time=order.date + pd.DateOffset(1), strategy=strategy_config, executor=executor_config, benchmark=order.stock_id, @@ -70,18 +70,25 @@ def reset( ) assert isinstance(self._executor, NestedExecutor) - strategy.reset(level_infra=self._executor.get_level_infra()) # TODO: check if we could remove this - self._collect_data_loop = self._executor.collect_data(strategy.generate_trade_decision(), level=0) + self._collect_data_loop = collect_data_loop( + start_time=order.date, + end_time=order.date, + trade_strategy=strategy, + trade_executor=self._executor, + ) assert isinstance(self._collect_data_loop, Generator) self._last_yielded_saoe_strategy = self._iter_strategy(action=None) self._order = order + def _get_adapter(self) -> QlibBacktestAdapter: + return self._last_yielded_saoe_strategy.adapter_dict[self._order.key_by_day] + @property def twap_price(self) -> float: - return self._last_yielded_saoe_strategy.adapter_dict[self._order.key].twap_price + return self._get_adapter().twap_price def _iter_strategy(self, action: float = None) -> SAOEStrategy: """Iterate the _collect_data_loop until we get the next yield SAOEStrategy.""" @@ -112,7 +119,7 @@ def step(self, action: float) -> None: assert self._executor is not None def get_state(self) -> SAOEState: - return self._last_yielded_saoe_strategy.get_saoe_state_by_order(self._order) + return self._get_adapter().saoe_state def done(self) -> bool: - return not self._executor.is_collecting + return self._executor.finished() diff --git a/qlib/rl/order_execution/state.py b/qlib/rl/order_execution/state.py index 639cc95089..bbdab40b94 100644 --- a/qlib/rl/order_execution/state.py +++ b/qlib/rl/order_execution/state.py @@ -54,8 +54,6 @@ def __init__( ticks_per_step: int, backtest_data: QlibIntradayBacktestData, ) -> None: - super().__init__() - self.position = order.amount self.order = order self.executor = executor diff --git a/qlib/rl/order_execution/strategy.py b/qlib/rl/order_execution/strategy.py index 8726d371ba..246ac841d3 100644 --- a/qlib/rl/order_execution/strategy.py +++ b/qlib/rl/order_execution/strategy.py @@ -63,10 +63,10 @@ def reset(self, outer_trade_decision: BaseTradeDecision = None, **kwargs: Any) - self.adapter_dict = {} for decision in outer_trade_decision.get_decision(): order = cast(Order, decision) - self.adapter_dict[order.key] = self._create_qlib_backtest_adapter(order, trade_range) + self.adapter_dict[order.key_by_day] = self._create_qlib_backtest_adapter(order, trade_range) def get_saoe_state_by_order(self, order: Order) -> SAOEState: - return self.adapter_dict[order.key].saoe_state + return self.adapter_dict[order.key_by_day].saoe_state def post_upper_level_exe_step(self) -> None: for adapter in self.adapter_dict.values(): @@ -81,7 +81,7 @@ def post_exe_step(self, execute_result: list) -> None: results = collections.defaultdict(list) if execute_result is not None: for e in execute_result: - results[e[0].key].append(e) + results[e[0].key_by_day].append(e) for key, adapter in self.adapter_dict.items(): adapter.update(results[key], self._last_step_range) @@ -112,9 +112,6 @@ def generate_trade_decision(self, execute_result: list = None) -> Generator[Any, return TradeDecisionWO([order], self) - def alter_outer_trade_decision(self, outer_trade_decision: BaseTradeDecision) -> BaseTradeDecision: - return outer_trade_decision - def reset(self, outer_trade_decision: BaseTradeDecision = None, **kwargs: Any) -> None: super().reset(outer_trade_decision=outer_trade_decision, **kwargs) diff --git a/qlib/rl/strategy/single_order.py b/qlib/rl/strategy/single_order.py index 042c88aa3c..6ae17f1d1f 100644 --- a/qlib/rl/strategy/single_order.py +++ b/qlib/rl/strategy/single_order.py @@ -2,7 +2,7 @@ # Licensed under the MIT License. from qlib.backtest import Order -from qlib.backtest.decision import BaseTradeDecision, OrderHelper, TradeDecisionWO, TradeRange +from qlib.backtest.decision import OrderHelper, TradeDecisionWO, TradeRange from qlib.strategy.base import BaseStrategy @@ -21,9 +21,6 @@ def __init__( self._trade_range = trade_range self._instrument = instrument - def alter_outer_trade_decision(self, outer_trade_decision: BaseTradeDecision) -> BaseTradeDecision: - return outer_trade_decision - def generate_trade_decision(self, execute_result: list = None) -> TradeDecisionWO: oh: OrderHelper = self.common_infra.get("trade_exchange").get_order_helper() order_list = [ diff --git a/qlib/rl/utils/cache.py b/qlib/rl/utils/cache.py deleted file mode 100644 index b0b3b43fb4..0000000000 --- a/qlib/rl/utils/cache.py +++ /dev/null @@ -1,24 +0,0 @@ -from __future__ import annotations - -import collections - - -class LRUCache: - def __init__(self, pool_size: int = 200) -> None: - self.pool_size = pool_size - self.contents: dict = {} - self.keys: collections.deque = collections.deque() - - def put(self, key, item): - if self.has(key): - self.keys.remove(key) - self.keys.append(key) - self.contents[key] = item - while len(self.contents) > self.pool_size: - self.contents.pop(self.keys.popleft()) - - def get(self, key): - return self.contents[key] - - def has(self, key): - return key in self.contents diff --git a/qlib/strategy/base.py b/qlib/strategy/base.py index 3725223778..78ec75a21f 100644 --- a/qlib/strategy/base.py +++ b/qlib/strategy/base.py @@ -187,6 +187,7 @@ def alter_outer_trade_decision(self, outer_trade_decision: BaseTradeDecision) -> """ # default to reset the decision directly # NOTE: normally, user should do something to the strategy due to the change of outer decision + return outer_trade_decision # helper methods: not necessary but for convenience def get_data_cal_avail_range(self, rtype: str = "full") -> Tuple[int, int]: @@ -221,7 +222,11 @@ def get_data_cal_avail_range(self, rtype: str = "full") -> Tuple[int, int]: def post_upper_level_exe_step(self) -> None: """ A hook for doing sth after the upper level executor finished its execution (for example, finalize - the metrics collection). + the metrics collection). This is used in the nested execution scenario. You do not need to care about + this method if your strategy is not used in nested execution. + + TODO: Group the nested-execution-related methods together and try to keep the the framework simple at the doc + TODO: and code level. """ def post_exe_step(self, execute_result: list) -> None: From 52747e76ec78998b9149a261cb090287dc68a91a Mon Sep 17 00:00:00 2001 From: Huoran Li Date: Tue, 23 Aug 2022 16:40:38 +0800 Subject: [PATCH 20/23] Resolve PR comments --- qlib/constant.py | 6 +- qlib/rl/data/exchange_wrapper.py | 14 ++-- qlib/rl/data/pickle_styled.py | 4 +- qlib/rl/integration/feature.py | 5 ++ qlib/rl/order_execution/simulator_qlib.py | 6 +- qlib/rl/order_execution/state.py | 16 ++--- qlib/rl/order_execution/strategy.py | 58 +++++++++++----- qlib/rl/strategy/single_order.py | 4 +- qlib/strategy/base.py | 81 +++++++++++------------ tests/rl/test_qlib_simulator.py | 14 ++-- 10 files changed, 114 insertions(+), 94 deletions(-) diff --git a/qlib/constant.py b/qlib/constant.py index 607f0bcf51..d91ecd803a 100644 --- a/qlib/constant.py +++ b/qlib/constant.py @@ -15,8 +15,8 @@ EPS = 1e-12 # Infinity in integer -INF = 10**18 -FINEST_GRANULARITY = "1min" -COARSEST_GRANULARITY = "1day" +INF = int(1e18) +ONE_DAY = pd.Timedelta("1day") +ONE_MIN = pd.Timedelta("1min") ONE_SEC = pd.Timedelta("1s") # use 1 second to exclude the right interval point float_or_ndarray = TypeVar("float_or_ndarray", float, np.ndarray) diff --git a/qlib/rl/data/exchange_wrapper.py b/qlib/rl/data/exchange_wrapper.py index 6a657fc6f1..3ed3f6904b 100644 --- a/qlib/rl/data/exchange_wrapper.py +++ b/qlib/rl/data/exchange_wrapper.py @@ -8,12 +8,13 @@ from qlib.backtest import Exchange, Order from qlib.backtest.decision import TradeRange, TradeRangeByTime +from qlib.constant import ONE_DAY, ONE_SEC from qlib.rl.order_execution.utils import get_ticks_slice -from .pickle_styled import IntradayBacktestData -from ...utils.index_data import IndexData +from qlib.utils.index_data import IndexData +from .pickle_styled import BaseIntradayBacktestData -class QlibIntradayBacktestData(IntradayBacktestData): +class IntradayBacktestData(BaseIntradayBacktestData): """Backtest data for Qlib simulator""" def __init__( @@ -23,7 +24,6 @@ def __init__( ticks_index: pd.DatetimeIndex, ticks_for_order: pd.DatetimeIndex, ) -> None: - super(QlibIntradayBacktestData, self).__init__() self._order = order self._exchange = exchange self._start_time = ticks_for_order[0] @@ -78,13 +78,13 @@ def load_qlib_backtest_data( order: Order, trade_exchange: Exchange, trade_range: TradeRange, -) -> QlibIntradayBacktestData: +) -> IntradayBacktestData: data = cast( IndexData, trade_exchange.get_deal_price( stock_id=order.stock_id, start_time=order.date, - end_time=order.date + pd.Timedelta("1day") - pd.Timedelta("1s"), + end_time=order.date + ONE_DAY - ONE_SEC, direction=order.direction, method=None, ), @@ -101,7 +101,7 @@ def load_qlib_backtest_data( else: ticks_for_order = None # FIXME: implement this logic - backtest_data = QlibIntradayBacktestData( + backtest_data = IntradayBacktestData( order=order, exchange=trade_exchange, ticks_index=ticks_index, diff --git a/qlib/rl/data/pickle_styled.py b/qlib/rl/data/pickle_styled.py index aa0ba38fff..43fe9dd5ad 100644 --- a/qlib/rl/data/pickle_styled.py +++ b/qlib/rl/data/pickle_styled.py @@ -86,7 +86,7 @@ def _read_pickle(filename_without_suffix: Path) -> pd.DataFrame: return pd.read_pickle(_find_pickle(filename_without_suffix)) -class IntradayBacktestData: +class BaseIntradayBacktestData: """ Raw market data that is often used in backtesting (thus called BacktestData). @@ -115,7 +115,7 @@ def get_time_index(self) -> pd.DatetimeIndex: raise NotImplementedError -class SimpleIntradayBacktestData(IntradayBacktestData): +class SimpleIntradayBacktestData(BaseIntradayBacktestData): """Backtest data for simple simulator""" def __init__( diff --git a/qlib/rl/integration/feature.py b/qlib/rl/integration/feature.py index 0b3a27159a..07ca381613 100644 --- a/qlib/rl/integration/feature.py +++ b/qlib/rl/integration/feature.py @@ -1,6 +1,11 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. +""" +TODO: This file is used to integrate NeuTrader with Qlib to run the existing projects. +TODO: The implementation here is kind of adhoc. It is better to design a more uniformed & general implementation. +""" + from __future__ import annotations import pickle diff --git a/qlib/rl/order_execution/simulator_qlib.py b/qlib/rl/order_execution/simulator_qlib.py index 808310dbaa..8f48c24228 100644 --- a/qlib/rl/order_execution/simulator_qlib.py +++ b/qlib/rl/order_execution/simulator_qlib.py @@ -10,12 +10,12 @@ from qlib.backtest.decision import Order from qlib.backtest.executor import NestedExecutor from qlib.rl.integration.feature import init_qlib -from qlib.rl.order_execution.state import QlibBacktestAdapter, SAOEState +from qlib.rl.order_execution.state import SAOEStateAdapter, SAOEState from qlib.rl.order_execution.strategy import SAOEStrategy from qlib.rl.simulator import Simulator -class SingleAssetOrderExecutionQlib(Simulator[Order, SAOEState, float]): +class SingleAssetOrderExecution(Simulator[Order, SAOEState, float]): """Single-asset order execution (SAOE) simulator which is implemented based on Qlib backtest tools. Parameters @@ -83,7 +83,7 @@ def reset( self._order = order - def _get_adapter(self) -> QlibBacktestAdapter: + def _get_adapter(self) -> SAOEStateAdapter: return self._last_yielded_saoe_strategy.adapter_dict[self._order.key_by_day] @property diff --git a/qlib/rl/order_execution/state.py b/qlib/rl/order_execution/state.py index bbdab40b94..9b2ae76d7e 100644 --- a/qlib/rl/order_execution/state.py +++ b/qlib/rl/order_execution/state.py @@ -9,9 +9,9 @@ import pandas as pd from qlib.backtest import Exchange, Order from qlib.backtest.executor import BaseExecutor -from qlib.constant import EPS, REG_CN -from qlib.rl.data.exchange_wrapper import QlibIntradayBacktestData -from qlib.rl.data.pickle_styled import IntradayBacktestData +from qlib.constant import EPS, ONE_MIN, REG_CN +from qlib.rl.data.exchange_wrapper import IntradayBacktestData +from qlib.rl.data.pickle_styled import BaseIntradayBacktestData from qlib.rl.order_execution.utils import dataframe_append, price_advantage from qlib.utils.time import get_day_min_idx_range from typing_extensions import TypedDict @@ -20,7 +20,7 @@ def _get_all_timestamps( start: pd.Timestamp, end: pd.Timestamp, - granularity: pd.Timedelta = pd.Timedelta("1min"), + granularity: pd.Timedelta = ONE_MIN, include_end: bool = True, ) -> pd.DatetimeIndex: ret = [] @@ -35,13 +35,13 @@ def _get_all_timestamps( return pd.DatetimeIndex(ret) -class QlibBacktestAdapter: +class SAOEStateAdapter: """ Maintain states of the environment. Example usage:: - adapter = QlibBacktestAdapter(...) + adapter = SAOEStateAdapter(...) adapter.update(...) state = adapter.saoe_state """ @@ -52,7 +52,7 @@ def __init__( executor: BaseExecutor, exchange: Exchange, ticks_per_step: int, - backtest_data: QlibIntradayBacktestData, + backtest_data: IntradayBacktestData, ) -> None: self.position = order.amount self.order = order @@ -316,7 +316,7 @@ class SAOEState(NamedTuple): metrics: Optional[SAOEMetrics] """Daily metric, only available when the trading is in "done" state.""" - backtest_data: IntradayBacktestData + backtest_data: BaseIntradayBacktestData """Backtest data is included in the state. Actually, only the time index of this data is needed, at this moment. I include the full data so that algorithms (e.g., VWAP) that relies on the raw data can be implemented. diff --git a/qlib/rl/order_execution/strategy.py b/qlib/rl/order_execution/strategy.py index 246ac841d3..5c280b6576 100644 --- a/qlib/rl/order_execution/strategy.py +++ b/qlib/rl/order_execution/strategy.py @@ -4,13 +4,17 @@ from __future__ import annotations import collections -from typing import Any, cast, Dict, Generator, Tuple +from types import GeneratorType +from typing import Any, Union, cast, Dict, Generator + +import pandas as pd from qlib.backtest import CommonInfrastructure, Order from qlib.backtest.decision import BaseTradeDecision, TradeDecisionWO, TradeRange from qlib.backtest.utils import LevelInfrastructure +from qlib.constant import ONE_MIN from qlib.rl.data.exchange_wrapper import load_qlib_backtest_data -from qlib.rl.order_execution.state import QlibBacktestAdapter, SAOEState +from qlib.rl.order_execution.state import SAOEStateAdapter, SAOEState from qlib.strategy.base import RLStrategy @@ -33,23 +37,20 @@ def __init__( **kwargs, ) - self.adapter_dict: Dict[tuple, QlibBacktestAdapter] = {} + self.adapter_dict: Dict[tuple, SAOEStateAdapter] = {} self._last_step_range = (0, 0) - def _create_qlib_backtest_adapter(self, order: Order, trade_range: TradeRange) -> QlibBacktestAdapter: + def _create_qlib_backtest_adapter(self, order: Order, trade_range: TradeRange) -> SAOEStateAdapter: backtest_data = load_qlib_backtest_data(order, self.trade_exchange, trade_range) - return QlibBacktestAdapter( + return SAOEStateAdapter( order=order, executor=self.executor, exchange=self.trade_exchange, - ticks_per_step=self.ticks_per_step, + ticks_per_step=int(pd.Timedelta(self.trade_calendar.get_freq()) / ONE_MIN), backtest_data=backtest_data, ) - def _update_last_step_range(self, step_range: Tuple[int, int]) -> None: - self._last_step_range = step_range - def reset(self, outer_trade_decision: BaseTradeDecision = None, **kwargs: Any) -> None: super(SAOEStrategy, self).reset(outer_trade_decision=outer_trade_decision, **kwargs) @@ -86,9 +87,35 @@ def post_exe_step(self, execute_result: list) -> None: for key, adapter in self.adapter_dict.items(): adapter.update(results[key], self._last_step_range) - -class DecomposedStrategy(SAOEStrategy): - """Decomposed strategy that needs actions from outside to generate trade decisions.""" + def generate_trade_decision( + self, + execute_result: list = None, + ) -> Union[BaseTradeDecision, Generator[Any, Any, BaseTradeDecision]]: + """ + For SAOEStrategy, we need to update the `self._last_step_range` every time a decision is generated. + This operation should be invisible to developers, so we implement it in `generate_trade_decision()` + The concrete logic to generate decisions should be implemented in `_generate_trade_decision()`. + In other words, all subclass of `SAOEStrategy` should overwrite `_generate_trade_decision()` instead of + `generate_trade_decision()`. + """ + self._last_step_range = self.get_data_cal_avail_range(rtype="step") + + decision = self._generate_trade_decision(execute_result) + if isinstance(decision, GeneratorType): + decision = yield from decision + + return decision + + def _generate_trade_decision(self, execute_result: list = None) -> Generator[Any, Any, BaseTradeDecision]: + raise NotImplementedError + + +class ProxySAOEStrategy(SAOEStrategy): + """Proxy strategy that uses SAOEState. It is called a 'proxy' strategy because it does not make any decisions + by itself. Instead, when the strategy is required to generate a decision, it will yield the environment's + information and let the outside agents to make the decision. Please refer to `_generate_trade_decision` for + more details. + """ def __init__( self, @@ -99,16 +126,15 @@ def __init__( ) -> None: super().__init__(None, outer_trade_decision, level_infra, common_infra, **kwargs) - def generate_trade_decision(self, execute_result: list = None) -> Generator[Any, Any, BaseTradeDecision]: - # Once the following line is executed, this DecomposedStrategy (self) will be yielded to the outside + def _generate_trade_decision(self, execute_result: list = None) -> Generator[Any, Any, BaseTradeDecision]: + # Once the following line is executed, this ProxySAOEStrategy (self) will be yielded to the outside # of the entire executor, and the execution will be suspended. When the execution is resumed by `send()`, - # the sent item will be captured by `exec_vol`. The outside policy could communicate with the inner + # the item will be captured by `exec_vol`. The outside policy could communicate with the inner # level strategy through this way. exec_vol = yield self oh = self.trade_exchange.get_order_helper() order = oh.create(self._order.stock_id, exec_vol, self._order.direction) - self._update_last_step_range(self.get_data_cal_avail_range(rtype="step")) return TradeDecisionWO([order], self) diff --git a/qlib/rl/strategy/single_order.py b/qlib/rl/strategy/single_order.py index 6ae17f1d1f..8972d20a1b 100644 --- a/qlib/rl/strategy/single_order.py +++ b/qlib/rl/strategy/single_order.py @@ -13,19 +13,17 @@ def __init__( self, order: Order, trade_range: TradeRange, - instrument: str, ) -> None: super().__init__() self._order = order self._trade_range = trade_range - self._instrument = instrument def generate_trade_decision(self, execute_result: list = None) -> TradeDecisionWO: oh: OrderHelper = self.common_infra.get("trade_exchange").get_order_helper() order_list = [ oh.create( - code=self._instrument, + code=self._order.stock_id, amount=self._order.amount, direction=self._order.direction, ), diff --git a/qlib/strategy/base.py b/qlib/strategy/base.py index 78ec75a21f..7844d25b30 100644 --- a/qlib/strategy/base.py +++ b/qlib/strategy/base.py @@ -5,10 +5,6 @@ from abc import ABCMeta, abstractmethod from typing import Any, Generator, Optional, TYPE_CHECKING, Union -import pandas as pd - -from ..constant import FINEST_GRANULARITY - if TYPE_CHECKING: from qlib.backtest.exchange import Exchange from qlib.backtest.position import BasePosition @@ -68,10 +64,6 @@ def executor(self) -> BaseExecutor: def trade_calendar(self) -> TradeCalendarManager: return self.level_infra.get("trade_calendar") - @property - def ticks_per_step(self) -> int: - return int(pd.Timedelta(self.trade_calendar.get_freq()) / pd.Timedelta(FINEST_GRANULARITY)) - @property def trade_position(self) -> BasePosition: return self.common_infra.get("trade_account").current_position @@ -98,7 +90,7 @@ def reset( level_infra: LevelInfrastructure = None, common_infra: CommonInfrastructure = None, outer_trade_decision: BaseTradeDecision = None, - **kwargs, # TODO: remove this? + **kwargs, ) -> None: """ - reset `level_infra`, used to reset trade calendar, .etc @@ -149,6 +141,41 @@ def generate_trade_decision( """ raise NotImplementedError("generate_trade_decision is not implemented!") + # helper methods: not necessary but for convenience + def get_data_cal_avail_range(self, rtype: str = "full") -> Tuple[int, int]: + """ + return data calendar's available decision range for `self` strategy + the range consider following factors + - data calendar in the charge of `self` strategy + - trading range limitation from the decision of outer strategy + + + related methods + - TradeCalendarManager.get_data_cal_range + - BaseTradeDecision.get_data_cal_range_limit + + Parameters + ---------- + rtype: str + - "full": return the available data index range of the strategy from `start_time` to `end_time` + - "step": return the available data index range of the strategy of current step + + Returns + ------- + Tuple[int, int]: + the available range both sides are closed + """ + cal_range = self.trade_calendar.get_data_cal_range(rtype=rtype) + if self.outer_trade_decision is None: + raise ValueError(f"There is not limitation for strategy {self}") + range_limit = self.outer_trade_decision.get_data_cal_range_limit(rtype=rtype) + return max(cal_range[0], range_limit[0]), min(cal_range[1], range_limit[1]) + + """ + The following methods are used to do cross-level communications in nested execution. + You do not need to care about them if you are implementing a single-level execution. + """ + @staticmethod def update_trade_decision( trade_decision: BaseTradeDecision, @@ -189,44 +216,10 @@ def alter_outer_trade_decision(self, outer_trade_decision: BaseTradeDecision) -> # NOTE: normally, user should do something to the strategy due to the change of outer decision return outer_trade_decision - # helper methods: not necessary but for convenience - def get_data_cal_avail_range(self, rtype: str = "full") -> Tuple[int, int]: - """ - return data calendar's available decision range for `self` strategy - the range consider following factors - - data calendar in the charge of `self` strategy - - trading range limitation from the decision of outer strategy - - - related methods - - TradeCalendarManager.get_data_cal_range - - BaseTradeDecision.get_data_cal_range_limit - - Parameters - ---------- - rtype: str - - "full": return the available data index range of the strategy from `start_time` to `end_time` - - "step": return the available data index range of the strategy of current step - - Returns - ------- - Tuple[int, int]: - the available range both sides are closed - """ - cal_range = self.trade_calendar.get_data_cal_range(rtype=rtype) - if self.outer_trade_decision is None: - raise ValueError(f"There is not limitation for strategy {self}") - range_limit = self.outer_trade_decision.get_data_cal_range_limit(rtype=rtype) - return max(cal_range[0], range_limit[0]), min(cal_range[1], range_limit[1]) - def post_upper_level_exe_step(self) -> None: """ A hook for doing sth after the upper level executor finished its execution (for example, finalize - the metrics collection). This is used in the nested execution scenario. You do not need to care about - this method if your strategy is not used in nested execution. - - TODO: Group the nested-execution-related methods together and try to keep the the framework simple at the doc - TODO: and code level. + the metrics collection). """ def post_exe_step(self, execute_result: list) -> None: diff --git a/tests/rl/test_qlib_simulator.py b/tests/rl/test_qlib_simulator.py index 230611fa66..b7d548e9ea 100644 --- a/tests/rl/test_qlib_simulator.py +++ b/tests/rl/test_qlib_simulator.py @@ -10,8 +10,7 @@ from qlib.backtest.decision import Order, OrderDir, TradeRangeByTime from qlib.backtest.executor import SimulatorExecutor from qlib.rl.order_execution import CategoricalActionInterpreter -from qlib.constant import FINEST_GRANULARITY -from qlib.rl.order_execution.simulator_qlib import SingleAssetOrderExecutionQlib +from qlib.rl.order_execution.simulator_qlib import SingleAssetOrderExecution TOTAL_POSITION = 2100.0 @@ -39,7 +38,6 @@ def get_configs(order: Order) -> Tuple[dict, dict, dict]: "kwargs": { "order": order, "trade_range": TradeRangeByTime(order.start_time.time(), order.end_time.time()), - "instrument": order.stock_id, }, } @@ -48,7 +46,7 @@ def get_configs(order: Order) -> Tuple[dict, dict, dict]: "module_path": "qlib.backtest.executor", "kwargs": { "time_per_step": "1day", - "inner_strategy": {"class": "DecomposedStrategy", "module_path": "qlib.rl.order_execution.strategy"}, + "inner_strategy": {"class": "ProxySAOEStrategy", "module_path": "qlib.rl.order_execution.strategy"}, "track_data": True, "inner_executor": { "class": "NestedExecutor", @@ -63,7 +61,7 @@ def get_configs(order: Order) -> Tuple[dict, dict, dict]: "class": "SimulatorExecutor", "module_path": "qlib.backtest.executor", "kwargs": { - "time_per_step": FINEST_GRANULARITY, + "time_per_step": "1min", "verbose": False, "trade_type": SimulatorExecutor.TT_SERIAL, "generate_report": False, @@ -79,7 +77,7 @@ def get_configs(order: Order) -> Tuple[dict, dict, dict]: } exchange_config = { - "freq": FINEST_GRANULARITY, + "freq": "1min", "codes": [order.stock_id], "limit_threshold": ("$ask == 0", "$bid == 0"), "deal_price": ("If($ask == 0, $bid, $ask)", "If($bid == 0, $ask, $bid)"), @@ -97,7 +95,7 @@ def get_configs(order: Order) -> Tuple[dict, dict, dict]: return strategy_config, executor_config, exchange_config -def get_simulator(order: Order) -> SingleAssetOrderExecutionQlib: +def get_simulator(order: Order) -> SingleAssetOrderExecution: DATA_ROOT_DIR = Path(__file__).parent.parent / ".data" / "rl" / "qlib_simulator" # fmt: off @@ -118,7 +116,7 @@ def get_simulator(order: Order) -> SingleAssetOrderExecutionQlib: strategy_config, executor_config, exchange_config = get_configs(order) - return SingleAssetOrderExecutionQlib( + return SingleAssetOrderExecution( order=order, qlib_config=qlib_config, strategy_config=strategy_config, From 2e5a61bf1519c3ff7f89928cb0541d4fe2415e38 Mon Sep 17 00:00:00 2001 From: Huoran Li Date: Wed, 24 Aug 2022 10:07:18 +0800 Subject: [PATCH 21/23] Rename ONE_SEC to EPS_T; complete backtest loop --- qlib/backtest/backtest.py | 2 ++ qlib/constant.py | 2 +- qlib/rl/data/exchange_wrapper.py | 4 ++-- qlib/rl/order_execution/simulator_simple.py | 8 ++++---- qlib/rl/order_execution/strategy.py | 2 +- qlib/rl/order_execution/utils.py | 4 ++-- qlib/strategy/base.py | 2 +- 7 files changed, 13 insertions(+), 11 deletions(-) diff --git a/qlib/backtest/backtest.py b/qlib/backtest/backtest.py index e476550691..f79622bff6 100644 --- a/qlib/backtest/backtest.py +++ b/qlib/backtest/backtest.py @@ -83,7 +83,9 @@ def collect_data_loop( while not trade_executor.finished(): _trade_decision: BaseTradeDecision = trade_strategy.generate_trade_decision(_execute_result) _execute_result = yield from trade_executor.collect_data(_trade_decision, level=0) + trade_strategy.post_exe_step(_execute_result) bar.update(1) + trade_strategy.post_upper_level_exe_step() if return_value is not None: all_executors = trade_executor.get_all_executors() diff --git a/qlib/constant.py b/qlib/constant.py index d91ecd803a..ac6c76ae22 100644 --- a/qlib/constant.py +++ b/qlib/constant.py @@ -18,5 +18,5 @@ INF = int(1e18) ONE_DAY = pd.Timedelta("1day") ONE_MIN = pd.Timedelta("1min") -ONE_SEC = pd.Timedelta("1s") # use 1 second to exclude the right interval point +EPS_T = pd.Timedelta("1s") # use 1 second to exclude the right interval point float_or_ndarray = TypeVar("float_or_ndarray", float, np.ndarray) diff --git a/qlib/rl/data/exchange_wrapper.py b/qlib/rl/data/exchange_wrapper.py index 3ed3f6904b..94bb1dcbbd 100644 --- a/qlib/rl/data/exchange_wrapper.py +++ b/qlib/rl/data/exchange_wrapper.py @@ -8,7 +8,7 @@ from qlib.backtest import Exchange, Order from qlib.backtest.decision import TradeRange, TradeRangeByTime -from qlib.constant import ONE_DAY, ONE_SEC +from qlib.constant import ONE_DAY, EPS_T from qlib.rl.order_execution.utils import get_ticks_slice from qlib.utils.index_data import IndexData from .pickle_styled import BaseIntradayBacktestData @@ -84,7 +84,7 @@ def load_qlib_backtest_data( trade_exchange.get_deal_price( stock_id=order.stock_id, start_time=order.date, - end_time=order.date + ONE_DAY - ONE_SEC, + end_time=order.date + ONE_DAY - EPS_T, direction=order.direction, method=None, ), diff --git a/qlib/rl/order_execution/simulator_simple.py b/qlib/rl/order_execution/simulator_simple.py index 59cd92b4fe..93b57c3a03 100644 --- a/qlib/rl/order_execution/simulator_simple.py +++ b/qlib/rl/order_execution/simulator_simple.py @@ -10,7 +10,7 @@ import pandas as pd from qlib.backtest.decision import Order, OrderDir -from qlib.constant import EPS, ONE_SEC, float_or_ndarray +from qlib.constant import EPS, EPS_T, float_or_ndarray from qlib.rl.data.pickle_styled import DealPriceType, load_simple_intraday_backtest_data from qlib.rl.order_execution.state import SAOEMetrics, SAOEState from qlib.rl.simulator import Simulator @@ -240,8 +240,8 @@ def _split_exec_vol(self, exec_vol_sum: float) -> np.ndarray: next_time = self._next_time() # get the backtest data for next interval - self.market_vol = self.backtest_data.get_volume().loc[self.cur_time : next_time - ONE_SEC].to_numpy() - self.market_price = self.backtest_data.get_deal_price().loc[self.cur_time : next_time - ONE_SEC].to_numpy() + self.market_vol = self.backtest_data.get_volume().loc[self.cur_time : next_time - EPS_T].to_numpy() + self.market_price = self.backtest_data.get_deal_price().loc[self.cur_time : next_time - EPS_T].to_numpy() assert self.market_vol is not None and self.market_price is not None @@ -294,7 +294,7 @@ def _metrics_collect( def _get_ticks_slice(self, start: pd.Timestamp, end: pd.Timestamp, include_end: bool = False) -> pd.DatetimeIndex: if not include_end: - end = end - ONE_SEC + end = end - EPS_T return self.ticks_index[self.ticks_index.slice_indexer(start, end)] @staticmethod diff --git a/qlib/rl/order_execution/strategy.py b/qlib/rl/order_execution/strategy.py index 5c280b6576..f6e3ae9faf 100644 --- a/qlib/rl/order_execution/strategy.py +++ b/qlib/rl/order_execution/strategy.py @@ -73,7 +73,7 @@ def post_upper_level_exe_step(self) -> None: for adapter in self.adapter_dict.values(): adapter.generate_metrics_after_done() - def post_exe_step(self, execute_result: list) -> None: + def post_exe_step(self, execute_result: Optional[list]) -> None: last_step_length = self._last_step_range[1] - self._last_step_range[0] if last_step_length <= 0: assert not execute_result diff --git a/qlib/rl/order_execution/utils.py b/qlib/rl/order_execution/utils.py index a498037ad0..43517fe744 100644 --- a/qlib/rl/order_execution/utils.py +++ b/qlib/rl/order_execution/utils.py @@ -10,7 +10,7 @@ from qlib.backtest.decision import OrderDir from qlib.backtest.executor import BaseExecutor, NestedExecutor, SimulatorExecutor -from qlib.constant import ONE_SEC, float_or_ndarray +from qlib.constant import EPS_T, float_or_ndarray def get_ticks_slice( @@ -20,7 +20,7 @@ def get_ticks_slice( include_end: bool = False, ) -> pd.DatetimeIndex: if not include_end: - end = end - ONE_SEC + end = end - EPS_T return ticks_index[ticks_index.slice_indexer(start, end)] diff --git a/qlib/strategy/base.py b/qlib/strategy/base.py index 7844d25b30..532e88452e 100644 --- a/qlib/strategy/base.py +++ b/qlib/strategy/base.py @@ -222,7 +222,7 @@ def post_upper_level_exe_step(self) -> None: the metrics collection). """ - def post_exe_step(self, execute_result: list) -> None: + def post_exe_step(self, execute_result: Optional[list]) -> None: """ A hook for doing sth after the corresponding executor finished its execution. From 1e72a3642346ce27e66b194c92523ecd784dc808 Mon Sep 17 00:00:00 2001 From: Huoran Li Date: Wed, 24 Aug 2022 10:14:52 +0800 Subject: [PATCH 22/23] CI issue --- qlib/rl/order_execution/strategy.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/qlib/rl/order_execution/strategy.py b/qlib/rl/order_execution/strategy.py index f6e3ae9faf..4a85bc76ed 100644 --- a/qlib/rl/order_execution/strategy.py +++ b/qlib/rl/order_execution/strategy.py @@ -5,7 +5,7 @@ import collections from types import GeneratorType -from typing import Any, Union, cast, Dict, Generator +from typing import Any, Optional, Union, cast, Dict, Generator import pandas as pd From cb2b214126abac506d9e064951a905dad643af7a Mon Sep 17 00:00:00 2001 From: Huoran Li Date: Wed, 24 Aug 2022 10:36:41 +0800 Subject: [PATCH 23/23] Resolve Yuge's PR comments --- qlib/rl/integration/__init__.py | 2 -- .../feature.py => order_execution/integration.py} | 0 qlib/rl/order_execution/simulator_qlib.py | 7 ++++--- qlib/rl/order_execution/simulator_simple.py | 6 +++--- qlib/rl/order_execution/state.py | 5 ++++- qlib/rl/strategy/single_order.py | 2 +- 6 files changed, 12 insertions(+), 10 deletions(-) delete mode 100644 qlib/rl/integration/__init__.py rename qlib/rl/{integration/feature.py => order_execution/integration.py} (100%) diff --git a/qlib/rl/integration/__init__.py b/qlib/rl/integration/__init__.py deleted file mode 100644 index 59e481eb93..0000000000 --- a/qlib/rl/integration/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. diff --git a/qlib/rl/integration/feature.py b/qlib/rl/order_execution/integration.py similarity index 100% rename from qlib/rl/integration/feature.py rename to qlib/rl/order_execution/integration.py diff --git a/qlib/rl/order_execution/simulator_qlib.py b/qlib/rl/order_execution/simulator_qlib.py index 8f48c24228..3002fd333e 100644 --- a/qlib/rl/order_execution/simulator_qlib.py +++ b/qlib/rl/order_execution/simulator_qlib.py @@ -9,11 +9,12 @@ from qlib.backtest import collect_data_loop, get_strategy_executor from qlib.backtest.decision import Order from qlib.backtest.executor import NestedExecutor -from qlib.rl.integration.feature import init_qlib -from qlib.rl.order_execution.state import SAOEStateAdapter, SAOEState -from qlib.rl.order_execution.strategy import SAOEStrategy from qlib.rl.simulator import Simulator +from .integration import init_qlib +from .state import SAOEState, SAOEStateAdapter +from .strategy import SAOEStrategy + class SingleAssetOrderExecution(Simulator[Order, SAOEState, float]): """Single-asset order execution (SAOE) simulator which is implemented based on Qlib backtest tools. diff --git a/qlib/rl/order_execution/simulator_simple.py b/qlib/rl/order_execution/simulator_simple.py index 93b57c3a03..f95aeebad0 100644 --- a/qlib/rl/order_execution/simulator_simple.py +++ b/qlib/rl/order_execution/simulator_simple.py @@ -4,18 +4,18 @@ from __future__ import annotations from pathlib import Path -from typing import Any, Optional, cast +from typing import Any, cast, Optional import numpy as np import pandas as pd - from qlib.backtest.decision import Order, OrderDir from qlib.constant import EPS, EPS_T, float_or_ndarray from qlib.rl.data.pickle_styled import DealPriceType, load_simple_intraday_backtest_data -from qlib.rl.order_execution.state import SAOEMetrics, SAOEState from qlib.rl.simulator import Simulator from qlib.rl.utils import LogLevel +from .state import SAOEMetrics, SAOEState + # TODO: Integrating Qlib's native data with simulator_simple __all__ = ["SingleAssetOrderExecution"] diff --git a/qlib/rl/order_execution/state.py b/qlib/rl/order_execution/state.py index 9b2ae76d7e..d6bbeaea5a 100644 --- a/qlib/rl/order_execution/state.py +++ b/qlib/rl/order_execution/state.py @@ -37,7 +37,10 @@ def _get_all_timestamps( class SAOEStateAdapter: """ - Maintain states of the environment. + Maintain states of the environment. SAOEStateAdapter accepts execution results and update its internal state + according to the execution results with additional information acquired from executors & exchange. For example, + it gets the dealt order amount from execution results, and get the corresponding market price / volume from + exchange. Example usage:: diff --git a/qlib/rl/strategy/single_order.py b/qlib/rl/strategy/single_order.py index 8972d20a1b..9d8e396ce0 100644 --- a/qlib/rl/strategy/single_order.py +++ b/qlib/rl/strategy/single_order.py @@ -12,7 +12,7 @@ class SingleOrderStrategy(BaseStrategy): def __init__( self, order: Order, - trade_range: TradeRange, + trade_range: TradeRange = None, ) -> None: super().__init__()