From c731c5a59b8d50fdc653842be7a1f7c944f11d14 Mon Sep 17 00:00:00 2001
From: Huoran Li <huo53926@126.com>
Date: Fri, 29 Jul 2022 15:44:19 +0800
Subject: [PATCH 01/23] Use dict-like configuration

---
 qlib/rl/from_neutrader/config.py          |  20 ---
 qlib/rl/from_neutrader/feature.py         |   6 +-
 qlib/rl/order_execution/objects.py        |   2 +
 qlib/rl/order_execution/simulator_qlib.py | 148 ++++++++++++----------
 qlib/rl/order_execution/utils.py          |  41 +-----
 tests/rl/test_qlib_simulator.py           |  77 +++++------
 6 files changed, 131 insertions(+), 163 deletions(-)
 delete mode 100644 qlib/rl/from_neutrader/config.py
 create mode 100644 qlib/rl/order_execution/objects.py

diff --git a/qlib/rl/from_neutrader/config.py b/qlib/rl/from_neutrader/config.py
deleted file mode 100644
index d9a681b32d..0000000000
--- a/qlib/rl/from_neutrader/config.py
+++ /dev/null
@@ -1,20 +0,0 @@
-# Copyright (c) Microsoft Corporation.
-# Licensed under the MIT License.
-
-from dataclasses import dataclass
-from pathlib import Path
-from typing import Optional, Tuple, Union
-
-
-# TODO: In the future we should merge the dataclass-based config with Qlib's dict-based config.
-@dataclass
-class ExchangeConfig:
-    limit_threshold: Union[float, Tuple[str, str]]
-    deal_price: Union[str, Tuple[str, str]]
-    volume_threshold: dict
-    open_cost: float = 0.0005
-    close_cost: float = 0.0015
-    min_cost: float = 5.0
-    trade_unit: Optional[float] = 100.0
-    cash_limit: Optional[Union[Path, float]] = None
-    generate_report: bool = False
diff --git a/qlib/rl/from_neutrader/feature.py b/qlib/rl/from_neutrader/feature.py
index ca42af24c9..2b4279e1a6 100644
--- a/qlib/rl/from_neutrader/feature.py
+++ b/qlib/rl/from_neutrader/feature.py
@@ -70,10 +70,10 @@ def get(self, stock_id: str, date: pd.Timestamp, backtest: bool = False) -> pd.D
         return data
 
 
-def init_qlib(config: dict, part: Optional[str] = None) -> None:
+def init_qlib(qlib_config: dict) -> None:
     provider_uri_map = {
-        "day": config["provider_uri_day"].as_posix(),
-        "1min": config["provider_uri_1min"].as_posix(),
+        "day": qlib_config["provider_uri_day"].as_posix(),
+        "1min": qlib_config["provider_uri_1min"].as_posix(),
     }
     qlib.init(
         region=REG_CN,
diff --git a/qlib/rl/order_execution/objects.py b/qlib/rl/order_execution/objects.py
new file mode 100644
index 0000000000..ee6358bcd3
--- /dev/null
+++ b/qlib/rl/order_execution/objects.py
@@ -0,0 +1,2 @@
+FINEST_GRANULARITY = "1min"
+COARSEST_GRANULARITY = "1day"
diff --git a/qlib/rl/order_execution/simulator_qlib.py b/qlib/rl/order_execution/simulator_qlib.py
index c75793f586..ad415b169a 100644
--- a/qlib/rl/order_execution/simulator_qlib.py
+++ b/qlib/rl/order_execution/simulator_qlib.py
@@ -3,22 +3,23 @@
 
 from __future__ import annotations
 
-from typing import Any, Callable, cast, Generator, List, Optional, Tuple
+import copy
+from typing import Any, cast, Generator, List, Optional, Tuple
 
 import numpy as np
 import pandas as pd
 
+from qlib.backtest import get_strategy_executor
 from qlib.backtest.decision import BaseTradeDecision, Order, OrderHelper, TradeDecisionWO, TradeRange, TradeRangeByTime
-from qlib.backtest.executor import BaseExecutor, NestedExecutor
+from qlib.backtest.executor import NestedExecutor
 from qlib.backtest.utils import CommonInfrastructure
 from qlib.constant import EPS
 from qlib.rl.data.exchange_wrapper import QlibIntradayBacktestData
-from qlib.rl.from_neutrader.config import ExchangeConfig
 from qlib.rl.from_neutrader.feature import init_qlib
+from qlib.rl.order_execution.objects import COARSEST_GRANULARITY, FINEST_GRANULARITY
 from qlib.rl.order_execution.simulator_simple import SAOEMetrics, SAOEState
 from qlib.rl.order_execution.utils import (
     dataframe_append,
-    get_common_infra,
     get_portfolio_and_indicator,
     get_ticks_slice,
     price_advantage,
@@ -28,8 +29,8 @@
 
 
 class DecomposedStrategy(BaseStrategy):
-    def __init__(self) -> None:
-        super().__init__()
+    def __init__(self, common_infra: CommonInfrastructure = None) -> None:
+        super().__init__(common_infra=common_infra)
 
         self.execute_order: Optional[Order] = None
         self.execute_result: List[Tuple[Order, float, float, float]] = []
@@ -66,12 +67,12 @@ class SingleOrderStrategy(BaseStrategy):
     # this logic is copied from FileOrderStrategy
     def __init__(
         self,
-        common_infra: CommonInfrastructure,
         order: Order,
         trade_range: TradeRange,
         instrument: str,
     ) -> None:
-        super().__init__(common_infra=common_infra)
+        super().__init__()
+
         self._order = order
         self._trade_range = trade_range
         self._instrument = instrument
@@ -91,9 +92,25 @@ def generate_trade_decision(self, execute_result: list = None) -> TradeDecisionW
         return TradeDecisionWO(order_list, self, self._trade_range)
 
 
-# TODO: move these to the configuration files
-FINEST_GRANULARITY = "1min"
-COARSEST_GRANULARITY = "1day"
+executor_config_template = {
+    "class": "NestedExecutor",
+    "module_path": "qlib.backtest.executor",
+    "kwargs": {
+        "time_per_step": COARSEST_GRANULARITY,
+        "inner_strategy": {
+            "class": "DecomposedStrategy",
+            "module_path": "qlib.rl.order_execution.simulator_qlib",
+        },
+        "track_data": True,
+    },
+}
+top_strategy_config_template = {
+    "class": "SingleOrderStrategy",
+    "module_path": "qlib.rl.order_execution.simulator_qlib",
+}
+exchange_kwargs_template = {
+    "freq": FINEST_GRANULARITY,
+}
 
 
 class StateMaintainer:
@@ -123,11 +140,14 @@ def __init__(self, order: Order, time_per_step: str, tick_index: pd.DatetimeInde
 
     def update(
         self,
-        inner_executor: BaseExecutor,
-        inner_strategy: DecomposedStrategy,
+        executor: NestedExecutor,
         done: bool,
         all_indicators: dict,
     ) -> None:
+        inner_executor = executor.inner_executor
+        inner_strategy = executor.inner_strategy
+        assert isinstance(inner_strategy, DecomposedStrategy)
+
         execute_order = inner_strategy.execute_order
         execute_result = inner_strategy.execute_result
         exec_vol = np.array([e[0].deal_amount for e in execute_result])
@@ -272,10 +292,10 @@ class SingleAssetOrderExecutionQlib(Simulator[Order, SAOEState, float]):
         A string to describe the time granularity of each step. Current support "1min", "30min", and "1day"
     qlib_config (dict):
         Configuration used to initialize Qlib.
-    inner_executor_fn (Callable[[str, CommonInfrastructure], BaseExecutor]):
-        Function used to get the inner level executor.
-    exchange_config (ExchangeConfig):
-        Configuration used to create the Exchange instance.
+    inner_executor_config (dict):
+        Inner executor configuration
+    exchange_config (dict):
+        Exchange configuration
     """
 
     def __init__(
@@ -283,8 +303,8 @@ def __init__(
         order: Order,
         time_per_step: str,  # "1min", "30min", "1day"
         qlib_config: dict,
-        inner_executor_fn: Callable[[str, CommonInfrastructure], BaseExecutor],
-        exchange_config: ExchangeConfig,
+        inner_executor_config: dict,
+        exchange_config: dict,
     ) -> None:
         assert time_per_step in ("1min", "30min", "1day")
 
@@ -292,12 +312,7 @@ def __init__(
 
         assert order.start_time.date() == order.end_time.date(), "Start date and end date must be the same."
 
-        self._order = order
-        self._order_date = pd.Timestamp(order.start_time.date())
-        self._trade_range = TradeRangeByTime(order.start_time.time(), order.end_time.time())
-        self._qlib_config = qlib_config
-        self._inner_executor_fn = inner_executor_fn
-        self._exchange_config = exchange_config
+        init_qlib(qlib_config)
 
         self._time_per_step = time_per_step
         self._ticks_per_step = int(pd.Timedelta(time_per_step).total_seconds() // 60)
@@ -307,56 +322,58 @@ def __init__(
 
         self._done = False
 
-        self._inner_strategy = DecomposedStrategy()
-
-        self.reset(self._order)
-
-    def reset(self, order: Order) -> None:
-        instrument = order.stock_id
-
-        # TODO: Check this logic. Make sure we need to do this every time we reset the simulator.
-        init_qlib(self._qlib_config, instrument)
-
-        common_infra = get_common_infra(
-            self._exchange_config,
-            trade_date=pd.Timestamp(self._order_date),
-            codes=[instrument],
-        )
-
-        # TODO: We can leverage interfaces like (https://tinyurl.com/y8f8fhv4) to create trading environment.
-        # TODO: By aligning the interface to create environments with Qlib, it will be easier to share the config and
-        # TODO: code between backtesting and training.
-        self._inner_executor = self._inner_executor_fn(self._time_per_step, common_infra)
-        self._executor = NestedExecutor(
-            time_per_step=COARSEST_GRANULARITY,
-            inner_executor=self._inner_executor,
-            inner_strategy=self._inner_strategy,
-            track_data=True,
-            common_infra=common_infra,
+        self.reset(order, inner_executor_config, exchange_config)
+
+    def reset(self, order: Order, inner_executor_config: dict, exchange_config: dict) -> None:
+        order_date = pd.Timestamp(order.start_time.date())
+
+        top_strategy_config: dict = copy.deepcopy(top_strategy_config_template)
+        top_strategy_config.update({
+            "kwargs": {
+                "order": order,
+                "trade_range": TradeRangeByTime(order.start_time.time(), order.end_time.time()),
+                "instrument": order.stock_id,
+            }
+        })
+
+        executor_config: dict = copy.deepcopy(executor_config_template)
+        executor_config["kwargs"].update({
+            "inner_executor": inner_executor_config,
+            "start_time": order_date,
+            "end_time": order_date,
+        })
+
+        exchange_kwargs: dict = copy.deepcopy(exchange_kwargs_template)
+        exchange_kwargs.update({"codes": [order.stock_id], **exchange_config})
+
+        top_strategy, self._executor = get_strategy_executor(
+            start_time=order_date,
+            end_time=order_date + pd.DateOffset(1),
+            strategy=top_strategy_config,
+            executor=executor_config,
+            benchmark=order.stock_id,
+            account=1e12,
+            exchange_kwargs=exchange_kwargs,
+            pos_type="InfPosition",
         )
+        top_strategy.reset(level_infra=self._executor.get_level_infra())
 
-        exchange = self._inner_executor.trade_exchange
+        exchange = self._executor.trade_exchange
         self._ticks_index = pd.DatetimeIndex([e[1] for e in list(exchange.quote_df.index)])
         self._ticks_for_order = get_ticks_slice(
             self._ticks_index,
-            self._order.start_time,
-            self._order.end_time,
+            order.start_time,
+            order.end_time,
             include_end=True,
         )
-
         self._backtest_data = QlibIntradayBacktestData(
-            order=self._order,
+            order=order,
             exchange=exchange,
             start_time=self._ticks_for_order[0],
             end_time=self._ticks_for_order[-1],
         )
-
         self.twap_price = self._backtest_data.get_deal_price().mean()
 
-        top_strategy = SingleOrderStrategy(common_infra, order, self._trade_range, instrument)
-        self._executor.reset(start_time=pd.Timestamp(self._order_date), end_time=pd.Timestamp(self._order_date))
-        top_strategy.reset(level_infra=self._executor.get_level_infra())
-
         self._collect_data_loop = self._executor.collect_data(top_strategy.generate_trade_decision(), level=0)
         assert isinstance(self._collect_data_loop, Generator)
 
@@ -364,12 +381,14 @@ def reset(self, order: Order) -> None:
         self._done = False
 
         self._maintainer = StateMaintainer(
-            order=self._order,
+            order=order,
             time_per_step=self._time_per_step,
             tick_index=self._ticks_index,
             twap_price=self.twap_price,
         )
 
+        self._order = order
+
     def _iter_strategy(self, action: float = None) -> DecomposedStrategy:
         """Iterate the _collect_data_loop until we get the next yield DecomposedStrategy."""
         assert self._collect_data_loop is not None
@@ -400,8 +419,7 @@ def step(self, action: float) -> None:
         _, all_indicators = get_portfolio_and_indicator(self._executor)
 
         self._maintainer.update(
-            inner_executor=self._inner_executor,
-            inner_strategy=self._inner_strategy,
+            executor=self._executor,
             done=self._done,
             all_indicators=all_indicators,
         )
@@ -409,7 +427,7 @@ def step(self, action: float) -> None:
     def get_state(self) -> SAOEState:
         return SAOEState(
             order=self._order,
-            cur_time=self._inner_executor.trade_calendar.get_step_time()[0],
+            cur_time=self._executor.inner_executor.trade_calendar.get_step_time()[0],
             position=self._maintainer.position,
             history_exec=self._maintainer.history_exec,
             history_steps=self._maintainer.history_steps,
diff --git a/qlib/rl/order_execution/utils.py b/qlib/rl/order_execution/utils.py
index e2d0de9812..779f860c33 100644
--- a/qlib/rl/order_execution/utils.py
+++ b/qlib/rl/order_execution/utils.py
@@ -3,54 +3,17 @@
 
 from __future__ import annotations
 
-from typing import Any, List, Tuple, cast
+from typing import Any, cast, Tuple
 
 import numpy as np
 import pandas as pd
 
-from qlib.backtest import CommonInfrastructure, get_exchange
-from qlib.backtest.account import Account
 from qlib.backtest.decision import OrderDir
 from qlib.backtest.executor import BaseExecutor
-from qlib.rl.from_neutrader.config import ExchangeConfig
-from qlib.rl.order_execution.simulator_simple import ONE_SEC, _float_or_ndarray
+from qlib.rl.order_execution.simulator_simple import _float_or_ndarray, ONE_SEC
 from qlib.utils.time import Freq
 
 
-def get_common_infra(
-    config: ExchangeConfig,
-    trade_date: pd.Timestamp,
-    codes: List[str],
-    cash_limit: float = None,
-) -> CommonInfrastructure:
-    # need to specify a range here for acceleration
-    if cash_limit is None:
-        trade_account = Account(init_cash=int(1e12), benchmark_config={}, pos_type="InfPosition")
-    else:
-        trade_account = Account(
-            init_cash=cash_limit,
-            benchmark_config={},
-            pos_type="Position",
-            position_dict={code: {"amount": 1e12, "price": 1.0} for code in codes},
-        )
-
-    exchange = get_exchange(
-        codes=codes,
-        freq="1min",
-        limit_threshold=config.limit_threshold,
-        deal_price=config.deal_price,
-        open_cost=config.open_cost,
-        close_cost=config.close_cost,
-        min_cost=config.min_cost if config.trade_unit is not None else 0,
-        start_time=trade_date,
-        end_time=trade_date + pd.DateOffset(1),
-        trade_unit=config.trade_unit,
-        volume_threshold=config.volume_threshold,
-    )
-
-    return CommonInfrastructure(trade_account=trade_account, trade_exchange=exchange)
-
-
 def get_ticks_slice(
     ticks_index: pd.DatetimeIndex,
     start: pd.Timestamp,
diff --git a/tests/rl/test_qlib_simulator.py b/tests/rl/test_qlib_simulator.py
index ca7820645f..ac94e589e5 100644
--- a/tests/rl/test_qlib_simulator.py
+++ b/tests/rl/test_qlib_simulator.py
@@ -7,11 +7,10 @@
 import pytest
 
 from qlib.backtest.decision import Order, OrderDir
-from qlib.backtest.executor import NestedExecutor, SimulatorExecutor
-from qlib.backtest.utils import CommonInfrastructure
-from qlib.contrib.strategy import TWAPStrategy
+from qlib.backtest.executor import SimulatorExecutor
 from qlib.rl.order_execution import CategoricalActionInterpreter
-from qlib.rl.order_execution.simulator_qlib import ExchangeConfig, SingleAssetOrderExecutionQlib
+from qlib.rl.order_execution.objects import FINEST_GRANULARITY
+from qlib.rl.order_execution.simulator_qlib import SingleAssetOrderExecutionQlib
 
 TOTAL_POSITION = 2100.0
 
@@ -32,22 +31,30 @@ def get_order() -> Order:
     )
 
 
-def get_simulator(order: Order) -> SingleAssetOrderExecutionQlib:
-    def _inner_executor_fn(time_per_step: str, common_infra: CommonInfrastructure) -> NestedExecutor:
-        return NestedExecutor(
-            time_per_step=time_per_step,
-            inner_strategy=TWAPStrategy(),
-            inner_executor=SimulatorExecutor(
-                time_per_step="1min",
-                verbose=False,
-                trade_type=SimulatorExecutor.TT_SERIAL,
-                generate_report=False,
-                common_infra=common_infra,
-                track_data=True,
-            ),
-            common_infra=common_infra,
-            track_data=True,
-        )
+def get_simulator(order: Order, time_per_step: str) -> SingleAssetOrderExecutionQlib:
+    _inner_executor_config = {
+        "class": "NestedExecutor",
+        "module_path": "qlib.backtest.executor",
+        "kwargs": {
+            "time_per_step": time_per_step,
+            "inner_strategy": {
+                "class": "TWAPStrategy",
+                "module_path": "qlib.contrib.strategy.rule_strategy",
+            },
+            "inner_executor": {
+                "class": "SimulatorExecutor",
+                "module_path": "qlib.backtest.executor",
+                "kwargs": {
+                    "time_per_step": FINEST_GRANULARITY,
+                    "verbose": False,
+                    "trade_type": SimulatorExecutor.TT_SERIAL,
+                    "generate_report": False,
+                    "track_data": True,
+                }
+            },
+            "track_data": True,
+        },
+    }
 
     DATA_ROOT_DIR = Path(__file__).parent.parent / ".data" / "rl" / "qlib_simulator"
 
@@ -67,27 +74,25 @@ def _inner_executor_fn(time_per_step: str, common_infra: CommonInfrastructure) -
     }
     # fmt: on
 
-    exchange_config = ExchangeConfig(
-        limit_threshold=("$ask == 0", "$bid == 0"),
-        deal_price=("If($ask == 0, $bid, $ask)", "If($bid == 0, $ask, $bid)"),
-        volume_threshold={
+    exchange_config = {
+        "limit_threshold": ("$ask == 0", "$bid == 0"),
+        "deal_price": ("If($ask == 0, $bid, $ask)", "If($bid == 0, $ask, $bid)"),
+        "volume_threshold": {
             "all": ("cum", "0.2 * DayCumsum($volume, '9:30', '14:29')"),
             "buy": ("current", "$askV1"),
             "sell": ("current", "$bidV1"),
         },
-        open_cost=0.0005,
-        close_cost=0.0015,
-        min_cost=5.0,
-        trade_unit=None,
-        cash_limit=None,
-        generate_report=False,
-    )
+        "open_cost": 0.0005,
+        "close_cost": 0.0015,
+        "min_cost": 5.0,
+        "trade_unit": None,
+    }
 
     return SingleAssetOrderExecutionQlib(
         order=order,
-        time_per_step="30min",
+        time_per_step=time_per_step,
         qlib_config=qlib_config,
-        inner_executor_fn=_inner_executor_fn,
+        inner_executor_config=_inner_executor_config,
         exchange_config=exchange_config,
     )
 
@@ -95,7 +100,7 @@ def _inner_executor_fn(time_per_step: str, common_infra: CommonInfrastructure) -
 @python_version_request
 def test_simulator_first_step():
     order = get_order()
-    simulator = get_simulator(order)
+    simulator = get_simulator(order, time_per_step="30min")
     state = simulator.get_state()
     assert state.cur_time == pd.Timestamp("2019-03-04 09:30:00")
     assert state.position == TOTAL_POSITION
@@ -130,7 +135,7 @@ def test_simulator_first_step():
 @python_version_request
 def test_simulator_stop_twap() -> None:
     order = get_order()
-    simulator = get_simulator(order)
+    simulator = get_simulator(order, time_per_step="30min")
     NUM_STEPS = 7
     for i in range(NUM_STEPS):
         simulator.step(TOTAL_POSITION / NUM_STEPS)
@@ -157,7 +162,7 @@ def test_simulator_stop_twap() -> None:
 def test_interpreter() -> None:
     NUM_EXECUTION = 3
     order = get_order()
-    simulator = get_simulator(order)
+    simulator = get_simulator(order, time_per_step="30min")
     interpreter_action = CategoricalActionInterpreter(values=NUM_EXECUTION)
 
     NUM_STEPS = 7

From 0490ff41ff7d9bf1335e8f9156347bd02a87ac5e Mon Sep 17 00:00:00 2001
From: Huoran Li <huo53926@126.com>
Date: Fri, 29 Jul 2022 15:50:51 +0800
Subject: [PATCH 02/23] Rename from_neutrader to integration

---
 qlib/rl/{from_neutrader => integration}/__init__.py | 0
 qlib/rl/{from_neutrader => integration}/feature.py  | 0
 qlib/rl/order_execution/simulator_qlib.py           | 2 +-
 3 files changed, 1 insertion(+), 1 deletion(-)
 rename qlib/rl/{from_neutrader => integration}/__init__.py (100%)
 rename qlib/rl/{from_neutrader => integration}/feature.py (100%)

diff --git a/qlib/rl/from_neutrader/__init__.py b/qlib/rl/integration/__init__.py
similarity index 100%
rename from qlib/rl/from_neutrader/__init__.py
rename to qlib/rl/integration/__init__.py
diff --git a/qlib/rl/from_neutrader/feature.py b/qlib/rl/integration/feature.py
similarity index 100%
rename from qlib/rl/from_neutrader/feature.py
rename to qlib/rl/integration/feature.py
diff --git a/qlib/rl/order_execution/simulator_qlib.py b/qlib/rl/order_execution/simulator_qlib.py
index ad415b169a..7e126d9a25 100644
--- a/qlib/rl/order_execution/simulator_qlib.py
+++ b/qlib/rl/order_execution/simulator_qlib.py
@@ -15,7 +15,7 @@
 from qlib.backtest.utils import CommonInfrastructure
 from qlib.constant import EPS
 from qlib.rl.data.exchange_wrapper import QlibIntradayBacktestData
-from qlib.rl.from_neutrader.feature import init_qlib
+from qlib.rl.integration.feature import init_qlib
 from qlib.rl.order_execution.objects import COARSEST_GRANULARITY, FINEST_GRANULARITY
 from qlib.rl.order_execution.simulator_simple import SAOEMetrics, SAOEState
 from qlib.rl.order_execution.utils import (

From ef73eac5ee6b777035eb2ca67cb77fa18e84befc Mon Sep 17 00:00:00 2001
From: Default <huo53926@126.com>
Date: Wed, 3 Aug 2022 13:50:43 +0800
Subject: [PATCH 03/23] SAOE strategy

---
 qlib/backtest/__init__.py                   |   2 +-
 qlib/backtest/executor.py                   |   9 +
 qlib/rl/order_execution/simulator_qlib.py   | 392 +++-----------------
 qlib/rl/order_execution/state_maintainer.py | 219 +++++++++++
 qlib/rl/order_execution/utils.py            |  23 +-
 qlib/rl/strategy/__init__.py                |   2 +
 qlib/rl/strategy/decomposed.py              |  44 +++
 qlib/rl/strategy/saoe.py                    |  57 +++
 qlib/rl/strategy/single_order.py            |  35 ++
 qlib/strategy/base.py                       |   7 +
 tests/rl/test_qlib_simulator.py             | 100 +++--
 11 files changed, 492 insertions(+), 398 deletions(-)
 create mode 100644 qlib/rl/order_execution/state_maintainer.py
 create mode 100644 qlib/rl/strategy/__init__.py
 create mode 100644 qlib/rl/strategy/decomposed.py
 create mode 100644 qlib/rl/strategy/saoe.py
 create mode 100644 qlib/rl/strategy/single_order.py

diff --git a/qlib/backtest/__init__.py b/qlib/backtest/__init__.py
index d3f4d72402..e8fe73c5a2 100644
--- a/qlib/backtest/__init__.py
+++ b/qlib/backtest/__init__.py
@@ -345,4 +345,4 @@ def format_decisions(
     return res
 
 
-__all__ = ["Order", "backtest"]
+__all__ = ["Order", "backtest", "get_strategy_executor"]
diff --git a/qlib/backtest/executor.py b/qlib/backtest/executor.py
index 13af7aea71..6948faca30 100644
--- a/qlib/backtest/executor.py
+++ b/qlib/backtest/executor.py
@@ -124,6 +124,9 @@ def __init__(
         self.dealt_order_amount: Dict[str, float] = defaultdict(float)
         self.deal_day = None
 
+        # whether the current executor is collecting data
+        self.is_collecting = False
+
     def reset_common_infra(self, common_infra: CommonInfrastructure, copy_trade_account: bool = False) -> None:
         """
         reset infrastructure for trading
@@ -256,6 +259,8 @@ def collect_data(
         object
             trade decision
         """
+        self.is_collecting = True
+
         if self.track_data:
             yield trade_decision
 
@@ -296,6 +301,8 @@ def collect_data(
 
         if return_value is not None:
             return_value.update({"execute_result": res})
+
+        self.is_collecting = False
         return res
 
     def get_all_executors(self) -> List[BaseExecutor]:
@@ -473,6 +480,8 @@ def _collect_data(
                 # do nothing and just step forward
                 sub_cal.step()
 
+        self.inner_strategy.post_upper_level_exe_step()
+
         return execute_result, {"inner_order_indicators": inner_order_indicators, "decision_list": decision_list}
 
     def post_inner_exe_step(self, inner_exe_res: List[object]) -> None:
diff --git a/qlib/rl/order_execution/simulator_qlib.py b/qlib/rl/order_execution/simulator_qlib.py
index 7e126d9a25..22ac718e93 100644
--- a/qlib/rl/order_execution/simulator_qlib.py
+++ b/qlib/rl/order_execution/simulator_qlib.py
@@ -3,282 +3,20 @@
 
 from __future__ import annotations
 
-import copy
-from typing import Any, cast, Generator, List, Optional, Tuple
+from typing import Generator, Optional
 
-import numpy as np
 import pandas as pd
-
 from qlib.backtest import get_strategy_executor
-from qlib.backtest.decision import BaseTradeDecision, Order, OrderHelper, TradeDecisionWO, TradeRange, TradeRangeByTime
+from qlib.backtest.decision import Order
 from qlib.backtest.executor import NestedExecutor
-from qlib.backtest.utils import CommonInfrastructure
-from qlib.constant import EPS
 from qlib.rl.data.exchange_wrapper import QlibIntradayBacktestData
 from qlib.rl.integration.feature import init_qlib
-from qlib.rl.order_execution.objects import COARSEST_GRANULARITY, FINEST_GRANULARITY
-from qlib.rl.order_execution.simulator_simple import SAOEMetrics, SAOEState
+from qlib.rl.order_execution.simulator_simple import SAOEState
 from qlib.rl.order_execution.utils import (
-    dataframe_append,
-    get_portfolio_and_indicator,
     get_ticks_slice,
-    price_advantage,
 )
 from qlib.rl.simulator import Simulator
-from qlib.strategy.base import BaseStrategy
-
-
-class DecomposedStrategy(BaseStrategy):
-    def __init__(self, common_infra: CommonInfrastructure = None) -> None:
-        super().__init__(common_infra=common_infra)
-
-        self.execute_order: Optional[Order] = None
-        self.execute_result: List[Tuple[Order, float, float, float]] = []
-
-    def generate_trade_decision(self, execute_result: list = None) -> Generator[Any, Any, BaseTradeDecision]:
-        # Once the following line is executed, this DecomposedStrategy (self) will be yielded to the outside
-        # of the entire executor, and the execution will be suspended. When the execution is resumed by `send()`,
-        # the sent item will be captured by `exec_vol`. The outside policy could communicate with the inner
-        # level strategy through this way.
-        exec_vol = yield self
-
-        oh = self.trade_exchange.get_order_helper()
-        order = oh.create(self._order.stock_id, exec_vol, self._order.direction)
-
-        self.execute_order = order
-
-        return TradeDecisionWO([order], self)
-
-    def alter_outer_trade_decision(self, outer_trade_decision: BaseTradeDecision) -> BaseTradeDecision:
-        return outer_trade_decision
-
-    def post_exe_step(self, execute_result: list) -> None:
-        self.execute_result = execute_result
-
-    def reset(self, outer_trade_decision: TradeDecisionWO = None, **kwargs: Any) -> None:
-        super().reset(outer_trade_decision=outer_trade_decision, **kwargs)
-        if outer_trade_decision is not None:
-            order_list = outer_trade_decision.order_list
-            assert len(order_list) == 1
-            self._order = order_list[0]
-
-
-class SingleOrderStrategy(BaseStrategy):
-    # this logic is copied from FileOrderStrategy
-    def __init__(
-        self,
-        order: Order,
-        trade_range: TradeRange,
-        instrument: str,
-    ) -> None:
-        super().__init__()
-
-        self._order = order
-        self._trade_range = trade_range
-        self._instrument = instrument
-
-    def alter_outer_trade_decision(self, outer_trade_decision: BaseTradeDecision) -> BaseTradeDecision:
-        return outer_trade_decision
-
-    def generate_trade_decision(self, execute_result: list = None) -> TradeDecisionWO:
-        oh: OrderHelper = self.common_infra.get("trade_exchange").get_order_helper()
-        order_list = [
-            oh.create(
-                code=self._instrument,
-                amount=self._order.amount,
-                direction=self._order.direction,
-            ),
-        ]
-        return TradeDecisionWO(order_list, self, self._trade_range)
-
-
-executor_config_template = {
-    "class": "NestedExecutor",
-    "module_path": "qlib.backtest.executor",
-    "kwargs": {
-        "time_per_step": COARSEST_GRANULARITY,
-        "inner_strategy": {
-            "class": "DecomposedStrategy",
-            "module_path": "qlib.rl.order_execution.simulator_qlib",
-        },
-        "track_data": True,
-    },
-}
-top_strategy_config_template = {
-    "class": "SingleOrderStrategy",
-    "module_path": "qlib.rl.order_execution.simulator_qlib",
-}
-exchange_kwargs_template = {
-    "freq": FINEST_GRANULARITY,
-}
-
-
-class StateMaintainer:
-    """
-    Maintain states of the environment.
-
-    Example usage::
-
-        maintainer = StateMaintainer(...)  # in reset
-        maintainer.update(...)  # in step
-        # get states in get_state from maintainer
-    """
-
-    def __init__(self, order: Order, time_per_step: str, tick_index: pd.DatetimeIndex, twap_price: float) -> None:
-        super().__init__()
-
-        self.position = order.amount
-        self._order = order
-        self._time_per_step = time_per_step
-        self._tick_index = tick_index
-        self._twap_price = twap_price
-
-        metric_keys = list(SAOEMetrics.__annotations__.keys())  # pylint: disable=no-member
-        self.history_exec = pd.DataFrame(columns=metric_keys).set_index("datetime")
-        self.history_steps = pd.DataFrame(columns=metric_keys).set_index("datetime")
-        self.metrics: Optional[SAOEMetrics] = None
-
-    def update(
-        self,
-        executor: NestedExecutor,
-        done: bool,
-        all_indicators: dict,
-    ) -> None:
-        inner_executor = executor.inner_executor
-        inner_strategy = executor.inner_strategy
-        assert isinstance(inner_strategy, DecomposedStrategy)
-
-        execute_order = inner_strategy.execute_order
-        execute_result = inner_strategy.execute_result
-        exec_vol = np.array([e[0].deal_amount for e in execute_result])
-        num_step = len(execute_result)
-
-        assert execute_order is not None
-
-        if num_step == 0:
-            market_volume = np.array([])
-            market_price = np.array([])
-            datetime_list = pd.DatetimeIndex([])
-        else:
-            market_volume = np.array(
-                inner_executor.trade_exchange.get_volume(
-                    execute_order.stock_id,
-                    execute_result[0][0].start_time,
-                    execute_result[-1][0].start_time,
-                    method=None,
-                ),
-            )
-
-            trade_value = all_indicators[FINEST_GRANULARITY].iloc[-num_step:]["value"].values
-            deal_amount = all_indicators[FINEST_GRANULARITY].iloc[-num_step:]["deal_amount"].values
-            market_price = trade_value / deal_amount
-
-            datetime_list = all_indicators[FINEST_GRANULARITY].index[-num_step:]
-
-        assert market_price.shape == market_volume.shape == exec_vol.shape
-
-        self.history_exec = dataframe_append(
-            self.history_exec,
-            self._collect_multi_order_metric(
-                order=self._order,
-                datetime=datetime_list,
-                market_vol=market_volume,
-                market_price=market_price,
-                exec_vol=exec_vol,
-                pa=all_indicators[self._time_per_step].iloc[-1]["pa"],
-            ),
-        )
-
-        self.history_steps = dataframe_append(
-            self.history_steps,
-            [
-                self._collect_single_order_metric(
-                    execute_order,
-                    execute_order.start_time,
-                    market_volume,
-                    market_price,
-                    exec_vol.sum(),
-                    exec_vol,
-                ),
-            ],
-        )
-
-        if done:
-            self.metrics = self._collect_single_order_metric(
-                self._order,
-                self._tick_index[0],  # start time
-                self.history_exec["market_volume"],
-                self.history_exec["market_price"],
-                self.history_steps["amount"].sum(),
-                self.history_exec["deal_amount"],
-            )
-
-        # TODO: check whether we need this. Can we get this information from Account?
-        # Do this at the end
-        self.position -= exec_vol.sum()
-
-    def _collect_multi_order_metric(
-        self,
-        order: Order,
-        datetime: pd.Timestamp,
-        market_vol: np.ndarray,
-        market_price: np.ndarray,
-        exec_vol: np.ndarray,
-        pa: float,
-    ) -> SAOEMetrics:
-        return SAOEMetrics(
-            # It should have the same keys with SAOEMetrics,
-            # but the values do not necessarily have the annotated type.
-            # Some values could be vectorized (e.g., exec_vol).
-            stock_id=order.stock_id,
-            datetime=datetime,
-            direction=order.direction,
-            market_volume=market_vol,
-            market_price=market_price,
-            amount=exec_vol,
-            inner_amount=exec_vol,
-            deal_amount=exec_vol,
-            trade_price=market_price,
-            trade_value=market_price * exec_vol,
-            position=self.position - np.cumsum(exec_vol),
-            ffr=exec_vol / order.amount,
-            pa=pa,
-        )
-
-    def _collect_single_order_metric(
-        self,
-        order: Order,
-        datetime: pd.Timestamp,
-        market_vol: np.ndarray,
-        market_price: np.ndarray,
-        amount: float,  # intended to trade such amount
-        exec_vol: np.ndarray,
-    ) -> SAOEMetrics:
-        assert len(market_vol) == len(market_price) == len(exec_vol)
-
-        if np.abs(np.sum(exec_vol)) < EPS:
-            exec_avg_price = 0.0
-        else:
-            exec_avg_price = cast(float, np.average(market_price, weights=exec_vol))  # could be nan
-            if hasattr(exec_avg_price, "item"):  # could be numpy scalar
-                exec_avg_price = exec_avg_price.item()  # type: ignore
-
-        exec_sum = exec_vol.sum()
-        return SAOEMetrics(
-            stock_id=order.stock_id,
-            datetime=datetime,
-            direction=order.direction,
-            market_volume=market_vol.sum(),
-            market_price=market_price.mean() if len(market_price) > 0 else np.nan,
-            amount=amount,
-            inner_amount=exec_sum,
-            deal_amount=exec_sum,  # in this simulator, there's no other restrictions
-            trade_price=exec_avg_price,
-            trade_value=float(np.sum(market_price * exec_vol)),
-            position=self.position - exec_sum,
-            ffr=float(exec_sum / order.amount),
-            pa=price_advantage(exec_avg_price, self._twap_price, order.direction),
-        )
+from qlib.rl.strategy.saoe import SAOEStrategy
 
 
 class SingleAssetOrderExecutionQlib(Simulator[Order, SAOEState, float]):
@@ -292,8 +30,10 @@ class SingleAssetOrderExecutionQlib(Simulator[Order, SAOEState, float]):
         A string to describe the time granularity of each step. Current support "1min", "30min", and "1day"
     qlib_config (dict):
         Configuration used to initialize Qlib.
-    inner_executor_config (dict):
-        Inner executor configuration
+    strategy_config (dict):
+        Strategy configuration
+    executor_config (dict):
+        Executor configuration
     exchange_config (dict):
         Exchange configuration
     """
@@ -303,7 +43,8 @@ def __init__(
         order: Order,
         time_per_step: str,  # "1min", "30min", "1day"
         qlib_config: dict,
-        inner_executor_config: dict,
+        strategy_config: dict,
+        executor_config: dict,
         exchange_config: dict,
     ) -> None:
         assert time_per_step in ("1min", "30min", "1day")
@@ -314,89 +55,72 @@ def __init__(
 
         init_qlib(qlib_config)
 
-        self._time_per_step = time_per_step
-        self._ticks_per_step = int(pd.Timedelta(time_per_step).total_seconds() // 60)
-
         self._executor: Optional[NestedExecutor] = None
         self._collect_data_loop: Optional[Generator] = None
+        self.reset(order, time_per_step, strategy_config, executor_config, exchange_config)
 
-        self._done = False
-
-        self.reset(order, inner_executor_config, exchange_config)
-
-    def reset(self, order: Order, inner_executor_config: dict, exchange_config: dict) -> None:
-        order_date = pd.Timestamp(order.start_time.date())
-
-        top_strategy_config: dict = copy.deepcopy(top_strategy_config_template)
-        top_strategy_config.update({
-            "kwargs": {
-                "order": order,
-                "trade_range": TradeRangeByTime(order.start_time.time(), order.end_time.time()),
-                "instrument": order.stock_id,
-            }
-        })
-
-        executor_config: dict = copy.deepcopy(executor_config_template)
-        executor_config["kwargs"].update({
-            "inner_executor": inner_executor_config,
-            "start_time": order_date,
-            "end_time": order_date,
-        })
-
-        exchange_kwargs: dict = copy.deepcopy(exchange_kwargs_template)
-        exchange_kwargs.update({"codes": [order.stock_id], **exchange_config})
-
+    def reset(
+        self,
+        order: Order,
+        time_per_step: str,
+        strategy_config: dict,
+        executor_config: dict,
+        exchange_config: dict,
+    ) -> None:
         top_strategy, self._executor = get_strategy_executor(
-            start_time=order_date,
-            end_time=order_date + pd.DateOffset(1),
-            strategy=top_strategy_config,
+            start_time=pd.Timestamp(order.start_time.date()),
+            end_time=pd.Timestamp(order.start_time.date()) + pd.DateOffset(1),
+            strategy=strategy_config,
             executor=executor_config,
             benchmark=order.stock_id,
             account=1e12,
-            exchange_kwargs=exchange_kwargs,
+            exchange_kwargs=exchange_config,
             pos_type="InfPosition",
         )
+        assert isinstance(self._executor, NestedExecutor)
         top_strategy.reset(level_infra=self._executor.get_level_infra())
 
         exchange = self._executor.trade_exchange
-        self._ticks_index = pd.DatetimeIndex([e[1] for e in list(exchange.quote_df.index)])
-        self._ticks_for_order = get_ticks_slice(
-            self._ticks_index,
+        ticks_index = pd.DatetimeIndex([e[1] for e in list(exchange.quote_df.index)])
+        ticks_for_order = get_ticks_slice(
+            ticks_index,
             order.start_time,
             order.end_time,
             include_end=True,
         )
-        self._backtest_data = QlibIntradayBacktestData(
+        backtest_data = QlibIntradayBacktestData(
             order=order,
             exchange=exchange,
-            start_time=self._ticks_for_order[0],
-            end_time=self._ticks_for_order[-1],
+            start_time=ticks_for_order[0],
+            end_time=ticks_for_order[-1],
         )
-        self.twap_price = self._backtest_data.get_deal_price().mean()
+
+        self.twap_price = backtest_data.get_deal_price().mean()
 
         self._collect_data_loop = self._executor.collect_data(top_strategy.generate_trade_decision(), level=0)
         assert isinstance(self._collect_data_loop, Generator)
 
-        self._iter_strategy(action=None)
-        self._done = False
+        self._last_yielded_saoe_strategy = self._iter_strategy(action=None)
 
-        self._maintainer = StateMaintainer(
+        assert isinstance(self._executor.inner_strategy, SAOEStrategy)
+        self._executor.inner_strategy.create_saoe_maintainer(
             order=order,
-            time_per_step=self._time_per_step,
-            tick_index=self._ticks_index,
+            executor=self._executor.inner_executor,
+            backtest_data=backtest_data,
+            time_per_step=time_per_step,
+            ticks_index=ticks_index,
             twap_price=self.twap_price,
+            ticks_for_order=ticks_for_order,
         )
 
-        self._order = order
-
-    def _iter_strategy(self, action: float = None) -> DecomposedStrategy:
-        """Iterate the _collect_data_loop until we get the next yield DecomposedStrategy."""
+    def _iter_strategy(self, action: float = None) -> SAOEStrategy:
+        """Iterate the _collect_data_loop until we get the next yield SAOEStrategy."""
         assert self._collect_data_loop is not None
 
         strategy = next(self._collect_data_loop) if action is None else self._collect_data_loop.send(action)
-        while not isinstance(strategy, DecomposedStrategy):
+        while not isinstance(strategy, SAOEStrategy):
             strategy = next(self._collect_data_loop) if action is None else self._collect_data_loop.send(action)
-        assert isinstance(strategy, DecomposedStrategy)
+        assert isinstance(strategy, SAOEStrategy)
         return strategy
 
     def step(self, action: float) -> None:
@@ -408,35 +132,17 @@ def step(self, action: float) -> None:
             The amount you wish to deal. The simulator doesn't guarantee all the amount to be successfully dealt.
         """
 
-        assert not self._done, "Simulator has already done!"
+        assert not self.done(), "Simulator has already done!"
 
         try:
-            self._iter_strategy(action=action)
+            self._last_yielded_saoe_strategy = self._iter_strategy(action=action)
         except StopIteration:
-            self._done = True
+            pass
 
         assert self._executor is not None
-        _, all_indicators = get_portfolio_and_indicator(self._executor)
-
-        self._maintainer.update(
-            executor=self._executor,
-            done=self._done,
-            all_indicators=all_indicators,
-        )
 
     def get_state(self) -> SAOEState:
-        return SAOEState(
-            order=self._order,
-            cur_time=self._executor.inner_executor.trade_calendar.get_step_time()[0],
-            position=self._maintainer.position,
-            history_exec=self._maintainer.history_exec,
-            history_steps=self._maintainer.history_steps,
-            metrics=self._maintainer.metrics,
-            backtest_data=self._backtest_data,
-            ticks_per_step=self._ticks_per_step,
-            ticks_index=self._ticks_index,
-            ticks_for_order=self._ticks_for_order,
-        )
+        return self._last_yielded_saoe_strategy.maintainer.saoe_state
 
     def done(self) -> bool:
-        return self._done
+        return not self._executor.is_collecting
diff --git a/qlib/rl/order_execution/state_maintainer.py b/qlib/rl/order_execution/state_maintainer.py
new file mode 100644
index 0000000000..af9ed5e9c0
--- /dev/null
+++ b/qlib/rl/order_execution/state_maintainer.py
@@ -0,0 +1,219 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+from typing import cast, Optional
+
+import numpy as np
+import pandas as pd
+from qlib.backtest import Order
+from qlib.backtest.executor import BaseExecutor
+from qlib.backtest.utils import TradeCalendarManager
+from qlib.constant import EPS
+from qlib.rl.data.exchange_wrapper import QlibIntradayBacktestData
+from qlib.rl.order_execution import SAOEMetrics, SAOEState
+from qlib.rl.order_execution.utils import dataframe_append, get_simulator_executor, get_ticks_slice, price_advantage
+
+
+class SAOEStateMaintainer:
+    """
+    Maintain states of the environment.
+
+    Example usage::
+
+        maintainer = StateMaintainer(...)  # in reset
+        maintainer.update(...)  # in step
+        # get states in get_state from maintainer
+    """
+
+    def __init__(
+        self,
+        order: Order,
+        executor: BaseExecutor,
+        backtest_data: QlibIntradayBacktestData,
+        time_per_step: str,
+        ticks_index: pd.DatetimeIndex,
+        twap_price: float,
+        ticks_for_order: pd.DatetimeIndex,
+    ) -> None:
+        super().__init__()
+
+        self.position = order.amount
+        self.order = order
+        self.executor = executor
+        self.backtest_data = backtest_data
+        self.time_per_step = time_per_step
+        self.ticks_index = ticks_index
+        self.ticks_for_order = ticks_for_order
+        self.twap_price = twap_price
+
+        metric_keys = list(SAOEMetrics.__annotations__.keys())  # pylint: disable=no-member
+        self.history_exec = pd.DataFrame(columns=metric_keys).set_index("datetime")
+        self.history_steps = pd.DataFrame(columns=metric_keys).set_index("datetime")
+        self.metrics: Optional[SAOEMetrics] = None
+
+        self.cur_time = ticks_for_order[0]
+        self.ticks_per_step = int(pd.Timedelta(self.time_per_step).total_seconds() // 60)
+
+    def _next_time(self) -> pd.Timestamp:
+        current_loc = self.ticks_index.get_loc(self.cur_time)
+        next_loc = current_loc + self.ticks_per_step
+        next_loc = next_loc - next_loc % self.ticks_per_step
+        if next_loc < len(self.ticks_index) and self.ticks_index[next_loc] < self.order.end_time:
+            return self.ticks_index[next_loc]
+        else:
+            return self.order.end_time
+
+    def update(self, execute_result: list) -> None:
+        exec_vol = np.array([e[0].deal_amount for e in execute_result])
+        num_step = len(execute_result)
+
+        if num_step == 0:
+            market_volume = np.array([])
+            market_price = np.array([])
+            datetime_list = pd.DatetimeIndex([])
+        else:
+            market_volume = np.array(
+                self.executor.trade_exchange.get_volume(
+                    self.order.stock_id,
+                    execute_result[0][0].start_time,
+                    execute_result[-1][0].start_time,
+                    method=None,
+                ),
+            )
+
+            # Get data from the SimulatorExecutor's (lowest-level executor) indicator
+            simulator_executor = get_simulator_executor(self.executor)
+            simulator_trade_account = simulator_executor.trade_account
+            simulator_df = simulator_trade_account.get_trade_indicator().generate_trade_indicators_dataframe()
+
+            trade_value = simulator_df.iloc[-num_step:]["value"].values
+            deal_amount = simulator_df.iloc[-num_step:]["deal_amount"].values
+            market_price = trade_value / deal_amount
+            datetime_list = simulator_df.index[-num_step:]
+
+        assert market_price.shape == market_volume.shape == exec_vol.shape
+
+        # Get data from the current level executor's indicator
+        current_trade_account = self.executor.trade_account
+        current_df = current_trade_account.get_trade_indicator().generate_trade_indicators_dataframe()
+        self.history_exec = dataframe_append(
+            self.history_exec,
+            self._collect_multi_order_metric(
+                order=self.order,
+                datetime=datetime_list,
+                market_vol=market_volume,
+                market_price=market_price,
+                exec_vol=exec_vol,
+                pa=current_df.iloc[-1]["pa"],
+            ),
+        )
+
+        self.history_steps = dataframe_append(
+            self.history_steps,
+            [
+                self._collect_single_order_metric(
+                    self.order,
+                    self.cur_time,
+                    market_volume,
+                    market_price,
+                    exec_vol.sum(),
+                    exec_vol,
+                ),
+            ],
+        )
+
+        # TODO: check whether we need this. Can we get this information from Account?
+        # Do this at the end
+        self.position -= exec_vol.sum()
+
+        self.cur_time = self._next_time()
+
+    def generate_metrics_after_done(self) -> None:
+        """Generate metrics once the upper level execution is done"""
+
+        self.metrics = self._collect_single_order_metric(
+            self.order,
+            self.ticks_index[0],  # start time
+            self.history_exec["market_volume"],
+            self.history_exec["market_price"],
+            self.history_steps["amount"].sum(),
+            self.history_exec["deal_amount"],
+        )
+
+    def _collect_multi_order_metric(
+        self,
+        order: Order,
+        datetime: pd.Timestamp,
+        market_vol: np.ndarray,
+        market_price: np.ndarray,
+        exec_vol: np.ndarray,
+        pa: float,
+    ) -> SAOEMetrics:
+        return SAOEMetrics(
+            # It should have the same keys with SAOEMetrics,
+            # but the values do not necessarily have the annotated type.
+            # Some values could be vectorized (e.g., exec_vol).
+            stock_id=order.stock_id,
+            datetime=datetime,
+            direction=order.direction,
+            market_volume=market_vol,
+            market_price=market_price,
+            amount=exec_vol,
+            inner_amount=exec_vol,
+            deal_amount=exec_vol,
+            trade_price=market_price,
+            trade_value=market_price * exec_vol,
+            position=self.position - np.cumsum(exec_vol),
+            ffr=exec_vol / order.amount,
+            pa=pa,
+        )
+
+    def _collect_single_order_metric(
+        self,
+        order: Order,
+        datetime: pd.Timestamp,
+        market_vol: np.ndarray,
+        market_price: np.ndarray,
+        amount: float,  # intended to trade such amount
+        exec_vol: np.ndarray,
+    ) -> SAOEMetrics:
+        assert len(market_vol) == len(market_price) == len(exec_vol)
+
+        if np.abs(np.sum(exec_vol)) < EPS:
+            exec_avg_price = 0.0
+        else:
+            exec_avg_price = cast(float, np.average(market_price, weights=exec_vol))  # could be nan
+            if hasattr(exec_avg_price, "item"):  # could be numpy scalar
+                exec_avg_price = exec_avg_price.item()  # type: ignore
+
+        exec_sum = exec_vol.sum()
+        return SAOEMetrics(
+            stock_id=order.stock_id,
+            datetime=datetime,
+            direction=order.direction,
+            market_volume=market_vol.sum(),
+            market_price=market_price.mean() if len(market_price) > 0 else np.nan,
+            amount=amount,
+            inner_amount=exec_sum,
+            deal_amount=exec_sum,  # in this simulator, there's no other restrictions
+            trade_price=exec_avg_price,
+            trade_value=float(np.sum(market_price * exec_vol)),
+            position=self.position - exec_sum,
+            ffr=float(exec_sum / order.amount),
+            pa=price_advantage(exec_avg_price, self.twap_price, order.direction),
+        )
+
+    @property
+    def saoe_state(self) -> SAOEState:
+        return SAOEState(
+            order=self.order,
+            cur_time=self.executor.trade_calendar.get_step_time()[0],
+            position=self.position,
+            history_exec=self.history_exec,
+            history_steps=self.history_steps,
+            metrics=self.metrics,
+            backtest_data=self.backtest_data,
+            ticks_per_step=int(pd.Timedelta(self.time_per_step).total_seconds() // 60),
+            ticks_index=self.ticks_index,
+            ticks_for_order=self.ticks_for_order,
+        )
diff --git a/qlib/rl/order_execution/utils.py b/qlib/rl/order_execution/utils.py
index 779f860c33..123cc1a727 100644
--- a/qlib/rl/order_execution/utils.py
+++ b/qlib/rl/order_execution/utils.py
@@ -9,9 +9,8 @@
 import pandas as pd
 
 from qlib.backtest.decision import OrderDir
-from qlib.backtest.executor import BaseExecutor
+from qlib.backtest.executor import BaseExecutor, NestedExecutor, SimulatorExecutor
 from qlib.rl.order_execution.simulator_simple import _float_or_ndarray, ONE_SEC
-from qlib.utils.time import Freq
 
 
 def get_ticks_slice(
@@ -57,18 +56,8 @@ def price_advantage(
         return cast(_float_or_ndarray, res_wo_nan)
 
 
-def get_portfolio_and_indicator(executor: BaseExecutor) -> Tuple[dict, dict]:
-    all_executors = executor.get_all_executors()
-    all_portfolio_metrics = {
-        "{}{}".format(*Freq.parse(_executor.time_per_step)): _executor.trade_account.get_portfolio_metrics()
-        for _executor in all_executors
-        if _executor.trade_account.is_port_metr_enabled()
-    }
-
-    all_indicators = {}
-    for _executor in all_executors:
-        key = "{}{}".format(*Freq.parse(_executor.time_per_step))
-        all_indicators[key] = _executor.trade_account.get_trade_indicator().generate_trade_indicators_dataframe()
-        all_indicators[key + "_obj"] = _executor.trade_account.get_trade_indicator()
-
-    return all_portfolio_metrics, all_indicators
+def get_simulator_executor(executor: BaseExecutor) -> SimulatorExecutor:
+    while isinstance(executor, NestedExecutor):
+        executor = executor.inner_executor
+    assert isinstance(executor, SimulatorExecutor)
+    return executor
diff --git a/qlib/rl/strategy/__init__.py b/qlib/rl/strategy/__init__.py
new file mode 100644
index 0000000000..59e481eb93
--- /dev/null
+++ b/qlib/rl/strategy/__init__.py
@@ -0,0 +1,2 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
diff --git a/qlib/rl/strategy/decomposed.py b/qlib/rl/strategy/decomposed.py
new file mode 100644
index 0000000000..d6fdf1b534
--- /dev/null
+++ b/qlib/rl/strategy/decomposed.py
@@ -0,0 +1,44 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+from typing import Any, Generator, Optional
+
+from qlib.backtest import CommonInfrastructure
+from qlib.backtest.decision import BaseTradeDecision, TradeDecisionWO
+from qlib.backtest.utils import LevelInfrastructure
+from qlib.rl.order_execution.state_maintainer import SAOEStateMaintainer
+from qlib.rl.strategy.saoe import SAOEStrategy
+
+
+class DecomposedStrategy(SAOEStrategy):
+    def __init__(
+        self,
+        outer_trade_decision: BaseTradeDecision = None,
+        level_infra: LevelInfrastructure = None,
+        common_infra: CommonInfrastructure = None,
+        **kwargs,
+    ) -> None:
+        super().__init__(None, outer_trade_decision, level_infra, common_infra, **kwargs)
+        self.maintainer: Optional[SAOEStateMaintainer] = None
+
+    def generate_trade_decision(self, execute_result: list = None) -> Generator[Any, Any, BaseTradeDecision]:
+        # Once the following line is executed, this DecomposedStrategy (self) will be yielded to the outside
+        # of the entire executor, and the execution will be suspended. When the execution is resumed by `send()`,
+        # the sent item will be captured by `exec_vol`. The outside policy could communicate with the inner
+        # level strategy through this way.
+        exec_vol = yield self
+
+        oh = self.trade_exchange.get_order_helper()
+        order = oh.create(self._order.stock_id, exec_vol, self._order.direction)
+
+        return TradeDecisionWO([order], self)
+
+    def alter_outer_trade_decision(self, outer_trade_decision: BaseTradeDecision) -> BaseTradeDecision:
+        return outer_trade_decision
+
+    def reset(self, outer_trade_decision: TradeDecisionWO = None, **kwargs: Any) -> None:
+        super().reset(outer_trade_decision=outer_trade_decision, **kwargs)
+        if outer_trade_decision is not None:
+            order_list = outer_trade_decision.order_list
+            assert len(order_list) == 1
+            self._order = order_list[0]
diff --git a/qlib/rl/strategy/saoe.py b/qlib/rl/strategy/saoe.py
new file mode 100644
index 0000000000..1528698e7b
--- /dev/null
+++ b/qlib/rl/strategy/saoe.py
@@ -0,0 +1,57 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+from abc import ABCMeta
+from typing import Optional
+
+import pandas as pd
+from qlib.backtest.decision import BaseTradeDecision, Order
+from qlib.backtest.executor import BaseExecutor
+from qlib.backtest.utils import CommonInfrastructure, LevelInfrastructure
+from qlib.rl.data.exchange_wrapper import QlibIntradayBacktestData
+from qlib.rl.order_execution.state_maintainer import SAOEStateMaintainer
+from qlib.strategy.base import RLStrategy
+
+
+class SAOEStrategy(RLStrategy, metaclass=ABCMeta):
+    """RL-based strategies that use SAOEState as state."""
+
+    def __init__(
+        self,
+        policy,
+        outer_trade_decision: BaseTradeDecision = None,
+        level_infra: LevelInfrastructure = None,
+        common_infra: CommonInfrastructure = None,
+        **kwargs,
+    ) -> None:
+        super(SAOEStrategy, self).__init__(policy, outer_trade_decision, level_infra, common_infra, **kwargs)
+
+        self.maintainer: Optional[SAOEStateMaintainer] = None
+
+    def create_saoe_maintainer(
+        self,
+        order: Order,
+        executor: BaseExecutor,
+        backtest_data: QlibIntradayBacktestData,
+        time_per_step: str,
+        ticks_index: pd.DatetimeIndex,
+        twap_price: float,
+        ticks_for_order: pd.DatetimeIndex,
+    ) -> None:
+        self.maintainer = SAOEStateMaintainer(
+            order=order,
+            executor=executor,
+            backtest_data=backtest_data,
+            time_per_step=time_per_step,
+            ticks_index=ticks_index,
+            twap_price=twap_price,
+            ticks_for_order=ticks_for_order,
+        )
+
+    def post_upper_level_exe_step(self) -> None:
+        self.maintainer.generate_metrics_after_done()
+
+    def post_exe_step(self, execute_result: list) -> None:
+        self.maintainer.update(
+            execute_result=execute_result,
+        )
diff --git a/qlib/rl/strategy/single_order.py b/qlib/rl/strategy/single_order.py
new file mode 100644
index 0000000000..0055781973
--- /dev/null
+++ b/qlib/rl/strategy/single_order.py
@@ -0,0 +1,35 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+from qlib.backtest import Order
+from qlib.backtest.decision import BaseTradeDecision, OrderHelper, TradeDecisionWO, TradeRange
+from qlib.strategy.base import BaseStrategy
+
+
+class SingleOrderStrategy(BaseStrategy):
+    # this logic is copied from FileOrderStrategy
+    def __init__(
+        self,
+        order: Order,
+        trade_range: TradeRange,
+        instrument: str,
+    ) -> None:
+        super().__init__()
+
+        self._order = order
+        self._trade_range = trade_range
+        self._instrument = instrument
+
+    def alter_outer_trade_decision(self, outer_trade_decision: BaseTradeDecision) -> BaseTradeDecision:
+        return outer_trade_decision
+
+    def generate_trade_decision(self, execute_result: list = None) -> TradeDecisionWO:
+        oh: OrderHelper = self.common_infra.get("trade_exchange").get_order_helper()
+        order_list = [
+            oh.create(
+                code=self._instrument,
+                amount=self._order.amount,
+                direction=self._order.direction,
+            ),
+        ]
+        return TradeDecisionWO(order_list, self, self._trade_range)
diff --git a/qlib/strategy/base.py b/qlib/strategy/base.py
index 27df347fc5..888adfa2ba 100644
--- a/qlib/strategy/base.py
+++ b/qlib/strategy/base.py
@@ -8,6 +8,7 @@
 if TYPE_CHECKING:
     from qlib.backtest.exchange import Exchange
     from qlib.backtest.position import BasePosition
+    from qlib.backtest.executor import BaseExecutor
 
 from typing import Tuple
 
@@ -207,6 +208,12 @@ def get_data_cal_avail_range(self, rtype: str = "full") -> Tuple[int, int]:
         range_limit = self.outer_trade_decision.get_data_cal_range_limit(rtype=rtype)
         return max(cal_range[0], range_limit[0]), min(cal_range[1], range_limit[1])
 
+    def post_upper_level_exe_step(self) -> None:
+        """
+        A hook for doing sth after the upper level executor finished its execution (for example, finalize
+        the metrics collection).
+        """
+
     def post_exe_step(self, execute_result: list) -> None:
         """
         A hook for doing sth after the corresponding executor finished its execution.
diff --git a/tests/rl/test_qlib_simulator.py b/tests/rl/test_qlib_simulator.py
index ac94e589e5..28a549eec6 100644
--- a/tests/rl/test_qlib_simulator.py
+++ b/tests/rl/test_qlib_simulator.py
@@ -1,12 +1,14 @@
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT License.
+
 import sys
 from pathlib import Path
+from typing import Tuple
 
 import pandas as pd
 import pytest
 
-from qlib.backtest.decision import Order, OrderDir
+from qlib.backtest.decision import Order, OrderDir, TradeRangeByTime
 from qlib.backtest.executor import SimulatorExecutor
 from qlib.rl.order_execution import CategoricalActionInterpreter
 from qlib.rl.order_execution.objects import FINEST_GRANULARITY
@@ -31,31 +33,72 @@ def get_order() -> Order:
     )
 
 
-def get_simulator(order: Order, time_per_step: str) -> SingleAssetOrderExecutionQlib:
-    _inner_executor_config = {
+def get_configs(order: Order, time_per_step: str) -> Tuple[dict, dict, dict]:
+    strategy_config = {
+        "class": "SingleOrderStrategy",
+        "module_path": "qlib.rl.strategy.single_order",
+        "kwargs": {
+            "order": order,
+            "trade_range": TradeRangeByTime(order.start_time.time(), order.end_time.time()),
+            "instrument": order.stock_id,
+        },
+    }
+
+    executor_config = {
         "class": "NestedExecutor",
         "module_path": "qlib.backtest.executor",
         "kwargs": {
-            "time_per_step": time_per_step,
-            "inner_strategy": {
-                "class": "TWAPStrategy",
-                "module_path": "qlib.contrib.strategy.rule_strategy",
-            },
+            "time_per_step": "1day",
+            "inner_strategy": {"class": "DecomposedStrategy", "module_path": "qlib.rl.strategy.decomposed"},
+            "track_data": True,
             "inner_executor": {
-                "class": "SimulatorExecutor",
+                "class": "NestedExecutor",
                 "module_path": "qlib.backtest.executor",
                 "kwargs": {
-                    "time_per_step": FINEST_GRANULARITY,
-                    "verbose": False,
-                    "trade_type": SimulatorExecutor.TT_SERIAL,
-                    "generate_report": False,
+                    "time_per_step": time_per_step,
+                    "inner_strategy": {
+                        "class": "TWAPStrategy",
+                        "module_path": "qlib.contrib.strategy.rule_strategy",
+                    },
+                    "inner_executor": {
+                        "class": "SimulatorExecutor",
+                        "module_path": "qlib.backtest.executor",
+                        "kwargs": {
+                            "time_per_step": FINEST_GRANULARITY,
+                            "verbose": False,
+                            "trade_type": SimulatorExecutor.TT_SERIAL,
+                            "generate_report": False,
+                            "track_data": True,
+                        }
+                    },
                     "track_data": True,
-                }
+                },
             },
-            "track_data": True,
+            "start_time": pd.Timestamp(order.start_time.date()),
+            "end_time": pd.Timestamp(order.start_time.date()),
         },
     }
 
+    exchange_config = {
+        "freq": FINEST_GRANULARITY,
+        "codes": [order.stock_id],
+        "limit_threshold": ("$ask == 0", "$bid == 0"),
+        "deal_price": ("If($ask == 0, $bid, $ask)", "If($bid == 0, $ask, $bid)"),
+        "volume_threshold": {
+            "all": ("cum", "0.2 * DayCumsum($volume, '9:30', '14:29')"),
+            "buy": ("current", "$askV1"),
+            "sell": ("current", "$bidV1"),
+        },
+        "open_cost": 0.0005,
+        "close_cost": 0.0015,
+        "min_cost": 5.0,
+        "trade_unit": None,
+    }
+
+    return strategy_config, executor_config, exchange_config
+
+
+def get_simulator(order: Order, time_per_step: str) -> SingleAssetOrderExecutionQlib:
     DATA_ROOT_DIR = Path(__file__).parent.parent / ".data" / "rl" / "qlib_simulator"
 
     # fmt: off
@@ -74,25 +117,14 @@ def get_simulator(order: Order, time_per_step: str) -> SingleAssetOrderExecution
     }
     # fmt: on
 
-    exchange_config = {
-        "limit_threshold": ("$ask == 0", "$bid == 0"),
-        "deal_price": ("If($ask == 0, $bid, $ask)", "If($bid == 0, $ask, $bid)"),
-        "volume_threshold": {
-            "all": ("cum", "0.2 * DayCumsum($volume, '9:30', '14:29')"),
-            "buy": ("current", "$askV1"),
-            "sell": ("current", "$bidV1"),
-        },
-        "open_cost": 0.0005,
-        "close_cost": 0.0015,
-        "min_cost": 5.0,
-        "trade_unit": None,
-    }
+    strategy_config, executor_config, exchange_config = get_configs(order, time_per_step)
 
     return SingleAssetOrderExecutionQlib(
         order=order,
         time_per_step=time_per_step,
         qlib_config=qlib_config,
-        inner_executor_config=_inner_executor_config,
+        strategy_config=strategy_config,
+        executor_config=executor_config,
         exchange_config=exchange_config,
     )
 
@@ -120,12 +152,12 @@ def test_simulator_first_step():
     assert is_close(state.history_exec["trade_price"].iloc[0], 149.566483)
     assert is_close(state.history_exec["trade_value"].iloc[0], 1495.664825)
     assert is_close(state.history_exec["position"].iloc[0], TOTAL_POSITION - AMOUNT / 30)
-    # assert state.history_exec["ffr"].iloc[0] == 1 / 60  # FIXME
+    assert is_close(state.history_exec["ffr"].iloc[0], AMOUNT / TOTAL_POSITION / 30)
 
     assert is_close(state.history_steps["market_volume"].iloc[0], 1254848.5756835938)
     assert state.history_steps["amount"].iloc[0] == AMOUNT
     assert state.history_steps["deal_amount"].iloc[0] == AMOUNT
-    assert state.history_steps["ffr"].iloc[0] == 1.0
+    assert state.history_steps["ffr"].iloc[0] == AMOUNT / TOTAL_POSITION
     assert is_close(
         state.history_steps["pa"].iloc[0] * (1.0 if order.direction == OrderDir.SELL else -1.0),
         (state.history_steps["trade_price"].iloc[0] / simulator.twap_price - 1) * 10000,
@@ -174,9 +206,3 @@ def test_interpreter() -> None:
         position_history.append(state.position)
 
         assert position_history[-1] == max(TOTAL_POSITION - TOTAL_POSITION / NUM_EXECUTION * (i + 1), 0.0)
-
-
-if __name__ == "__main__":
-    test_simulator_first_step()
-    test_simulator_stop_twap()
-    test_interpreter()

From 1e8f0e90c1ad0f5a781f2527c25aa63251eef5a3 Mon Sep 17 00:00:00 2001
From: Huoran Li <huo53926@126.com>
Date: Wed, 3 Aug 2022 13:54:05 +0800
Subject: [PATCH 04/23] Optimize file structure

---
 qlib/rl/order_execution/interpreter.py        |   2 +-
 qlib/rl/order_execution/objects.py            |  10 ++
 qlib/rl/order_execution/reward.py             |   2 +-
 qlib/rl/order_execution/simulator_qlib.py     |   2 +-
 qlib/rl/order_execution/simulator_simple.py   | 104 ++----------------
 .../{state_maintainer.py => state.py}         |  90 ++++++++++++++-
 qlib/rl/order_execution/utils.py              |   8 +-
 qlib/rl/strategy/decomposed.py                |   2 +-
 qlib/rl/strategy/saoe.py                      |   2 +-
 9 files changed, 115 insertions(+), 107 deletions(-)
 rename qlib/rl/order_execution/{state_maintainer.py => state.py} (70%)

diff --git a/qlib/rl/order_execution/interpreter.py b/qlib/rl/order_execution/interpreter.py
index 602a15e54e..172ad1cff5 100644
--- a/qlib/rl/order_execution/interpreter.py
+++ b/qlib/rl/order_execution/interpreter.py
@@ -16,7 +16,7 @@
 from qlib.rl.interpreter import ActionInterpreter, StateInterpreter
 from qlib.typehint import TypedDict
 
-from .simulator_simple import SAOEState
+from qlib.rl.order_execution.state import SAOEState
 
 __all__ = [
     "FullHistoryStateInterpreter",
diff --git a/qlib/rl/order_execution/objects.py b/qlib/rl/order_execution/objects.py
index ee6358bcd3..a4ffb1a4f7 100644
--- a/qlib/rl/order_execution/objects.py
+++ b/qlib/rl/order_execution/objects.py
@@ -1,2 +1,12 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+from typing import TypeVar
+
+import numpy as np
+import pandas as pd
+
 FINEST_GRANULARITY = "1min"
 COARSEST_GRANULARITY = "1day"
+ONE_SEC = pd.Timedelta("1s")  # use 1 second to exclude the right interval point
+float_or_ndarray = TypeVar("float_or_ndarray", float, np.ndarray)
\ No newline at end of file
diff --git a/qlib/rl/order_execution/reward.py b/qlib/rl/order_execution/reward.py
index f15a152c66..b4f021f6ad 100644
--- a/qlib/rl/order_execution/reward.py
+++ b/qlib/rl/order_execution/reward.py
@@ -9,7 +9,7 @@
 
 from qlib.rl.reward import Reward
 
-from .simulator_simple import SAOEMetrics, SAOEState
+from qlib.rl.order_execution.state import SAOEMetrics, SAOEState
 
 __all__ = ["PAPenaltyReward"]
 
diff --git a/qlib/rl/order_execution/simulator_qlib.py b/qlib/rl/order_execution/simulator_qlib.py
index 22ac718e93..a59b61ad81 100644
--- a/qlib/rl/order_execution/simulator_qlib.py
+++ b/qlib/rl/order_execution/simulator_qlib.py
@@ -11,7 +11,7 @@
 from qlib.backtest.executor import NestedExecutor
 from qlib.rl.data.exchange_wrapper import QlibIntradayBacktestData
 from qlib.rl.integration.feature import init_qlib
-from qlib.rl.order_execution.simulator_simple import SAOEState
+from qlib.rl.order_execution.state import SAOEState
 from qlib.rl.order_execution.utils import (
     get_ticks_slice,
 )
diff --git a/qlib/rl/order_execution/simulator_simple.py b/qlib/rl/order_execution/simulator_simple.py
index 6d49457841..11d759cb9b 100644
--- a/qlib/rl/order_execution/simulator_simple.py
+++ b/qlib/rl/order_execution/simulator_simple.py
@@ -4,107 +4,22 @@
 from __future__ import annotations
 
 from pathlib import Path
-from typing import Any, NamedTuple, Optional, TypeVar, cast
+from typing import Any, Optional, cast
 
 import numpy as np
 import pandas as pd
 
 from qlib.backtest.decision import Order, OrderDir
 from qlib.constant import EPS
-from qlib.rl.data.pickle_styled import DealPriceType, IntradayBacktestData, load_simple_intraday_backtest_data
+from qlib.rl.data.pickle_styled import DealPriceType, load_simple_intraday_backtest_data
+from qlib.rl.order_execution.objects import float_or_ndarray, ONE_SEC
+from qlib.rl.order_execution.state import SAOEMetrics, SAOEState
 from qlib.rl.simulator import Simulator
 from qlib.rl.utils import LogLevel
-from qlib.typehint import TypedDict
 
 # TODO: Integrating Qlib's native data with simulator_simple
 
-__all__ = ["SAOEMetrics", "SAOEState", "SingleAssetOrderExecution"]
-
-ONE_SEC = pd.Timedelta("1s")  # use 1 second to exclude the right interval point
-
-
-class SAOEMetrics(TypedDict):
-    """Metrics for SAOE accumulated for a "period".
-    It could be accumulated for a day, or a period of time (e.g., 30min), or calculated separately for every minute.
-
-    Warnings
-    --------
-    The type hints are for single elements. In lots of times, they can be vectorized.
-    For example, ``market_volume`` could be a list of float (or ndarray) rather tahn a single float.
-    """
-
-    stock_id: str
-    """Stock ID of this record."""
-    datetime: pd.Timestamp | pd.DatetimeIndex  # TODO: check this
-    """Datetime of this record (this is index in the dataframe)."""
-    direction: int
-    """Direction of the order. 0 for sell, 1 for buy."""
-
-    # Market information.
-    market_volume: np.ndarray | float
-    """(total) market volume traded in the period."""
-    market_price: np.ndarray | float
-    """Deal price. If it's a period of time, this is the average market deal price."""
-
-    # Strategy records.
-
-    amount: np.ndarray | float
-    """Total amount (volume) strategy intends to trade."""
-    inner_amount: np.ndarray | float
-    """Total amount that the lower-level strategy intends to trade
-    (might be larger than amount, e.g., to ensure ffr)."""
-
-    deal_amount: np.ndarray | float
-    """Amount that successfully takes effect (must be less than inner_amount)."""
-    trade_price: np.ndarray | float
-    """The average deal price for this strategy."""
-    trade_value: np.ndarray | float
-    """Total worth of trading. In the simple simulation, trade_value = deal_amount * price."""
-    position: np.ndarray | float
-    """Position left after this "period"."""
-
-    # Accumulated metrics
-
-    ffr: np.ndarray | float
-    """Completed how much percent of the daily order."""
-
-    pa: np.ndarray | float
-    """Price advantage compared to baseline (i.e., trade with baseline market price).
-    The baseline is trade price when using TWAP strategy to execute this order.
-    Please note that there could be data leak here).
-    Unit is BP (basis point, 1/10000)."""
-
-
-class SAOEState(NamedTuple):
-    """Data structure holding a state for SAOE simulator."""
-
-    order: Order
-    """The order we are dealing with."""
-    cur_time: pd.Timestamp
-    """Current time, e.g., 9:30."""
-    position: float
-    """Current remaining volume to execute."""
-    history_exec: pd.DataFrame
-    """See :attr:`SingleAssetOrderExecution.history_exec`."""
-    history_steps: pd.DataFrame
-    """See :attr:`SingleAssetOrderExecution.history_steps`."""
-
-    metrics: Optional[SAOEMetrics]
-    """Daily metric, only available when the trading is in "done" state."""
-
-    backtest_data: IntradayBacktestData
-    """Backtest data is included in the state.
-    Actually, only the time index of this data is needed, at this moment.
-    I include the full data so that algorithms (e.g., VWAP) that relies on the raw data can be implemented.
-    Interpreter can use this as they wish, but they should be careful not to leak future data.
-    """
-
-    ticks_per_step: int
-    """How many ticks for each step."""
-    ticks_index: pd.DatetimeIndex
-    """Trading ticks in all day, NOT sliced by order (defined in data). e.g., [9:30, 9:31, ..., 14:59]."""
-    ticks_for_order: pd.DatetimeIndex
-    """Trading ticks sliced by order, e.g., [9:45, 9:46, ..., 14:44]."""
+__all__ = ["SingleAssetOrderExecution"]
 
 
 class SingleAssetOrderExecution(Simulator[Order, SAOEState, float]):
@@ -391,14 +306,11 @@ def _dataframe_append(df: pd.DataFrame, other: Any) -> pd.DataFrame:
         return pd.concat([df, other_df], axis=0)
 
 
-_float_or_ndarray = TypeVar("_float_or_ndarray", float, np.ndarray)
-
-
 def price_advantage(
-    exec_price: _float_or_ndarray,
+    exec_price: float_or_ndarray,
     baseline_price: float,
     direction: OrderDir | int,
-) -> _float_or_ndarray:
+) -> float_or_ndarray:
     if baseline_price == 0:  # something is wrong with data. Should be nan here
         if isinstance(exec_price, float):
             return 0.0
@@ -414,4 +326,4 @@ def price_advantage(
     if res_wo_nan.size == 1:
         return res_wo_nan.item()
     else:
-        return cast(_float_or_ndarray, res_wo_nan)
+        return cast(float_or_ndarray, res_wo_nan)
diff --git a/qlib/rl/order_execution/state_maintainer.py b/qlib/rl/order_execution/state.py
similarity index 70%
rename from qlib/rl/order_execution/state_maintainer.py
rename to qlib/rl/order_execution/state.py
index af9ed5e9c0..fd12e1d7f3 100644
--- a/qlib/rl/order_execution/state_maintainer.py
+++ b/qlib/rl/order_execution/state.py
@@ -1,7 +1,10 @@
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT License.
 
-from typing import cast, Optional
+from typing import cast, NamedTuple, Optional
+
+from qlib.rl.data.pickle_styled import IntradayBacktestData
+from typing_extensions import TypedDict
 
 import numpy as np
 import pandas as pd
@@ -10,7 +13,6 @@
 from qlib.backtest.utils import TradeCalendarManager
 from qlib.constant import EPS
 from qlib.rl.data.exchange_wrapper import QlibIntradayBacktestData
-from qlib.rl.order_execution import SAOEMetrics, SAOEState
 from qlib.rl.order_execution.utils import dataframe_append, get_simulator_executor, get_ticks_slice, price_advantage
 
 
@@ -217,3 +219,87 @@ def saoe_state(self) -> SAOEState:
             ticks_index=self.ticks_index,
             ticks_for_order=self.ticks_for_order,
         )
+
+
+class SAOEMetrics(TypedDict):
+    """Metrics for SAOE accumulated for a "period".
+    It could be accumulated for a day, or a period of time (e.g., 30min), or calculated separately for every minute.
+
+    Warnings
+    --------
+    The type hints are for single elements. In lots of times, they can be vectorized.
+    For example, ``market_volume`` could be a list of float (or ndarray) rather tahn a single float.
+    """
+
+    stock_id: str
+    """Stock ID of this record."""
+    datetime: pd.Timestamp | pd.DatetimeIndex  # TODO: check this
+    """Datetime of this record (this is index in the dataframe)."""
+    direction: int
+    """Direction of the order. 0 for sell, 1 for buy."""
+
+    # Market information.
+    market_volume: np.ndarray | float
+    """(total) market volume traded in the period."""
+    market_price: np.ndarray | float
+    """Deal price. If it's a period of time, this is the average market deal price."""
+
+    # Strategy records.
+
+    amount: np.ndarray | float
+    """Total amount (volume) strategy intends to trade."""
+    inner_amount: np.ndarray | float
+    """Total amount that the lower-level strategy intends to trade
+    (might be larger than amount, e.g., to ensure ffr)."""
+
+    deal_amount: np.ndarray | float
+    """Amount that successfully takes effect (must be less than inner_amount)."""
+    trade_price: np.ndarray | float
+    """The average deal price for this strategy."""
+    trade_value: np.ndarray | float
+    """Total worth of trading. In the simple simulation, trade_value = deal_amount * price."""
+    position: np.ndarray | float
+    """Position left after this "period"."""
+
+    # Accumulated metrics
+
+    ffr: np.ndarray | float
+    """Completed how much percent of the daily order."""
+
+    pa: np.ndarray | float
+    """Price advantage compared to baseline (i.e., trade with baseline market price).
+    The baseline is trade price when using TWAP strategy to execute this order.
+    Please note that there could be data leak here).
+    Unit is BP (basis point, 1/10000)."""
+
+
+class SAOEState(NamedTuple):
+    """Data structure holding a state for SAOE simulator."""
+
+    order: Order
+    """The order we are dealing with."""
+    cur_time: pd.Timestamp
+    """Current time, e.g., 9:30."""
+    position: float
+    """Current remaining volume to execute."""
+    history_exec: pd.DataFrame
+    """See :attr:`SingleAssetOrderExecution.history_exec`."""
+    history_steps: pd.DataFrame
+    """See :attr:`SingleAssetOrderExecution.history_steps`."""
+
+    metrics: Optional[SAOEMetrics]
+    """Daily metric, only available when the trading is in "done" state."""
+
+    backtest_data: IntradayBacktestData
+    """Backtest data is included in the state.
+    Actually, only the time index of this data is needed, at this moment.
+    I include the full data so that algorithms (e.g., VWAP) that relies on the raw data can be implemented.
+    Interpreter can use this as they wish, but they should be careful not to leak future data.
+    """
+
+    ticks_per_step: int
+    """How many ticks for each step."""
+    ticks_index: pd.DatetimeIndex
+    """Trading ticks in all day, NOT sliced by order (defined in data). e.g., [9:30, 9:31, ..., 14:59]."""
+    ticks_for_order: pd.DatetimeIndex
+    """Trading ticks sliced by order, e.g., [9:45, 9:46, ..., 14:44]."""
\ No newline at end of file
diff --git a/qlib/rl/order_execution/utils.py b/qlib/rl/order_execution/utils.py
index 123cc1a727..fe38b94eec 100644
--- a/qlib/rl/order_execution/utils.py
+++ b/qlib/rl/order_execution/utils.py
@@ -10,7 +10,7 @@
 
 from qlib.backtest.decision import OrderDir
 from qlib.backtest.executor import BaseExecutor, NestedExecutor, SimulatorExecutor
-from qlib.rl.order_execution.simulator_simple import _float_or_ndarray, ONE_SEC
+from qlib.rl.order_execution.objects import float_or_ndarray, ONE_SEC
 
 
 def get_ticks_slice(
@@ -34,10 +34,10 @@ def dataframe_append(df: pd.DataFrame, other: Any) -> pd.DataFrame:
 
 
 def price_advantage(
-    exec_price: _float_or_ndarray,
+    exec_price: float_or_ndarray,
     baseline_price: float,
     direction: OrderDir | int,
-) -> _float_or_ndarray:
+) -> float_or_ndarray:
     if baseline_price == 0:  # something is wrong with data. Should be nan here
         if isinstance(exec_price, float):
             return 0.0
@@ -53,7 +53,7 @@ def price_advantage(
     if res_wo_nan.size == 1:
         return res_wo_nan.item()
     else:
-        return cast(_float_or_ndarray, res_wo_nan)
+        return cast(float_or_ndarray, res_wo_nan)
 
 
 def get_simulator_executor(executor: BaseExecutor) -> SimulatorExecutor:
diff --git a/qlib/rl/strategy/decomposed.py b/qlib/rl/strategy/decomposed.py
index d6fdf1b534..1da1540f4a 100644
--- a/qlib/rl/strategy/decomposed.py
+++ b/qlib/rl/strategy/decomposed.py
@@ -6,7 +6,7 @@
 from qlib.backtest import CommonInfrastructure
 from qlib.backtest.decision import BaseTradeDecision, TradeDecisionWO
 from qlib.backtest.utils import LevelInfrastructure
-from qlib.rl.order_execution.state_maintainer import SAOEStateMaintainer
+from qlib.rl.order_execution.state import SAOEStateMaintainer
 from qlib.rl.strategy.saoe import SAOEStrategy
 
 
diff --git a/qlib/rl/strategy/saoe.py b/qlib/rl/strategy/saoe.py
index 1528698e7b..69864fcf46 100644
--- a/qlib/rl/strategy/saoe.py
+++ b/qlib/rl/strategy/saoe.py
@@ -9,7 +9,7 @@
 from qlib.backtest.executor import BaseExecutor
 from qlib.backtest.utils import CommonInfrastructure, LevelInfrastructure
 from qlib.rl.data.exchange_wrapper import QlibIntradayBacktestData
-from qlib.rl.order_execution.state_maintainer import SAOEStateMaintainer
+from qlib.rl.order_execution.state import SAOEStateMaintainer
 from qlib.strategy.base import RLStrategy
 
 

From 8a868a66dbf44b090ee77a6895e94e024f5ecdc8 Mon Sep 17 00:00:00 2001
From: Huoran Li <huo53926@126.com>
Date: Wed, 3 Aug 2022 13:57:37 +0800
Subject: [PATCH 05/23] Optimize code

---
 qlib/data/dataset/__init__.py          | 2 +-
 qlib/rl/order_execution/interpreter.py | 1 +
 qlib/rl/order_execution/objects.py     | 2 +-
 qlib/rl/order_execution/policy.py      | 1 +
 qlib/rl/order_execution/state.py       | 2 ++
 5 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/qlib/data/dataset/__init__.py b/qlib/data/dataset/__init__.py
index c74092de34..5e98bfc97a 100644
--- a/qlib/data/dataset/__init__.py
+++ b/qlib/data/dataset/__init__.py
@@ -615,4 +615,4 @@ def _prepare_seg(self, slc: slice, **kwargs) -> TSDataSampler:
         return tsds
 
 
-__all__ = ["Optional"]
+__all__ = ["Optional", "Dataset", "DatasetH"]
diff --git a/qlib/rl/order_execution/interpreter.py b/qlib/rl/order_execution/interpreter.py
index 172ad1cff5..09f5f343da 100644
--- a/qlib/rl/order_execution/interpreter.py
+++ b/qlib/rl/order_execution/interpreter.py
@@ -23,6 +23,7 @@
     "CurrentStepStateInterpreter",
     "CategoricalActionInterpreter",
     "TwapRelativeActionInterpreter",
+    "FullHistoryObs",
 ]
 
 
diff --git a/qlib/rl/order_execution/objects.py b/qlib/rl/order_execution/objects.py
index a4ffb1a4f7..2f6c81b825 100644
--- a/qlib/rl/order_execution/objects.py
+++ b/qlib/rl/order_execution/objects.py
@@ -9,4 +9,4 @@
 FINEST_GRANULARITY = "1min"
 COARSEST_GRANULARITY = "1day"
 ONE_SEC = pd.Timedelta("1s")  # use 1 second to exclude the right interval point
-float_or_ndarray = TypeVar("float_or_ndarray", float, np.ndarray)
\ No newline at end of file
+float_or_ndarray = TypeVar("float_or_ndarray", float, np.ndarray)
diff --git a/qlib/rl/order_execution/policy.py b/qlib/rl/order_execution/policy.py
index 18c2e4f175..cfd3181ca2 100644
--- a/qlib/rl/order_execution/policy.py
+++ b/qlib/rl/order_execution/policy.py
@@ -1,5 +1,6 @@
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT License.
+
 from __future__ import annotations
 
 from pathlib import Path
diff --git a/qlib/rl/order_execution/state.py b/qlib/rl/order_execution/state.py
index fd12e1d7f3..9379868a8c 100644
--- a/qlib/rl/order_execution/state.py
+++ b/qlib/rl/order_execution/state.py
@@ -1,6 +1,8 @@
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT License.
 
+from __future__ import annotations
+
 from typing import cast, NamedTuple, Optional
 
 from qlib.rl.data.pickle_styled import IntradayBacktestData

From 03472eff4429f9af4e02371d5ddb978d0091909d Mon Sep 17 00:00:00 2001
From: Huoran Li <huo53926@126.com>
Date: Wed, 3 Aug 2022 14:01:23 +0800
Subject: [PATCH 06/23] Format code

---
 qlib/rl/aux_info.py                         |  2 +-
 qlib/rl/data/exchange_wrapper.py            |  1 +
 qlib/rl/integration/feature.py              |  2 +-
 qlib/rl/order_execution/interpreter.py      |  3 +--
 qlib/rl/order_execution/reward.py           |  3 +--
 qlib/rl/order_execution/simulator_qlib.py   |  5 ++---
 qlib/rl/order_execution/simulator_simple.py |  2 +-
 qlib/rl/order_execution/state.py            | 13 ++++++-------
 qlib/rl/order_execution/utils.py            |  4 ++--
 qlib/rl/strategy/saoe.py                    |  1 +
 qlib/rl/utils/finite_env.py                 |  3 ++-
 11 files changed, 19 insertions(+), 20 deletions(-)

diff --git a/qlib/rl/aux_info.py b/qlib/rl/aux_info.py
index 9ab0834511..1fd581544e 100644
--- a/qlib/rl/aux_info.py
+++ b/qlib/rl/aux_info.py
@@ -3,7 +3,7 @@
 
 from __future__ import annotations
 
-from typing import Optional, TYPE_CHECKING, Generic, TypeVar
+from typing import TYPE_CHECKING, Generic, Optional, TypeVar
 
 from qlib.typehint import final
 
diff --git a/qlib/rl/data/exchange_wrapper.py b/qlib/rl/data/exchange_wrapper.py
index bc36fa11b8..fb8daabf33 100644
--- a/qlib/rl/data/exchange_wrapper.py
+++ b/qlib/rl/data/exchange_wrapper.py
@@ -6,6 +6,7 @@
 import pandas as pd
 
 from qlib.backtest import Exchange, Order
+
 from .pickle_styled import IntradayBacktestData
 
 
diff --git a/qlib/rl/integration/feature.py b/qlib/rl/integration/feature.py
index 2b4279e1a6..347b2775ba 100644
--- a/qlib/rl/integration/feature.py
+++ b/qlib/rl/integration/feature.py
@@ -2,7 +2,7 @@
 # Licensed under the MIT License.
 
 import collections
-from typing import List, Optional
+from typing import List
 
 import pandas as pd
 
diff --git a/qlib/rl/order_execution/interpreter.py b/qlib/rl/order_execution/interpreter.py
index 09f5f343da..089fc553cf 100644
--- a/qlib/rl/order_execution/interpreter.py
+++ b/qlib/rl/order_execution/interpreter.py
@@ -14,9 +14,8 @@
 from qlib.constant import EPS
 from qlib.rl.data import pickle_styled
 from qlib.rl.interpreter import ActionInterpreter, StateInterpreter
-from qlib.typehint import TypedDict
-
 from qlib.rl.order_execution.state import SAOEState
+from qlib.typehint import TypedDict
 
 __all__ = [
     "FullHistoryStateInterpreter",
diff --git a/qlib/rl/order_execution/reward.py b/qlib/rl/order_execution/reward.py
index b4f021f6ad..99a88f8e44 100644
--- a/qlib/rl/order_execution/reward.py
+++ b/qlib/rl/order_execution/reward.py
@@ -7,9 +7,8 @@
 
 import numpy as np
 
-from qlib.rl.reward import Reward
-
 from qlib.rl.order_execution.state import SAOEMetrics, SAOEState
+from qlib.rl.reward import Reward
 
 __all__ = ["PAPenaltyReward"]
 
diff --git a/qlib/rl/order_execution/simulator_qlib.py b/qlib/rl/order_execution/simulator_qlib.py
index a59b61ad81..ff45a84781 100644
--- a/qlib/rl/order_execution/simulator_qlib.py
+++ b/qlib/rl/order_execution/simulator_qlib.py
@@ -6,15 +6,14 @@
 from typing import Generator, Optional
 
 import pandas as pd
+
 from qlib.backtest import get_strategy_executor
 from qlib.backtest.decision import Order
 from qlib.backtest.executor import NestedExecutor
 from qlib.rl.data.exchange_wrapper import QlibIntradayBacktestData
 from qlib.rl.integration.feature import init_qlib
 from qlib.rl.order_execution.state import SAOEState
-from qlib.rl.order_execution.utils import (
-    get_ticks_slice,
-)
+from qlib.rl.order_execution.utils import get_ticks_slice
 from qlib.rl.simulator import Simulator
 from qlib.rl.strategy.saoe import SAOEStrategy
 
diff --git a/qlib/rl/order_execution/simulator_simple.py b/qlib/rl/order_execution/simulator_simple.py
index 11d759cb9b..2c4ae822f4 100644
--- a/qlib/rl/order_execution/simulator_simple.py
+++ b/qlib/rl/order_execution/simulator_simple.py
@@ -12,7 +12,7 @@
 from qlib.backtest.decision import Order, OrderDir
 from qlib.constant import EPS
 from qlib.rl.data.pickle_styled import DealPriceType, load_simple_intraday_backtest_data
-from qlib.rl.order_execution.objects import float_or_ndarray, ONE_SEC
+from qlib.rl.order_execution.objects import ONE_SEC, float_or_ndarray
 from qlib.rl.order_execution.state import SAOEMetrics, SAOEState
 from qlib.rl.simulator import Simulator
 from qlib.rl.utils import LogLevel
diff --git a/qlib/rl/order_execution/state.py b/qlib/rl/order_execution/state.py
index 9379868a8c..6b52cb39fb 100644
--- a/qlib/rl/order_execution/state.py
+++ b/qlib/rl/order_execution/state.py
@@ -3,19 +3,18 @@
 
 from __future__ import annotations
 
-from typing import cast, NamedTuple, Optional
-
-from qlib.rl.data.pickle_styled import IntradayBacktestData
-from typing_extensions import TypedDict
+from typing import NamedTuple, Optional, cast
 
 import numpy as np
 import pandas as pd
+from typing_extensions import TypedDict
+
 from qlib.backtest import Order
 from qlib.backtest.executor import BaseExecutor
-from qlib.backtest.utils import TradeCalendarManager
 from qlib.constant import EPS
 from qlib.rl.data.exchange_wrapper import QlibIntradayBacktestData
-from qlib.rl.order_execution.utils import dataframe_append, get_simulator_executor, get_ticks_slice, price_advantage
+from qlib.rl.data.pickle_styled import IntradayBacktestData
+from qlib.rl.order_execution.utils import dataframe_append, get_simulator_executor, price_advantage
 
 
 class SAOEStateMaintainer:
@@ -304,4 +303,4 @@ class SAOEState(NamedTuple):
     ticks_index: pd.DatetimeIndex
     """Trading ticks in all day, NOT sliced by order (defined in data). e.g., [9:30, 9:31, ..., 14:59]."""
     ticks_for_order: pd.DatetimeIndex
-    """Trading ticks sliced by order, e.g., [9:45, 9:46, ..., 14:44]."""
\ No newline at end of file
+    """Trading ticks sliced by order, e.g., [9:45, 9:46, ..., 14:44]."""
diff --git a/qlib/rl/order_execution/utils.py b/qlib/rl/order_execution/utils.py
index fe38b94eec..44012d9db1 100644
--- a/qlib/rl/order_execution/utils.py
+++ b/qlib/rl/order_execution/utils.py
@@ -3,14 +3,14 @@
 
 from __future__ import annotations
 
-from typing import Any, cast, Tuple
+from typing import Any, cast
 
 import numpy as np
 import pandas as pd
 
 from qlib.backtest.decision import OrderDir
 from qlib.backtest.executor import BaseExecutor, NestedExecutor, SimulatorExecutor
-from qlib.rl.order_execution.objects import float_or_ndarray, ONE_SEC
+from qlib.rl.order_execution.objects import ONE_SEC, float_or_ndarray
 
 
 def get_ticks_slice(
diff --git a/qlib/rl/strategy/saoe.py b/qlib/rl/strategy/saoe.py
index 69864fcf46..e221fab092 100644
--- a/qlib/rl/strategy/saoe.py
+++ b/qlib/rl/strategy/saoe.py
@@ -5,6 +5,7 @@
 from typing import Optional
 
 import pandas as pd
+
 from qlib.backtest.decision import BaseTradeDecision, Order
 from qlib.backtest.executor import BaseExecutor
 from qlib.backtest.utils import CommonInfrastructure, LevelInfrastructure
diff --git a/qlib/rl/utils/finite_env.py b/qlib/rl/utils/finite_env.py
index 309b34e6dd..87f0900e16 100644
--- a/qlib/rl/utils/finite_env.py
+++ b/qlib/rl/utils/finite_env.py
@@ -11,13 +11,14 @@
 import copy
 import warnings
 from contextlib import contextmanager
-from typing import Any, Callable, cast, Dict, Generator, List, Optional, Set, Tuple, Type, Union
+from typing import Any, Callable, Dict, Generator, List, Optional, Set, Tuple, Type, Union, cast
 
 import gym
 import numpy as np
 from tianshou.env import BaseVectorEnv, DummyVectorEnv, ShmemVectorEnv, SubprocVectorEnv
 
 from qlib.typehint import Literal
+
 from .log import LogWriter
 
 __all__ = [

From e1beab57a2dfea7a7e1cdc6573c9b7aecf6188d2 Mon Sep 17 00:00:00 2001
From: Default <huo53926@126.com>
Date: Wed, 3 Aug 2022 14:52:28 +0800
Subject: [PATCH 07/23] create_state_maintainer_recursive

---
 qlib/backtest/executor.py                 |   1 +
 qlib/rl/integration/__init__.py           |   4 -
 qlib/rl/integration/feature.py            | 109 ----------------------
 qlib/rl/order_execution/simulator_qlib.py |  94 +++++++++++++++++--
 qlib/rl/order_execution/state.py          |   6 +-
 qlib/rl/strategy/decomposed.py            |  11 +--
 qlib/rl/strategy/saoe.py                  |   9 +-
 qlib/strategy/base.py                     |   1 -
 tests/rl/test_qlib_simulator.py           |   2 +-
 9 files changed, 97 insertions(+), 140 deletions(-)
 delete mode 100644 qlib/rl/integration/__init__.py
 delete mode 100644 qlib/rl/integration/feature.py

diff --git a/qlib/backtest/executor.py b/qlib/backtest/executor.py
index 6948faca30..501b1bbb35 100644
--- a/qlib/backtest/executor.py
+++ b/qlib/backtest/executor.py
@@ -480,6 +480,7 @@ def _collect_data(
                 # do nothing and just step forward
                 sub_cal.step()
 
+        # Lef inner strategy know that the outer level execution is done.
         self.inner_strategy.post_upper_level_exe_step()
 
         return execute_result, {"inner_order_indicators": inner_order_indicators, "decision_list": decision_list}
diff --git a/qlib/rl/integration/__init__.py b/qlib/rl/integration/__init__.py
deleted file mode 100644
index 765bdee0c1..0000000000
--- a/qlib/rl/integration/__init__.py
+++ /dev/null
@@ -1,4 +0,0 @@
-# Copyright (c) Microsoft Corporation.
-# Licensed under the MIT License.
-
-# TODO: find a better way to organize contents under this module.
diff --git a/qlib/rl/integration/feature.py b/qlib/rl/integration/feature.py
deleted file mode 100644
index 347b2775ba..0000000000
--- a/qlib/rl/integration/feature.py
+++ /dev/null
@@ -1,109 +0,0 @@
-# Copyright (c) Microsoft Corporation.
-# Licensed under the MIT License.
-
-import collections
-from typing import List
-
-import pandas as pd
-
-import qlib
-from qlib.config import REG_CN
-from qlib.contrib.ops.high_freq import BFillNan, Cut, Date, DayCumsum, DayLast, FFillNan, IsInf, IsNull, Select
-from qlib.data.dataset import DatasetH
-
-
-class LRUCache:
-    def __init__(self, pool_size: int = 200):
-        self.pool_size = pool_size
-        self.contents: dict = {}
-        self.keys: collections.deque = collections.deque()
-
-    def put(self, key, item):
-        if self.has(key):
-            self.keys.remove(key)
-        self.keys.append(key)
-        self.contents[key] = item
-        while len(self.contents) > self.pool_size:
-            self.contents.pop(self.keys.popleft())
-
-    def get(self, key):
-        return self.contents[key]
-
-    def has(self, key):
-        return key in self.contents
-
-
-class DataWrapper:
-    def __init__(
-        self,
-        feature_dataset: DatasetH,
-        backtest_dataset: DatasetH,
-        columns_today: List[str],
-        columns_yesterday: List[str],
-        _internal: bool = False,
-    ):
-        assert _internal, "Init function of data wrapper is for internal use only."
-
-        self.feature_dataset = feature_dataset
-        self.backtest_dataset = backtest_dataset
-        self.columns_today = columns_today
-        self.columns_yesterday = columns_yesterday
-
-        # TODO: We might have the chance to merge them.
-        self.feature_cache = LRUCache()
-        self.backtest_cache = LRUCache()
-
-    def get(self, stock_id: str, date: pd.Timestamp, backtest: bool = False) -> pd.DataFrame:
-        start_time, end_time = date.replace(hour=0, minute=0, second=0), date.replace(hour=23, minute=59, second=59)
-
-        if backtest:
-            dataset = self.backtest_dataset
-            cache = self.backtest_cache
-        else:
-            dataset = self.feature_dataset
-            cache = self.feature_cache
-
-        if cache.has((start_time, end_time, stock_id)):
-            return cache.get((start_time, end_time, stock_id))
-        data = dataset.handler.fetch(pd.IndexSlice[stock_id, start_time:end_time], level=None)
-        cache.put((start_time, end_time, stock_id), data)
-        return data
-
-
-def init_qlib(qlib_config: dict) -> None:
-    provider_uri_map = {
-        "day": qlib_config["provider_uri_day"].as_posix(),
-        "1min": qlib_config["provider_uri_1min"].as_posix(),
-    }
-    qlib.init(
-        region=REG_CN,
-        auto_mount=False,
-        custom_ops=[DayLast, FFillNan, BFillNan, Date, Select, IsNull, IsInf, Cut, DayCumsum],
-        expression_cache=None,
-        calendar_provider={
-            "class": "LocalCalendarProvider",
-            "module_path": "qlib.data.data",
-            "kwargs": {
-                "backend": {
-                    "class": "FileCalendarStorage",
-                    "module_path": "qlib.data.storage.file_storage",
-                    "kwargs": {"provider_uri_map": provider_uri_map},
-                },
-            },
-        },
-        feature_provider={
-            "class": "LocalFeatureProvider",
-            "module_path": "qlib.data.data",
-            "kwargs": {
-                "backend": {
-                    "class": "FileFeatureStorage",
-                    "module_path": "qlib.data.storage.file_storage",
-                    "kwargs": {"provider_uri_map": provider_uri_map},
-                },
-            },
-        },
-        provider_uri=provider_uri_map,
-        kernels=1,
-        redis_port=-1,
-        clear_mem_cache=False,  # init_qlib will be called for multiple times. Keep the cache for improving performance
-    )
diff --git a/qlib/rl/order_execution/simulator_qlib.py b/qlib/rl/order_execution/simulator_qlib.py
index ff45a84781..438caec10b 100644
--- a/qlib/rl/order_execution/simulator_qlib.py
+++ b/qlib/rl/order_execution/simulator_qlib.py
@@ -6,18 +6,94 @@
 from typing import Generator, Optional
 
 import pandas as pd
-
+import qlib
 from qlib.backtest import get_strategy_executor
 from qlib.backtest.decision import Order
-from qlib.backtest.executor import NestedExecutor
+from qlib.backtest.executor import BaseExecutor, NestedExecutor, SimulatorExecutor
+from qlib.config import REG_CN
+from qlib.contrib.ops.high_freq import BFillNan, Cut, Date, DayCumsum, DayLast, FFillNan, IsInf, IsNull, Select
 from qlib.rl.data.exchange_wrapper import QlibIntradayBacktestData
-from qlib.rl.integration.feature import init_qlib
 from qlib.rl.order_execution.state import SAOEState
 from qlib.rl.order_execution.utils import get_ticks_slice
 from qlib.rl.simulator import Simulator
 from qlib.rl.strategy.saoe import SAOEStrategy
 
 
+def init_qlib(qlib_config: dict) -> None:
+    provider_uri_map = {
+        "day": qlib_config["provider_uri_day"].as_posix(),
+        "1min": qlib_config["provider_uri_1min"].as_posix(),
+    }
+    qlib.init(
+        region=REG_CN,
+        auto_mount=False,
+        custom_ops=[DayLast, FFillNan, BFillNan, Date, Select, IsNull, IsInf, Cut, DayCumsum],
+        expression_cache=None,
+        calendar_provider={
+            "class": "LocalCalendarProvider",
+            "module_path": "qlib.data.data",
+            "kwargs": {
+                "backend": {
+                    "class": "FileCalendarStorage",
+                    "module_path": "qlib.data.storage.file_storage",
+                    "kwargs": {"provider_uri_map": provider_uri_map},
+                },
+            },
+        },
+        feature_provider={
+            "class": "LocalFeatureProvider",
+            "module_path": "qlib.data.data",
+            "kwargs": {
+                "backend": {
+                    "class": "FileFeatureStorage",
+                    "module_path": "qlib.data.storage.file_storage",
+                    "kwargs": {"provider_uri_map": provider_uri_map},
+                },
+            },
+        },
+        provider_uri=provider_uri_map,
+        kernels=1,
+        redis_port=-1,
+        clear_mem_cache=False,  # init_qlib will be called for multiple times. Keep the cache for improving performance
+    )
+
+
+def create_state_maintainer_recursive(
+    executor: BaseExecutor,
+    order: Order,
+    backtest_data: QlibIntradayBacktestData,
+    time_per_step: str,
+    ticks_index: pd.DatetimeIndex,
+    twap_price: float,
+    ticks_for_order: pd.DatetimeIndex,
+) -> None:
+    if isinstance(executor, SimulatorExecutor):
+        return
+    else:
+        assert isinstance(executor, NestedExecutor)
+
+        if isinstance(executor.inner_strategy, SAOEStrategy):
+            executor.inner_strategy.create_saoe_maintainer(
+                order=order,
+                executor=executor.inner_executor,
+                backtest_data=backtest_data,
+                time_per_step=time_per_step,
+                ticks_index=ticks_index,
+                twap_price=twap_price,
+                ticks_for_order=ticks_for_order,
+            )
+
+        create_state_maintainer_recursive(
+            executor.inner_executor,
+            order,
+            backtest_data,
+            time_per_step,
+            ticks_index,
+            twap_price,
+            ticks_for_order,
+        )
+
+
 class SingleAssetOrderExecutionQlib(Simulator[Order, SAOEState, float]):
     """Single-asset order execution (SAOE) simulator which is implemented based on Qlib backtest tools.
 
@@ -54,7 +130,6 @@ def __init__(
 
         init_qlib(qlib_config)
 
-        self._executor: Optional[NestedExecutor] = None
         self._collect_data_loop: Optional[Generator] = None
         self.reset(order, time_per_step, strategy_config, executor_config, exchange_config)
 
@@ -66,7 +141,7 @@ def reset(
         executor_config: dict,
         exchange_config: dict,
     ) -> None:
-        top_strategy, self._executor = get_strategy_executor(
+        strategy, self._executor = get_strategy_executor(
             start_time=pd.Timestamp(order.start_time.date()),
             end_time=pd.Timestamp(order.start_time.date()) + pd.DateOffset(1),
             strategy=strategy_config,
@@ -77,7 +152,7 @@ def reset(
             pos_type="InfPosition",
         )
         assert isinstance(self._executor, NestedExecutor)
-        top_strategy.reset(level_infra=self._executor.get_level_infra())
+        strategy.reset(level_infra=self._executor.get_level_infra())
 
         exchange = self._executor.trade_exchange
         ticks_index = pd.DatetimeIndex([e[1] for e in list(exchange.quote_df.index)])
@@ -96,15 +171,14 @@ def reset(
 
         self.twap_price = backtest_data.get_deal_price().mean()
 
-        self._collect_data_loop = self._executor.collect_data(top_strategy.generate_trade_decision(), level=0)
+        self._collect_data_loop = self._executor.collect_data(strategy.generate_trade_decision(), level=0)
         assert isinstance(self._collect_data_loop, Generator)
 
         self._last_yielded_saoe_strategy = self._iter_strategy(action=None)
 
-        assert isinstance(self._executor.inner_strategy, SAOEStrategy)
-        self._executor.inner_strategy.create_saoe_maintainer(
+        create_state_maintainer_recursive(
+            executor=self._executor,
             order=order,
-            executor=self._executor.inner_executor,
             backtest_data=backtest_data,
             time_per_step=time_per_step,
             ticks_index=ticks_index,
diff --git a/qlib/rl/order_execution/state.py b/qlib/rl/order_execution/state.py
index 6b52cb39fb..dd66813665 100644
--- a/qlib/rl/order_execution/state.py
+++ b/qlib/rl/order_execution/state.py
@@ -23,9 +23,9 @@ class SAOEStateMaintainer:
 
     Example usage::
 
-        maintainer = StateMaintainer(...)  # in reset
-        maintainer.update(...)  # in step
-        # get states in get_state from maintainer
+        maintainer = StateMaintainer(...)
+        maintainer.update(...)
+        state = maintainer.saoe_state
     """
 
     def __init__(
diff --git a/qlib/rl/strategy/decomposed.py b/qlib/rl/strategy/decomposed.py
index 1da1540f4a..7431fe3562 100644
--- a/qlib/rl/strategy/decomposed.py
+++ b/qlib/rl/strategy/decomposed.py
@@ -1,25 +1,24 @@
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT License.
 
-from typing import Any, Generator, Optional
+from typing import Any, Generator
 
-from qlib.backtest import CommonInfrastructure
 from qlib.backtest.decision import BaseTradeDecision, TradeDecisionWO
-from qlib.backtest.utils import LevelInfrastructure
-from qlib.rl.order_execution.state import SAOEStateMaintainer
+from qlib.backtest.utils import CommonInfrastructure, LevelInfrastructure
 from qlib.rl.strategy.saoe import SAOEStrategy
 
 
 class DecomposedStrategy(SAOEStrategy):
+    """Decomposed strategy that needs actions from outside to generate trade decisions."""
+
     def __init__(
         self,
         outer_trade_decision: BaseTradeDecision = None,
         level_infra: LevelInfrastructure = None,
         common_infra: CommonInfrastructure = None,
-        **kwargs,
+        **kwargs: Any,
     ) -> None:
         super().__init__(None, outer_trade_decision, level_infra, common_infra, **kwargs)
-        self.maintainer: Optional[SAOEStateMaintainer] = None
 
     def generate_trade_decision(self, execute_result: list = None) -> Generator[Any, Any, BaseTradeDecision]:
         # Once the following line is executed, this DecomposedStrategy (self) will be yielded to the outside
diff --git a/qlib/rl/strategy/saoe.py b/qlib/rl/strategy/saoe.py
index e221fab092..f6f17e5a8d 100644
--- a/qlib/rl/strategy/saoe.py
+++ b/qlib/rl/strategy/saoe.py
@@ -2,10 +2,9 @@
 # Licensed under the MIT License.
 
 from abc import ABCMeta
-from typing import Optional
+from typing import Any
 
 import pandas as pd
-
 from qlib.backtest.decision import BaseTradeDecision, Order
 from qlib.backtest.executor import BaseExecutor
 from qlib.backtest.utils import CommonInfrastructure, LevelInfrastructure
@@ -19,16 +18,14 @@ class SAOEStrategy(RLStrategy, metaclass=ABCMeta):
 
     def __init__(
         self,
-        policy,
+        policy: object,  # TODO: add accurate typehint later.
         outer_trade_decision: BaseTradeDecision = None,
         level_infra: LevelInfrastructure = None,
         common_infra: CommonInfrastructure = None,
-        **kwargs,
+        **kwargs: Any,
     ) -> None:
         super(SAOEStrategy, self).__init__(policy, outer_trade_decision, level_infra, common_infra, **kwargs)
 
-        self.maintainer: Optional[SAOEStateMaintainer] = None
-
     def create_saoe_maintainer(
         self,
         order: Order,
diff --git a/qlib/strategy/base.py b/qlib/strategy/base.py
index 888adfa2ba..550561cf54 100644
--- a/qlib/strategy/base.py
+++ b/qlib/strategy/base.py
@@ -8,7 +8,6 @@
 if TYPE_CHECKING:
     from qlib.backtest.exchange import Exchange
     from qlib.backtest.position import BasePosition
-    from qlib.backtest.executor import BaseExecutor
 
 from typing import Tuple
 
diff --git a/tests/rl/test_qlib_simulator.py b/tests/rl/test_qlib_simulator.py
index 28a549eec6..d336ab2b6a 100644
--- a/tests/rl/test_qlib_simulator.py
+++ b/tests/rl/test_qlib_simulator.py
@@ -69,7 +69,7 @@ def get_configs(order: Order, time_per_step: str) -> Tuple[dict, dict, dict]:
                             "trade_type": SimulatorExecutor.TT_SERIAL,
                             "generate_report": False,
                             "track_data": True,
-                        }
+                        },
                     },
                     "track_data": True,
                 },

From 25aeee583464ca577572b848e65687195d1f9219 Mon Sep 17 00:00:00 2001
From: Huoran Li <huo53926@126.com>
Date: Thu, 4 Aug 2022 13:31:27 +0800
Subject: [PATCH 08/23] Remove explicit time_per_step

---
 qlib/rl/order_execution/simulator_qlib.py | 13 ++-----------
 tests/rl/test_qlib_simulator.py           | 15 +++++++--------
 2 files changed, 9 insertions(+), 19 deletions(-)

diff --git a/qlib/rl/order_execution/simulator_qlib.py b/qlib/rl/order_execution/simulator_qlib.py
index 438caec10b..04cb143046 100644
--- a/qlib/rl/order_execution/simulator_qlib.py
+++ b/qlib/rl/order_execution/simulator_qlib.py
@@ -62,7 +62,6 @@ def create_state_maintainer_recursive(
     executor: BaseExecutor,
     order: Order,
     backtest_data: QlibIntradayBacktestData,
-    time_per_step: str,
     ticks_index: pd.DatetimeIndex,
     twap_price: float,
     ticks_for_order: pd.DatetimeIndex,
@@ -77,7 +76,7 @@ def create_state_maintainer_recursive(
                 order=order,
                 executor=executor.inner_executor,
                 backtest_data=backtest_data,
-                time_per_step=time_per_step,
+                time_per_step=executor.inner_executor.time_per_step,
                 ticks_index=ticks_index,
                 twap_price=twap_price,
                 ticks_for_order=ticks_for_order,
@@ -87,7 +86,6 @@ def create_state_maintainer_recursive(
             executor.inner_executor,
             order,
             backtest_data,
-            time_per_step,
             ticks_index,
             twap_price,
             ticks_for_order,
@@ -101,8 +99,6 @@ class SingleAssetOrderExecutionQlib(Simulator[Order, SAOEState, float]):
     ----------
     order (Order):
         The seed to start an SAOE simulator is an order.
-    time_per_step (str):
-        A string to describe the time granularity of each step. Current support "1min", "30min", and "1day"
     qlib_config (dict):
         Configuration used to initialize Qlib.
     strategy_config (dict):
@@ -116,14 +112,11 @@ class SingleAssetOrderExecutionQlib(Simulator[Order, SAOEState, float]):
     def __init__(
         self,
         order: Order,
-        time_per_step: str,  # "1min", "30min", "1day"
         qlib_config: dict,
         strategy_config: dict,
         executor_config: dict,
         exchange_config: dict,
     ) -> None:
-        assert time_per_step in ("1min", "30min", "1day")
-
         super().__init__(initial=order)
 
         assert order.start_time.date() == order.end_time.date(), "Start date and end date must be the same."
@@ -131,12 +124,11 @@ def __init__(
         init_qlib(qlib_config)
 
         self._collect_data_loop: Optional[Generator] = None
-        self.reset(order, time_per_step, strategy_config, executor_config, exchange_config)
+        self.reset(order, strategy_config, executor_config, exchange_config)
 
     def reset(
         self,
         order: Order,
-        time_per_step: str,
         strategy_config: dict,
         executor_config: dict,
         exchange_config: dict,
@@ -180,7 +172,6 @@ def reset(
             executor=self._executor,
             order=order,
             backtest_data=backtest_data,
-            time_per_step=time_per_step,
             ticks_index=ticks_index,
             twap_price=self.twap_price,
             ticks_for_order=ticks_for_order,
diff --git a/tests/rl/test_qlib_simulator.py b/tests/rl/test_qlib_simulator.py
index d336ab2b6a..6a0bd4c329 100644
--- a/tests/rl/test_qlib_simulator.py
+++ b/tests/rl/test_qlib_simulator.py
@@ -33,7 +33,7 @@ def get_order() -> Order:
     )
 
 
-def get_configs(order: Order, time_per_step: str) -> Tuple[dict, dict, dict]:
+def get_configs(order: Order) -> Tuple[dict, dict, dict]:
     strategy_config = {
         "class": "SingleOrderStrategy",
         "module_path": "qlib.rl.strategy.single_order",
@@ -55,7 +55,7 @@ def get_configs(order: Order, time_per_step: str) -> Tuple[dict, dict, dict]:
                 "class": "NestedExecutor",
                 "module_path": "qlib.backtest.executor",
                 "kwargs": {
-                    "time_per_step": time_per_step,
+                    "time_per_step": "30min",
                     "inner_strategy": {
                         "class": "TWAPStrategy",
                         "module_path": "qlib.contrib.strategy.rule_strategy",
@@ -98,7 +98,7 @@ def get_configs(order: Order, time_per_step: str) -> Tuple[dict, dict, dict]:
     return strategy_config, executor_config, exchange_config
 
 
-def get_simulator(order: Order, time_per_step: str) -> SingleAssetOrderExecutionQlib:
+def get_simulator(order: Order) -> SingleAssetOrderExecutionQlib:
     DATA_ROOT_DIR = Path(__file__).parent.parent / ".data" / "rl" / "qlib_simulator"
 
     # fmt: off
@@ -117,11 +117,10 @@ def get_simulator(order: Order, time_per_step: str) -> SingleAssetOrderExecution
     }
     # fmt: on
 
-    strategy_config, executor_config, exchange_config = get_configs(order, time_per_step)
+    strategy_config, executor_config, exchange_config = get_configs(order)
 
     return SingleAssetOrderExecutionQlib(
         order=order,
-        time_per_step=time_per_step,
         qlib_config=qlib_config,
         strategy_config=strategy_config,
         executor_config=executor_config,
@@ -132,7 +131,7 @@ def get_simulator(order: Order, time_per_step: str) -> SingleAssetOrderExecution
 @python_version_request
 def test_simulator_first_step():
     order = get_order()
-    simulator = get_simulator(order, time_per_step="30min")
+    simulator = get_simulator(order)
     state = simulator.get_state()
     assert state.cur_time == pd.Timestamp("2019-03-04 09:30:00")
     assert state.position == TOTAL_POSITION
@@ -167,7 +166,7 @@ def test_simulator_first_step():
 @python_version_request
 def test_simulator_stop_twap() -> None:
     order = get_order()
-    simulator = get_simulator(order, time_per_step="30min")
+    simulator = get_simulator(order)
     NUM_STEPS = 7
     for i in range(NUM_STEPS):
         simulator.step(TOTAL_POSITION / NUM_STEPS)
@@ -194,7 +193,7 @@ def test_simulator_stop_twap() -> None:
 def test_interpreter() -> None:
     NUM_EXECUTION = 3
     order = get_order()
-    simulator = get_simulator(order, time_per_step="30min")
+    simulator = get_simulator(order)
     interpreter_action = CategoricalActionInterpreter(values=NUM_EXECUTION)
 
     NUM_STEPS = 7

From a84c1f198a3a0b9388ade9f74642a2b179d59d6f Mon Sep 17 00:00:00 2001
From: Default <huo53926@126.com>
Date: Fri, 5 Aug 2022 13:46:35 +0800
Subject: [PATCH 09/23] CI test passed

---
 qlib/backtest/executor.py                 |  2 +-
 qlib/backtest/utils.py                    |  9 +++-
 qlib/rl/order_execution/simulator_qlib.py | 60 +++++----------------
 qlib/rl/order_execution/state.py          | 23 ++++----
 qlib/rl/strategy/saoe.py                  | 66 ++++++++++++++---------
 qlib/strategy/base.py                     |  5 ++
 tests/rl/test_qlib_simulator.py           |  4 +-
 7 files changed, 83 insertions(+), 86 deletions(-)

diff --git a/qlib/backtest/executor.py b/qlib/backtest/executor.py
index 501b1bbb35..07da6b57bc 100644
--- a/qlib/backtest/executor.py
+++ b/qlib/backtest/executor.py
@@ -114,7 +114,7 @@ def __init__(
         self.track_data = track_data
         self._trade_exchange = trade_exchange
         self.level_infra = LevelInfrastructure()
-        self.level_infra.reset_infra(common_infra=common_infra)
+        self.level_infra.reset_infra(common_infra=common_infra, executor=self)
         self._settle_type = settle_type
         self.reset(start_time=start_time, end_time=end_time, common_infra=common_infra)
         if common_infra is None:
diff --git a/qlib/backtest/utils.py b/qlib/backtest/utils.py
index db35dc4820..05691f8228 100644
--- a/qlib/backtest/utils.py
+++ b/qlib/backtest/utils.py
@@ -21,6 +21,9 @@
 from ..data.data import Cal
 
 
+SAOE_DATA_KEY = "saoe_data"
+
+
 class TradeCalendarManager:
     """
     Manager for trading calendar
@@ -235,7 +238,9 @@ def update(self, other: BaseInfrastructure) -> None:
 
 class CommonInfrastructure(BaseInfrastructure):
     def get_support_infra(self) -> Set[str]:
-        return {"trade_account", "trade_exchange"}
+        # SAOE_DATA_KEY is used to store SAOE (single asset order execution) information that should be shared by
+        # all strategies. It should be dict.
+        return {"trade_account", "trade_exchange", SAOE_DATA_KEY}
 
 
 class LevelInfrastructure(BaseInfrastructure):
@@ -248,7 +253,7 @@ def get_support_infra(self) -> Set[str]:
         sub_level_infra:
         - **NOTE**: this will only work after _init_sub_trading !!!
         """
-        return {"trade_calendar", "sub_level_infra", "common_infra"}
+        return {"trade_calendar", "sub_level_infra", "common_infra", "executor"}
 
     def reset_cal(
         self,
diff --git a/qlib/rl/order_execution/simulator_qlib.py b/qlib/rl/order_execution/simulator_qlib.py
index 04cb143046..d56ca43f01 100644
--- a/qlib/rl/order_execution/simulator_qlib.py
+++ b/qlib/rl/order_execution/simulator_qlib.py
@@ -10,6 +10,7 @@
 from qlib.backtest import get_strategy_executor
 from qlib.backtest.decision import Order
 from qlib.backtest.executor import BaseExecutor, NestedExecutor, SimulatorExecutor
+from qlib.backtest.utils import SAOE_DATA_KEY
 from qlib.config import REG_CN
 from qlib.contrib.ops.high_freq import BFillNan, Cut, Date, DayCumsum, DayLast, FFillNan, IsInf, IsNull, Select
 from qlib.rl.data.exchange_wrapper import QlibIntradayBacktestData
@@ -58,40 +59,6 @@ def init_qlib(qlib_config: dict) -> None:
     )
 
 
-def create_state_maintainer_recursive(
-    executor: BaseExecutor,
-    order: Order,
-    backtest_data: QlibIntradayBacktestData,
-    ticks_index: pd.DatetimeIndex,
-    twap_price: float,
-    ticks_for_order: pd.DatetimeIndex,
-) -> None:
-    if isinstance(executor, SimulatorExecutor):
-        return
-    else:
-        assert isinstance(executor, NestedExecutor)
-
-        if isinstance(executor.inner_strategy, SAOEStrategy):
-            executor.inner_strategy.create_saoe_maintainer(
-                order=order,
-                executor=executor.inner_executor,
-                backtest_data=backtest_data,
-                time_per_step=executor.inner_executor.time_per_step,
-                ticks_index=ticks_index,
-                twap_price=twap_price,
-                ticks_for_order=ticks_for_order,
-            )
-
-        create_state_maintainer_recursive(
-            executor.inner_executor,
-            order,
-            backtest_data,
-            ticks_index,
-            twap_price,
-            ticks_for_order,
-        )
-
-
 class SingleAssetOrderExecutionQlib(Simulator[Order, SAOEState, float]):
     """Single-asset order execution (SAOE) simulator which is implemented based on Qlib backtest tools.
 
@@ -134,8 +101,8 @@ def reset(
         exchange_config: dict,
     ) -> None:
         strategy, self._executor = get_strategy_executor(
-            start_time=pd.Timestamp(order.start_time.date()),
-            end_time=pd.Timestamp(order.start_time.date()) + pd.DateOffset(1),
+            start_time=order.start_time.replace(hour=0, minute=0, second=0),
+            end_time=order.start_time.replace(hour=0, minute=0, second=0) + pd.DateOffset(1),
             strategy=strategy_config,
             executor=executor_config,
             benchmark=order.stock_id,
@@ -143,8 +110,9 @@ def reset(
             exchange_kwargs=exchange_config,
             pos_type="InfPosition",
         )
+
         assert isinstance(self._executor, NestedExecutor)
-        strategy.reset(level_infra=self._executor.get_level_infra())
+        strategy.reset(level_infra=self._executor.get_level_infra())  # TODO: check if we could remove this
 
         exchange = self._executor.trade_exchange
         ticks_index = pd.DatetimeIndex([e[1] for e in list(exchange.quote_df.index)])
@@ -154,6 +122,7 @@ def reset(
             order.end_time,
             include_end=True,
         )
+
         backtest_data = QlibIntradayBacktestData(
             order=order,
             exchange=exchange,
@@ -161,6 +130,12 @@ def reset(
             end_time=ticks_for_order[-1],
         )
 
+        # Store ticks_for_order & backtest_data in the common_infra. They will be reused by all strategies.
+        common_infra = self._executor.common_infra
+        saoe_data = {} if not common_infra.has(SAOE_DATA_KEY) else common_infra.get(SAOE_DATA_KEY)
+        saoe_data[(order.stock_id, order.direction)] = (ticks_index, ticks_for_order, backtest_data)
+        common_infra.reset_infra(**{SAOE_DATA_KEY: saoe_data})
+
         self.twap_price = backtest_data.get_deal_price().mean()
 
         self._collect_data_loop = self._executor.collect_data(strategy.generate_trade_decision(), level=0)
@@ -168,14 +143,7 @@ def reset(
 
         self._last_yielded_saoe_strategy = self._iter_strategy(action=None)
 
-        create_state_maintainer_recursive(
-            executor=self._executor,
-            order=order,
-            backtest_data=backtest_data,
-            ticks_index=ticks_index,
-            twap_price=self.twap_price,
-            ticks_for_order=ticks_for_order,
-        )
+        self._order = order
 
     def _iter_strategy(self, action: float = None) -> SAOEStrategy:
         """Iterate the _collect_data_loop until we get the next yield SAOEStrategy."""
@@ -206,7 +174,7 @@ def step(self, action: float) -> None:
         assert self._executor is not None
 
     def get_state(self) -> SAOEState:
-        return self._last_yielded_saoe_strategy.maintainer.saoe_state
+        return self._last_yielded_saoe_strategy.get_saoe_state_by_order(self._order)
 
     def done(self) -> bool:
         return not self._executor.is_collecting
diff --git a/qlib/rl/order_execution/state.py b/qlib/rl/order_execution/state.py
index dd66813665..5824d0d090 100644
--- a/qlib/rl/order_execution/state.py
+++ b/qlib/rl/order_execution/state.py
@@ -9,7 +9,7 @@
 import pandas as pd
 from typing_extensions import TypedDict
 
-from qlib.backtest import Order
+from qlib.backtest import Exchange, Order
 from qlib.backtest.executor import BaseExecutor
 from qlib.constant import EPS
 from qlib.rl.data.exchange_wrapper import QlibIntradayBacktestData
@@ -32,30 +32,31 @@ def __init__(
         self,
         order: Order,
         executor: BaseExecutor,
-        backtest_data: QlibIntradayBacktestData,
-        time_per_step: str,
+        exchange: Exchange,
+        ticks_per_step: int,
         ticks_index: pd.DatetimeIndex,
-        twap_price: float,
         ticks_for_order: pd.DatetimeIndex,
+        backtest_data: QlibIntradayBacktestData,
     ) -> None:
         super().__init__()
 
         self.position = order.amount
         self.order = order
         self.executor = executor
-        self.backtest_data = backtest_data
-        self.time_per_step = time_per_step
+        self.exchange = exchange
         self.ticks_index = ticks_index
         self.ticks_for_order = ticks_for_order
-        self.twap_price = twap_price
+        self.backtest_data = backtest_data
+
+        self.twap_price = self.backtest_data.get_deal_price().mean()
 
         metric_keys = list(SAOEMetrics.__annotations__.keys())  # pylint: disable=no-member
         self.history_exec = pd.DataFrame(columns=metric_keys).set_index("datetime")
         self.history_steps = pd.DataFrame(columns=metric_keys).set_index("datetime")
         self.metrics: Optional[SAOEMetrics] = None
 
-        self.cur_time = ticks_for_order[0]
-        self.ticks_per_step = int(pd.Timedelta(self.time_per_step).total_seconds() // 60)
+        self.cur_time = self.ticks_for_order[0]
+        self.ticks_per_step = ticks_per_step
 
     def _next_time(self) -> pd.Timestamp:
         current_loc = self.ticks_index.get_loc(self.cur_time)
@@ -76,7 +77,7 @@ def update(self, execute_result: list) -> None:
             datetime_list = pd.DatetimeIndex([])
         else:
             market_volume = np.array(
-                self.executor.trade_exchange.get_volume(
+                self.exchange.get_volume(
                     self.order.stock_id,
                     execute_result[0][0].start_time,
                     execute_result[-1][0].start_time,
@@ -216,7 +217,7 @@ def saoe_state(self) -> SAOEState:
             history_steps=self.history_steps,
             metrics=self.metrics,
             backtest_data=self.backtest_data,
-            ticks_per_step=int(pd.Timedelta(self.time_per_step).total_seconds() // 60),
+            ticks_per_step=self.ticks_per_step,
             ticks_index=self.ticks_index,
             ticks_for_order=self.ticks_for_order,
         )
diff --git a/qlib/rl/strategy/saoe.py b/qlib/rl/strategy/saoe.py
index f6f17e5a8d..aae60d4802 100644
--- a/qlib/rl/strategy/saoe.py
+++ b/qlib/rl/strategy/saoe.py
@@ -1,15 +1,13 @@
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT License.
-
+import collections
 from abc import ABCMeta
-from typing import Any
+from typing import Any, cast, Dict, Tuple
 
 import pandas as pd
 from qlib.backtest.decision import BaseTradeDecision, Order
-from qlib.backtest.executor import BaseExecutor
-from qlib.backtest.utils import CommonInfrastructure, LevelInfrastructure
-from qlib.rl.data.exchange_wrapper import QlibIntradayBacktestData
-from qlib.rl.order_execution.state import SAOEStateMaintainer
+from qlib.backtest.utils import CommonInfrastructure, LevelInfrastructure, SAOE_DATA_KEY
+from qlib.rl.order_execution.state import SAOEState, SAOEStateMaintainer
 from qlib.strategy.base import RLStrategy
 
 
@@ -26,30 +24,48 @@ def __init__(
     ) -> None:
         super(SAOEStrategy, self).__init__(policy, outer_trade_decision, level_infra, common_infra, **kwargs)
 
-    def create_saoe_maintainer(
-        self,
-        order: Order,
-        executor: BaseExecutor,
-        backtest_data: QlibIntradayBacktestData,
-        time_per_step: str,
-        ticks_index: pd.DatetimeIndex,
-        twap_price: float,
-        ticks_for_order: pd.DatetimeIndex,
-    ) -> None:
-        self.maintainer = SAOEStateMaintainer(
+        self.maintainer_dict: Dict[Tuple[str, int], SAOEStateMaintainer] = {}
+
+    def _create_saoe_maintainer(self, order: Order) -> SAOEStateMaintainer:
+        saoe_data = self.common_infra.get(SAOE_DATA_KEY)
+        ticks_index, ticks_for_order, backtest_data = saoe_data[(order.stock_id, order.direction)]
+
+        return SAOEStateMaintainer(
             order=order,
-            executor=executor,
-            backtest_data=backtest_data,
-            time_per_step=time_per_step,
+            executor=self.executor,
+            exchange=self.trade_exchange,
+            ticks_per_step=int(pd.Timedelta(self.trade_calendar.get_freq()) / pd.Timedelta("1min")),
             ticks_index=ticks_index,
-            twap_price=twap_price,
             ticks_for_order=ticks_for_order,
+            backtest_data=backtest_data,
         )
 
+    def reset(
+        self,
+        level_infra: LevelInfrastructure = None,
+        common_infra: CommonInfrastructure = None,
+        outer_trade_decision: BaseTradeDecision = None,
+        **kwargs,
+    ) -> None:
+        super(SAOEStrategy, self).reset(level_infra, common_infra, outer_trade_decision, **kwargs)
+
+        self.maintainer_dict = {}
+        for decision in outer_trade_decision.get_decision():
+            order = cast(Order, decision)
+            self.maintainer_dict[(order.stock_id, order.direction)] = self._create_saoe_maintainer(order)
+
+    def get_saoe_state_by_order(self, order: Order) -> SAOEState:
+        return self.maintainer_dict[(order.stock_id, order.direction)].saoe_state
+
     def post_upper_level_exe_step(self) -> None:
-        self.maintainer.generate_metrics_after_done()
+        for maintainer in self.maintainer_dict.values():
+            maintainer.generate_metrics_after_done()
 
     def post_exe_step(self, execute_result: list) -> None:
-        self.maintainer.update(
-            execute_result=execute_result,
-        )
+        results = collections.defaultdict(list)
+        if execute_result is not None:
+            for e in execute_result:
+                results[(e[0].stock_id, e[0].direction)].append(e)
+
+        for (stock_id, direction), maintainer in self.maintainer_dict.items():
+            maintainer.update(results[(stock_id, direction)])
diff --git a/qlib/strategy/base.py b/qlib/strategy/base.py
index 550561cf54..c6294eea3e 100644
--- a/qlib/strategy/base.py
+++ b/qlib/strategy/base.py
@@ -8,6 +8,7 @@
 if TYPE_CHECKING:
     from qlib.backtest.exchange import Exchange
     from qlib.backtest.position import BasePosition
+    from qlib.backtest.executor import BaseExecutor
 
 from typing import Tuple
 
@@ -55,6 +56,10 @@ def __init__(
         self._reset(level_infra=level_infra, common_infra=common_infra, outer_trade_decision=outer_trade_decision)
         self._trade_exchange = trade_exchange
 
+    @property
+    def executor(self) -> BaseExecutor:
+        return self.level_infra.get("executor")
+
     @property
     def trade_calendar(self) -> TradeCalendarManager:
         return self.level_infra.get("trade_calendar")
diff --git a/tests/rl/test_qlib_simulator.py b/tests/rl/test_qlib_simulator.py
index 6a0bd4c329..db41be3d61 100644
--- a/tests/rl/test_qlib_simulator.py
+++ b/tests/rl/test_qlib_simulator.py
@@ -7,7 +7,6 @@
 
 import pandas as pd
 import pytest
-
 from qlib.backtest.decision import Order, OrderDir, TradeRangeByTime
 from qlib.backtest.executor import SimulatorExecutor
 from qlib.rl.order_execution import CategoricalActionInterpreter
@@ -205,3 +204,6 @@ def test_interpreter() -> None:
         position_history.append(state.position)
 
         assert position_history[-1] == max(TOTAL_POSITION - TOTAL_POSITION / NUM_EXECUTION * (i + 1), 0.0)
+
+
+test_simulator_stop_twap()

From fefad581d605ca1539ec25031c0b277984613403 Mon Sep 17 00:00:00 2001
From: Huoran Li <huo53926@126.com>
Date: Fri, 5 Aug 2022 14:03:18 +0800
Subject: [PATCH 10/23] Resolve PR comments

---
 .../{objects.py => constants.py}              |   0
 qlib/rl/order_execution/simulator_qlib.py     |  35 +++++-
 qlib/rl/order_execution/simulator_simple.py   |   2 +-
 qlib/rl/order_execution/state.py              |   2 +-
 qlib/rl/order_execution/strategy.py           | 108 ++++++++++++++++++
 qlib/rl/order_execution/utils.py              |   2 +-
 qlib/rl/strategy/decomposed.py                |  43 -------
 qlib/rl/strategy/saoe.py                      |  71 ------------
 tests/rl/test_qlib_simulator.py               |   4 +-
 9 files changed, 142 insertions(+), 125 deletions(-)
 rename qlib/rl/order_execution/{objects.py => constants.py} (100%)
 create mode 100644 qlib/rl/order_execution/strategy.py
 delete mode 100644 qlib/rl/strategy/decomposed.py
 delete mode 100644 qlib/rl/strategy/saoe.py

diff --git a/qlib/rl/order_execution/objects.py b/qlib/rl/order_execution/constants.py
similarity index 100%
rename from qlib/rl/order_execution/objects.py
rename to qlib/rl/order_execution/constants.py
diff --git a/qlib/rl/order_execution/simulator_qlib.py b/qlib/rl/order_execution/simulator_qlib.py
index d56ca43f01..244c55dfb1 100644
--- a/qlib/rl/order_execution/simulator_qlib.py
+++ b/qlib/rl/order_execution/simulator_qlib.py
@@ -17,10 +17,33 @@
 from qlib.rl.order_execution.state import SAOEState
 from qlib.rl.order_execution.utils import get_ticks_slice
 from qlib.rl.simulator import Simulator
-from qlib.rl.strategy.saoe import SAOEStrategy
+from qlib.rl.order_execution.strategy import SAOEStrategy
 
 
 def init_qlib(qlib_config: dict) -> None:
+    """Initialize necessary resource to launch the workflow, including data direction, feature columns, etc..
+
+    Parameters
+    ----------
+    qlib_config:
+        Qlib configuration.
+
+        Example:
+            {
+                "provider_uri_day": DATA_ROOT_DIR / "qlib_1d",
+                "provider_uri_1min": DATA_ROOT_DIR / "qlib_1min",
+                "feature_root_dir": DATA_ROOT_DIR / "qlib_handler_stock",
+                "feature_columns_today": [
+                    "$open", "$high", "$low", "$close", "$vwap", "$bid", "$ask", "$volume",
+                    "$bidV", "$bidV1", "$bidV3", "$bidV5", "$askV", "$askV1", "$askV3", "$askV5",
+                ],
+                "feature_columns_yesterday": [
+                    "$open_1", "$high_1", "$low_1", "$close_1", "$vwap_1", "$bid_1", "$ask_1", "$volume_1",
+                    "$bidV_1", "$bidV1_1", "$bidV3_1", "$bidV5_1", "$askV_1", "$askV1_1", "$askV3_1", "$askV5_1",
+                ],
+            }
+    """
+
     provider_uri_map = {
         "day": qlib_config["provider_uri_day"].as_posix(),
         "1min": qlib_config["provider_uri_1min"].as_posix(),
@@ -64,15 +87,15 @@ class SingleAssetOrderExecutionQlib(Simulator[Order, SAOEState, float]):
 
     Parameters
     ----------
-    order (Order):
+    order
         The seed to start an SAOE simulator is an order.
-    qlib_config (dict):
+    qlib_config
         Configuration used to initialize Qlib.
-    strategy_config (dict):
+    strategy_config
         Strategy configuration
-    executor_config (dict):
+    executor_config
         Executor configuration
-    exchange_config (dict):
+    exchange_config
         Exchange configuration
     """
 
diff --git a/qlib/rl/order_execution/simulator_simple.py b/qlib/rl/order_execution/simulator_simple.py
index 2c4ae822f4..5754d34c2f 100644
--- a/qlib/rl/order_execution/simulator_simple.py
+++ b/qlib/rl/order_execution/simulator_simple.py
@@ -12,7 +12,7 @@
 from qlib.backtest.decision import Order, OrderDir
 from qlib.constant import EPS
 from qlib.rl.data.pickle_styled import DealPriceType, load_simple_intraday_backtest_data
-from qlib.rl.order_execution.objects import ONE_SEC, float_or_ndarray
+from qlib.rl.order_execution.constants import ONE_SEC, float_or_ndarray
 from qlib.rl.order_execution.state import SAOEMetrics, SAOEState
 from qlib.rl.simulator import Simulator
 from qlib.rl.utils import LogLevel
diff --git a/qlib/rl/order_execution/state.py b/qlib/rl/order_execution/state.py
index 5824d0d090..dbeab8dd3f 100644
--- a/qlib/rl/order_execution/state.py
+++ b/qlib/rl/order_execution/state.py
@@ -17,7 +17,7 @@
 from qlib.rl.order_execution.utils import dataframe_append, get_simulator_executor, price_advantage
 
 
-class SAOEStateMaintainer:
+class QlibBacktestAdapter:
     """
     Maintain states of the environment.
 
diff --git a/qlib/rl/order_execution/strategy.py b/qlib/rl/order_execution/strategy.py
new file mode 100644
index 0000000000..d36fadef13
--- /dev/null
+++ b/qlib/rl/order_execution/strategy.py
@@ -0,0 +1,108 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+import collections
+from abc import ABCMeta
+from typing import Any, cast, Dict, Generator, Tuple
+
+import pandas as pd
+from qlib.backtest import CommonInfrastructure, Order
+from qlib.backtest.decision import BaseTradeDecision, TradeDecisionWO
+from qlib.backtest.utils import LevelInfrastructure, SAOE_DATA_KEY
+from qlib.rl.order_execution.state import QlibBacktestAdapter, SAOEState
+from qlib.strategy.base import RLStrategy
+
+
+class SAOEStrategy(RLStrategy, metaclass=ABCMeta):
+    """RL-based strategies that use SAOEState as state."""
+
+    def __init__(
+        self,
+        policy: object,  # TODO: add accurate typehint later.
+        outer_trade_decision: BaseTradeDecision = None,
+        level_infra: LevelInfrastructure = None,
+        common_infra: CommonInfrastructure = None,
+        **kwargs: Any,
+    ) -> None:
+        super(SAOEStrategy, self).__init__(policy, outer_trade_decision, level_infra, common_infra, **kwargs)
+
+        self.adapter_dict: Dict[Tuple[str, int], QlibBacktestAdapter] = {}
+
+    def _create_qlib_backtest_adapter(self, order: Order) -> QlibBacktestAdapter:
+        saoe_data = self.common_infra.get(SAOE_DATA_KEY)
+        ticks_index, ticks_for_order, backtest_data = saoe_data[(order.stock_id, order.direction)]
+
+        return QlibBacktestAdapter(
+            order=order,
+            executor=self.executor,
+            exchange=self.trade_exchange,
+            ticks_per_step=int(pd.Timedelta(self.trade_calendar.get_freq()) / pd.Timedelta("1min")),
+            ticks_index=ticks_index,
+            ticks_for_order=ticks_for_order,
+            backtest_data=backtest_data,
+        )
+
+    def reset(
+        self,
+        level_infra: LevelInfrastructure = None,
+        common_infra: CommonInfrastructure = None,
+        outer_trade_decision: BaseTradeDecision = None,
+        **kwargs,
+    ) -> None:
+        super(SAOEStrategy, self).reset(level_infra, common_infra, outer_trade_decision, **kwargs)
+
+        self.adapter_dict = {}
+        for decision in outer_trade_decision.get_decision():
+            order = cast(Order, decision)
+            self.adapter_dict[(order.stock_id, order.direction)] = self._create_qlib_backtest_adapter(order)
+
+    def get_saoe_state_by_order(self, order: Order) -> SAOEState:
+        return self.adapter_dict[(order.stock_id, order.direction)].saoe_state
+
+    def post_upper_level_exe_step(self) -> None:
+        for maintainer in self.adapter_dict.values():
+            maintainer.generate_metrics_after_done()
+
+    def post_exe_step(self, execute_result: list) -> None:
+        results = collections.defaultdict(list)
+        if execute_result is not None:
+            for e in execute_result:
+                results[(e[0].stock_id, e[0].direction)].append(e)
+
+        for (stock_id, direction), maintainer in self.adapter_dict.items():
+            maintainer.update(results[(stock_id, direction)])
+
+
+class DecomposedStrategy(SAOEStrategy):
+    """Decomposed strategy that needs actions from outside to generate trade decisions."""
+
+    def __init__(
+        self,
+        outer_trade_decision: BaseTradeDecision = None,
+        level_infra: LevelInfrastructure = None,
+        common_infra: CommonInfrastructure = None,
+        **kwargs: Any,
+    ) -> None:
+        super().__init__(None, outer_trade_decision, level_infra, common_infra, **kwargs)
+
+    def generate_trade_decision(self, execute_result: list = None) -> Generator[Any, Any, BaseTradeDecision]:
+        # Once the following line is executed, this DecomposedStrategy (self) will be yielded to the outside
+        # of the entire executor, and the execution will be suspended. When the execution is resumed by `send()`,
+        # the sent item will be captured by `exec_vol`. The outside policy could communicate with the inner
+        # level strategy through this way.
+        exec_vol = yield self
+
+        oh = self.trade_exchange.get_order_helper()
+        order = oh.create(self._order.stock_id, exec_vol, self._order.direction)
+
+        return TradeDecisionWO([order], self)
+
+    def alter_outer_trade_decision(self, outer_trade_decision: BaseTradeDecision) -> BaseTradeDecision:
+        return outer_trade_decision
+
+    def reset(self, outer_trade_decision: TradeDecisionWO = None, **kwargs: Any) -> None:
+        super().reset(outer_trade_decision=outer_trade_decision, **kwargs)
+        if outer_trade_decision is not None:
+            order_list = outer_trade_decision.order_list
+            assert len(order_list) == 1
+            self._order = order_list[0]
diff --git a/qlib/rl/order_execution/utils.py b/qlib/rl/order_execution/utils.py
index 44012d9db1..f861237398 100644
--- a/qlib/rl/order_execution/utils.py
+++ b/qlib/rl/order_execution/utils.py
@@ -10,7 +10,7 @@
 
 from qlib.backtest.decision import OrderDir
 from qlib.backtest.executor import BaseExecutor, NestedExecutor, SimulatorExecutor
-from qlib.rl.order_execution.objects import ONE_SEC, float_or_ndarray
+from qlib.rl.order_execution.constants import ONE_SEC, float_or_ndarray
 
 
 def get_ticks_slice(
diff --git a/qlib/rl/strategy/decomposed.py b/qlib/rl/strategy/decomposed.py
deleted file mode 100644
index 7431fe3562..0000000000
--- a/qlib/rl/strategy/decomposed.py
+++ /dev/null
@@ -1,43 +0,0 @@
-# Copyright (c) Microsoft Corporation.
-# Licensed under the MIT License.
-
-from typing import Any, Generator
-
-from qlib.backtest.decision import BaseTradeDecision, TradeDecisionWO
-from qlib.backtest.utils import CommonInfrastructure, LevelInfrastructure
-from qlib.rl.strategy.saoe import SAOEStrategy
-
-
-class DecomposedStrategy(SAOEStrategy):
-    """Decomposed strategy that needs actions from outside to generate trade decisions."""
-
-    def __init__(
-        self,
-        outer_trade_decision: BaseTradeDecision = None,
-        level_infra: LevelInfrastructure = None,
-        common_infra: CommonInfrastructure = None,
-        **kwargs: Any,
-    ) -> None:
-        super().__init__(None, outer_trade_decision, level_infra, common_infra, **kwargs)
-
-    def generate_trade_decision(self, execute_result: list = None) -> Generator[Any, Any, BaseTradeDecision]:
-        # Once the following line is executed, this DecomposedStrategy (self) will be yielded to the outside
-        # of the entire executor, and the execution will be suspended. When the execution is resumed by `send()`,
-        # the sent item will be captured by `exec_vol`. The outside policy could communicate with the inner
-        # level strategy through this way.
-        exec_vol = yield self
-
-        oh = self.trade_exchange.get_order_helper()
-        order = oh.create(self._order.stock_id, exec_vol, self._order.direction)
-
-        return TradeDecisionWO([order], self)
-
-    def alter_outer_trade_decision(self, outer_trade_decision: BaseTradeDecision) -> BaseTradeDecision:
-        return outer_trade_decision
-
-    def reset(self, outer_trade_decision: TradeDecisionWO = None, **kwargs: Any) -> None:
-        super().reset(outer_trade_decision=outer_trade_decision, **kwargs)
-        if outer_trade_decision is not None:
-            order_list = outer_trade_decision.order_list
-            assert len(order_list) == 1
-            self._order = order_list[0]
diff --git a/qlib/rl/strategy/saoe.py b/qlib/rl/strategy/saoe.py
deleted file mode 100644
index aae60d4802..0000000000
--- a/qlib/rl/strategy/saoe.py
+++ /dev/null
@@ -1,71 +0,0 @@
-# Copyright (c) Microsoft Corporation.
-# Licensed under the MIT License.
-import collections
-from abc import ABCMeta
-from typing import Any, cast, Dict, Tuple
-
-import pandas as pd
-from qlib.backtest.decision import BaseTradeDecision, Order
-from qlib.backtest.utils import CommonInfrastructure, LevelInfrastructure, SAOE_DATA_KEY
-from qlib.rl.order_execution.state import SAOEState, SAOEStateMaintainer
-from qlib.strategy.base import RLStrategy
-
-
-class SAOEStrategy(RLStrategy, metaclass=ABCMeta):
-    """RL-based strategies that use SAOEState as state."""
-
-    def __init__(
-        self,
-        policy: object,  # TODO: add accurate typehint later.
-        outer_trade_decision: BaseTradeDecision = None,
-        level_infra: LevelInfrastructure = None,
-        common_infra: CommonInfrastructure = None,
-        **kwargs: Any,
-    ) -> None:
-        super(SAOEStrategy, self).__init__(policy, outer_trade_decision, level_infra, common_infra, **kwargs)
-
-        self.maintainer_dict: Dict[Tuple[str, int], SAOEStateMaintainer] = {}
-
-    def _create_saoe_maintainer(self, order: Order) -> SAOEStateMaintainer:
-        saoe_data = self.common_infra.get(SAOE_DATA_KEY)
-        ticks_index, ticks_for_order, backtest_data = saoe_data[(order.stock_id, order.direction)]
-
-        return SAOEStateMaintainer(
-            order=order,
-            executor=self.executor,
-            exchange=self.trade_exchange,
-            ticks_per_step=int(pd.Timedelta(self.trade_calendar.get_freq()) / pd.Timedelta("1min")),
-            ticks_index=ticks_index,
-            ticks_for_order=ticks_for_order,
-            backtest_data=backtest_data,
-        )
-
-    def reset(
-        self,
-        level_infra: LevelInfrastructure = None,
-        common_infra: CommonInfrastructure = None,
-        outer_trade_decision: BaseTradeDecision = None,
-        **kwargs,
-    ) -> None:
-        super(SAOEStrategy, self).reset(level_infra, common_infra, outer_trade_decision, **kwargs)
-
-        self.maintainer_dict = {}
-        for decision in outer_trade_decision.get_decision():
-            order = cast(Order, decision)
-            self.maintainer_dict[(order.stock_id, order.direction)] = self._create_saoe_maintainer(order)
-
-    def get_saoe_state_by_order(self, order: Order) -> SAOEState:
-        return self.maintainer_dict[(order.stock_id, order.direction)].saoe_state
-
-    def post_upper_level_exe_step(self) -> None:
-        for maintainer in self.maintainer_dict.values():
-            maintainer.generate_metrics_after_done()
-
-    def post_exe_step(self, execute_result: list) -> None:
-        results = collections.defaultdict(list)
-        if execute_result is not None:
-            for e in execute_result:
-                results[(e[0].stock_id, e[0].direction)].append(e)
-
-        for (stock_id, direction), maintainer in self.maintainer_dict.items():
-            maintainer.update(results[(stock_id, direction)])
diff --git a/tests/rl/test_qlib_simulator.py b/tests/rl/test_qlib_simulator.py
index db41be3d61..305cf16391 100644
--- a/tests/rl/test_qlib_simulator.py
+++ b/tests/rl/test_qlib_simulator.py
@@ -10,7 +10,7 @@
 from qlib.backtest.decision import Order, OrderDir, TradeRangeByTime
 from qlib.backtest.executor import SimulatorExecutor
 from qlib.rl.order_execution import CategoricalActionInterpreter
-from qlib.rl.order_execution.objects import FINEST_GRANULARITY
+from qlib.rl.order_execution.constants import FINEST_GRANULARITY
 from qlib.rl.order_execution.simulator_qlib import SingleAssetOrderExecutionQlib
 
 TOTAL_POSITION = 2100.0
@@ -48,7 +48,7 @@ def get_configs(order: Order) -> Tuple[dict, dict, dict]:
         "module_path": "qlib.backtest.executor",
         "kwargs": {
             "time_per_step": "1day",
-            "inner_strategy": {"class": "DecomposedStrategy", "module_path": "qlib.rl.strategy.decomposed"},
+            "inner_strategy": {"class": "DecomposedStrategy", "module_path": "qlib.rl.order_execution.strategy"},
             "track_data": True,
             "inner_executor": {
                 "class": "NestedExecutor",

From d697381f43af7109e89dcfc3f60ba61bbdef3aa3 Mon Sep 17 00:00:00 2001
From: Huoran Li <huo53926@126.com>
Date: Fri, 5 Aug 2022 14:19:44 +0800
Subject: [PATCH 11/23] Pass all CI

---
 qlib/rl/order_execution/simulator_qlib.py |  2 +-
 qlib/rl/order_execution/strategy.py       | 23 ++++++++++-------------
 2 files changed, 11 insertions(+), 14 deletions(-)

diff --git a/qlib/rl/order_execution/simulator_qlib.py b/qlib/rl/order_execution/simulator_qlib.py
index 244c55dfb1..53e63f709a 100644
--- a/qlib/rl/order_execution/simulator_qlib.py
+++ b/qlib/rl/order_execution/simulator_qlib.py
@@ -9,7 +9,7 @@
 import qlib
 from qlib.backtest import get_strategy_executor
 from qlib.backtest.decision import Order
-from qlib.backtest.executor import BaseExecutor, NestedExecutor, SimulatorExecutor
+from qlib.backtest.executor import NestedExecutor
 from qlib.backtest.utils import SAOE_DATA_KEY
 from qlib.config import REG_CN
 from qlib.contrib.ops.high_freq import BFillNan, Cut, Date, DayCumsum, DayLast, FFillNan, IsInf, IsNull, Select
diff --git a/qlib/rl/order_execution/strategy.py b/qlib/rl/order_execution/strategy.py
index d36fadef13..20eef6c848 100644
--- a/qlib/rl/order_execution/strategy.py
+++ b/qlib/rl/order_execution/strategy.py
@@ -42,19 +42,14 @@ def _create_qlib_backtest_adapter(self, order: Order) -> QlibBacktestAdapter:
             backtest_data=backtest_data,
         )
 
-    def reset(
-        self,
-        level_infra: LevelInfrastructure = None,
-        common_infra: CommonInfrastructure = None,
-        outer_trade_decision: BaseTradeDecision = None,
-        **kwargs,
-    ) -> None:
-        super(SAOEStrategy, self).reset(level_infra, common_infra, outer_trade_decision, **kwargs)
+    def reset(self, outer_trade_decision: BaseTradeDecision = None, **kwargs: Any) -> None:
+        super(SAOEStrategy, self).reset(outer_trade_decision=outer_trade_decision, **kwargs)
 
-        self.adapter_dict = {}
-        for decision in outer_trade_decision.get_decision():
-            order = cast(Order, decision)
-            self.adapter_dict[(order.stock_id, order.direction)] = self._create_qlib_backtest_adapter(order)
+        if outer_trade_decision is not None:
+            self.adapter_dict = {}
+            for decision in outer_trade_decision.get_decision():
+                order = cast(Order, decision)
+                self.adapter_dict[(order.stock_id, order.direction)] = self._create_qlib_backtest_adapter(order)
 
     def get_saoe_state_by_order(self, order: Order) -> SAOEState:
         return self.adapter_dict[(order.stock_id, order.direction)].saoe_state
@@ -100,8 +95,10 @@ def generate_trade_decision(self, execute_result: list = None) -> Generator[Any,
     def alter_outer_trade_decision(self, outer_trade_decision: BaseTradeDecision) -> BaseTradeDecision:
         return outer_trade_decision
 
-    def reset(self, outer_trade_decision: TradeDecisionWO = None, **kwargs: Any) -> None:
+    def reset(self, outer_trade_decision: BaseTradeDecision = None, **kwargs: Any) -> None:
         super().reset(outer_trade_decision=outer_trade_decision, **kwargs)
+
+        assert isinstance(outer_trade_decision, TradeDecisionWO)
         if outer_trade_decision is not None:
             order_list = outer_trade_decision.order_list
             assert len(order_list) == 1

From edd62fd7d1b7e3c97eb3dfbc2b334525051a8b12 Mon Sep 17 00:00:00 2001
From: Huoran Li <huo53926@126.com>
Date: Fri, 5 Aug 2022 14:47:37 +0800
Subject: [PATCH 12/23] Minor test issue

---
 tests/rl/test_qlib_simulator.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/tests/rl/test_qlib_simulator.py b/tests/rl/test_qlib_simulator.py
index 305cf16391..df78976654 100644
--- a/tests/rl/test_qlib_simulator.py
+++ b/tests/rl/test_qlib_simulator.py
@@ -204,6 +204,3 @@ def test_interpreter() -> None:
         position_history.append(state.position)
 
         assert position_history[-1] == max(TOTAL_POSITION - TOTAL_POSITION / NUM_EXECUTION * (i + 1), 0.0)
-
-
-test_simulator_stop_twap()

From 4049bfda2121b2557e9368ef07796d872e925875 Mon Sep 17 00:00:00 2001
From: Default <huo53926@126.com>
Date: Mon, 8 Aug 2022 11:51:56 +0800
Subject: [PATCH 13/23] Refine SAOE adapter logic

---
 qlib/backtest/decision.py                 |  5 ++
 qlib/rl/order_execution/simulator_qlib.py | 42 +++++-----------
 qlib/rl/order_execution/state.py          |  2 +-
 qlib/rl/order_execution/strategy.py       | 59 ++++++++++++++++++-----
 qlib/strategy/base.py                     |  6 +++
 5 files changed, 72 insertions(+), 42 deletions(-)

diff --git a/qlib/backtest/decision.py b/qlib/backtest/decision.py
index 4828478c7e..d41fa66f60 100644
--- a/qlib/backtest/decision.py
+++ b/qlib/backtest/decision.py
@@ -135,6 +135,11 @@ def parse_dir(direction: Union[str, int, np.integer, OrderDir, np.ndarray]) -> U
         else:
             raise NotImplementedError(f"This type of input is not supported")
 
+    @property
+    def key(self) -> tuple:
+        """A hashable & unique key to identify this order. Usually used as the key in a dict."""
+        return self.stock_id, self.start_time.replace(hour=0, minute=0, second=0), self.direction
+
 
 class OrderHelper:
     """
diff --git a/qlib/rl/order_execution/simulator_qlib.py b/qlib/rl/order_execution/simulator_qlib.py
index 53e63f709a..15d0edf128 100644
--- a/qlib/rl/order_execution/simulator_qlib.py
+++ b/qlib/rl/order_execution/simulator_qlib.py
@@ -89,32 +89,30 @@ class SingleAssetOrderExecutionQlib(Simulator[Order, SAOEState, float]):
     ----------
     order
         The seed to start an SAOE simulator is an order.
-    qlib_config
-        Configuration used to initialize Qlib.
     strategy_config
         Strategy configuration
     executor_config
         Executor configuration
     exchange_config
         Exchange configuration
+    qlib_config
+        Configuration used to initialize Qlib. If it is None, Qlib will not be initialized.
     """
 
     def __init__(
         self,
         order: Order,
-        qlib_config: dict,
         strategy_config: dict,
         executor_config: dict,
         exchange_config: dict,
+        qlib_config: dict = None,
     ) -> None:
         super().__init__(initial=order)
 
         assert order.start_time.date() == order.end_time.date(), "Start date and end date must be the same."
 
-        init_qlib(qlib_config)
-
         self._collect_data_loop: Optional[Generator] = None
-        self.reset(order, strategy_config, executor_config, exchange_config)
+        self.reset(order, strategy_config, executor_config, exchange_config, qlib_config)
 
     def reset(
         self,
@@ -122,7 +120,11 @@ def reset(
         strategy_config: dict,
         executor_config: dict,
         exchange_config: dict,
+        qlib_config: dict = None,
     ) -> None:
+        if qlib_config is not None:
+            init_qlib(qlib_config)
+
         strategy, self._executor = get_strategy_executor(
             start_time=order.start_time.replace(hour=0, minute=0, second=0),
             end_time=order.start_time.replace(hour=0, minute=0, second=0) + pd.DateOffset(1),
@@ -137,30 +139,6 @@ def reset(
         assert isinstance(self._executor, NestedExecutor)
         strategy.reset(level_infra=self._executor.get_level_infra())  # TODO: check if we could remove this
 
-        exchange = self._executor.trade_exchange
-        ticks_index = pd.DatetimeIndex([e[1] for e in list(exchange.quote_df.index)])
-        ticks_for_order = get_ticks_slice(
-            ticks_index,
-            order.start_time,
-            order.end_time,
-            include_end=True,
-        )
-
-        backtest_data = QlibIntradayBacktestData(
-            order=order,
-            exchange=exchange,
-            start_time=ticks_for_order[0],
-            end_time=ticks_for_order[-1],
-        )
-
-        # Store ticks_for_order & backtest_data in the common_infra. They will be reused by all strategies.
-        common_infra = self._executor.common_infra
-        saoe_data = {} if not common_infra.has(SAOE_DATA_KEY) else common_infra.get(SAOE_DATA_KEY)
-        saoe_data[(order.stock_id, order.direction)] = (ticks_index, ticks_for_order, backtest_data)
-        common_infra.reset_infra(**{SAOE_DATA_KEY: saoe_data})
-
-        self.twap_price = backtest_data.get_deal_price().mean()
-
         self._collect_data_loop = self._executor.collect_data(strategy.generate_trade_decision(), level=0)
         assert isinstance(self._collect_data_loop, Generator)
 
@@ -168,6 +146,10 @@ def reset(
 
         self._order = order
 
+    @property
+    def twap_price(self) -> float:
+        return self._last_yielded_saoe_strategy.adapter_dict[self._order.key].twap_price
+
     def _iter_strategy(self, action: float = None) -> SAOEStrategy:
         """Iterate the _collect_data_loop until we get the next yield SAOEStrategy."""
         assert self._collect_data_loop is not None
diff --git a/qlib/rl/order_execution/state.py b/qlib/rl/order_execution/state.py
index dbeab8dd3f..02885bce6b 100644
--- a/qlib/rl/order_execution/state.py
+++ b/qlib/rl/order_execution/state.py
@@ -55,7 +55,7 @@ def __init__(
         self.history_steps = pd.DataFrame(columns=metric_keys).set_index("datetime")
         self.metrics: Optional[SAOEMetrics] = None
 
-        self.cur_time = self.ticks_for_order[0]
+        self.cur_time = max(ticks_for_order[0], order.start_time)
         self.ticks_per_step = ticks_per_step
 
     def _next_time(self) -> pd.Timestamp:
diff --git a/qlib/rl/order_execution/strategy.py b/qlib/rl/order_execution/strategy.py
index 20eef6c848..c6d1faa436 100644
--- a/qlib/rl/order_execution/strategy.py
+++ b/qlib/rl/order_execution/strategy.py
@@ -7,9 +7,11 @@
 
 import pandas as pd
 from qlib.backtest import CommonInfrastructure, Order
-from qlib.backtest.decision import BaseTradeDecision, TradeDecisionWO
+from qlib.backtest.decision import BaseTradeDecision, TradeDecisionWO, TradeRange, TradeRangeByTime
 from qlib.backtest.utils import LevelInfrastructure, SAOE_DATA_KEY
+from qlib.rl.data.exchange_wrapper import QlibIntradayBacktestData
 from qlib.rl.order_execution.state import QlibBacktestAdapter, SAOEState
+from qlib.rl.order_execution.utils import get_ticks_slice
 from qlib.strategy.base import RLStrategy
 
 
@@ -26,17 +28,51 @@ def __init__(
     ) -> None:
         super(SAOEStrategy, self).__init__(policy, outer_trade_decision, level_infra, common_infra, **kwargs)
 
-        self.adapter_dict: Dict[Tuple[str, int], QlibBacktestAdapter] = {}
+        self.adapter_dict: Dict[tuple, QlibBacktestAdapter] = {}
 
-    def _create_qlib_backtest_adapter(self, order: Order) -> QlibBacktestAdapter:
-        saoe_data = self.common_infra.get(SAOE_DATA_KEY)
-        ticks_index, ticks_for_order, backtest_data = saoe_data[(order.stock_id, order.direction)]
+    def _create_qlib_backtest_adapter(self, order: Order, trade_range: TradeRange) -> QlibBacktestAdapter:
+        if not self.common_infra.has(SAOE_DATA_KEY):
+            self.common_infra.reset_infra(**{SAOE_DATA_KEY: {}})
 
+        # saoe_data can be considered as some type of cache. Use it to avoid unnecessary data reload.
+        # The data for one order would be loaded only once. All strategies will reuse this data.
+        saoe_data = self.common_infra.get(SAOE_DATA_KEY)
+        if order.key not in saoe_data:
+            data = self.trade_exchange.get_deal_price(
+                stock_id=order.stock_id,
+                start_time=order.start_time.replace(hour=0, minute=0, second=0),
+                end_time=order.start_time.replace(hour=23, minute=59, second=59),
+                direction=order.direction,
+                method=None
+            )
+
+            ticks_index = pd.DatetimeIndex(data.index)
+            if isinstance(trade_range, TradeRangeByTime):
+                ticks_for_order = get_ticks_slice(
+                    ticks_index,
+                    trade_range.start_time,
+                    trade_range.end_time,
+                    include_end=True,
+                )
+            else:
+                ticks_for_order = None  # FIXME: implement this logic
+                start_time = None  # FIXME: implement this logic
+
+            backtest_data = QlibIntradayBacktestData(
+                order=order,
+                exchange=self.trade_exchange,
+                start_time=ticks_for_order[0],
+                end_time=ticks_for_order[-1],
+            )
+
+            saoe_data[order.key] = (ticks_index, ticks_for_order, backtest_data)
+
+        ticks_index, ticks_for_order, backtest_data = saoe_data[order.key]
         return QlibBacktestAdapter(
             order=order,
             executor=self.executor,
             exchange=self.trade_exchange,
-            ticks_per_step=int(pd.Timedelta(self.trade_calendar.get_freq()) / pd.Timedelta("1min")),
+            ticks_per_step=self.ticks_per_step,
             ticks_index=ticks_index,
             ticks_for_order=ticks_for_order,
             backtest_data=backtest_data,
@@ -45,14 +81,15 @@ def _create_qlib_backtest_adapter(self, order: Order) -> QlibBacktestAdapter:
     def reset(self, outer_trade_decision: BaseTradeDecision = None, **kwargs: Any) -> None:
         super(SAOEStrategy, self).reset(outer_trade_decision=outer_trade_decision, **kwargs)
 
+        trade_range = outer_trade_decision.trade_range
         if outer_trade_decision is not None:
             self.adapter_dict = {}
             for decision in outer_trade_decision.get_decision():
                 order = cast(Order, decision)
-                self.adapter_dict[(order.stock_id, order.direction)] = self._create_qlib_backtest_adapter(order)
+                self.adapter_dict[order.key] = self._create_qlib_backtest_adapter(order, trade_range)
 
     def get_saoe_state_by_order(self, order: Order) -> SAOEState:
-        return self.adapter_dict[(order.stock_id, order.direction)].saoe_state
+        return self.adapter_dict[order.key].saoe_state
 
     def post_upper_level_exe_step(self) -> None:
         for maintainer in self.adapter_dict.values():
@@ -62,10 +99,10 @@ def post_exe_step(self, execute_result: list) -> None:
         results = collections.defaultdict(list)
         if execute_result is not None:
             for e in execute_result:
-                results[(e[0].stock_id, e[0].direction)].append(e)
+                results[e[0].key].append(e)
 
-        for (stock_id, direction), maintainer in self.adapter_dict.items():
-            maintainer.update(results[(stock_id, direction)])
+        for key, maintainer in self.adapter_dict.items():
+            maintainer.update(results[key])
 
 
 class DecomposedStrategy(SAOEStrategy):
diff --git a/qlib/strategy/base.py b/qlib/strategy/base.py
index c6294eea3e..6c173a9ec3 100644
--- a/qlib/strategy/base.py
+++ b/qlib/strategy/base.py
@@ -5,6 +5,8 @@
 from abc import ABCMeta, abstractmethod
 from typing import Any, Generator, Optional, TYPE_CHECKING, Union
 
+import pandas as pd
+
 if TYPE_CHECKING:
     from qlib.backtest.exchange import Exchange
     from qlib.backtest.position import BasePosition
@@ -64,6 +66,10 @@ def executor(self) -> BaseExecutor:
     def trade_calendar(self) -> TradeCalendarManager:
         return self.level_infra.get("trade_calendar")
 
+    @property
+    def ticks_per_step(self) -> int:
+        return int(pd.Timedelta(self.trade_calendar.get_freq()) / pd.Timedelta("1min"))
+
     @property
     def trade_position(self) -> BasePosition:
         return self.common_infra.get("trade_account").current_position

From bbf500ca5ea9df743d18ceee47c9f1ea6d332df5 Mon Sep 17 00:00:00 2001
From: Huoran Li <huo53926@126.com>
Date: Mon, 8 Aug 2022 14:07:29 +0800
Subject: [PATCH 14/23] Minor bugfix

---
 qlib/rl/order_execution/simulator_qlib.py | 5 +----
 qlib/rl/order_execution/state.py          | 2 +-
 qlib/rl/order_execution/strategy.py       | 9 +++++----
 3 files changed, 7 insertions(+), 9 deletions(-)

diff --git a/qlib/rl/order_execution/simulator_qlib.py b/qlib/rl/order_execution/simulator_qlib.py
index 15d0edf128..35e829fb7f 100644
--- a/qlib/rl/order_execution/simulator_qlib.py
+++ b/qlib/rl/order_execution/simulator_qlib.py
@@ -10,14 +10,11 @@
 from qlib.backtest import get_strategy_executor
 from qlib.backtest.decision import Order
 from qlib.backtest.executor import NestedExecutor
-from qlib.backtest.utils import SAOE_DATA_KEY
 from qlib.config import REG_CN
 from qlib.contrib.ops.high_freq import BFillNan, Cut, Date, DayCumsum, DayLast, FFillNan, IsInf, IsNull, Select
-from qlib.rl.data.exchange_wrapper import QlibIntradayBacktestData
 from qlib.rl.order_execution.state import SAOEState
-from qlib.rl.order_execution.utils import get_ticks_slice
-from qlib.rl.simulator import Simulator
 from qlib.rl.order_execution.strategy import SAOEStrategy
+from qlib.rl.simulator import Simulator
 
 
 def init_qlib(qlib_config: dict) -> None:
diff --git a/qlib/rl/order_execution/state.py b/qlib/rl/order_execution/state.py
index 02885bce6b..9e93562701 100644
--- a/qlib/rl/order_execution/state.py
+++ b/qlib/rl/order_execution/state.py
@@ -211,7 +211,7 @@ def _collect_single_order_metric(
     def saoe_state(self) -> SAOEState:
         return SAOEState(
             order=self.order,
-            cur_time=self.executor.trade_calendar.get_step_time()[0],
+            cur_time=self.cur_time,
             position=self.position,
             history_exec=self.history_exec,
             history_steps=self.history_steps,
diff --git a/qlib/rl/order_execution/strategy.py b/qlib/rl/order_execution/strategy.py
index c6d1faa436..7b4ced2309 100644
--- a/qlib/rl/order_execution/strategy.py
+++ b/qlib/rl/order_execution/strategy.py
@@ -3,7 +3,7 @@
 
 import collections
 from abc import ABCMeta
-from typing import Any, cast, Dict, Generator, Tuple
+from typing import Any, cast, Dict, Generator
 
 import pandas as pd
 from qlib.backtest import CommonInfrastructure, Order
@@ -43,7 +43,7 @@ def _create_qlib_backtest_adapter(self, order: Order, trade_range: TradeRange) -
                 start_time=order.start_time.replace(hour=0, minute=0, second=0),
                 end_time=order.start_time.replace(hour=23, minute=59, second=59),
                 direction=order.direction,
-                method=None
+                method=None,
             )
 
             ticks_index = pd.DatetimeIndex(data.index)
@@ -56,7 +56,6 @@ def _create_qlib_backtest_adapter(self, order: Order, trade_range: TradeRange) -
                 )
             else:
                 ticks_for_order = None  # FIXME: implement this logic
-                start_time = None  # FIXME: implement this logic
 
             backtest_data = QlibIntradayBacktestData(
                 order=order,
@@ -81,8 +80,10 @@ def _create_qlib_backtest_adapter(self, order: Order, trade_range: TradeRange) -
     def reset(self, outer_trade_decision: BaseTradeDecision = None, **kwargs: Any) -> None:
         super(SAOEStrategy, self).reset(outer_trade_decision=outer_trade_decision, **kwargs)
 
-        trade_range = outer_trade_decision.trade_range
         if outer_trade_decision is not None:
+            trade_range = outer_trade_decision.trade_range
+            assert trade_range is not None
+
             self.adapter_dict = {}
             for decision in outer_trade_decision.get_decision():
                 order = cast(Order, decision)

From 0824ced4196b98f8458a2f342d4fa51896a6bfc0 Mon Sep 17 00:00:00 2001
From: Huoran Li <huoranli@microsoft.com>
Date: Tue, 16 Aug 2022 13:01:53 +0800
Subject: [PATCH 15/23] Cherry pick updates

---
 qlib/backtest/executor.py                 |   2 +
 qlib/backtest/utils.py                    |   4 +-
 qlib/rl/integration/__init__.py           |   2 +
 qlib/rl/integration/feature.py            | 182 ++++++++++++++++++++++
 qlib/rl/order_execution/simulator_qlib.py |  68 +-------
 qlib/rl/order_execution/state.py          |  95 +++++++----
 qlib/rl/order_execution/strategy.py       |  31 +++-
 qlib/strategy/base.py                     |   3 +-
 8 files changed, 279 insertions(+), 108 deletions(-)
 create mode 100644 qlib/rl/integration/__init__.py
 create mode 100644 qlib/rl/integration/feature.py

diff --git a/qlib/backtest/executor.py b/qlib/backtest/executor.py
index 07da6b57bc..c53f2b2fdf 100644
--- a/qlib/backtest/executor.py
+++ b/qlib/backtest/executor.py
@@ -137,6 +137,8 @@ def reset_common_infra(self, common_infra: CommonInfrastructure, copy_trade_acco
         else:
             self.common_infra.update(common_infra)
 
+        self.level_infra.reset_infra(common_infra=self.common_infra)
+
         if common_infra.has("trade_account"):
             # NOTE: there is a trick in the code.
             # shallow copy is used instead of deepcopy.
diff --git a/qlib/backtest/utils.py b/qlib/backtest/utils.py
index 05691f8228..a1470a3398 100644
--- a/qlib/backtest/utils.py
+++ b/qlib/backtest/utils.py
@@ -187,8 +187,8 @@ def get_range_idx(self, start_time: pd.Timestamp, end_time: pd.Timestamp) -> Tup
         Tuple[int, int]:
             the index of the range.  **the left and right are closed**
         """
-        left = bisect.bisect_right(list(self._calendar), start_time) - 1
-        right = bisect.bisect_right(list(self._calendar), end_time) - 1
+        left = bisect.bisect_right(self._calendar, start_time) - 1
+        right = bisect.bisect_right(self._calendar, end_time) - 1
         left -= self.start_index
         right -= self.start_index
 
diff --git a/qlib/rl/integration/__init__.py b/qlib/rl/integration/__init__.py
new file mode 100644
index 0000000000..59e481eb93
--- /dev/null
+++ b/qlib/rl/integration/__init__.py
@@ -0,0 +1,2 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
diff --git a/qlib/rl/integration/feature.py b/qlib/rl/integration/feature.py
new file mode 100644
index 0000000000..8a5b653ece
--- /dev/null
+++ b/qlib/rl/integration/feature.py
@@ -0,0 +1,182 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+from __future__ import annotations
+
+import collections
+import pickle
+from pathlib import Path
+from typing import List
+
+import numpy as np
+import pandas as pd
+import qlib
+from qlib.constant import REG_CN
+from qlib.contrib.ops.high_freq import BFillNan, Cut, Date, DayCumsum, DayLast, FFillNan, IsInf, IsNull, Select
+from qlib.data.dataset import DatasetH
+
+dataset = None
+
+
+class LRUCache:
+    def __init__(self, pool_size: int = 200):
+        self.pool_size = pool_size
+        self.contents = dict()
+        self.keys = collections.deque()
+
+    def put(self, key, item):
+        if self.has(key):
+            self.keys.remove(key)
+        self.keys.append(key)
+        self.contents[key] = item
+        while len(self.contents) > self.pool_size:
+            self.contents.pop(self.keys.popleft())
+
+    def get(self, key):
+        return self.contents[key]
+
+    def has(self, key):
+        return key in self.contents
+
+
+class DataWrapper:
+    def __init__(self, feature_dataset: DatasetH, backtest_dataset: DatasetH,
+                 columns_today: List[str], columns_yesterday: List[str], _internal: bool = False):
+        assert _internal, 'Init function of data wrapper is for internal use only.'
+
+        self.feature_dataset = feature_dataset
+        self.backtest_dataset = backtest_dataset
+        self.columns_today = columns_today
+        self.columns_yesterday = columns_yesterday
+
+        self.feature_cache = LRUCache()
+        self.backtest_cache = LRUCache()
+
+    def get(self, stock_id: str, date: pd.Timestamp, backtest: bool = False):
+        start_time, end_time = date.replace(hour=0, minute=0, second=0), date.replace(hour=23, minute=59, second=59)
+
+        if backtest:
+            dataset = self.backtest_dataset
+            cache = self.backtest_cache
+        else:
+            dataset = self.feature_dataset
+            cache = self.feature_cache
+
+        if cache.has((start_time, end_time, stock_id)):
+            return cache.get((start_time, end_time, stock_id))
+        data = dataset.handler.fetch(pd.IndexSlice[stock_id, start_time:end_time], level=None)
+        cache.put((start_time, end_time, stock_id), data)
+        return data
+
+
+def init_qlib(qlib_config: dict, part: str = None) -> None:
+    """Initialize necessary resource to launch the workflow, including data direction, feature columns, etc..
+
+    Parameters
+    ----------
+    qlib_config:
+        Qlib configuration.
+
+        Example:
+            {
+                "provider_uri_day": DATA_ROOT_DIR / "qlib_1d",
+                "provider_uri_1min": DATA_ROOT_DIR / "qlib_1min",
+                "feature_root_dir": DATA_ROOT_DIR / "qlib_handler_stock",
+                "feature_columns_today": [
+                    "$open", "$high", "$low", "$close", "$vwap", "$bid", "$ask", "$volume",
+                    "$bidV", "$bidV1", "$bidV3", "$bidV5", "$askV", "$askV1", "$askV3", "$askV5",
+                ],
+                "feature_columns_yesterday": [
+                    "$open_1", "$high_1", "$low_1", "$close_1", "$vwap_1", "$bid_1", "$ask_1", "$volume_1",
+                    "$bidV_1", "$bidV1_1", "$bidV3_1", "$bidV5_1", "$askV_1", "$askV1_1", "$askV3_1", "$askV5_1",
+                ],
+            }
+    part
+        Identifying which part (stock / date) to load.
+    """
+
+    global dataset
+
+    def _convert_to_path(path: str | Path) -> Path:
+        return path if isinstance(path, Path) else Path(path)
+
+    provider_uri_map = {
+        "day": _convert_to_path(qlib_config["provider_uri_day"]).as_posix(),
+        "1min": _convert_to_path(qlib_config["provider_uri_1min"]).as_posix(),
+    }
+    qlib.init(
+        region=REG_CN,
+        auto_mount=False,
+        custom_ops=[DayLast, FFillNan, BFillNan, Date, Select, IsNull, IsInf, Cut, DayCumsum],
+        expression_cache=None,
+        calendar_provider={
+            "class": "LocalCalendarProvider",
+            "module_path": "qlib.data.data",
+            "kwargs": {
+                "backend": {
+                    "class": "FileCalendarStorage",
+                    "module_path": "qlib.data.storage.file_storage",
+                    "kwargs": {"provider_uri_map": provider_uri_map},
+                },
+            },
+        },
+        feature_provider={
+            "class": "LocalFeatureProvider",
+            "module_path": "qlib.data.data",
+            "kwargs": {
+                "backend": {
+                    "class": "FileFeatureStorage",
+                    "module_path": "qlib.data.storage.file_storage",
+                    "kwargs": {"provider_uri_map": provider_uri_map},
+                },
+            },
+        },
+        provider_uri=provider_uri_map,
+        kernels=1,
+        redis_port=-1,
+        clear_mem_cache=False,  # init_qlib will be called for multiple times. Keep the cache for improving performance
+    )
+
+    if part == "skip":
+        return
+
+    # this won't work if it's put outside in case of multiprocessing
+    from qlib.data import D
+
+    if part is None:
+        feature_path = Path(qlib_config["feature_root_dir"]) / 'feature.pkl'
+        backtest_path = Path(qlib_config["feature_root_dir"]) / 'backtest.pkl'
+    else:
+        feature_path = Path(qlib_config["feature_root_dir"]) / 'feature' / (part + '.pkl')
+        backtest_path = Path(qlib_config["feature_root_dir"]) / 'backtest' / (part + '.pkl')
+
+    with feature_path.open('rb') as f:
+        feature_dataset = pickle.load(f)
+    with backtest_path.open('rb') as f:
+        backtest_dataset = pickle.load(f)
+
+    dataset = DataWrapper(
+        feature_dataset,
+        backtest_dataset,
+        qlib_config["feature_columns_today"],
+        qlib_config["feature_columns_yesterday"],
+        _internal=True
+    )
+
+
+def fetch_features(stock_id: str, date: pd.Timestamp, yesterday: bool = False, backtest: bool = False):
+    assert dataset is not None, 'You must call init_qlib() before doing this.'
+
+    if backtest:
+        fields = ['$close', '$volume']
+    else:
+        fields = dataset.columns_yesterday if yesterday else dataset.columns_today
+
+    data = dataset.get(stock_id, date, backtest)
+    if data is None or len(data) == 0:
+        # create a fake index, but RL doesn't care about index
+        data = pd.DataFrame(0., index=np.arange(240), columns=fields, dtype=np.float32)  # FIXME: hardcode here
+    else:
+        data = data.rename(columns={c: c.rstrip('0') for c in data.columns})
+        data = data[fields]
+    return data
diff --git a/qlib/rl/order_execution/simulator_qlib.py b/qlib/rl/order_execution/simulator_qlib.py
index 35e829fb7f..e7638ffb65 100644
--- a/qlib/rl/order_execution/simulator_qlib.py
+++ b/qlib/rl/order_execution/simulator_qlib.py
@@ -6,79 +6,15 @@
 from typing import Generator, Optional
 
 import pandas as pd
-import qlib
 from qlib.backtest import get_strategy_executor
 from qlib.backtest.decision import Order
 from qlib.backtest.executor import NestedExecutor
-from qlib.config import REG_CN
-from qlib.contrib.ops.high_freq import BFillNan, Cut, Date, DayCumsum, DayLast, FFillNan, IsInf, IsNull, Select
+from qlib.rl.integration.feature import init_qlib
 from qlib.rl.order_execution.state import SAOEState
 from qlib.rl.order_execution.strategy import SAOEStrategy
 from qlib.rl.simulator import Simulator
 
 
-def init_qlib(qlib_config: dict) -> None:
-    """Initialize necessary resource to launch the workflow, including data direction, feature columns, etc..
-
-    Parameters
-    ----------
-    qlib_config:
-        Qlib configuration.
-
-        Example:
-            {
-                "provider_uri_day": DATA_ROOT_DIR / "qlib_1d",
-                "provider_uri_1min": DATA_ROOT_DIR / "qlib_1min",
-                "feature_root_dir": DATA_ROOT_DIR / "qlib_handler_stock",
-                "feature_columns_today": [
-                    "$open", "$high", "$low", "$close", "$vwap", "$bid", "$ask", "$volume",
-                    "$bidV", "$bidV1", "$bidV3", "$bidV5", "$askV", "$askV1", "$askV3", "$askV5",
-                ],
-                "feature_columns_yesterday": [
-                    "$open_1", "$high_1", "$low_1", "$close_1", "$vwap_1", "$bid_1", "$ask_1", "$volume_1",
-                    "$bidV_1", "$bidV1_1", "$bidV3_1", "$bidV5_1", "$askV_1", "$askV1_1", "$askV3_1", "$askV5_1",
-                ],
-            }
-    """
-
-    provider_uri_map = {
-        "day": qlib_config["provider_uri_day"].as_posix(),
-        "1min": qlib_config["provider_uri_1min"].as_posix(),
-    }
-    qlib.init(
-        region=REG_CN,
-        auto_mount=False,
-        custom_ops=[DayLast, FFillNan, BFillNan, Date, Select, IsNull, IsInf, Cut, DayCumsum],
-        expression_cache=None,
-        calendar_provider={
-            "class": "LocalCalendarProvider",
-            "module_path": "qlib.data.data",
-            "kwargs": {
-                "backend": {
-                    "class": "FileCalendarStorage",
-                    "module_path": "qlib.data.storage.file_storage",
-                    "kwargs": {"provider_uri_map": provider_uri_map},
-                },
-            },
-        },
-        feature_provider={
-            "class": "LocalFeatureProvider",
-            "module_path": "qlib.data.data",
-            "kwargs": {
-                "backend": {
-                    "class": "FileFeatureStorage",
-                    "module_path": "qlib.data.storage.file_storage",
-                    "kwargs": {"provider_uri_map": provider_uri_map},
-                },
-            },
-        },
-        provider_uri=provider_uri_map,
-        kernels=1,
-        redis_port=-1,
-        clear_mem_cache=False,  # init_qlib will be called for multiple times. Keep the cache for improving performance
-    )
-
-
 class SingleAssetOrderExecutionQlib(Simulator[Order, SAOEState, float]):
     """Single-asset order execution (SAOE) simulator which is implemented based on Qlib backtest tools.
 
@@ -120,7 +56,7 @@ def reset(
         qlib_config: dict = None,
     ) -> None:
         if qlib_config is not None:
-            init_qlib(qlib_config)
+            init_qlib(qlib_config, part="skip")
 
         strategy, self._executor = get_strategy_executor(
             start_time=order.start_time.replace(hour=0, minute=0, second=0),
diff --git a/qlib/rl/order_execution/state.py b/qlib/rl/order_execution/state.py
index 9e93562701..97c2ea942a 100644
--- a/qlib/rl/order_execution/state.py
+++ b/qlib/rl/order_execution/state.py
@@ -3,18 +3,36 @@
 
 from __future__ import annotations
 
-from typing import NamedTuple, Optional, cast
+from typing import cast, NamedTuple, Optional, Tuple
 
 import numpy as np
 import pandas as pd
-from typing_extensions import TypedDict
-
 from qlib.backtest import Exchange, Order
 from qlib.backtest.executor import BaseExecutor
-from qlib.constant import EPS
+from qlib.constant import EPS, REG_CN
 from qlib.rl.data.exchange_wrapper import QlibIntradayBacktestData
 from qlib.rl.data.pickle_styled import IntradayBacktestData
-from qlib.rl.order_execution.utils import dataframe_append, get_simulator_executor, price_advantage
+from qlib.rl.order_execution.utils import dataframe_append, price_advantage
+from qlib.utils.time import get_day_min_idx_range
+from typing_extensions import TypedDict
+
+
+def _get_all_timestamps(
+    start: pd.Timestamp,
+    end: pd.Timestamp,
+    granularity: pd.Timedelta = pd.Timedelta("1min"),
+    include_end: bool = True,
+) -> pd.DatetimeIndex:
+    ret = []
+    while start <= end:
+        ret.append(start)
+        start += granularity
+
+    if ret[-1] > end:
+        ret.pop()
+    if ret[-1] == end and not include_end:
+        ret.pop()
+    return pd.DatetimeIndex(ret)
 
 
 class QlibBacktestAdapter:
@@ -67,33 +85,42 @@ def _next_time(self) -> pd.Timestamp:
         else:
             return self.order.end_time
 
-    def update(self, execute_result: list) -> None:
-        exec_vol = np.array([e[0].deal_amount for e in execute_result])
-        num_step = len(execute_result)
-
-        if num_step == 0:
-            market_volume = np.array([])
-            market_price = np.array([])
-            datetime_list = pd.DatetimeIndex([])
-        else:
-            market_volume = np.array(
-                self.exchange.get_volume(
-                    self.order.stock_id,
-                    execute_result[0][0].start_time,
-                    execute_result[-1][0].start_time,
-                    method=None,
-                ),
-            )
-
-            # Get data from the SimulatorExecutor's (lowest-level executor) indicator
-            simulator_executor = get_simulator_executor(self.executor)
-            simulator_trade_account = simulator_executor.trade_account
-            simulator_df = simulator_trade_account.get_trade_indicator().generate_trade_indicators_dataframe()
-
-            trade_value = simulator_df.iloc[-num_step:]["value"].values
-            deal_amount = simulator_df.iloc[-num_step:]["deal_amount"].values
-            market_price = trade_value / deal_amount
-            datetime_list = simulator_df.index[-num_step:]
+    def update(
+        self,
+        execute_result: list,
+        last_step_range: Tuple[int, int],
+    ) -> None:
+        last_step_size = last_step_range[1] - last_step_range[0] + 1
+        start_time = self.ticks_index[last_step_range[0]]
+        end_time = self.ticks_index[last_step_range[1]]
+
+        exec_vol = np.zeros(last_step_size)
+        for order, _, __, ___ in execute_result:
+            idx, _ = get_day_min_idx_range(order.start_time, order.end_time, '1min', REG_CN)
+            exec_vol[idx - last_step_range[0]] = order.deal_amount
+
+        if exec_vol.sum() > self.position and exec_vol.sum() > 0.0:
+            assert exec_vol.sum() < self.position + 1, f'{exec_vol} too large'
+            exec_vol *= self.position / (exec_vol.sum())
+
+        market_volume = np.array(
+            self.exchange.get_volume(
+                self.order.stock_id,
+                pd.Timestamp(start_time),
+                pd.Timestamp(end_time),
+                method=None,
+            ),
+        ).reshape(-1)
+
+        market_price = np.array(
+            self.exchange.get_deal_price(
+                self.order.stock_id,
+                pd.Timestamp(start_time),
+                pd.Timestamp(end_time),
+                method=None,
+                direction=self.order.direction,
+            ),
+        ).reshape(-1)
 
         assert market_price.shape == market_volume.shape == exec_vol.shape
 
@@ -104,7 +131,7 @@ def update(self, execute_result: list) -> None:
             self.history_exec,
             self._collect_multi_order_metric(
                 order=self.order,
-                datetime=datetime_list,
+                datetime=_get_all_timestamps(start_time, end_time, include_end=True),
                 market_vol=market_volume,
                 market_price=market_price,
                 exec_vol=exec_vol,
@@ -147,7 +174,7 @@ def generate_metrics_after_done(self) -> None:
     def _collect_multi_order_metric(
         self,
         order: Order,
-        datetime: pd.Timestamp,
+        datetime: pd.DatetimeIndex,
         market_vol: np.ndarray,
         market_price: np.ndarray,
         exec_vol: np.ndarray,
diff --git a/qlib/rl/order_execution/strategy.py b/qlib/rl/order_execution/strategy.py
index 7b4ced2309..8f63bec5cf 100644
--- a/qlib/rl/order_execution/strategy.py
+++ b/qlib/rl/order_execution/strategy.py
@@ -1,11 +1,14 @@
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT License.
 
+from __future__ import annotations
+
 import collections
 from abc import ABCMeta
-from typing import Any, cast, Dict, Generator
+from typing import Any, Dict, Generator, Tuple, cast
 
 import pandas as pd
+
 from qlib.backtest import CommonInfrastructure, Order
 from qlib.backtest.decision import BaseTradeDecision, TradeDecisionWO, TradeRange, TradeRangeByTime
 from qlib.backtest.utils import LevelInfrastructure, SAOE_DATA_KEY
@@ -26,9 +29,16 @@ def __init__(
         common_infra: CommonInfrastructure = None,
         **kwargs: Any,
     ) -> None:
-        super(SAOEStrategy, self).__init__(policy, outer_trade_decision, level_infra, common_infra, **kwargs)
+        super(SAOEStrategy, self).__init__(
+            policy=policy,
+            outer_trade_decision=outer_trade_decision,
+            level_infra=level_infra,
+            common_infra=common_infra,
+            **kwargs,
+        )
 
         self.adapter_dict: Dict[tuple, QlibBacktestAdapter] = {}
+        self._last_step_range = (0, 0)
 
     def _create_qlib_backtest_adapter(self, order: Order, trade_range: TradeRange) -> QlibBacktestAdapter:
         if not self.common_infra.has(SAOE_DATA_KEY):
@@ -67,6 +77,7 @@ def _create_qlib_backtest_adapter(self, order: Order, trade_range: TradeRange) -
             saoe_data[order.key] = (ticks_index, ticks_for_order, backtest_data)
 
         ticks_index, ticks_for_order, backtest_data = saoe_data[order.key]
+
         return QlibBacktestAdapter(
             order=order,
             executor=self.executor,
@@ -77,10 +88,16 @@ def _create_qlib_backtest_adapter(self, order: Order, trade_range: TradeRange) -
             backtest_data=backtest_data,
         )
 
+    def _update_last_step_range(self, step_range: Tuple[int, int]) -> None:
+        self._last_step_range = step_range
+
     def reset(self, outer_trade_decision: BaseTradeDecision = None, **kwargs: Any) -> None:
         super(SAOEStrategy, self).reset(outer_trade_decision=outer_trade_decision, **kwargs)
 
-        if outer_trade_decision is not None:
+        self.adapter_dict = {}
+        self._last_step_range = (0, 0)
+
+        if outer_trade_decision is not None and not outer_trade_decision.empty():
             trade_range = outer_trade_decision.trade_range
             assert trade_range is not None
 
@@ -97,13 +114,18 @@ def post_upper_level_exe_step(self) -> None:
             maintainer.generate_metrics_after_done()
 
     def post_exe_step(self, execute_result: list) -> None:
+        last_step_length = self._last_step_range[1] - self._last_step_range[0]
+        if last_step_length <= 0:
+            assert not execute_result
+            return
+
         results = collections.defaultdict(list)
         if execute_result is not None:
             for e in execute_result:
                 results[e[0].key].append(e)
 
         for key, maintainer in self.adapter_dict.items():
-            maintainer.update(results[key])
+            maintainer.update(results[key], self._last_step_range)
 
 
 class DecomposedStrategy(SAOEStrategy):
@@ -127,6 +149,7 @@ def generate_trade_decision(self, execute_result: list = None) -> Generator[Any,
 
         oh = self.trade_exchange.get_order_helper()
         order = oh.create(self._order.stock_id, exec_vol, self._order.direction)
+        self._update_last_step_range(self.get_data_cal_avail_range(rtype="step"))
 
         return TradeDecisionWO([order], self)
 
diff --git a/qlib/strategy/base.py b/qlib/strategy/base.py
index 6c173a9ec3..615ddcbc38 100644
--- a/qlib/strategy/base.py
+++ b/qlib/strategy/base.py
@@ -169,7 +169,6 @@ def update_trade_decision(
         # default to return None, which indicates that the trade decision is not changed
         return None
 
-    # FIXME: do not define this method as an abstract one since it is never implemented
     def alter_outer_trade_decision(self, outer_trade_decision: BaseTradeDecision) -> BaseTradeDecision:
         """
         A method for updating the outer_trade_decision.
@@ -186,7 +185,7 @@ def alter_outer_trade_decision(self, outer_trade_decision: BaseTradeDecision) ->
         """
         # default to reset the decision directly
         # NOTE: normally, user should do something to the strategy due to the change of outer decision
-        raise NotImplementedError(f"Please implement the `alter_outer_trade_decision` method")
+        pass
 
     # helper methods: not necessary but for convenience
     def get_data_cal_avail_range(self, rtype: str = "full") -> Tuple[int, int]:

From 48d5a1da3297959628d4ecc1834542f3a74224e9 Mon Sep 17 00:00:00 2001
From: Huoran Li <huoranli@microsoft.com>
Date: Tue, 16 Aug 2022 14:14:50 +0800
Subject: [PATCH 16/23] Resolve PR comments

---
 qlib/constant.py                            |  9 +++++++
 qlib/rl/integration/feature.py              | 26 +++------------------
 qlib/rl/order_execution/constants.py        | 12 ----------
 qlib/rl/order_execution/simulator_simple.py |  3 +--
 qlib/rl/order_execution/strategy.py         | 14 +++++------
 qlib/rl/order_execution/utils.py            |  2 +-
 qlib/rl/strategy/single_order.py            |  3 ++-
 qlib/rl/utils/cache.py                      | 24 +++++++++++++++++++
 qlib/strategy/base.py                       |  4 +++-
 tests/rl/test_qlib_simulator.py             |  2 +-
 10 files changed, 51 insertions(+), 48 deletions(-)
 delete mode 100644 qlib/rl/order_execution/constants.py
 create mode 100644 qlib/rl/utils/cache.py

diff --git a/qlib/constant.py b/qlib/constant.py
index 458890957d..cad1a7e6b3 100644
--- a/qlib/constant.py
+++ b/qlib/constant.py
@@ -2,6 +2,11 @@
 # Licensed under the MIT License.
 
 # REGION CONST
+from typing import TypeVar
+
+import numpy as np
+import pandas as pd
+
 REG_CN = "cn"
 REG_US = "us"
 REG_TW = "tw"
@@ -11,3 +16,7 @@
 
 # Infinity in integer
 INF = 10**18
+FINEST_GRANULARITY = "1min"
+COARSEST_GRANULARITY = "1day"
+ONE_SEC = pd.Timedelta("1s")  # use 1 second to exclude the right interval point
+float_or_ndarray = TypeVar("float_or_ndarray", float, np.ndarray)
\ No newline at end of file
diff --git a/qlib/rl/integration/feature.py b/qlib/rl/integration/feature.py
index 8a5b653ece..07e4c0a2b8 100644
--- a/qlib/rl/integration/feature.py
+++ b/qlib/rl/integration/feature.py
@@ -3,7 +3,6 @@
 
 from __future__ import annotations
 
-import collections
 import pickle
 from pathlib import Path
 from typing import List
@@ -14,31 +13,11 @@
 from qlib.constant import REG_CN
 from qlib.contrib.ops.high_freq import BFillNan, Cut, Date, DayCumsum, DayLast, FFillNan, IsInf, IsNull, Select
 from qlib.data.dataset import DatasetH
+from qlib.rl.utils.cache import LRUCache
 
 dataset = None
 
 
-class LRUCache:
-    def __init__(self, pool_size: int = 200):
-        self.pool_size = pool_size
-        self.contents = dict()
-        self.keys = collections.deque()
-
-    def put(self, key, item):
-        if self.has(key):
-            self.keys.remove(key)
-        self.keys.append(key)
-        self.contents[key] = item
-        while len(self.contents) > self.pool_size:
-            self.contents.pop(self.keys.popleft())
-
-    def get(self, key):
-        return self.contents[key]
-
-    def has(self, key):
-        return key in self.contents
-
-
 class DataWrapper:
     def __init__(self, feature_dataset: DatasetH, backtest_dataset: DatasetH,
                  columns_today: List[str], columns_yesterday: List[str], _internal: bool = False):
@@ -77,7 +56,8 @@ def init_qlib(qlib_config: dict, part: str = None) -> None:
     qlib_config:
         Qlib configuration.
 
-        Example:
+        Example::
+
             {
                 "provider_uri_day": DATA_ROOT_DIR / "qlib_1d",
                 "provider_uri_1min": DATA_ROOT_DIR / "qlib_1min",
diff --git a/qlib/rl/order_execution/constants.py b/qlib/rl/order_execution/constants.py
deleted file mode 100644
index 2f6c81b825..0000000000
--- a/qlib/rl/order_execution/constants.py
+++ /dev/null
@@ -1,12 +0,0 @@
-# Copyright (c) Microsoft Corporation.
-# Licensed under the MIT License.
-
-from typing import TypeVar
-
-import numpy as np
-import pandas as pd
-
-FINEST_GRANULARITY = "1min"
-COARSEST_GRANULARITY = "1day"
-ONE_SEC = pd.Timedelta("1s")  # use 1 second to exclude the right interval point
-float_or_ndarray = TypeVar("float_or_ndarray", float, np.ndarray)
diff --git a/qlib/rl/order_execution/simulator_simple.py b/qlib/rl/order_execution/simulator_simple.py
index 5754d34c2f..59cd92b4fe 100644
--- a/qlib/rl/order_execution/simulator_simple.py
+++ b/qlib/rl/order_execution/simulator_simple.py
@@ -10,9 +10,8 @@
 import pandas as pd
 
 from qlib.backtest.decision import Order, OrderDir
-from qlib.constant import EPS
+from qlib.constant import EPS, ONE_SEC, float_or_ndarray
 from qlib.rl.data.pickle_styled import DealPriceType, load_simple_intraday_backtest_data
-from qlib.rl.order_execution.constants import ONE_SEC, float_or_ndarray
 from qlib.rl.order_execution.state import SAOEMetrics, SAOEState
 from qlib.rl.simulator import Simulator
 from qlib.rl.utils import LogLevel
diff --git a/qlib/rl/order_execution/strategy.py b/qlib/rl/order_execution/strategy.py
index 8f63bec5cf..b5be3f9879 100644
--- a/qlib/rl/order_execution/strategy.py
+++ b/qlib/rl/order_execution/strategy.py
@@ -4,7 +4,6 @@
 from __future__ import annotations
 
 import collections
-from abc import ABCMeta
 from typing import Any, Dict, Generator, Tuple, cast
 
 import pandas as pd
@@ -15,10 +14,11 @@
 from qlib.rl.data.exchange_wrapper import QlibIntradayBacktestData
 from qlib.rl.order_execution.state import QlibBacktestAdapter, SAOEState
 from qlib.rl.order_execution.utils import get_ticks_slice
+from qlib.rl.utils.cache import LRUCache
 from qlib.strategy.base import RLStrategy
 
 
-class SAOEStrategy(RLStrategy, metaclass=ABCMeta):
+class SAOEStrategy(RLStrategy):
     """RL-based strategies that use SAOEState as state."""
 
     def __init__(
@@ -42,12 +42,12 @@ def __init__(
 
     def _create_qlib_backtest_adapter(self, order: Order, trade_range: TradeRange) -> QlibBacktestAdapter:
         if not self.common_infra.has(SAOE_DATA_KEY):
-            self.common_infra.reset_infra(**{SAOE_DATA_KEY: {}})
+            self.common_infra.reset_infra(**{SAOE_DATA_KEY: LRUCache(pool_size=100)})
 
         # saoe_data can be considered as some type of cache. Use it to avoid unnecessary data reload.
         # The data for one order would be loaded only once. All strategies will reuse this data.
-        saoe_data = self.common_infra.get(SAOE_DATA_KEY)
-        if order.key not in saoe_data:
+        saoe_data = cast(LRUCache, self.common_infra.get(SAOE_DATA_KEY))
+        if not saoe_data.has(order.key):
             data = self.trade_exchange.get_deal_price(
                 stock_id=order.stock_id,
                 start_time=order.start_time.replace(hour=0, minute=0, second=0),
@@ -74,9 +74,9 @@ def _create_qlib_backtest_adapter(self, order: Order, trade_range: TradeRange) -
                 end_time=ticks_for_order[-1],
             )
 
-            saoe_data[order.key] = (ticks_index, ticks_for_order, backtest_data)
+            saoe_data.put(key=order.key, item=(ticks_index, ticks_for_order, backtest_data))
 
-        ticks_index, ticks_for_order, backtest_data = saoe_data[order.key]
+        ticks_index, ticks_for_order, backtest_data = saoe_data.get(order.key)
 
         return QlibBacktestAdapter(
             order=order,
diff --git a/qlib/rl/order_execution/utils.py b/qlib/rl/order_execution/utils.py
index f861237398..a498037ad0 100644
--- a/qlib/rl/order_execution/utils.py
+++ b/qlib/rl/order_execution/utils.py
@@ -10,7 +10,7 @@
 
 from qlib.backtest.decision import OrderDir
 from qlib.backtest.executor import BaseExecutor, NestedExecutor, SimulatorExecutor
-from qlib.rl.order_execution.constants import ONE_SEC, float_or_ndarray
+from qlib.constant import ONE_SEC, float_or_ndarray
 
 
 def get_ticks_slice(
diff --git a/qlib/rl/strategy/single_order.py b/qlib/rl/strategy/single_order.py
index 0055781973..d5b5915709 100644
--- a/qlib/rl/strategy/single_order.py
+++ b/qlib/rl/strategy/single_order.py
@@ -7,7 +7,8 @@
 
 
 class SingleOrderStrategy(BaseStrategy):
-    # this logic is copied from FileOrderStrategy
+    """Strategy used to generate a trade decision with exactly one order.
+    """
     def __init__(
         self,
         order: Order,
diff --git a/qlib/rl/utils/cache.py b/qlib/rl/utils/cache.py
new file mode 100644
index 0000000000..abe35afa1f
--- /dev/null
+++ b/qlib/rl/utils/cache.py
@@ -0,0 +1,24 @@
+from __future__ import annotations
+
+import collections
+
+
+class LRUCache:
+    def __init__(self, pool_size: int = 200):
+        self.pool_size = pool_size
+        self.contents = dict()
+        self.keys = collections.deque()
+
+    def put(self, key, item):
+        if self.has(key):
+            self.keys.remove(key)
+        self.keys.append(key)
+        self.contents[key] = item
+        while len(self.contents) > self.pool_size:
+            self.contents.pop(self.keys.popleft())
+
+    def get(self, key):
+        return self.contents[key]
+
+    def has(self, key):
+        return key in self.contents
diff --git a/qlib/strategy/base.py b/qlib/strategy/base.py
index 615ddcbc38..41103b8bde 100644
--- a/qlib/strategy/base.py
+++ b/qlib/strategy/base.py
@@ -7,6 +7,8 @@
 
 import pandas as pd
 
+from ..constant import FINEST_GRANULARITY
+
 if TYPE_CHECKING:
     from qlib.backtest.exchange import Exchange
     from qlib.backtest.position import BasePosition
@@ -68,7 +70,7 @@ def trade_calendar(self) -> TradeCalendarManager:
 
     @property
     def ticks_per_step(self) -> int:
-        return int(pd.Timedelta(self.trade_calendar.get_freq()) / pd.Timedelta("1min"))
+        return int(pd.Timedelta(self.trade_calendar.get_freq()) / pd.Timedelta(FINEST_GRANULARITY))
 
     @property
     def trade_position(self) -> BasePosition:
diff --git a/tests/rl/test_qlib_simulator.py b/tests/rl/test_qlib_simulator.py
index df78976654..230611fa66 100644
--- a/tests/rl/test_qlib_simulator.py
+++ b/tests/rl/test_qlib_simulator.py
@@ -10,7 +10,7 @@
 from qlib.backtest.decision import Order, OrderDir, TradeRangeByTime
 from qlib.backtest.executor import SimulatorExecutor
 from qlib.rl.order_execution import CategoricalActionInterpreter
-from qlib.rl.order_execution.constants import FINEST_GRANULARITY
+from qlib.constant import FINEST_GRANULARITY
 from qlib.rl.order_execution.simulator_qlib import SingleAssetOrderExecutionQlib
 
 TOTAL_POSITION = 2100.0

From 4e7d3db06b4e09afdfaca6220454010c0e21277c Mon Sep 17 00:00:00 2001
From: Huoran Li <huoranli@microsoft.com>
Date: Tue, 16 Aug 2022 14:49:05 +0800
Subject: [PATCH 17/23] CI issues

---
 qlib/backtest/utils.py           |  5 ++--
 qlib/constant.py                 |  2 +-
 qlib/rl/integration/feature.py   | 42 ++++++++++++++++++--------------
 qlib/rl/order_execution/state.py |  4 +--
 qlib/rl/strategy/single_order.py |  4 +--
 qlib/rl/utils/cache.py           |  6 ++---
 qlib/strategy/base.py            |  1 -
 7 files changed, 34 insertions(+), 30 deletions(-)

diff --git a/qlib/backtest/utils.py b/qlib/backtest/utils.py
index a1470a3398..f2a4851684 100644
--- a/qlib/backtest/utils.py
+++ b/qlib/backtest/utils.py
@@ -3,7 +3,6 @@
 
 from __future__ import annotations
 
-import bisect
 from abc import abstractmethod
 from typing import TYPE_CHECKING, Any, Set, Tuple, Union
 
@@ -187,8 +186,8 @@ def get_range_idx(self, start_time: pd.Timestamp, end_time: pd.Timestamp) -> Tup
         Tuple[int, int]:
             the index of the range.  **the left and right are closed**
         """
-        left = bisect.bisect_right(self._calendar, start_time) - 1
-        right = bisect.bisect_right(self._calendar, end_time) - 1
+        left = np.searchsorted(self._calendar, start_time, side="right") - 1
+        right = np.searchsorted(self._calendar, end_time, side="right") - 1
         left -= self.start_index
         right -= self.start_index
 
diff --git a/qlib/constant.py b/qlib/constant.py
index cad1a7e6b3..607f0bcf51 100644
--- a/qlib/constant.py
+++ b/qlib/constant.py
@@ -19,4 +19,4 @@
 FINEST_GRANULARITY = "1min"
 COARSEST_GRANULARITY = "1day"
 ONE_SEC = pd.Timedelta("1s")  # use 1 second to exclude the right interval point
-float_or_ndarray = TypeVar("float_or_ndarray", float, np.ndarray)
\ No newline at end of file
+float_or_ndarray = TypeVar("float_or_ndarray", float, np.ndarray)
diff --git a/qlib/rl/integration/feature.py b/qlib/rl/integration/feature.py
index 07e4c0a2b8..3c35e9c47b 100644
--- a/qlib/rl/integration/feature.py
+++ b/qlib/rl/integration/feature.py
@@ -19,9 +19,15 @@
 
 
 class DataWrapper:
-    def __init__(self, feature_dataset: DatasetH, backtest_dataset: DatasetH,
-                 columns_today: List[str], columns_yesterday: List[str], _internal: bool = False):
-        assert _internal, 'Init function of data wrapper is for internal use only.'
+    def __init__(
+        self,
+        feature_dataset: DatasetH,
+        backtest_dataset: DatasetH,
+        columns_today: List[str],
+        columns_yesterday: List[str],
+        _internal: bool = False,
+    ):
+        assert _internal, "Init function of data wrapper is for internal use only."
 
         self.feature_dataset = feature_dataset
         self.backtest_dataset = backtest_dataset
@@ -31,7 +37,7 @@ def __init__(self, feature_dataset: DatasetH, backtest_dataset: DatasetH,
         self.feature_cache = LRUCache()
         self.backtest_cache = LRUCache()
 
-    def get(self, stock_id: str, date: pd.Timestamp, backtest: bool = False):
+    def get(self, stock_id: str, date: pd.Timestamp, backtest: bool = False) -> pd.DataFrame:
         start_time, end_time = date.replace(hour=0, minute=0, second=0), date.replace(hour=23, minute=59, second=59)
 
         if backtest:
@@ -75,7 +81,7 @@ def init_qlib(qlib_config: dict, part: str = None) -> None:
         Identifying which part (stock / date) to load.
     """
 
-    global dataset
+    global dataset  # pylint: disable=W0603
 
     def _convert_to_path(path: str | Path) -> Path:
         return path if isinstance(path, Path) else Path(path)
@@ -121,18 +127,18 @@ def _convert_to_path(path: str | Path) -> Path:
         return
 
     # this won't work if it's put outside in case of multiprocessing
-    from qlib.data import D
+    from qlib.data import D  # noqa pylint: disable=C0415,W0611
 
     if part is None:
-        feature_path = Path(qlib_config["feature_root_dir"]) / 'feature.pkl'
-        backtest_path = Path(qlib_config["feature_root_dir"]) / 'backtest.pkl'
+        feature_path = Path(qlib_config["feature_root_dir"]) / "feature.pkl"
+        backtest_path = Path(qlib_config["feature_root_dir"]) / "backtest.pkl"
     else:
-        feature_path = Path(qlib_config["feature_root_dir"]) / 'feature' / (part + '.pkl')
-        backtest_path = Path(qlib_config["feature_root_dir"]) / 'backtest' / (part + '.pkl')
+        feature_path = Path(qlib_config["feature_root_dir"]) / "feature" / (part + ".pkl")
+        backtest_path = Path(qlib_config["feature_root_dir"]) / "backtest" / (part + ".pkl")
 
-    with feature_path.open('rb') as f:
+    with feature_path.open("rb") as f:
         feature_dataset = pickle.load(f)
-    with backtest_path.open('rb') as f:
+    with backtest_path.open("rb") as f:
         backtest_dataset = pickle.load(f)
 
     dataset = DataWrapper(
@@ -140,23 +146,23 @@ def _convert_to_path(path: str | Path) -> Path:
         backtest_dataset,
         qlib_config["feature_columns_today"],
         qlib_config["feature_columns_yesterday"],
-        _internal=True
+        _internal=True,
     )
 
 
-def fetch_features(stock_id: str, date: pd.Timestamp, yesterday: bool = False, backtest: bool = False):
-    assert dataset is not None, 'You must call init_qlib() before doing this.'
+def fetch_features(stock_id: str, date: pd.Timestamp, yesterday: bool = False, backtest: bool = False) -> pd.DataFrame:
+    assert dataset is not None, "You must call init_qlib() before doing this."
 
     if backtest:
-        fields = ['$close', '$volume']
+        fields = ["$close", "$volume"]
     else:
         fields = dataset.columns_yesterday if yesterday else dataset.columns_today
 
     data = dataset.get(stock_id, date, backtest)
     if data is None or len(data) == 0:
         # create a fake index, but RL doesn't care about index
-        data = pd.DataFrame(0., index=np.arange(240), columns=fields, dtype=np.float32)  # FIXME: hardcode here
+        data = pd.DataFrame(0.0, index=np.arange(240), columns=fields, dtype=np.float32)  # FIXME: hardcode here
     else:
-        data = data.rename(columns={c: c.rstrip('0') for c in data.columns})
+        data = data.rename(columns={c: c.rstrip("0") for c in data.columns})
         data = data[fields]
     return data
diff --git a/qlib/rl/order_execution/state.py b/qlib/rl/order_execution/state.py
index 97c2ea942a..8e8067fec3 100644
--- a/qlib/rl/order_execution/state.py
+++ b/qlib/rl/order_execution/state.py
@@ -96,11 +96,11 @@ def update(
 
         exec_vol = np.zeros(last_step_size)
         for order, _, __, ___ in execute_result:
-            idx, _ = get_day_min_idx_range(order.start_time, order.end_time, '1min', REG_CN)
+            idx, _ = get_day_min_idx_range(order.start_time, order.end_time, "1min", REG_CN)
             exec_vol[idx - last_step_range[0]] = order.deal_amount
 
         if exec_vol.sum() > self.position and exec_vol.sum() > 0.0:
-            assert exec_vol.sum() < self.position + 1, f'{exec_vol} too large'
+            assert exec_vol.sum() < self.position + 1, f"{exec_vol} too large"
             exec_vol *= self.position / (exec_vol.sum())
 
         market_volume = np.array(
diff --git a/qlib/rl/strategy/single_order.py b/qlib/rl/strategy/single_order.py
index d5b5915709..042c88aa3c 100644
--- a/qlib/rl/strategy/single_order.py
+++ b/qlib/rl/strategy/single_order.py
@@ -7,8 +7,8 @@
 
 
 class SingleOrderStrategy(BaseStrategy):
-    """Strategy used to generate a trade decision with exactly one order.
-    """
+    """Strategy used to generate a trade decision with exactly one order."""
+
     def __init__(
         self,
         order: Order,
diff --git a/qlib/rl/utils/cache.py b/qlib/rl/utils/cache.py
index abe35afa1f..b0b3b43fb4 100644
--- a/qlib/rl/utils/cache.py
+++ b/qlib/rl/utils/cache.py
@@ -4,10 +4,10 @@
 
 
 class LRUCache:
-    def __init__(self, pool_size: int = 200):
+    def __init__(self, pool_size: int = 200) -> None:
         self.pool_size = pool_size
-        self.contents = dict()
-        self.keys = collections.deque()
+        self.contents: dict = {}
+        self.keys: collections.deque = collections.deque()
 
     def put(self, key, item):
         if self.has(key):
diff --git a/qlib/strategy/base.py b/qlib/strategy/base.py
index 41103b8bde..3725223778 100644
--- a/qlib/strategy/base.py
+++ b/qlib/strategy/base.py
@@ -187,7 +187,6 @@ def alter_outer_trade_decision(self, outer_trade_decision: BaseTradeDecision) ->
         """
         # default to reset the decision directly
         # NOTE: normally, user should do something to the strategy due to the change of outer decision
-        pass
 
     # helper methods: not necessary but for convenience
     def get_data_cal_avail_range(self, rtype: str = "full") -> Tuple[int, int]:

From 422163ae1e141625039c1ff33d76210f8dbd24b7 Mon Sep 17 00:00:00 2001
From: Default <huo53926@126.com>
Date: Wed, 17 Aug 2022 09:47:16 +0800
Subject: [PATCH 18/23] Refine adapter & saoe_data logic

---
 qlib/backtest/utils.py              |  9 +----
 qlib/rl/data/exchange_wrapper.py    | 59 +++++++++++++++++++++++++++--
 qlib/rl/order_execution/state.py    | 31 ++++++++-------
 qlib/rl/order_execution/strategy.py | 59 +++++------------------------
 4 files changed, 81 insertions(+), 77 deletions(-)

diff --git a/qlib/backtest/utils.py b/qlib/backtest/utils.py
index f2a4851684..f815d10554 100644
--- a/qlib/backtest/utils.py
+++ b/qlib/backtest/utils.py
@@ -4,7 +4,7 @@
 from __future__ import annotations
 
 from abc import abstractmethod
-from typing import TYPE_CHECKING, Any, Set, Tuple, Union
+from typing import Any, Set, Tuple, TYPE_CHECKING, Union
 
 import numpy as np
 
@@ -20,9 +20,6 @@
 from ..data.data import Cal
 
 
-SAOE_DATA_KEY = "saoe_data"
-
-
 class TradeCalendarManager:
     """
     Manager for trading calendar
@@ -237,9 +234,7 @@ def update(self, other: BaseInfrastructure) -> None:
 
 class CommonInfrastructure(BaseInfrastructure):
     def get_support_infra(self) -> Set[str]:
-        # SAOE_DATA_KEY is used to store SAOE (single asset order execution) information that should be shared by
-        # all strategies. It should be dict.
-        return {"trade_account", "trade_exchange", SAOE_DATA_KEY}
+        return {"trade_account", "trade_exchange"}
 
 
 class LevelInfrastructure(BaseInfrastructure):
diff --git a/qlib/rl/data/exchange_wrapper.py b/qlib/rl/data/exchange_wrapper.py
index fb8daabf33..62305dad6f 100644
--- a/qlib/rl/data/exchange_wrapper.py
+++ b/qlib/rl/data/exchange_wrapper.py
@@ -3,22 +3,33 @@
 
 from typing import cast
 
+import cachetools
 import pandas as pd
 
 from qlib.backtest import Exchange, Order
-
+from qlib.backtest.decision import TradeRange, TradeRangeByTime
+from qlib.rl.order_execution.utils import get_ticks_slice
 from .pickle_styled import IntradayBacktestData
+from ...utils.index_data import IndexData
 
 
 class QlibIntradayBacktestData(IntradayBacktestData):
     """Backtest data for Qlib simulator"""
 
-    def __init__(self, order: Order, exchange: Exchange, start_time: pd.Timestamp, end_time: pd.Timestamp) -> None:
+    def __init__(
+        self,
+        order: Order,
+        exchange: Exchange,
+        ticks_index: pd.DatetimeIndex,
+        ticks_for_order: pd.DatetimeIndex,
+    ) -> None:
         super(QlibIntradayBacktestData, self).__init__()
         self._order = order
         self._exchange = exchange
-        self._start_time = start_time
-        self._end_time = end_time
+        self._start_time = ticks_for_order[0]
+        self._end_time = ticks_for_order[-1]
+        self.ticks_index = ticks_index
+        self.ticks_for_order = ticks_for_order
 
         self._deal_price = cast(
             pd.Series,
@@ -57,3 +68,43 @@ def get_volume(self) -> pd.Series:
 
     def get_time_index(self) -> pd.DatetimeIndex:
         return pd.DatetimeIndex([e[1] for e in list(self._exchange.quote_df.index)])
+
+
+@cachetools.cached(  # type: ignore
+    cache=cachetools.LRUCache(100),
+    key=lambda order, _, __: order.key,
+)
+def load_qlib_backtest_data(
+    order: Order,
+    trade_exchange: Exchange,
+    trade_range: TradeRange,
+) -> QlibIntradayBacktestData:
+    data = cast(
+        IndexData,
+        trade_exchange.get_deal_price(
+            stock_id=order.stock_id,
+            start_time=order.start_time.replace(hour=0, minute=0, second=0),
+            end_time=order.start_time.replace(hour=23, minute=59, second=59),
+            direction=order.direction,
+            method=None,
+        ),
+    )
+
+    ticks_index = pd.DatetimeIndex(data.index)
+    if isinstance(trade_range, TradeRangeByTime):
+        ticks_for_order = get_ticks_slice(
+            ticks_index,
+            trade_range.start_time,
+            trade_range.end_time,
+            include_end=True,
+        )
+    else:
+        ticks_for_order = None  # FIXME: implement this logic
+
+    backtest_data = QlibIntradayBacktestData(
+        order=order,
+        exchange=trade_exchange,
+        ticks_index=ticks_index,
+        ticks_for_order=ticks_for_order,
+    )
+    return backtest_data
diff --git a/qlib/rl/order_execution/state.py b/qlib/rl/order_execution/state.py
index 8e8067fec3..639cc95089 100644
--- a/qlib/rl/order_execution/state.py
+++ b/qlib/rl/order_execution/state.py
@@ -41,9 +41,9 @@ class QlibBacktestAdapter:
 
     Example usage::
 
-        maintainer = StateMaintainer(...)
-        maintainer.update(...)
-        state = maintainer.saoe_state
+        adapter = QlibBacktestAdapter(...)
+        adapter.update(...)
+        state = adapter.saoe_state
     """
 
     def __init__(
@@ -52,8 +52,6 @@ def __init__(
         executor: BaseExecutor,
         exchange: Exchange,
         ticks_per_step: int,
-        ticks_index: pd.DatetimeIndex,
-        ticks_for_order: pd.DatetimeIndex,
         backtest_data: QlibIntradayBacktestData,
     ) -> None:
         super().__init__()
@@ -62,8 +60,6 @@ def __init__(
         self.order = order
         self.executor = executor
         self.exchange = exchange
-        self.ticks_index = ticks_index
-        self.ticks_for_order = ticks_for_order
         self.backtest_data = backtest_data
 
         self.twap_price = self.backtest_data.get_deal_price().mean()
@@ -73,15 +69,18 @@ def __init__(
         self.history_steps = pd.DataFrame(columns=metric_keys).set_index("datetime")
         self.metrics: Optional[SAOEMetrics] = None
 
-        self.cur_time = max(ticks_for_order[0], order.start_time)
+        self.cur_time = max(backtest_data.ticks_for_order[0], order.start_time)
         self.ticks_per_step = ticks_per_step
 
     def _next_time(self) -> pd.Timestamp:
-        current_loc = self.ticks_index.get_loc(self.cur_time)
+        current_loc = self.backtest_data.ticks_index.get_loc(self.cur_time)
         next_loc = current_loc + self.ticks_per_step
         next_loc = next_loc - next_loc % self.ticks_per_step
-        if next_loc < len(self.ticks_index) and self.ticks_index[next_loc] < self.order.end_time:
-            return self.ticks_index[next_loc]
+        if (
+            next_loc < len(self.backtest_data.ticks_index)
+            and self.backtest_data.ticks_index[next_loc] < self.order.end_time
+        ):
+            return self.backtest_data.ticks_index[next_loc]
         else:
             return self.order.end_time
 
@@ -91,8 +90,8 @@ def update(
         last_step_range: Tuple[int, int],
     ) -> None:
         last_step_size = last_step_range[1] - last_step_range[0] + 1
-        start_time = self.ticks_index[last_step_range[0]]
-        end_time = self.ticks_index[last_step_range[1]]
+        start_time = self.backtest_data.ticks_index[last_step_range[0]]
+        end_time = self.backtest_data.ticks_index[last_step_range[1]]
 
         exec_vol = np.zeros(last_step_size)
         for order, _, __, ___ in execute_result:
@@ -164,7 +163,7 @@ def generate_metrics_after_done(self) -> None:
 
         self.metrics = self._collect_single_order_metric(
             self.order,
-            self.ticks_index[0],  # start time
+            self.backtest_data.ticks_index[0],  # start time
             self.history_exec["market_volume"],
             self.history_exec["market_price"],
             self.history_steps["amount"].sum(),
@@ -245,8 +244,8 @@ def saoe_state(self) -> SAOEState:
             metrics=self.metrics,
             backtest_data=self.backtest_data,
             ticks_per_step=self.ticks_per_step,
-            ticks_index=self.ticks_index,
-            ticks_for_order=self.ticks_for_order,
+            ticks_index=self.backtest_data.ticks_index,
+            ticks_for_order=self.backtest_data.ticks_for_order,
         )
 
 
diff --git a/qlib/rl/order_execution/strategy.py b/qlib/rl/order_execution/strategy.py
index b5be3f9879..8726d371ba 100644
--- a/qlib/rl/order_execution/strategy.py
+++ b/qlib/rl/order_execution/strategy.py
@@ -4,17 +4,13 @@
 from __future__ import annotations
 
 import collections
-from typing import Any, Dict, Generator, Tuple, cast
-
-import pandas as pd
+from typing import Any, cast, Dict, Generator, Tuple
 
 from qlib.backtest import CommonInfrastructure, Order
-from qlib.backtest.decision import BaseTradeDecision, TradeDecisionWO, TradeRange, TradeRangeByTime
-from qlib.backtest.utils import LevelInfrastructure, SAOE_DATA_KEY
-from qlib.rl.data.exchange_wrapper import QlibIntradayBacktestData
+from qlib.backtest.decision import BaseTradeDecision, TradeDecisionWO, TradeRange
+from qlib.backtest.utils import LevelInfrastructure
+from qlib.rl.data.exchange_wrapper import load_qlib_backtest_data
 from qlib.rl.order_execution.state import QlibBacktestAdapter, SAOEState
-from qlib.rl.order_execution.utils import get_ticks_slice
-from qlib.rl.utils.cache import LRUCache
 from qlib.strategy.base import RLStrategy
 
 
@@ -41,50 +37,13 @@ def __init__(
         self._last_step_range = (0, 0)
 
     def _create_qlib_backtest_adapter(self, order: Order, trade_range: TradeRange) -> QlibBacktestAdapter:
-        if not self.common_infra.has(SAOE_DATA_KEY):
-            self.common_infra.reset_infra(**{SAOE_DATA_KEY: LRUCache(pool_size=100)})
-
-        # saoe_data can be considered as some type of cache. Use it to avoid unnecessary data reload.
-        # The data for one order would be loaded only once. All strategies will reuse this data.
-        saoe_data = cast(LRUCache, self.common_infra.get(SAOE_DATA_KEY))
-        if not saoe_data.has(order.key):
-            data = self.trade_exchange.get_deal_price(
-                stock_id=order.stock_id,
-                start_time=order.start_time.replace(hour=0, minute=0, second=0),
-                end_time=order.start_time.replace(hour=23, minute=59, second=59),
-                direction=order.direction,
-                method=None,
-            )
-
-            ticks_index = pd.DatetimeIndex(data.index)
-            if isinstance(trade_range, TradeRangeByTime):
-                ticks_for_order = get_ticks_slice(
-                    ticks_index,
-                    trade_range.start_time,
-                    trade_range.end_time,
-                    include_end=True,
-                )
-            else:
-                ticks_for_order = None  # FIXME: implement this logic
-
-            backtest_data = QlibIntradayBacktestData(
-                order=order,
-                exchange=self.trade_exchange,
-                start_time=ticks_for_order[0],
-                end_time=ticks_for_order[-1],
-            )
-
-            saoe_data.put(key=order.key, item=(ticks_index, ticks_for_order, backtest_data))
-
-        ticks_index, ticks_for_order, backtest_data = saoe_data.get(order.key)
+        backtest_data = load_qlib_backtest_data(order, self.trade_exchange, trade_range)
 
         return QlibBacktestAdapter(
             order=order,
             executor=self.executor,
             exchange=self.trade_exchange,
             ticks_per_step=self.ticks_per_step,
-            ticks_index=ticks_index,
-            ticks_for_order=ticks_for_order,
             backtest_data=backtest_data,
         )
 
@@ -110,8 +69,8 @@ def get_saoe_state_by_order(self, order: Order) -> SAOEState:
         return self.adapter_dict[order.key].saoe_state
 
     def post_upper_level_exe_step(self) -> None:
-        for maintainer in self.adapter_dict.values():
-            maintainer.generate_metrics_after_done()
+        for adapter in self.adapter_dict.values():
+            adapter.generate_metrics_after_done()
 
     def post_exe_step(self, execute_result: list) -> None:
         last_step_length = self._last_step_range[1] - self._last_step_range[0]
@@ -124,8 +83,8 @@ def post_exe_step(self, execute_result: list) -> None:
             for e in execute_result:
                 results[e[0].key].append(e)
 
-        for key, maintainer in self.adapter_dict.items():
-            maintainer.update(results[key], self._last_step_range)
+        for key, adapter in self.adapter_dict.items():
+            adapter.update(results[key], self._last_step_range)
 
 
 class DecomposedStrategy(SAOEStrategy):

From fe3b02f1523373489faee37ae64ac5cec05d4d21 Mon Sep 17 00:00:00 2001
From: Huoran Li <huoranli@microsoft.com>
Date: Tue, 23 Aug 2022 13:04:57 +0800
Subject: [PATCH 19/23] Resolve PR comments

---
 qlib/backtest/decision.py                 | 14 +++++++++++--
 qlib/backtest/executor.py                 |  9 ++------
 qlib/rl/data/exchange_wrapper.py          |  6 +++---
 qlib/rl/integration/feature.py            | 24 +++++++---------------
 qlib/rl/order_execution/simulator_qlib.py | 25 +++++++++++++++--------
 qlib/rl/order_execution/state.py          |  2 --
 qlib/rl/order_execution/strategy.py       |  9 +++-----
 qlib/rl/strategy/single_order.py          |  5 +----
 qlib/rl/utils/cache.py                    | 24 ----------------------
 qlib/strategy/base.py                     |  7 ++++++-
 10 files changed, 50 insertions(+), 75 deletions(-)
 delete mode 100644 qlib/rl/utils/cache.py

diff --git a/qlib/backtest/decision.py b/qlib/backtest/decision.py
index d41fa66f60..042b73fea8 100644
--- a/qlib/backtest/decision.py
+++ b/qlib/backtest/decision.py
@@ -135,10 +135,20 @@ def parse_dir(direction: Union[str, int, np.integer, OrderDir, np.ndarray]) -> U
         else:
             raise NotImplementedError(f"This type of input is not supported")
 
+    @property
+    def key_by_day(self) -> tuple:
+        """A hashable & unique key to identify this order, under the granularity in day."""
+        return self.stock_id, self.date, self.direction
+
     @property
     def key(self) -> tuple:
-        """A hashable & unique key to identify this order. Usually used as the key in a dict."""
-        return self.stock_id, self.start_time.replace(hour=0, minute=0, second=0), self.direction
+        """A hashable & unique key to identify this order."""
+        return self.stock_id, self.start_time, self.end_time, self.direction
+
+    @property
+    def date(self) -> pd.Timestamp:
+        """Date of the order."""
+        return pd.Timestamp(self.start_time.replace(hour=0, minute=0, second=0))
 
 
 class OrderHelper:
diff --git a/qlib/backtest/executor.py b/qlib/backtest/executor.py
index c53f2b2fdf..664f33a3cd 100644
--- a/qlib/backtest/executor.py
+++ b/qlib/backtest/executor.py
@@ -124,9 +124,6 @@ def __init__(
         self.dealt_order_amount: Dict[str, float] = defaultdict(float)
         self.deal_day = None
 
-        # whether the current executor is collecting data
-        self.is_collecting = False
-
     def reset_common_infra(self, common_infra: CommonInfrastructure, copy_trade_account: bool = False) -> None:
         """
         reset infrastructure for trading
@@ -261,7 +258,6 @@ def collect_data(
         object
             trade decision
         """
-        self.is_collecting = True
 
         if self.track_data:
             yield trade_decision
@@ -304,7 +300,6 @@ def collect_data(
         if return_value is not None:
             return_value.update({"execute_result": res})
 
-        self.is_collecting = False
         return res
 
     def get_all_executors(self) -> List[BaseExecutor]:
@@ -405,7 +400,7 @@ def _update_trade_decision(self, trade_decision: BaseTradeDecision) -> BaseTrade
             trade_decision = updated_trade_decision
             # NEW UPDATE
             # create a hook for inner strategy to update outer decision
-            self.inner_strategy.alter_outer_trade_decision(trade_decision)
+            trade_decision = self.inner_strategy.alter_outer_trade_decision(trade_decision)
         return trade_decision
 
     def _collect_data(
@@ -482,7 +477,7 @@ def _collect_data(
                 # do nothing and just step forward
                 sub_cal.step()
 
-        # Lef inner strategy know that the outer level execution is done.
+        # Let inner strategy know that the outer level execution is done.
         self.inner_strategy.post_upper_level_exe_step()
 
         return execute_result, {"inner_order_indicators": inner_order_indicators, "decision_list": decision_list}
diff --git a/qlib/rl/data/exchange_wrapper.py b/qlib/rl/data/exchange_wrapper.py
index 62305dad6f..6a657fc6f1 100644
--- a/qlib/rl/data/exchange_wrapper.py
+++ b/qlib/rl/data/exchange_wrapper.py
@@ -72,7 +72,7 @@ def get_time_index(self) -> pd.DatetimeIndex:
 
 @cachetools.cached(  # type: ignore
     cache=cachetools.LRUCache(100),
-    key=lambda order, _, __: order.key,
+    key=lambda order, _, __: order.key_by_day,
 )
 def load_qlib_backtest_data(
     order: Order,
@@ -83,8 +83,8 @@ def load_qlib_backtest_data(
         IndexData,
         trade_exchange.get_deal_price(
             stock_id=order.stock_id,
-            start_time=order.start_time.replace(hour=0, minute=0, second=0),
-            end_time=order.start_time.replace(hour=23, minute=59, second=59),
+            start_time=order.date,
+            end_time=order.date + pd.Timedelta("1day") - pd.Timedelta("1s"),
             direction=order.direction,
             method=None,
         ),
diff --git a/qlib/rl/integration/feature.py b/qlib/rl/integration/feature.py
index 3c35e9c47b..0b3a27159a 100644
--- a/qlib/rl/integration/feature.py
+++ b/qlib/rl/integration/feature.py
@@ -7,13 +7,13 @@
 from pathlib import Path
 from typing import List
 
+import cachetools
 import numpy as np
 import pandas as pd
 import qlib
 from qlib.constant import REG_CN
 from qlib.contrib.ops.high_freq import BFillNan, Cut, Date, DayCumsum, DayLast, FFillNan, IsInf, IsNull, Select
 from qlib.data.dataset import DatasetH
-from qlib.rl.utils.cache import LRUCache
 
 dataset = None
 
@@ -34,24 +34,14 @@ def __init__(
         self.columns_today = columns_today
         self.columns_yesterday = columns_yesterday
 
-        self.feature_cache = LRUCache()
-        self.backtest_cache = LRUCache()
-
+    @cachetools.cached(  # type: ignore
+        cache=cachetools.LRUCache(100),
+        key=lambda stock_id, date, backtest: (stock_id, date.replace(hour=0, minute=0, second=0), backtest),
+    )
     def get(self, stock_id: str, date: pd.Timestamp, backtest: bool = False) -> pd.DataFrame:
         start_time, end_time = date.replace(hour=0, minute=0, second=0), date.replace(hour=23, minute=59, second=59)
-
-        if backtest:
-            dataset = self.backtest_dataset
-            cache = self.backtest_cache
-        else:
-            dataset = self.feature_dataset
-            cache = self.feature_cache
-
-        if cache.has((start_time, end_time, stock_id)):
-            return cache.get((start_time, end_time, stock_id))
-        data = dataset.handler.fetch(pd.IndexSlice[stock_id, start_time:end_time], level=None)
-        cache.put((start_time, end_time, stock_id), data)
-        return data
+        dataset = self.backtest_dataset if backtest else self.feature_dataset
+        return dataset.handler.fetch(pd.IndexSlice[stock_id, start_time:end_time], level=None)
 
 
 def init_qlib(qlib_config: dict, part: str = None) -> None:
diff --git a/qlib/rl/order_execution/simulator_qlib.py b/qlib/rl/order_execution/simulator_qlib.py
index e7638ffb65..808310dbaa 100644
--- a/qlib/rl/order_execution/simulator_qlib.py
+++ b/qlib/rl/order_execution/simulator_qlib.py
@@ -6,11 +6,11 @@
 from typing import Generator, Optional
 
 import pandas as pd
-from qlib.backtest import get_strategy_executor
+from qlib.backtest import collect_data_loop, get_strategy_executor
 from qlib.backtest.decision import Order
 from qlib.backtest.executor import NestedExecutor
 from qlib.rl.integration.feature import init_qlib
-from qlib.rl.order_execution.state import SAOEState
+from qlib.rl.order_execution.state import QlibBacktestAdapter, SAOEState
 from qlib.rl.order_execution.strategy import SAOEStrategy
 from qlib.rl.simulator import Simulator
 
@@ -59,8 +59,8 @@ def reset(
             init_qlib(qlib_config, part="skip")
 
         strategy, self._executor = get_strategy_executor(
-            start_time=order.start_time.replace(hour=0, minute=0, second=0),
-            end_time=order.start_time.replace(hour=0, minute=0, second=0) + pd.DateOffset(1),
+            start_time=order.date,
+            end_time=order.date + pd.DateOffset(1),
             strategy=strategy_config,
             executor=executor_config,
             benchmark=order.stock_id,
@@ -70,18 +70,25 @@ def reset(
         )
 
         assert isinstance(self._executor, NestedExecutor)
-        strategy.reset(level_infra=self._executor.get_level_infra())  # TODO: check if we could remove this
 
-        self._collect_data_loop = self._executor.collect_data(strategy.generate_trade_decision(), level=0)
+        self._collect_data_loop = collect_data_loop(
+            start_time=order.date,
+            end_time=order.date,
+            trade_strategy=strategy,
+            trade_executor=self._executor,
+        )
         assert isinstance(self._collect_data_loop, Generator)
 
         self._last_yielded_saoe_strategy = self._iter_strategy(action=None)
 
         self._order = order
 
+    def _get_adapter(self) -> QlibBacktestAdapter:
+        return self._last_yielded_saoe_strategy.adapter_dict[self._order.key_by_day]
+
     @property
     def twap_price(self) -> float:
-        return self._last_yielded_saoe_strategy.adapter_dict[self._order.key].twap_price
+        return self._get_adapter().twap_price
 
     def _iter_strategy(self, action: float = None) -> SAOEStrategy:
         """Iterate the _collect_data_loop until we get the next yield SAOEStrategy."""
@@ -112,7 +119,7 @@ def step(self, action: float) -> None:
         assert self._executor is not None
 
     def get_state(self) -> SAOEState:
-        return self._last_yielded_saoe_strategy.get_saoe_state_by_order(self._order)
+        return self._get_adapter().saoe_state
 
     def done(self) -> bool:
-        return not self._executor.is_collecting
+        return self._executor.finished()
diff --git a/qlib/rl/order_execution/state.py b/qlib/rl/order_execution/state.py
index 639cc95089..bbdab40b94 100644
--- a/qlib/rl/order_execution/state.py
+++ b/qlib/rl/order_execution/state.py
@@ -54,8 +54,6 @@ def __init__(
         ticks_per_step: int,
         backtest_data: QlibIntradayBacktestData,
     ) -> None:
-        super().__init__()
-
         self.position = order.amount
         self.order = order
         self.executor = executor
diff --git a/qlib/rl/order_execution/strategy.py b/qlib/rl/order_execution/strategy.py
index 8726d371ba..246ac841d3 100644
--- a/qlib/rl/order_execution/strategy.py
+++ b/qlib/rl/order_execution/strategy.py
@@ -63,10 +63,10 @@ def reset(self, outer_trade_decision: BaseTradeDecision = None, **kwargs: Any) -
             self.adapter_dict = {}
             for decision in outer_trade_decision.get_decision():
                 order = cast(Order, decision)
-                self.adapter_dict[order.key] = self._create_qlib_backtest_adapter(order, trade_range)
+                self.adapter_dict[order.key_by_day] = self._create_qlib_backtest_adapter(order, trade_range)
 
     def get_saoe_state_by_order(self, order: Order) -> SAOEState:
-        return self.adapter_dict[order.key].saoe_state
+        return self.adapter_dict[order.key_by_day].saoe_state
 
     def post_upper_level_exe_step(self) -> None:
         for adapter in self.adapter_dict.values():
@@ -81,7 +81,7 @@ def post_exe_step(self, execute_result: list) -> None:
         results = collections.defaultdict(list)
         if execute_result is not None:
             for e in execute_result:
-                results[e[0].key].append(e)
+                results[e[0].key_by_day].append(e)
 
         for key, adapter in self.adapter_dict.items():
             adapter.update(results[key], self._last_step_range)
@@ -112,9 +112,6 @@ def generate_trade_decision(self, execute_result: list = None) -> Generator[Any,
 
         return TradeDecisionWO([order], self)
 
-    def alter_outer_trade_decision(self, outer_trade_decision: BaseTradeDecision) -> BaseTradeDecision:
-        return outer_trade_decision
-
     def reset(self, outer_trade_decision: BaseTradeDecision = None, **kwargs: Any) -> None:
         super().reset(outer_trade_decision=outer_trade_decision, **kwargs)
 
diff --git a/qlib/rl/strategy/single_order.py b/qlib/rl/strategy/single_order.py
index 042c88aa3c..6ae17f1d1f 100644
--- a/qlib/rl/strategy/single_order.py
+++ b/qlib/rl/strategy/single_order.py
@@ -2,7 +2,7 @@
 # Licensed under the MIT License.
 
 from qlib.backtest import Order
-from qlib.backtest.decision import BaseTradeDecision, OrderHelper, TradeDecisionWO, TradeRange
+from qlib.backtest.decision import OrderHelper, TradeDecisionWO, TradeRange
 from qlib.strategy.base import BaseStrategy
 
 
@@ -21,9 +21,6 @@ def __init__(
         self._trade_range = trade_range
         self._instrument = instrument
 
-    def alter_outer_trade_decision(self, outer_trade_decision: BaseTradeDecision) -> BaseTradeDecision:
-        return outer_trade_decision
-
     def generate_trade_decision(self, execute_result: list = None) -> TradeDecisionWO:
         oh: OrderHelper = self.common_infra.get("trade_exchange").get_order_helper()
         order_list = [
diff --git a/qlib/rl/utils/cache.py b/qlib/rl/utils/cache.py
deleted file mode 100644
index b0b3b43fb4..0000000000
--- a/qlib/rl/utils/cache.py
+++ /dev/null
@@ -1,24 +0,0 @@
-from __future__ import annotations
-
-import collections
-
-
-class LRUCache:
-    def __init__(self, pool_size: int = 200) -> None:
-        self.pool_size = pool_size
-        self.contents: dict = {}
-        self.keys: collections.deque = collections.deque()
-
-    def put(self, key, item):
-        if self.has(key):
-            self.keys.remove(key)
-        self.keys.append(key)
-        self.contents[key] = item
-        while len(self.contents) > self.pool_size:
-            self.contents.pop(self.keys.popleft())
-
-    def get(self, key):
-        return self.contents[key]
-
-    def has(self, key):
-        return key in self.contents
diff --git a/qlib/strategy/base.py b/qlib/strategy/base.py
index 3725223778..78ec75a21f 100644
--- a/qlib/strategy/base.py
+++ b/qlib/strategy/base.py
@@ -187,6 +187,7 @@ def alter_outer_trade_decision(self, outer_trade_decision: BaseTradeDecision) ->
         """
         # default to reset the decision directly
         # NOTE: normally, user should do something to the strategy due to the change of outer decision
+        return outer_trade_decision
 
     # helper methods: not necessary but for convenience
     def get_data_cal_avail_range(self, rtype: str = "full") -> Tuple[int, int]:
@@ -221,7 +222,11 @@ def get_data_cal_avail_range(self, rtype: str = "full") -> Tuple[int, int]:
     def post_upper_level_exe_step(self) -> None:
         """
         A hook for doing sth after the upper level executor finished its execution (for example, finalize
-        the metrics collection).
+        the metrics collection). This is used in the nested execution scenario. You do not need to care about
+        this method if your strategy is not used in nested execution.
+
+        TODO: Group the nested-execution-related methods together and try to keep the the framework simple at the doc
+        TODO: and code level.
         """
 
     def post_exe_step(self, execute_result: list) -> None:

From 52747e76ec78998b9149a261cb090287dc68a91a Mon Sep 17 00:00:00 2001
From: Huoran Li <huoranli@microsoft.com>
Date: Tue, 23 Aug 2022 16:40:38 +0800
Subject: [PATCH 20/23] Resolve PR comments

---
 qlib/constant.py                          |  6 +-
 qlib/rl/data/exchange_wrapper.py          | 14 ++--
 qlib/rl/data/pickle_styled.py             |  4 +-
 qlib/rl/integration/feature.py            |  5 ++
 qlib/rl/order_execution/simulator_qlib.py |  6 +-
 qlib/rl/order_execution/state.py          | 16 ++---
 qlib/rl/order_execution/strategy.py       | 58 +++++++++++-----
 qlib/rl/strategy/single_order.py          |  4 +-
 qlib/strategy/base.py                     | 81 +++++++++++------------
 tests/rl/test_qlib_simulator.py           | 14 ++--
 10 files changed, 114 insertions(+), 94 deletions(-)

diff --git a/qlib/constant.py b/qlib/constant.py
index 607f0bcf51..d91ecd803a 100644
--- a/qlib/constant.py
+++ b/qlib/constant.py
@@ -15,8 +15,8 @@
 EPS = 1e-12
 
 # Infinity in integer
-INF = 10**18
-FINEST_GRANULARITY = "1min"
-COARSEST_GRANULARITY = "1day"
+INF = int(1e18)
+ONE_DAY = pd.Timedelta("1day")
+ONE_MIN = pd.Timedelta("1min")
 ONE_SEC = pd.Timedelta("1s")  # use 1 second to exclude the right interval point
 float_or_ndarray = TypeVar("float_or_ndarray", float, np.ndarray)
diff --git a/qlib/rl/data/exchange_wrapper.py b/qlib/rl/data/exchange_wrapper.py
index 6a657fc6f1..3ed3f6904b 100644
--- a/qlib/rl/data/exchange_wrapper.py
+++ b/qlib/rl/data/exchange_wrapper.py
@@ -8,12 +8,13 @@
 
 from qlib.backtest import Exchange, Order
 from qlib.backtest.decision import TradeRange, TradeRangeByTime
+from qlib.constant import ONE_DAY, ONE_SEC
 from qlib.rl.order_execution.utils import get_ticks_slice
-from .pickle_styled import IntradayBacktestData
-from ...utils.index_data import IndexData
+from qlib.utils.index_data import IndexData
+from .pickle_styled import BaseIntradayBacktestData
 
 
-class QlibIntradayBacktestData(IntradayBacktestData):
+class IntradayBacktestData(BaseIntradayBacktestData):
     """Backtest data for Qlib simulator"""
 
     def __init__(
@@ -23,7 +24,6 @@ def __init__(
         ticks_index: pd.DatetimeIndex,
         ticks_for_order: pd.DatetimeIndex,
     ) -> None:
-        super(QlibIntradayBacktestData, self).__init__()
         self._order = order
         self._exchange = exchange
         self._start_time = ticks_for_order[0]
@@ -78,13 +78,13 @@ def load_qlib_backtest_data(
     order: Order,
     trade_exchange: Exchange,
     trade_range: TradeRange,
-) -> QlibIntradayBacktestData:
+) -> IntradayBacktestData:
     data = cast(
         IndexData,
         trade_exchange.get_deal_price(
             stock_id=order.stock_id,
             start_time=order.date,
-            end_time=order.date + pd.Timedelta("1day") - pd.Timedelta("1s"),
+            end_time=order.date + ONE_DAY - ONE_SEC,
             direction=order.direction,
             method=None,
         ),
@@ -101,7 +101,7 @@ def load_qlib_backtest_data(
     else:
         ticks_for_order = None  # FIXME: implement this logic
 
-    backtest_data = QlibIntradayBacktestData(
+    backtest_data = IntradayBacktestData(
         order=order,
         exchange=trade_exchange,
         ticks_index=ticks_index,
diff --git a/qlib/rl/data/pickle_styled.py b/qlib/rl/data/pickle_styled.py
index aa0ba38fff..43fe9dd5ad 100644
--- a/qlib/rl/data/pickle_styled.py
+++ b/qlib/rl/data/pickle_styled.py
@@ -86,7 +86,7 @@ def _read_pickle(filename_without_suffix: Path) -> pd.DataFrame:
     return pd.read_pickle(_find_pickle(filename_without_suffix))
 
 
-class IntradayBacktestData:
+class BaseIntradayBacktestData:
     """
     Raw market data that is often used in backtesting (thus called BacktestData).
 
@@ -115,7 +115,7 @@ def get_time_index(self) -> pd.DatetimeIndex:
         raise NotImplementedError
 
 
-class SimpleIntradayBacktestData(IntradayBacktestData):
+class SimpleIntradayBacktestData(BaseIntradayBacktestData):
     """Backtest data for simple simulator"""
 
     def __init__(
diff --git a/qlib/rl/integration/feature.py b/qlib/rl/integration/feature.py
index 0b3a27159a..07ca381613 100644
--- a/qlib/rl/integration/feature.py
+++ b/qlib/rl/integration/feature.py
@@ -1,6 +1,11 @@
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT License.
 
+"""
+TODO: This file is used to integrate NeuTrader with Qlib to run the existing projects.
+TODO: The implementation here is kind of adhoc. It is better to design a more uniformed & general implementation.
+"""
+
 from __future__ import annotations
 
 import pickle
diff --git a/qlib/rl/order_execution/simulator_qlib.py b/qlib/rl/order_execution/simulator_qlib.py
index 808310dbaa..8f48c24228 100644
--- a/qlib/rl/order_execution/simulator_qlib.py
+++ b/qlib/rl/order_execution/simulator_qlib.py
@@ -10,12 +10,12 @@
 from qlib.backtest.decision import Order
 from qlib.backtest.executor import NestedExecutor
 from qlib.rl.integration.feature import init_qlib
-from qlib.rl.order_execution.state import QlibBacktestAdapter, SAOEState
+from qlib.rl.order_execution.state import SAOEStateAdapter, SAOEState
 from qlib.rl.order_execution.strategy import SAOEStrategy
 from qlib.rl.simulator import Simulator
 
 
-class SingleAssetOrderExecutionQlib(Simulator[Order, SAOEState, float]):
+class SingleAssetOrderExecution(Simulator[Order, SAOEState, float]):
     """Single-asset order execution (SAOE) simulator which is implemented based on Qlib backtest tools.
 
     Parameters
@@ -83,7 +83,7 @@ def reset(
 
         self._order = order
 
-    def _get_adapter(self) -> QlibBacktestAdapter:
+    def _get_adapter(self) -> SAOEStateAdapter:
         return self._last_yielded_saoe_strategy.adapter_dict[self._order.key_by_day]
 
     @property
diff --git a/qlib/rl/order_execution/state.py b/qlib/rl/order_execution/state.py
index bbdab40b94..9b2ae76d7e 100644
--- a/qlib/rl/order_execution/state.py
+++ b/qlib/rl/order_execution/state.py
@@ -9,9 +9,9 @@
 import pandas as pd
 from qlib.backtest import Exchange, Order
 from qlib.backtest.executor import BaseExecutor
-from qlib.constant import EPS, REG_CN
-from qlib.rl.data.exchange_wrapper import QlibIntradayBacktestData
-from qlib.rl.data.pickle_styled import IntradayBacktestData
+from qlib.constant import EPS, ONE_MIN, REG_CN
+from qlib.rl.data.exchange_wrapper import IntradayBacktestData
+from qlib.rl.data.pickle_styled import BaseIntradayBacktestData
 from qlib.rl.order_execution.utils import dataframe_append, price_advantage
 from qlib.utils.time import get_day_min_idx_range
 from typing_extensions import TypedDict
@@ -20,7 +20,7 @@
 def _get_all_timestamps(
     start: pd.Timestamp,
     end: pd.Timestamp,
-    granularity: pd.Timedelta = pd.Timedelta("1min"),
+    granularity: pd.Timedelta = ONE_MIN,
     include_end: bool = True,
 ) -> pd.DatetimeIndex:
     ret = []
@@ -35,13 +35,13 @@ def _get_all_timestamps(
     return pd.DatetimeIndex(ret)
 
 
-class QlibBacktestAdapter:
+class SAOEStateAdapter:
     """
     Maintain states of the environment.
 
     Example usage::
 
-        adapter = QlibBacktestAdapter(...)
+        adapter = SAOEStateAdapter(...)
         adapter.update(...)
         state = adapter.saoe_state
     """
@@ -52,7 +52,7 @@ def __init__(
         executor: BaseExecutor,
         exchange: Exchange,
         ticks_per_step: int,
-        backtest_data: QlibIntradayBacktestData,
+        backtest_data: IntradayBacktestData,
     ) -> None:
         self.position = order.amount
         self.order = order
@@ -316,7 +316,7 @@ class SAOEState(NamedTuple):
     metrics: Optional[SAOEMetrics]
     """Daily metric, only available when the trading is in "done" state."""
 
-    backtest_data: IntradayBacktestData
+    backtest_data: BaseIntradayBacktestData
     """Backtest data is included in the state.
     Actually, only the time index of this data is needed, at this moment.
     I include the full data so that algorithms (e.g., VWAP) that relies on the raw data can be implemented.
diff --git a/qlib/rl/order_execution/strategy.py b/qlib/rl/order_execution/strategy.py
index 246ac841d3..5c280b6576 100644
--- a/qlib/rl/order_execution/strategy.py
+++ b/qlib/rl/order_execution/strategy.py
@@ -4,13 +4,17 @@
 from __future__ import annotations
 
 import collections
-from typing import Any, cast, Dict, Generator, Tuple
+from types import GeneratorType
+from typing import Any, Union, cast, Dict, Generator
+
+import pandas as pd
 
 from qlib.backtest import CommonInfrastructure, Order
 from qlib.backtest.decision import BaseTradeDecision, TradeDecisionWO, TradeRange
 from qlib.backtest.utils import LevelInfrastructure
+from qlib.constant import ONE_MIN
 from qlib.rl.data.exchange_wrapper import load_qlib_backtest_data
-from qlib.rl.order_execution.state import QlibBacktestAdapter, SAOEState
+from qlib.rl.order_execution.state import SAOEStateAdapter, SAOEState
 from qlib.strategy.base import RLStrategy
 
 
@@ -33,23 +37,20 @@ def __init__(
             **kwargs,
         )
 
-        self.adapter_dict: Dict[tuple, QlibBacktestAdapter] = {}
+        self.adapter_dict: Dict[tuple, SAOEStateAdapter] = {}
         self._last_step_range = (0, 0)
 
-    def _create_qlib_backtest_adapter(self, order: Order, trade_range: TradeRange) -> QlibBacktestAdapter:
+    def _create_qlib_backtest_adapter(self, order: Order, trade_range: TradeRange) -> SAOEStateAdapter:
         backtest_data = load_qlib_backtest_data(order, self.trade_exchange, trade_range)
 
-        return QlibBacktestAdapter(
+        return SAOEStateAdapter(
             order=order,
             executor=self.executor,
             exchange=self.trade_exchange,
-            ticks_per_step=self.ticks_per_step,
+            ticks_per_step=int(pd.Timedelta(self.trade_calendar.get_freq()) / ONE_MIN),
             backtest_data=backtest_data,
         )
 
-    def _update_last_step_range(self, step_range: Tuple[int, int]) -> None:
-        self._last_step_range = step_range
-
     def reset(self, outer_trade_decision: BaseTradeDecision = None, **kwargs: Any) -> None:
         super(SAOEStrategy, self).reset(outer_trade_decision=outer_trade_decision, **kwargs)
 
@@ -86,9 +87,35 @@ def post_exe_step(self, execute_result: list) -> None:
         for key, adapter in self.adapter_dict.items():
             adapter.update(results[key], self._last_step_range)
 
-
-class DecomposedStrategy(SAOEStrategy):
-    """Decomposed strategy that needs actions from outside to generate trade decisions."""
+    def generate_trade_decision(
+        self,
+        execute_result: list = None,
+    ) -> Union[BaseTradeDecision, Generator[Any, Any, BaseTradeDecision]]:
+        """
+        For SAOEStrategy, we need to update the `self._last_step_range` every time a decision is generated.
+        This operation should be invisible to developers, so we implement it in `generate_trade_decision()`
+        The concrete logic to generate decisions should be implemented in `_generate_trade_decision()`.
+        In other words, all subclass of `SAOEStrategy` should overwrite `_generate_trade_decision()` instead of
+        `generate_trade_decision()`.
+        """
+        self._last_step_range = self.get_data_cal_avail_range(rtype="step")
+
+        decision = self._generate_trade_decision(execute_result)
+        if isinstance(decision, GeneratorType):
+            decision = yield from decision
+
+        return decision
+
+    def _generate_trade_decision(self, execute_result: list = None) -> Generator[Any, Any, BaseTradeDecision]:
+        raise NotImplementedError
+
+
+class ProxySAOEStrategy(SAOEStrategy):
+    """Proxy strategy that uses SAOEState. It is called a 'proxy' strategy because it does not make any decisions
+    by itself. Instead, when the strategy is required to generate a decision, it will yield the environment's
+    information and let the outside agents to make the decision. Please refer to `_generate_trade_decision` for
+    more details.
+    """
 
     def __init__(
         self,
@@ -99,16 +126,15 @@ def __init__(
     ) -> None:
         super().__init__(None, outer_trade_decision, level_infra, common_infra, **kwargs)
 
-    def generate_trade_decision(self, execute_result: list = None) -> Generator[Any, Any, BaseTradeDecision]:
-        # Once the following line is executed, this DecomposedStrategy (self) will be yielded to the outside
+    def _generate_trade_decision(self, execute_result: list = None) -> Generator[Any, Any, BaseTradeDecision]:
+        # Once the following line is executed, this ProxySAOEStrategy (self) will be yielded to the outside
         # of the entire executor, and the execution will be suspended. When the execution is resumed by `send()`,
-        # the sent item will be captured by `exec_vol`. The outside policy could communicate with the inner
+        # the item will be captured by `exec_vol`. The outside policy could communicate with the inner
         # level strategy through this way.
         exec_vol = yield self
 
         oh = self.trade_exchange.get_order_helper()
         order = oh.create(self._order.stock_id, exec_vol, self._order.direction)
-        self._update_last_step_range(self.get_data_cal_avail_range(rtype="step"))
 
         return TradeDecisionWO([order], self)
 
diff --git a/qlib/rl/strategy/single_order.py b/qlib/rl/strategy/single_order.py
index 6ae17f1d1f..8972d20a1b 100644
--- a/qlib/rl/strategy/single_order.py
+++ b/qlib/rl/strategy/single_order.py
@@ -13,19 +13,17 @@ def __init__(
         self,
         order: Order,
         trade_range: TradeRange,
-        instrument: str,
     ) -> None:
         super().__init__()
 
         self._order = order
         self._trade_range = trade_range
-        self._instrument = instrument
 
     def generate_trade_decision(self, execute_result: list = None) -> TradeDecisionWO:
         oh: OrderHelper = self.common_infra.get("trade_exchange").get_order_helper()
         order_list = [
             oh.create(
-                code=self._instrument,
+                code=self._order.stock_id,
                 amount=self._order.amount,
                 direction=self._order.direction,
             ),
diff --git a/qlib/strategy/base.py b/qlib/strategy/base.py
index 78ec75a21f..7844d25b30 100644
--- a/qlib/strategy/base.py
+++ b/qlib/strategy/base.py
@@ -5,10 +5,6 @@
 from abc import ABCMeta, abstractmethod
 from typing import Any, Generator, Optional, TYPE_CHECKING, Union
 
-import pandas as pd
-
-from ..constant import FINEST_GRANULARITY
-
 if TYPE_CHECKING:
     from qlib.backtest.exchange import Exchange
     from qlib.backtest.position import BasePosition
@@ -68,10 +64,6 @@ def executor(self) -> BaseExecutor:
     def trade_calendar(self) -> TradeCalendarManager:
         return self.level_infra.get("trade_calendar")
 
-    @property
-    def ticks_per_step(self) -> int:
-        return int(pd.Timedelta(self.trade_calendar.get_freq()) / pd.Timedelta(FINEST_GRANULARITY))
-
     @property
     def trade_position(self) -> BasePosition:
         return self.common_infra.get("trade_account").current_position
@@ -98,7 +90,7 @@ def reset(
         level_infra: LevelInfrastructure = None,
         common_infra: CommonInfrastructure = None,
         outer_trade_decision: BaseTradeDecision = None,
-        **kwargs,  # TODO: remove this?
+        **kwargs,
     ) -> None:
         """
         - reset `level_infra`, used to reset trade calendar, .etc
@@ -149,6 +141,41 @@ def generate_trade_decision(
         """
         raise NotImplementedError("generate_trade_decision is not implemented!")
 
+    # helper methods: not necessary but for convenience
+    def get_data_cal_avail_range(self, rtype: str = "full") -> Tuple[int, int]:
+        """
+        return data calendar's available decision range for `self` strategy
+        the range consider following factors
+        - data calendar in the charge of `self` strategy
+        - trading range limitation from the decision of outer strategy
+
+
+        related methods
+        - TradeCalendarManager.get_data_cal_range
+        - BaseTradeDecision.get_data_cal_range_limit
+
+        Parameters
+        ----------
+        rtype: str
+            - "full": return the available data index range of the strategy from `start_time` to `end_time`
+            - "step": return the available data index range of the strategy of current step
+
+        Returns
+        -------
+        Tuple[int, int]:
+            the available range both sides are closed
+        """
+        cal_range = self.trade_calendar.get_data_cal_range(rtype=rtype)
+        if self.outer_trade_decision is None:
+            raise ValueError(f"There is not limitation for strategy {self}")
+        range_limit = self.outer_trade_decision.get_data_cal_range_limit(rtype=rtype)
+        return max(cal_range[0], range_limit[0]), min(cal_range[1], range_limit[1])
+
+    """
+    The following methods are used to do cross-level communications in nested execution.
+    You do not need to care about them if you are implementing a single-level execution.
+    """
+
     @staticmethod
     def update_trade_decision(
         trade_decision: BaseTradeDecision,
@@ -189,44 +216,10 @@ def alter_outer_trade_decision(self, outer_trade_decision: BaseTradeDecision) ->
         # NOTE: normally, user should do something to the strategy due to the change of outer decision
         return outer_trade_decision
 
-    # helper methods: not necessary but for convenience
-    def get_data_cal_avail_range(self, rtype: str = "full") -> Tuple[int, int]:
-        """
-        return data calendar's available decision range for `self` strategy
-        the range consider following factors
-        - data calendar in the charge of `self` strategy
-        - trading range limitation from the decision of outer strategy
-
-
-        related methods
-        - TradeCalendarManager.get_data_cal_range
-        - BaseTradeDecision.get_data_cal_range_limit
-
-        Parameters
-        ----------
-        rtype: str
-            - "full": return the available data index range of the strategy from `start_time` to `end_time`
-            - "step": return the available data index range of the strategy of current step
-
-        Returns
-        -------
-        Tuple[int, int]:
-            the available range both sides are closed
-        """
-        cal_range = self.trade_calendar.get_data_cal_range(rtype=rtype)
-        if self.outer_trade_decision is None:
-            raise ValueError(f"There is not limitation for strategy {self}")
-        range_limit = self.outer_trade_decision.get_data_cal_range_limit(rtype=rtype)
-        return max(cal_range[0], range_limit[0]), min(cal_range[1], range_limit[1])
-
     def post_upper_level_exe_step(self) -> None:
         """
         A hook for doing sth after the upper level executor finished its execution (for example, finalize
-        the metrics collection). This is used in the nested execution scenario. You do not need to care about
-        this method if your strategy is not used in nested execution.
-
-        TODO: Group the nested-execution-related methods together and try to keep the the framework simple at the doc
-        TODO: and code level.
+        the metrics collection).
         """
 
     def post_exe_step(self, execute_result: list) -> None:
diff --git a/tests/rl/test_qlib_simulator.py b/tests/rl/test_qlib_simulator.py
index 230611fa66..b7d548e9ea 100644
--- a/tests/rl/test_qlib_simulator.py
+++ b/tests/rl/test_qlib_simulator.py
@@ -10,8 +10,7 @@
 from qlib.backtest.decision import Order, OrderDir, TradeRangeByTime
 from qlib.backtest.executor import SimulatorExecutor
 from qlib.rl.order_execution import CategoricalActionInterpreter
-from qlib.constant import FINEST_GRANULARITY
-from qlib.rl.order_execution.simulator_qlib import SingleAssetOrderExecutionQlib
+from qlib.rl.order_execution.simulator_qlib import SingleAssetOrderExecution
 
 TOTAL_POSITION = 2100.0
 
@@ -39,7 +38,6 @@ def get_configs(order: Order) -> Tuple[dict, dict, dict]:
         "kwargs": {
             "order": order,
             "trade_range": TradeRangeByTime(order.start_time.time(), order.end_time.time()),
-            "instrument": order.stock_id,
         },
     }
 
@@ -48,7 +46,7 @@ def get_configs(order: Order) -> Tuple[dict, dict, dict]:
         "module_path": "qlib.backtest.executor",
         "kwargs": {
             "time_per_step": "1day",
-            "inner_strategy": {"class": "DecomposedStrategy", "module_path": "qlib.rl.order_execution.strategy"},
+            "inner_strategy": {"class": "ProxySAOEStrategy", "module_path": "qlib.rl.order_execution.strategy"},
             "track_data": True,
             "inner_executor": {
                 "class": "NestedExecutor",
@@ -63,7 +61,7 @@ def get_configs(order: Order) -> Tuple[dict, dict, dict]:
                         "class": "SimulatorExecutor",
                         "module_path": "qlib.backtest.executor",
                         "kwargs": {
-                            "time_per_step": FINEST_GRANULARITY,
+                            "time_per_step": "1min",
                             "verbose": False,
                             "trade_type": SimulatorExecutor.TT_SERIAL,
                             "generate_report": False,
@@ -79,7 +77,7 @@ def get_configs(order: Order) -> Tuple[dict, dict, dict]:
     }
 
     exchange_config = {
-        "freq": FINEST_GRANULARITY,
+        "freq": "1min",
         "codes": [order.stock_id],
         "limit_threshold": ("$ask == 0", "$bid == 0"),
         "deal_price": ("If($ask == 0, $bid, $ask)", "If($bid == 0, $ask, $bid)"),
@@ -97,7 +95,7 @@ def get_configs(order: Order) -> Tuple[dict, dict, dict]:
     return strategy_config, executor_config, exchange_config
 
 
-def get_simulator(order: Order) -> SingleAssetOrderExecutionQlib:
+def get_simulator(order: Order) -> SingleAssetOrderExecution:
     DATA_ROOT_DIR = Path(__file__).parent.parent / ".data" / "rl" / "qlib_simulator"
 
     # fmt: off
@@ -118,7 +116,7 @@ def get_simulator(order: Order) -> SingleAssetOrderExecutionQlib:
 
     strategy_config, executor_config, exchange_config = get_configs(order)
 
-    return SingleAssetOrderExecutionQlib(
+    return SingleAssetOrderExecution(
         order=order,
         qlib_config=qlib_config,
         strategy_config=strategy_config,

From 2e5a61bf1519c3ff7f89928cb0541d4fe2415e38 Mon Sep 17 00:00:00 2001
From: Huoran Li <huo53926@126.com>
Date: Wed, 24 Aug 2022 10:07:18 +0800
Subject: [PATCH 21/23] Rename ONE_SEC to EPS_T; complete backtest loop

---
 qlib/backtest/backtest.py                   | 2 ++
 qlib/constant.py                            | 2 +-
 qlib/rl/data/exchange_wrapper.py            | 4 ++--
 qlib/rl/order_execution/simulator_simple.py | 8 ++++----
 qlib/rl/order_execution/strategy.py         | 2 +-
 qlib/rl/order_execution/utils.py            | 4 ++--
 qlib/strategy/base.py                       | 2 +-
 7 files changed, 13 insertions(+), 11 deletions(-)

diff --git a/qlib/backtest/backtest.py b/qlib/backtest/backtest.py
index e476550691..f79622bff6 100644
--- a/qlib/backtest/backtest.py
+++ b/qlib/backtest/backtest.py
@@ -83,7 +83,9 @@ def collect_data_loop(
         while not trade_executor.finished():
             _trade_decision: BaseTradeDecision = trade_strategy.generate_trade_decision(_execute_result)
             _execute_result = yield from trade_executor.collect_data(_trade_decision, level=0)
+            trade_strategy.post_exe_step(_execute_result)
             bar.update(1)
+        trade_strategy.post_upper_level_exe_step()
 
     if return_value is not None:
         all_executors = trade_executor.get_all_executors()
diff --git a/qlib/constant.py b/qlib/constant.py
index d91ecd803a..ac6c76ae22 100644
--- a/qlib/constant.py
+++ b/qlib/constant.py
@@ -18,5 +18,5 @@
 INF = int(1e18)
 ONE_DAY = pd.Timedelta("1day")
 ONE_MIN = pd.Timedelta("1min")
-ONE_SEC = pd.Timedelta("1s")  # use 1 second to exclude the right interval point
+EPS_T = pd.Timedelta("1s")  # use 1 second to exclude the right interval point
 float_or_ndarray = TypeVar("float_or_ndarray", float, np.ndarray)
diff --git a/qlib/rl/data/exchange_wrapper.py b/qlib/rl/data/exchange_wrapper.py
index 3ed3f6904b..94bb1dcbbd 100644
--- a/qlib/rl/data/exchange_wrapper.py
+++ b/qlib/rl/data/exchange_wrapper.py
@@ -8,7 +8,7 @@
 
 from qlib.backtest import Exchange, Order
 from qlib.backtest.decision import TradeRange, TradeRangeByTime
-from qlib.constant import ONE_DAY, ONE_SEC
+from qlib.constant import ONE_DAY, EPS_T
 from qlib.rl.order_execution.utils import get_ticks_slice
 from qlib.utils.index_data import IndexData
 from .pickle_styled import BaseIntradayBacktestData
@@ -84,7 +84,7 @@ def load_qlib_backtest_data(
         trade_exchange.get_deal_price(
             stock_id=order.stock_id,
             start_time=order.date,
-            end_time=order.date + ONE_DAY - ONE_SEC,
+            end_time=order.date + ONE_DAY - EPS_T,
             direction=order.direction,
             method=None,
         ),
diff --git a/qlib/rl/order_execution/simulator_simple.py b/qlib/rl/order_execution/simulator_simple.py
index 59cd92b4fe..93b57c3a03 100644
--- a/qlib/rl/order_execution/simulator_simple.py
+++ b/qlib/rl/order_execution/simulator_simple.py
@@ -10,7 +10,7 @@
 import pandas as pd
 
 from qlib.backtest.decision import Order, OrderDir
-from qlib.constant import EPS, ONE_SEC, float_or_ndarray
+from qlib.constant import EPS, EPS_T, float_or_ndarray
 from qlib.rl.data.pickle_styled import DealPriceType, load_simple_intraday_backtest_data
 from qlib.rl.order_execution.state import SAOEMetrics, SAOEState
 from qlib.rl.simulator import Simulator
@@ -240,8 +240,8 @@ def _split_exec_vol(self, exec_vol_sum: float) -> np.ndarray:
         next_time = self._next_time()
 
         # get the backtest data for next interval
-        self.market_vol = self.backtest_data.get_volume().loc[self.cur_time : next_time - ONE_SEC].to_numpy()
-        self.market_price = self.backtest_data.get_deal_price().loc[self.cur_time : next_time - ONE_SEC].to_numpy()
+        self.market_vol = self.backtest_data.get_volume().loc[self.cur_time : next_time - EPS_T].to_numpy()
+        self.market_price = self.backtest_data.get_deal_price().loc[self.cur_time : next_time - EPS_T].to_numpy()
 
         assert self.market_vol is not None and self.market_price is not None
 
@@ -294,7 +294,7 @@ def _metrics_collect(
 
     def _get_ticks_slice(self, start: pd.Timestamp, end: pd.Timestamp, include_end: bool = False) -> pd.DatetimeIndex:
         if not include_end:
-            end = end - ONE_SEC
+            end = end - EPS_T
         return self.ticks_index[self.ticks_index.slice_indexer(start, end)]
 
     @staticmethod
diff --git a/qlib/rl/order_execution/strategy.py b/qlib/rl/order_execution/strategy.py
index 5c280b6576..f6e3ae9faf 100644
--- a/qlib/rl/order_execution/strategy.py
+++ b/qlib/rl/order_execution/strategy.py
@@ -73,7 +73,7 @@ def post_upper_level_exe_step(self) -> None:
         for adapter in self.adapter_dict.values():
             adapter.generate_metrics_after_done()
 
-    def post_exe_step(self, execute_result: list) -> None:
+    def post_exe_step(self, execute_result: Optional[list]) -> None:
         last_step_length = self._last_step_range[1] - self._last_step_range[0]
         if last_step_length <= 0:
             assert not execute_result
diff --git a/qlib/rl/order_execution/utils.py b/qlib/rl/order_execution/utils.py
index a498037ad0..43517fe744 100644
--- a/qlib/rl/order_execution/utils.py
+++ b/qlib/rl/order_execution/utils.py
@@ -10,7 +10,7 @@
 
 from qlib.backtest.decision import OrderDir
 from qlib.backtest.executor import BaseExecutor, NestedExecutor, SimulatorExecutor
-from qlib.constant import ONE_SEC, float_or_ndarray
+from qlib.constant import EPS_T, float_or_ndarray
 
 
 def get_ticks_slice(
@@ -20,7 +20,7 @@ def get_ticks_slice(
     include_end: bool = False,
 ) -> pd.DatetimeIndex:
     if not include_end:
-        end = end - ONE_SEC
+        end = end - EPS_T
     return ticks_index[ticks_index.slice_indexer(start, end)]
 
 
diff --git a/qlib/strategy/base.py b/qlib/strategy/base.py
index 7844d25b30..532e88452e 100644
--- a/qlib/strategy/base.py
+++ b/qlib/strategy/base.py
@@ -222,7 +222,7 @@ def post_upper_level_exe_step(self) -> None:
         the metrics collection).
         """
 
-    def post_exe_step(self, execute_result: list) -> None:
+    def post_exe_step(self, execute_result: Optional[list]) -> None:
         """
         A hook for doing sth after the corresponding executor finished its execution.
 

From 1e72a3642346ce27e66b194c92523ecd784dc808 Mon Sep 17 00:00:00 2001
From: Huoran Li <huo53926@126.com>
Date: Wed, 24 Aug 2022 10:14:52 +0800
Subject: [PATCH 22/23] CI issue

---
 qlib/rl/order_execution/strategy.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/qlib/rl/order_execution/strategy.py b/qlib/rl/order_execution/strategy.py
index f6e3ae9faf..4a85bc76ed 100644
--- a/qlib/rl/order_execution/strategy.py
+++ b/qlib/rl/order_execution/strategy.py
@@ -5,7 +5,7 @@
 
 import collections
 from types import GeneratorType
-from typing import Any, Union, cast, Dict, Generator
+from typing import Any, Optional, Union, cast, Dict, Generator
 
 import pandas as pd
 

From cb2b214126abac506d9e064951a905dad643af7a Mon Sep 17 00:00:00 2001
From: Huoran Li <huo53926@126.com>
Date: Wed, 24 Aug 2022 10:36:41 +0800
Subject: [PATCH 23/23] Resolve Yuge's PR comments

---
 qlib/rl/integration/__init__.py                            | 2 --
 .../feature.py => order_execution/integration.py}          | 0
 qlib/rl/order_execution/simulator_qlib.py                  | 7 ++++---
 qlib/rl/order_execution/simulator_simple.py                | 6 +++---
 qlib/rl/order_execution/state.py                           | 5 ++++-
 qlib/rl/strategy/single_order.py                           | 2 +-
 6 files changed, 12 insertions(+), 10 deletions(-)
 delete mode 100644 qlib/rl/integration/__init__.py
 rename qlib/rl/{integration/feature.py => order_execution/integration.py} (100%)

diff --git a/qlib/rl/integration/__init__.py b/qlib/rl/integration/__init__.py
deleted file mode 100644
index 59e481eb93..0000000000
--- a/qlib/rl/integration/__init__.py
+++ /dev/null
@@ -1,2 +0,0 @@
-# Copyright (c) Microsoft Corporation.
-# Licensed under the MIT License.
diff --git a/qlib/rl/integration/feature.py b/qlib/rl/order_execution/integration.py
similarity index 100%
rename from qlib/rl/integration/feature.py
rename to qlib/rl/order_execution/integration.py
diff --git a/qlib/rl/order_execution/simulator_qlib.py b/qlib/rl/order_execution/simulator_qlib.py
index 8f48c24228..3002fd333e 100644
--- a/qlib/rl/order_execution/simulator_qlib.py
+++ b/qlib/rl/order_execution/simulator_qlib.py
@@ -9,11 +9,12 @@
 from qlib.backtest import collect_data_loop, get_strategy_executor
 from qlib.backtest.decision import Order
 from qlib.backtest.executor import NestedExecutor
-from qlib.rl.integration.feature import init_qlib
-from qlib.rl.order_execution.state import SAOEStateAdapter, SAOEState
-from qlib.rl.order_execution.strategy import SAOEStrategy
 from qlib.rl.simulator import Simulator
 
+from .integration import init_qlib
+from .state import SAOEState, SAOEStateAdapter
+from .strategy import SAOEStrategy
+
 
 class SingleAssetOrderExecution(Simulator[Order, SAOEState, float]):
     """Single-asset order execution (SAOE) simulator which is implemented based on Qlib backtest tools.
diff --git a/qlib/rl/order_execution/simulator_simple.py b/qlib/rl/order_execution/simulator_simple.py
index 93b57c3a03..f95aeebad0 100644
--- a/qlib/rl/order_execution/simulator_simple.py
+++ b/qlib/rl/order_execution/simulator_simple.py
@@ -4,18 +4,18 @@
 from __future__ import annotations
 
 from pathlib import Path
-from typing import Any, Optional, cast
+from typing import Any, cast, Optional
 
 import numpy as np
 import pandas as pd
-
 from qlib.backtest.decision import Order, OrderDir
 from qlib.constant import EPS, EPS_T, float_or_ndarray
 from qlib.rl.data.pickle_styled import DealPriceType, load_simple_intraday_backtest_data
-from qlib.rl.order_execution.state import SAOEMetrics, SAOEState
 from qlib.rl.simulator import Simulator
 from qlib.rl.utils import LogLevel
 
+from .state import SAOEMetrics, SAOEState
+
 # TODO: Integrating Qlib's native data with simulator_simple
 
 __all__ = ["SingleAssetOrderExecution"]
diff --git a/qlib/rl/order_execution/state.py b/qlib/rl/order_execution/state.py
index 9b2ae76d7e..d6bbeaea5a 100644
--- a/qlib/rl/order_execution/state.py
+++ b/qlib/rl/order_execution/state.py
@@ -37,7 +37,10 @@ def _get_all_timestamps(
 
 class SAOEStateAdapter:
     """
-    Maintain states of the environment.
+    Maintain states of the environment. SAOEStateAdapter accepts execution results and update its internal state
+    according to the execution results with additional information acquired from executors & exchange. For example,
+    it gets the dealt order amount from execution results, and get the corresponding market price / volume from
+    exchange.
 
     Example usage::
 
diff --git a/qlib/rl/strategy/single_order.py b/qlib/rl/strategy/single_order.py
index 8972d20a1b..9d8e396ce0 100644
--- a/qlib/rl/strategy/single_order.py
+++ b/qlib/rl/strategy/single_order.py
@@ -12,7 +12,7 @@ class SingleOrderStrategy(BaseStrategy):
     def __init__(
         self,
         order: Order,
-        trade_range: TradeRange,
+        trade_range: TradeRange = None,
     ) -> None:
         super().__init__()