Skip to content

Commit

Permalink
update action shaping to support SC and LC
Browse files Browse the repository at this point in the history
  • Loading branch information
Jinyu Wang authored and Jinyu Wang committed Feb 23, 2024
1 parent c49b920 commit 584e3a1
Show file tree
Hide file tree
Showing 3 changed files with 78 additions and 74 deletions.
15 changes: 3 additions & 12 deletions examples/supply_chain/single_echelon/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,13 @@
product_features = ("price",)
IDX_PRODUCT_PRICE = 0

DEVICE: str = "cpu"
# Algorithm for ConsumerUnits: How many products to purchase from the upstream facility?
# EOQ: a consumer baseline policy. The quantity is decided by the expected leading time & the historical demand.
# DQN: a RL policy.
# PPO: a RL policy.
ALGO = "PPO"
assert ALGO in ["DQN", "EOQ", "PPO"], "wrong ALGO"
DEVICE: str = "cpu"

# Parameters for the reward design of RL policy. Treat ConsumerUnits of one facility as a team or not.
TEAM_REWARD = False
Expand All @@ -32,10 +32,7 @@
NUM_CONSUMER_ACTIONS = 3
OR_MANUFACTURE_ACTIONS = 20

# Topology to use, valid SCI topologies:
# - SCI_10_default, SCI_10_cheapest_storage_enlarged, SCI_10_shortest_storage_enlarged
# - SCI_500_default, SCI_500_cheapest_storage_enlarged, SCI_500_shortest_storage_enlarged
# TOPOLOGY = "SCI_10_default"
# Topology to use
TOPOLOGY = "single_echelon"

# The duration for training Env. Unit: tick (day).
Expand Down Expand Up @@ -78,13 +75,7 @@
}

# Experiment name, partial setting for log path.
EXP_NAME = (
f"{TOPOLOGY}"
# f"_{test_env_conf['durations']}"
f"_{ALGO}"
f"{'_TR' if TEAM_REWARD else ''}"
f"_test"
)
EXP_NAME = f"{TOPOLOGY}_{ALGO}_test"

# Path to dump the experimental logs, results, and render figures.
workflow_settings["log_path"] = f"examples/supply_chain/logs/{EXP_NAME}/"
75 changes: 75 additions & 0 deletions examples/supply_chain/single_echelon/env_sampler.py
Original file line number Diff line number Diff line change
Expand Up @@ -505,6 +505,67 @@ def get_reward_norm(entity_id):

return {entity_id: r / get_reward_norm(entity_id) for entity_id, r in rewards.items()}

def _shape_action_to_meet_constraints(
self, actions: List[ConsumerAction], SC_dict: Dict[int, int], LC: int, P_dict: Dict[int, int],
) -> List[ConsumerAction]:
action_infos: List[Tuple[int, int, int]] = [(action.id, action.sku_id, action.quantity) for action in actions]
shaped_quantity_dict: Dict[int, Dict[int, int]] = {}

# Step1: shape action quantity to be times of P
action_infos: List[Tuple[int, int, int]] = [
(con_id, sku_id, quantity // P_dict[sku_id] * P_dict[sku_id])
for con_id, sku_id, quantity in action_infos
]

# Supply Constraint
action_info_by_sku: List[List[Tuple[int, int, int]]] = []
for sku_id, SC in SC_dict.items():
total_asked = 0
for con_id, act_sku_id, quantity in action_infos:
if sku_id == act_sku_id:
total_asked += quantity

remove_ratio = max(total_asked - SC, 0) / total_asked if total_asked > 0 else 0
shaped_action_infos: List[Tuple[int, int, int]] = []
for con_id, act_sku_id, quantity in action_infos:
if sku_id == act_sku_id:
P = P_dict[sku_id]
remaining_quantity = int(quantity * (1 - remove_ratio) // P * P)
shaped_action_infos.append((con_id, act_sku_id, remaining_quantity))
if len(shaped_action_infos) > 0:
action_info_by_sku.append(shaped_action_infos)

# Labour Constraint
labour_count: List[int] = []
for shaped_action_infos in action_info_by_sku:
sku_id = shaped_action_infos[0][1]
P = P_dict[sku_id]
labour_count.append([quantity // P for _, _, quantity in shaped_action_infos])
total_labour_needed = sum([sum(count_list) for count_list in labour_count])
remove_ratio = max(total_labour_needed - LC, 0) / total_labour_needed if total_labour_needed > 0 else 0

for shaped_action_infos in action_info_by_sku:
for con_id, sku_id, quantity in shaped_action_infos:
P = P_dict[sku_id]
remaining_quantity = int(quantity * (1 - remove_ratio) // P * P)
if con_id not in shaped_quantity_dict:
shaped_quantity_dict[con_id] = {}
shaped_quantity_dict[con_id][sku_id] = remaining_quantity

shaped_actions = [
ConsumerAction(
action.id,
action.sku_id,
action.source_id,
shaped_quantity_dict[action.id][action.sku_id],
action.vehicle_type,
action.expiration_buffer,
)
for action in actions
]

return shaped_actions

def _translate_to_env_action(
self,
action_dict: Dict[Any, Union[np.ndarray, List[object]]],
Expand Down Expand Up @@ -548,6 +609,20 @@ def _translate_to_env_action(
if env_action:
env_action_dict[agent_id] = env_action

# Shape consumer actions to meet constraints
consumer_actions = [
action for agent_id, action in env_action_dict.items()
if issubclass(self._entity_dict[agent_id].class_type, ConsumerUnit)
]
consumer_actions = self._shape_action_to_meet_constraints(
consumer_actions,
SC_dict={10: 800, 11: 500, 12: 500}, # TODO: constant used here
LC=260, # TODO: constant used here
P_dict={10: 8, 11: 5, 12: 5}, # TODO: constant used here
)
for con_act in consumer_actions:
env_action_dict[con_act.id] = con_act

return env_action_dict

def _reset(self):
Expand Down
62 changes: 0 additions & 62 deletions examples/supply_chain/single_echelon_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,68 +21,6 @@ def get_metrics(env_: Env) -> dict:
return info


def shape_actions(
actions: List[ConsumerAction], SC_dict: Dict[int, int], LC: int, P_dict: Dict[int, int], consumer_list: List[SupplyChainEntity],
) -> List[ConsumerAction]:
action_infos: List[Tuple[int, int, int]] = [(action.id, action.sku_id, action.quantity) for action in actions]
shaped_quantity_dict: Dict[int, Dict[int, int]] = {}

# Step1: shape action quantity to be times of P
action_infos: List[Tuple[int, int, int]] = [(con_id, sku_id, quantity // P_dict[sku_id] * P_dict[sku_id]) for con_id, sku_id, quantity in action_infos]

# Supply Constraint
action_info_by_sku: List[List[Tuple[int, int, int]]] = []
for sku_id, SC in SC_dict.items():
total_asked = 0
for con_id, act_sku_id, quantity in action_infos:
if sku_id == act_sku_id:
total_asked += quantity

remove_ratio = max(total_asked - SC, 0) / SC
shaped_action_infos: List[Tuple[int, int, int]] = []
for con_id, act_sku_id, quantity in action_infos:
if sku_id == act_sku_id:
P = P_dict[sku_id]
remaining_quantity = quantity * (1 - remove_ratio) // P * P
shaped_action_infos.append((con_id, act_sku_id, remaining_quantity))
action_info_by_sku.append(shaped_action_infos)

# Labour Constraint
labour_count: List[int] = []
for shaped_action_infos in action_info_by_sku:
sku_id = shaped_action_infos[0][1]
P = P_dict[sku_id]
labour_count.append([quantity // P for _, _, quantity in shaped_action_infos])
total_labour_needed = sum([sum(count_list) for count_list in labour_count])
remove_ratio = max(total_labour_needed - LC, 0) / LC

for shaped_action_infos in action_info_by_sku:
for con_id, sku_id, quantity in shaped_action_infos:
P = P_dict[sku_id]
remaining_quantity = quantity * (1 - remove_ratio) // P * P
if con_id not in shaped_quantity_dict:
shaped_quantity_dict[con_id] = {}
shaped_quantity_dict[con_id][sku_id] = remaining_quantity

shaped_actions = [
ConsumerAction(
action.id,
action.sku_id,
action.source_id,
shaped_quantity_dict[action.id][action.sku_id],
action.vehicle_type,
action.expiration_buffer,
)
for action in actions
]

# Storage Capacity Limit?
for act, sact in zip(actions, shaped_actions):
print(act.id, act.sku_id, ":", act.quantity, "->", sact.quantity)

return shaped_actions


if __name__ == "__main__":
# Create an environment instance
env = Env(scenario="supply_chain", topology="single_echelon", start_tick=0, durations=100)
Expand Down

0 comments on commit 584e3a1

Please sign in to comment.