flatland-association · chenkins · Nov 20, 2024 · Nov 20, 2024 · Nov 20, 2024 · Nov 20, 2024
@@ -1,8 +1,28 @@
-import networkx as nx
-import numpy as np
+"""
+Agent Chains: Unordered Close Following Agents
+
+Think of a chain of agents, in random order, moving in the same direction.
+For any adjacent pair of agents, there's a 0.5 chance that it is in index order, ie index(A) < index(B) where A is in front of B.
+So roughly half the adjacent pairs will need to leave a gap and half won't, and the chain of agents will typically be one-third empty space.
+By removing the restriction, we can keep the agents close together and
+so move up to 50% more agents through a junction or segment of rail in the same number of steps.
+
+We are still using index order to resolve conflicts between two agents trying to move into the same spot, for example, head-on collisions, or agents "merging" at junctions.
+
+Implementation: We did it by storing an agent's position as a graph node, and a movement as a directed edge, using the NetworkX graph library.
+We create an empty graph for each step, and add the agents into the graph in order,
+using their (row, column) location for the node. Stationary agents get a self-loop.
+Agents in an adjacent chain naturally get "connected up".
+We then use some NetworkX algorithms (https://github.com/networkx/networkx):
+    * `weakly_connected_components` to find the chains.
+    * `selfloop_edges` to find the stopped agents
+    * `dfs_postorder_nodes` to traverse a chain
+    * `simple_cycles` to find agents colliding head-on
+"""
+from typing import Tuple, Set, Union
 
-from typing import List, Tuple, Set, Union
 import graphviz as gv
+import networkx as nx
 
 
 class MotionCheck(object):
@@ -17,12 +37,8 @@ def __init__(self):
         self.nDeadlocks = 0
         self.svDeadlocked = set()
         self._G_reversed: Union[nx.DiGraph, None] = None
-
 
     def get_G_reversed(self):
-        #if self._G_reversed is None:
-        #    self._G_reversed = self.G.reverse()
-        #return self._G_reversed
         return self.Grev
 
     def reset_G_reversed(self):
@@ -49,26 +65,6 @@ def addAgent(self, iAg, rc1, rc2, xlabel=None):
         self.G.add_edge(rc1, rc2)
         self.Grev.add_edge(rc2, rc1)
 
-    def find_stops(self):
-        """ find all the stopped agents as a set of rc position nodes
-            A stopped agent is a self-loop on a cell node.
-        """
-
-        # get the (sparse) adjacency matrix
-        spAdj = nx.linalg.adjacency_matrix(self.G)
-
-        # the stopped agents appear as 1s on the diagonal
-        # the where turns this into a list of indices of the 1s
-        giStops = np.where(spAdj.diagonal())[0]
-
-        # convert the cell/node indices into the node rc values
-        lvAll = list(self.G.nodes())
-        # pick out the stops by their indices
-        lvStops = [lvAll[i] for i in giStops]
-        # make it into a set ready for a set intersection
-        svStops = set(lvStops)
-        return svStops
-
     def find_stops2(self):
         """ alternative method to find stopped agents, using a networkx call to find selfloop edges
         """
@@ -119,18 +115,6 @@ def find_stop_preds(self, svStops=None):
         # the set of all the nodes/agents blocked by this set of stopped nodes
         return svBlocked
 
-    def find_swaps(self):
-        """ find all the swap conflicts where two agents are trying to exchange places.
-            These appear as simple cycles of length 2.
-            These agents are necessarily deadlocked (since they can't change direction in flatland) -
-            meaning they will now be stuck for the rest of the episode.
-        """
-        # svStops = self.find_stops2()
-        llvLoops = list(nx.algorithms.cycles.simple_cycles(self.G))
-        llvSwaps = [lvLoop for lvLoop in llvLoops if len(lvLoop) == 2]
-        svSwaps = {v for lvSwap in llvSwaps for v in lvSwap}
-        return svSwaps
-
     def find_swaps2(self) -> Set[Tuple[int, int]]:
         svSwaps = set()
         sEdges = self.G.edges()
@@ -184,10 +168,9 @@ def find_conflicts(self):
         self.reset_G_reversed()
 
         svStops = self.find_stops2()  # voluntarily stopped agents - have self-loops
-        # svSwaps = self.find_swaps()   # deadlocks - adjacent head-on collisions
-        svSwaps = self.find_swaps2()  # faster version of find_swaps
+        svSwaps = self.find_swaps2()  # deadlocks - adjacent head-on collisions
 
-        # Block all swaps and their tree of predessors
+        # Block all swaps and their tree of predecessors
         self.svDeadlocked = self.block_preds(svSwaps, color="purple")
 
         # Take the union of the above, and find all the predecessors
@@ -284,192 +267,3 @@ def render(omc: MotionCheck, horizontal=True):
     except ImportError as oError:
         print("Flatland agent_chains ignoring ImportError - install pygraphviz to render graphs")
         return None
-
-
-class ChainTestEnv(object):
-    """ Just for testing agent chains
-    """
-
-    def __init__(self, omc: MotionCheck):
-        self.iAgNext = 0
-        self.iRowNext = 1
-        self.omc = omc
-
-    def addAgent(self, rc1, rc2, xlabel=None):
-        self.omc.addAgent(self.iAgNext, rc1, rc2, xlabel=xlabel)
-        self.iAgNext += 1
-
-    def addAgentToRow(self, c1, c2, xlabel=None):
-        self.addAgent((self.iRowNext, c1), (self.iRowNext, c2), xlabel=xlabel)
-
-    def create_test_chain(self,
-                          nAgents: int,
-                          rcVel: Tuple[int] = (0, 1),
-                          liStopped: List[int] = [],
-                          xlabel=None):
-        """ create a chain of agents
-        """
-        lrcAgPos = [(self.iRowNext, i * rcVel[1]) for i in range(nAgents)]
-
-        for iAg, rcPos in zip(range(nAgents), lrcAgPos):
-            if iAg in liStopped:
-                rcVel1 = (0, 0)
-            else:
-                rcVel1 = rcVel
-            self.omc.addAgent(iAg + self.iAgNext, rcPos, (rcPos[0] + rcVel1[0], rcPos[1] + rcVel1[1]))
-
-        if xlabel:
-            self.omc.G.nodes[lrcAgPos[0]]["xlabel"] = xlabel
-
-        self.iAgNext += nAgents
-        self.iRowNext += 1
-
-    def nextRow(self):
-        self.iRowNext += 1
-
-
-def create_test_agents(omc: MotionCheck):
-    # blocked chain
-    omc.addAgent(1, (1, 2), (1, 3))
-    omc.addAgent(2, (1, 3), (1, 4))
-    omc.addAgent(3, (1, 4), (1, 5))
-    omc.addAgent(31, (1, 5), (1, 5))
-
-    # unblocked chain
-    omc.addAgent(4, (2, 1), (2, 2))
-    omc.addAgent(5, (2, 2), (2, 3))
-
-    # blocked short chain
-    omc.addAgent(6, (3, 1), (3, 2))
-    omc.addAgent(7, (3, 2), (3, 2))
-
-    # solitary agent
-    omc.addAgent(8, (4, 1), (4, 2))
-
-    # solitary stopped agent
-    omc.addAgent(9, (5, 1), (5, 1))
-
-    # blocked short chain (opposite direction)
-    omc.addAgent(10, (6, 4), (6, 3))
-    omc.addAgent(11, (6, 3), (6, 3))
-
-    # swap conflict
-    omc.addAgent(12, (7, 1), (7, 2))
-    omc.addAgent(13, (7, 2), (7, 1))
-
-
-def create_test_agents2(omc: MotionCheck):
-    # blocked chain
-    cte = ChainTestEnv(omc)
-    cte.create_test_chain(4, liStopped=[3], xlabel="stopped\nchain")
-    cte.create_test_chain(4, xlabel="running\nchain")
-
-    cte.create_test_chain(2, liStopped=[1], xlabel="stopped \nshort\n chain")
-
-    cte.addAgentToRow(1, 2, "swap")
-    cte.addAgentToRow(2, 1)
-
-    cte.nextRow()
-
-    cte.addAgentToRow(1, 2, "chain\nswap")
-    cte.addAgentToRow(2, 3)
-    cte.addAgentToRow(3, 2)
-
-    cte.nextRow()
-
-    cte.addAgentToRow(1, 2, "midchain\nstop")
-    cte.addAgentToRow(2, 3)
-    cte.addAgentToRow(3, 4)
-    cte.addAgentToRow(4, 4)
-    cte.addAgentToRow(5, 6)
-    cte.addAgentToRow(6, 7)
-
-    cte.nextRow()
-
-    cte.addAgentToRow(1, 2, "midchain\nswap")
-    cte.addAgentToRow(2, 3)
-    cte.addAgentToRow(3, 4)
-    cte.addAgentToRow(4, 3)
-    cte.addAgentToRow(5, 4)
-    cte.addAgentToRow(6, 5)
-
-    cte.nextRow()
-
-    cte.addAgentToRow(1, 2, "Land on\nSame")
-    cte.addAgentToRow(3, 2)
-
-    cte.nextRow()
-    cte.addAgentToRow(1, 2, "chains\nonto\nsame")
-    cte.addAgentToRow(2, 3)
-    cte.addAgentToRow(3, 4)
-    cte.addAgentToRow(5, 4)
-    cte.addAgentToRow(6, 5)
-    cte.addAgentToRow(7, 6)
-
-    cte.nextRow()
-    cte.addAgentToRow(1, 2, "3-way\nsame")
-    cte.addAgentToRow(3, 2)
-    cte.addAgent((cte.iRowNext + 1, 2), (cte.iRowNext, 2))
-    cte.nextRow()
-
-    if False:
-        cte.nextRow()
-        cte.nextRow()
-        cte.addAgentToRow(1, 2, "4-way\nsame")
-        cte.addAgentToRow(3, 2)
-        cte.addAgent((cte.iRowNext + 1, 2), (cte.iRowNext, 2))
-        cte.addAgent((cte.iRowNext - 1, 2), (cte.iRowNext, 2))
-        cte.nextRow()
-
-    cte.nextRow()
-    cte.addAgentToRow(1, 2, "Tee")
-    cte.addAgentToRow(2, 3)
-    cte.addAgentToRow(3, 4)
-    cte.addAgent((cte.iRowNext + 1, 3), (cte.iRowNext, 3))
-    cte.nextRow()
-
-    cte.nextRow()
-    cte.addAgentToRow(1, 2, "Tree")
-    cte.addAgentToRow(2, 3)
-    cte.addAgentToRow(3, 4)
-    r1 = cte.iRowNext
-    r2 = cte.iRowNext + 1
-    r3 = cte.iRowNext + 2
-    cte.addAgent((r2, 3), (r1, 3))
-    cte.addAgent((r2, 2), (r2, 3))
-    cte.addAgent((r3, 2), (r2, 3))
-
-    cte.nextRow()
-
-
-def test_agent_following():
-    omc = MotionCheck()
-    create_test_agents2(omc)
-
-    svStops = omc.find_stops()
-    svBlocked = omc.find_stop_preds()
-    llvSwaps = omc.find_swaps()
-    svSwaps = {v for lvSwap in llvSwaps for v in lvSwap}
-    print(list(svBlocked))
-
-    lvCells = omc.G.nodes()
-
-    lColours = ["magenta" if v in svStops
-                else "red" if v in svBlocked
-    else "purple" if v in svSwaps
-    else "lightblue"
-                for v in lvCells]
-    dPos = dict(zip(lvCells, lvCells))
-
-    nx.draw(omc.G,
-            with_labels=True, arrowsize=20,
-            pos=dPos,
-            node_color=lColours)
-
-
-def main():
-    test_agent_following()
-
-
-if __name__ == "__main__":
-    main()
@@ -5,7 +5,6 @@
 from typing import List, Optional, Dict, Tuple
 
 import numpy as np
-from flatland.utils import seeding
 
 # from flatland.envs.timetable_generators import timetable_generator
 import flatland.envs.timetable_generators as ttg
@@ -27,6 +26,7 @@
 from flatland.envs.step_utils import env_utils
 from flatland.envs.step_utils.states import TrainState, StateTransitionSignals
 from flatland.envs.step_utils.transition_utils import check_valid_action
+from flatland.utils import seeding
 from flatland.utils.decorators import send_infrastructure_data_change_signal_to_reset_lru_cache, \
     enable_infrastructure_lru_cache
 from flatland.utils.rendertools import RenderTool, AgentRenderVariant
@@ -52,9 +52,12 @@ class RailEnv(Environment):
     Moving forward in a dead-end cell makes the agent turn 180 degrees and step
     to the cell it came from.
 
+    In order for agents to be able to "understand" the simulation behaviour from the observations,
+    the execution order of actions should not matter (i.e. not depend on the agent handle).
+    However, the agent ordering is still used to resolve conflicts between two agents trying to move into the same cell,
+    for example, head-on collisions, or agents "merging" at junctions.
+    See `MotionCheck` for more details.
 
-    The actions of the agents are executed in order of their handle to prevent
-    deadlocks and to allow them to learn relative priorities.
 
     Reward Function:
 

@@ -94,7 +94,8 @@
     "from flatland.envs.persistence import RailEnvPersister\n",
     "from flatland.utils.rendertools import RenderTool\n",
     "from flatland.utils import env_edit_utils as eeu\n",
-    "from flatland.utils import jupyter_utils as ju"
+    "from flatland.utils import jupyter_utils as ju\n",
+    "from tests.test_agent_chains import create_test_agents2"
    ]
   },
   {
@@ -113,7 +114,7 @@
    "outputs": [],
    "source": [
     "omc = ac.MotionCheck()\n",
-    "ac.create_test_agents2(omc)\n",
+    "create_test_agents2(omc)\n",
     "rv = ac.render(omc)\n",
     "print(type(rv))"
    ]