Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

86 Improve documentation MotionCheck #87

Draft
wants to merge 9 commits into
base: main
Choose a base branch
from
256 changes: 25 additions & 231 deletions flatland/envs/agent_chains.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,28 @@
import networkx as nx
import numpy as np
"""
Agent Chains: Unordered Close Following Agents

Think of a chain of agents, in random order, moving in the same direction.
For any adjacent pair of agents, there's a 0.5 chance that it is in index order, ie index(A) < index(B) where A is in front of B.
So roughly half the adjacent pairs will need to leave a gap and half won't, and the chain of agents will typically be one-third empty space.
By removing the restriction, we can keep the agents close together and
so move up to 50% more agents through a junction or segment of rail in the same number of steps.

We are still using index order to resolve conflicts between two agents trying to move into the same spot, for example, head-on collisions, or agents "merging" at junctions.

Implementation: We did it by storing an agent's position as a graph node, and a movement as a directed edge, using the NetworkX graph library.
We create an empty graph for each step, and add the agents into the graph in order,
using their (row, column) location for the node. Stationary agents get a self-loop.
Agents in an adjacent chain naturally get "connected up".
We then use some NetworkX algorithms (https://github.com/networkx/networkx):
* `weakly_connected_components` to find the chains.
* `selfloop_edges` to find the stopped agents
* `dfs_postorder_nodes` to traverse a chain
* `simple_cycles` to find agents colliding head-on
"""
from typing import Tuple, Set, Union

from typing import List, Tuple, Set, Union
import graphviz as gv
import networkx as nx
chenkins marked this conversation as resolved.
Show resolved Hide resolved


class MotionCheck(object):
Expand All @@ -17,12 +37,8 @@ def __init__(self):
self.nDeadlocks = 0
self.svDeadlocked = set()
self._G_reversed: Union[nx.DiGraph, None] = None


def get_G_reversed(self):
#if self._G_reversed is None:
# self._G_reversed = self.G.reverse()
#return self._G_reversed
return self.Grev

def reset_G_reversed(self):
Expand All @@ -49,26 +65,6 @@ def addAgent(self, iAg, rc1, rc2, xlabel=None):
self.G.add_edge(rc1, rc2)
self.Grev.add_edge(rc2, rc1)

def find_stops(self):
""" find all the stopped agents as a set of rc position nodes
A stopped agent is a self-loop on a cell node.
"""

# get the (sparse) adjacency matrix
spAdj = nx.linalg.adjacency_matrix(self.G)

# the stopped agents appear as 1s on the diagonal
# the where turns this into a list of indices of the 1s
giStops = np.where(spAdj.diagonal())[0]

# convert the cell/node indices into the node rc values
lvAll = list(self.G.nodes())
# pick out the stops by their indices
lvStops = [lvAll[i] for i in giStops]
# make it into a set ready for a set intersection
svStops = set(lvStops)
return svStops

def find_stops2(self):
""" alternative method to find stopped agents, using a networkx call to find selfloop edges
"""
Expand Down Expand Up @@ -119,18 +115,6 @@ def find_stop_preds(self, svStops=None):
# the set of all the nodes/agents blocked by this set of stopped nodes
return svBlocked

def find_swaps(self):
""" find all the swap conflicts where two agents are trying to exchange places.
These appear as simple cycles of length 2.
These agents are necessarily deadlocked (since they can't change direction in flatland) -
meaning they will now be stuck for the rest of the episode.
"""
# svStops = self.find_stops2()
llvLoops = list(nx.algorithms.cycles.simple_cycles(self.G))
llvSwaps = [lvLoop for lvLoop in llvLoops if len(lvLoop) == 2]
svSwaps = {v for lvSwap in llvSwaps for v in lvSwap}
return svSwaps

def find_swaps2(self) -> Set[Tuple[int, int]]:
svSwaps = set()
sEdges = self.G.edges()
Expand Down Expand Up @@ -184,10 +168,9 @@ def find_conflicts(self):
self.reset_G_reversed()

svStops = self.find_stops2() # voluntarily stopped agents - have self-loops
# svSwaps = self.find_swaps() # deadlocks - adjacent head-on collisions
svSwaps = self.find_swaps2() # faster version of find_swaps
svSwaps = self.find_swaps2() # deadlocks - adjacent head-on collisions

# Block all swaps and their tree of predessors
# Block all swaps and their tree of predecessors
self.svDeadlocked = self.block_preds(svSwaps, color="purple")

# Take the union of the above, and find all the predecessors
Expand Down Expand Up @@ -284,192 +267,3 @@ def render(omc: MotionCheck, horizontal=True):
except ImportError as oError:
print("Flatland agent_chains ignoring ImportError - install pygraphviz to render graphs")
return None


class ChainTestEnv(object):
""" Just for testing agent chains
"""

def __init__(self, omc: MotionCheck):
self.iAgNext = 0
self.iRowNext = 1
self.omc = omc

def addAgent(self, rc1, rc2, xlabel=None):
self.omc.addAgent(self.iAgNext, rc1, rc2, xlabel=xlabel)
self.iAgNext += 1

def addAgentToRow(self, c1, c2, xlabel=None):
self.addAgent((self.iRowNext, c1), (self.iRowNext, c2), xlabel=xlabel)

def create_test_chain(self,
nAgents: int,
rcVel: Tuple[int] = (0, 1),
liStopped: List[int] = [],
xlabel=None):
""" create a chain of agents
"""
lrcAgPos = [(self.iRowNext, i * rcVel[1]) for i in range(nAgents)]

for iAg, rcPos in zip(range(nAgents), lrcAgPos):
if iAg in liStopped:
rcVel1 = (0, 0)
else:
rcVel1 = rcVel
self.omc.addAgent(iAg + self.iAgNext, rcPos, (rcPos[0] + rcVel1[0], rcPos[1] + rcVel1[1]))

if xlabel:
self.omc.G.nodes[lrcAgPos[0]]["xlabel"] = xlabel

self.iAgNext += nAgents
self.iRowNext += 1

def nextRow(self):
self.iRowNext += 1


def create_test_agents(omc: MotionCheck):
# blocked chain
omc.addAgent(1, (1, 2), (1, 3))
omc.addAgent(2, (1, 3), (1, 4))
omc.addAgent(3, (1, 4), (1, 5))
omc.addAgent(31, (1, 5), (1, 5))

# unblocked chain
omc.addAgent(4, (2, 1), (2, 2))
omc.addAgent(5, (2, 2), (2, 3))

# blocked short chain
omc.addAgent(6, (3, 1), (3, 2))
omc.addAgent(7, (3, 2), (3, 2))

# solitary agent
omc.addAgent(8, (4, 1), (4, 2))

# solitary stopped agent
omc.addAgent(9, (5, 1), (5, 1))

# blocked short chain (opposite direction)
omc.addAgent(10, (6, 4), (6, 3))
omc.addAgent(11, (6, 3), (6, 3))

# swap conflict
omc.addAgent(12, (7, 1), (7, 2))
omc.addAgent(13, (7, 2), (7, 1))


def create_test_agents2(omc: MotionCheck):
# blocked chain
cte = ChainTestEnv(omc)
cte.create_test_chain(4, liStopped=[3], xlabel="stopped\nchain")
cte.create_test_chain(4, xlabel="running\nchain")

cte.create_test_chain(2, liStopped=[1], xlabel="stopped \nshort\n chain")

cte.addAgentToRow(1, 2, "swap")
cte.addAgentToRow(2, 1)

cte.nextRow()

cte.addAgentToRow(1, 2, "chain\nswap")
cte.addAgentToRow(2, 3)
cte.addAgentToRow(3, 2)

cte.nextRow()

cte.addAgentToRow(1, 2, "midchain\nstop")
cte.addAgentToRow(2, 3)
cte.addAgentToRow(3, 4)
cte.addAgentToRow(4, 4)
cte.addAgentToRow(5, 6)
cte.addAgentToRow(6, 7)

cte.nextRow()

cte.addAgentToRow(1, 2, "midchain\nswap")
cte.addAgentToRow(2, 3)
cte.addAgentToRow(3, 4)
cte.addAgentToRow(4, 3)
cte.addAgentToRow(5, 4)
cte.addAgentToRow(6, 5)

cte.nextRow()

cte.addAgentToRow(1, 2, "Land on\nSame")
cte.addAgentToRow(3, 2)

cte.nextRow()
cte.addAgentToRow(1, 2, "chains\nonto\nsame")
cte.addAgentToRow(2, 3)
cte.addAgentToRow(3, 4)
cte.addAgentToRow(5, 4)
cte.addAgentToRow(6, 5)
cte.addAgentToRow(7, 6)

cte.nextRow()
cte.addAgentToRow(1, 2, "3-way\nsame")
cte.addAgentToRow(3, 2)
cte.addAgent((cte.iRowNext + 1, 2), (cte.iRowNext, 2))
cte.nextRow()

if False:
cte.nextRow()
cte.nextRow()
cte.addAgentToRow(1, 2, "4-way\nsame")
cte.addAgentToRow(3, 2)
cte.addAgent((cte.iRowNext + 1, 2), (cte.iRowNext, 2))
cte.addAgent((cte.iRowNext - 1, 2), (cte.iRowNext, 2))
cte.nextRow()

cte.nextRow()
cte.addAgentToRow(1, 2, "Tee")
cte.addAgentToRow(2, 3)
cte.addAgentToRow(3, 4)
cte.addAgent((cte.iRowNext + 1, 3), (cte.iRowNext, 3))
cte.nextRow()

cte.nextRow()
cte.addAgentToRow(1, 2, "Tree")
cte.addAgentToRow(2, 3)
cte.addAgentToRow(3, 4)
r1 = cte.iRowNext
r2 = cte.iRowNext + 1
r3 = cte.iRowNext + 2
cte.addAgent((r2, 3), (r1, 3))
cte.addAgent((r2, 2), (r2, 3))
cte.addAgent((r3, 2), (r2, 3))

cte.nextRow()


def test_agent_following():
omc = MotionCheck()
create_test_agents2(omc)

svStops = omc.find_stops()
svBlocked = omc.find_stop_preds()
llvSwaps = omc.find_swaps()
svSwaps = {v for lvSwap in llvSwaps for v in lvSwap}
print(list(svBlocked))

lvCells = omc.G.nodes()

lColours = ["magenta" if v in svStops
else "red" if v in svBlocked
else "purple" if v in svSwaps
else "lightblue"
for v in lvCells]
dPos = dict(zip(lvCells, lvCells))

nx.draw(omc.G,
with_labels=True, arrowsize=20,
pos=dPos,
node_color=lColours)


def main():
test_agent_following()


if __name__ == "__main__":
main()
9 changes: 6 additions & 3 deletions flatland/envs/rail_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
from typing import List, Optional, Dict, Tuple

import numpy as np
from flatland.utils import seeding

# from flatland.envs.timetable_generators import timetable_generator
import flatland.envs.timetable_generators as ttg
Expand All @@ -27,6 +26,7 @@
from flatland.envs.step_utils import env_utils
from flatland.envs.step_utils.states import TrainState, StateTransitionSignals
from flatland.envs.step_utils.transition_utils import check_valid_action
from flatland.utils import seeding
from flatland.utils.decorators import send_infrastructure_data_change_signal_to_reset_lru_cache, \
enable_infrastructure_lru_cache
from flatland.utils.rendertools import RenderTool, AgentRenderVariant
Expand All @@ -52,9 +52,12 @@ class RailEnv(Environment):
Moving forward in a dead-end cell makes the agent turn 180 degrees and step
to the cell it came from.

In order for agents to be able to "understand" the simulation behaviour from the observations,
the execution order of actions should not matter (i.e. not depend on the agent handle).
However, the agent ordering is still used to resolve conflicts between two agents trying to move into the same cell,
for example, head-on collisions, or agents "merging" at junctions.
See `MotionCheck` for more details.

The actions of the agents are executed in order of their handle to prevent
deadlocks and to allow them to learn relative priorities.

Reward Function:

Expand Down
5 changes: 3 additions & 2 deletions notebooks/Agent-Close-Following.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,8 @@
"from flatland.envs.persistence import RailEnvPersister\n",
"from flatland.utils.rendertools import RenderTool\n",
"from flatland.utils import env_edit_utils as eeu\n",
"from flatland.utils import jupyter_utils as ju"
"from flatland.utils import jupyter_utils as ju\n",
"from tests.test_agent_chains import create_test_agents2"
]
},
{
Expand All @@ -113,7 +114,7 @@
"outputs": [],
"source": [
"omc = ac.MotionCheck()\n",
"ac.create_test_agents2(omc)\n",
"create_test_agents2(omc)\n",
"rv = ac.render(omc)\n",
"print(type(rv))"
]
Expand Down
Loading
Loading