From 3a3488fbf70d7356e21543f56aca74a01c610dea Mon Sep 17 00:00:00 2001 From: Jake-Moss Date: Tue, 9 Jan 2024 17:05:27 +1000 Subject: [PATCH 01/44] Removed `reached_first` from A* The current method of skimming for A* produces out of bound accesses for A*. This occurs when it reconsiders more nodes than are in the network. The `reached_view` array is primarily for skimming, as such I've disabled skimming for A*. Skimming for early exit Dijkstra's should also be reconsidered. --- aequilibrae/paths/AoN.pyx | 29 ++++++++++++------------ aequilibrae/paths/basic_path_finding.pyx | 12 +++------- 2 files changed, 18 insertions(+), 23 deletions(-) diff --git a/aequilibrae/paths/AoN.pyx b/aequilibrae/paths/AoN.pyx index 31f0dd844..e0ba57a52 100644 --- a/aequilibrae/paths/AoN.pyx +++ b/aequilibrae/paths/AoN.pyx @@ -242,19 +242,20 @@ def path_computation(origin, destination, graph, results): original_b_nodes_view) if a_star_bint: - w = path_finding_a_star(origin_index, - dest_index, - g_view, - b_nodes_view, - graph_fs_view, - nodes_to_indices_view, - lat_view, - lon_view, - predecessors_view, - ids_graph_view, - conn_view, - reached_first_view, - heuristic) + path_finding_a_star( + origin_index, + dest_index, + g_view, + b_nodes_view, + graph_fs_view, + nodes_to_indices_view, + lat_view, + lon_view, + predecessors_view, + ids_graph_view, + conn_view, + heuristic + ) else: w = path_finding(origin_index, dest_index if early_exit_bint else -1, @@ -267,7 +268,7 @@ def path_computation(origin, destination, graph, results): reached_first_view) - if skims > 0: + if skims > 0 and not a_star_bint: skim_single_path(origin_index, nodes, skims, diff --git a/aequilibrae/paths/basic_path_finding.pyx b/aequilibrae/paths/basic_path_finding.pyx index 76e49db48..865495139 100644 --- a/aequilibrae/paths/basic_path_finding.pyx +++ b/aequilibrae/paths/basic_path_finding.pyx @@ -224,7 +224,7 @@ cdef void skim_single_path(long origin, # Zeroes the intrazonal cost for j in range(skims): - node_skims[origin, j] = 0 + node_skims[origin, j] = 0 # Cascade skimming for i in range(1, found + 1): @@ -420,7 +420,7 @@ cdef inline double equirectangular_heuristic(double lat1, double lon1, double la @cython.wraparound(False) @cython.embedsignature(True) @cython.boundscheck(False) -cpdef int path_finding_a_star(long origin, +cpdef void path_finding_a_star(long origin, long destination, double[:] graph_costs, long long [:] csr_indices, @@ -431,8 +431,7 @@ cpdef int path_finding_a_star(long origin, long long [:] pred, long long [:] ids, long long [:] connectors, - long long [:] reached_first, - Heuristic heuristic) noexcept nogil: + Heuristic heuristic) nogil: """ Based on the pseudocode presented at https://en.wikipedia.org/wiki/A*_search_algorithm#Pseudocode The following variables have been renamed to be consistent with out Dijkstra's implementation @@ -451,7 +450,6 @@ cpdef int path_finding_a_star(long origin, ElementState vert_state # vertex state size_t origin_vert = origin size_t destination_vert = destination if destination != -1 else 0 - ITYPE_t found = 0 double *gScore = malloc(M * sizeof(double)) cdef: @@ -473,7 +471,6 @@ cpdef int path_finding_a_star(long origin, for i in range(M): pred[i] = -1 connectors[i] = -1 - reached_first[i] = -1 gScore[i] = INFINITY # initialization of the heap elements @@ -488,8 +485,6 @@ cpdef int path_finding_a_star(long origin, # main loop while pqueue.size > 0: current = extract_min(&pqueue) - reached_first[found] = current - found += 1 if current == destination_vert: break @@ -515,4 +510,3 @@ cpdef int path_finding_a_star(long origin, free_heap(&pqueue) free(gScore) - return found - 1 From c32e91a19837f495385700462338c860b246ee43 Mon Sep 17 00:00:00 2001 From: Jake-Moss Date: Tue, 9 Jan 2024 17:12:59 +1000 Subject: [PATCH 02/44] Add preliminary route choice --- .flake8 | 2 +- aequilibrae/paths/bfs_le.pxd | 27 ++++++ aequilibrae/paths/bfs_le.pyx | 160 +++++++++++++++++++++++++++++++++++ setup.py | 11 ++- 4 files changed, 198 insertions(+), 2 deletions(-) create mode 100644 aequilibrae/paths/bfs_le.pxd create mode 100644 aequilibrae/paths/bfs_le.pyx diff --git a/.flake8 b/.flake8 index 6cf616230..88ffabaca 100644 --- a/.flake8 +++ b/.flake8 @@ -3,4 +3,4 @@ max-line-length = 120 ignore = E203, E266, E501, W503, F403, F401, C901, W605 max-complexity = 20 select = B,C,E,F,W,T4,B9 -exclude = .idea,.git,__pycache__,sphinx,.venv*,.venv,venv,docs/*,benchmarks/* +exclude = .idea,.git,__pycache__,sphinx,.venv*,.venv,venv,docs/*,benchmarks/*,*.pyx,*pxd,*.pxi diff --git a/aequilibrae/paths/bfs_le.pxd b/aequilibrae/paths/bfs_le.pxd new file mode 100644 index 000000000..db0b77bf0 --- /dev/null +++ b/aequilibrae/paths/bfs_le.pxd @@ -0,0 +1,27 @@ +# cython: language_level=3str +from aequilibrae.paths.results import PathResults + + +cpdef float cube(float x) + +cdef class RouteChoice: + cdef: + int num + readonly object res + unsigned int depth + + double [:] cost_view + long long [:] graph_fs_view + long long [:] b_nodes_view + long long [:] nodes_to_indices_view + double [:] lat_view + double [:] lon_view + long long [:] predecessors_view + long long [:] ids_graph_view + long long [:] conn_view + # Heuristic heuristic + cdef void c_helloworld(RouteChoice self) noexcept nogil + cpdef helloworld(self) + # cpdef run(self, origin, destination, max_depth = 0) + # cdef void generate_route_set(RouteChoice self, long origin_index, long dest_index, unsigned int max_depth) noexcept nogil + cdef void generate_route_set(RouteChoice self, long origin_index, long dest_index, unsigned int max_depth) nogil diff --git a/aequilibrae/paths/bfs_le.pyx b/aequilibrae/paths/bfs_le.pyx new file mode 100644 index 000000000..4406c9994 --- /dev/null +++ b/aequilibrae/paths/bfs_le.pyx @@ -0,0 +1,160 @@ +# cython: language_level=3str +# distutils: define_macros=CYTHON_TRACE_NOGIL=1 + +"""This module aims to implemented the BFS-LE algorithm as described in Rieser-Schüssler, Balmer, and Axhausen, +'Route Choice Sets for Very High-Resolution Data'. + +A rough overview of the algorithm is as follows. + 1. Prepare the initial graph, this is depth 0 with no links removed. + 2. Find a short path, P. If P is empty stop, else add P to the path set. + 3. For all links p in P, remove p from E, compounding with the previously removed links. + 4. De-duplicate the sub-graphs, we only care about unique sub-graphs. + 5. Go to 2. + +Thoughts: + + - Path set: One issue is that the path set can't be stored as a simple sub-graph. This is because it is the union of + multiple paths. The union of two partially separate paths may create paths that are no in the paths + themselves. Instead I believe we can store the paths as (compressed) tries, operating on the common prefix + links/nodes. Each branch in the trie would encode a branch in the choice of route. This has the benefit of being + very small to store and iterating over all choices is some traversal of the tree. Downsides of this are, insertion + and search scale with the length of the paths. Each lookup requires a linear time search of the existing tree. As + each link in the path is removed the number of branches scales with the length of the path times the degree of the + vertices. + + Another option is hash maps, just throw hash maps at it. Upsides of this is they are much more generic, no special + methods required and we'll likely be able to use something off the shelf. Downsides are that their performance is + largely dependent on the hash function. We'll need to use the set of removed edges as the key, which the path as + the value. This means we can't compresse the paths. Choosing a good hash function may be tough, because link/node + ids can be arbitrarily large we'll have to consider overflows, though an non-naive function should handle this + fine. We'll also want to avoid modulo, wikipedia says using a multiply-shift scheme with a Mersenne prime like + 2^61 - 1 should work well. Although we have variable length paths, fixed length vector hashing can be applied and + padded to blocks of our paths. + + - Removed link set: This set suffers from the similar issues as the path set as order doesn't matter. I think a hash + or tree set is about the only way to go about this. Since order doesn't matter a trie doesn't make sense but a + tree set using sorted node/link ids could work. + + Another option is a bit map. For a million link network, the bitmap would "only" be 125kB. Membership checks and + addition and removal from this set would be incredily fast, faster than anything else, however comparison may + suffer. We could hash this bitmap but if we're hashing it we might as well just hash the removal set. + + We could also nest the hash sets, essientially building up a hash set of previously seen hashes. + + - Hash functions: We're looking for a "incremental (multi)set hash function", because we don't need it to be secure + at all some commutative binary operation should work e.g. +, *, ^. Whether or not they make good hash functions remains to be + seen. + + - Graph modification: Actually removing a link from the graph would require modification of the existing + methods. Instead we can just require that the cost of a like is float or double and set it to INFINITY, that way + the algorithms will never consider the link as viable. + +Current front runner: Hash/tree set of removed link with prefix code to path set trie. Whether its worth incrementally +building the trie or not should be tested. + +""" + +from aequilibrae import Graph +from aequilibrae.paths.results import PathResults +from libc.stdio cimport printf, fprintf, stderr +from libc.math cimport INFINITY + +import numpy as np + +# It would really be nice if these were modules. The 'include' syntax is long deprecated +include 'basic_path_finding.pyx' + +cpdef float cube(float x): + return x * x * x + +cdef class RouteChoice: + """ + Route choice implemented via breadth first search with link removal (BFS-LE) as described in Rieser-Schüssler, + Balmer, and Axhausen, 'Route Choice Sets for Very High-Resolution Data' + """ + + # cdef int num + def __cinit__(self): + """C level init. For C memory allocation and initalisation. Called exactly once per object.""" + print("cinit called") + + + def __init__(self, graph: Graph): + """Python level init, may be called multiple times, for things that can't be done in __cinit__.""" + print("init called") + self.res = PathResults() + self.res.prepare(graph) + + # self.heuristic = HEURISTIC_MAP[self.res._heuristic] + self.cost_view = graph.cost + self.graph_fs_view = graph.fs + self.b_nodes_view = graph.graph.b_node.values # FIXME: Why does path_computation copy this? + self.nodes_to_indices_view = graph.nodes_to_indices + self.lat_view = graph.lonlat_index.lat.values + self.lon_view = graph.lonlat_index.lon.values + self.predecessors_view = self.res.predecessors + self.ids_graph_view = graph.graph.id.values + self.conn_view = self.res.connectors + + def __dealloc__(self): + """ + C level deallocation. For freeing memeory allocated by this object. *Must* have GIL, `self` may be in a + partially deallocated state already. + """ + print("Deallocating!") + + cdef void c_helloworld(RouteChoice self) noexcept nogil: + printf("Hello world\n") + + cpdef helloworld(self): + with nogil: + RouteChoice.c_helloworld(self) + + def run(self, origin, destination, max_depth=0): + cdef: + long origin_index = self.nodes_to_indices_view[origin] + long dest_index = self.nodes_to_indices_view[destination] + unsigned int c_max_depth = max_depth + with nogil: + RouteChoice.generate_route_set(self, origin_index, dest_index, c_max_depth) + + + cdef void generate_route_set(RouteChoice self, long origin_index, long dest_index, unsigned int max_depth) nogil: + """Main method for route set generation, thread safe.""" + cdef long connector + + for depth in range(max_depth): + path_finding_a_star( + origin_index, + dest_index, + self.cost_view, + self.b_nodes_view, + self.graph_fs_view, + self.nodes_to_indices_view, + self.lat_view, + self.lon_view, + self.predecessors_view, + self.ids_graph_view, + self.conn_view, + EQUIRECTANGULAR # FIXME: enum import failing due to redefinition + ) + if self.predecessors_view[dest_index] >= 0: + printf("%ld is still reachable from %ld at depth %d\n", dest_index, origin_index, depth) + connector = self.conn_view[dest_index] + self.cost_view[connector] = INFINITY + printf("%ld has been banned\n", connector) + + with gil: + p = n = dest_index + all_connectors = [] + if p != origin_index: + while p != origin_index: + p = self.predecessors_view[p] + connector = self.conn_view[n] + all_connectors.append(connector + 1) + n = p + print("(", ", ".join(str(x) for x in all_connectors[::-1]), ")") + + else: + printf("%ld is unreachable from %ld at depth %d\n", dest_index, origin_index, depth) + break diff --git a/setup.py b/setup.py index 861aa5c47..6d29f08c4 100644 --- a/setup.py +++ b/setup.py @@ -55,6 +55,15 @@ language="c++", ) +ext_mod_bfs_le = Extension( + "aequilibrae.paths.bfs_le", + [join("aequilibrae", "paths", "bfs_le.pyx")], + extra_compile_args=compile_args, + extra_link_args=link_args, + define_macros=[("NPY_NO_DEPRECATED_API", "NPY_1_7_API_VERSION")], + include_dirs=include_dirs, + language="c++", +) with open("requirements.txt", "r") as fl: install_requirements = [x.strip() for x in fl.readlines()] @@ -100,5 +109,5 @@ "Programming Language :: Python :: 3.12", ], cmdclass={"build_ext": build_ext}, - ext_modules=[ext_mod_aon, ext_mod_ipf, ext_mod_put], + ext_modules=[ext_mod_aon, ext_mod_ipf, ext_mod_put, ext_mod_bfs_le], ) From 7bf8b4e0c32779fb8535e6e59c6803ce5ad4d6cc Mon Sep 17 00:00:00 2001 From: Jake-Moss Date: Tue, 9 Jan 2024 17:16:43 +1000 Subject: [PATCH 03/44] Return noexcepts --- aequilibrae/paths/basic_path_finding.pyx | 2 +- aequilibrae/paths/bfs_le.pxd | 5 +---- aequilibrae/paths/bfs_le.pyx | 2 +- 3 files changed, 3 insertions(+), 6 deletions(-) diff --git a/aequilibrae/paths/basic_path_finding.pyx b/aequilibrae/paths/basic_path_finding.pyx index 865495139..9dde9ac8f 100644 --- a/aequilibrae/paths/basic_path_finding.pyx +++ b/aequilibrae/paths/basic_path_finding.pyx @@ -431,7 +431,7 @@ cpdef void path_finding_a_star(long origin, long long [:] pred, long long [:] ids, long long [:] connectors, - Heuristic heuristic) nogil: + Heuristic heuristic) noexcept nogil: """ Based on the pseudocode presented at https://en.wikipedia.org/wiki/A*_search_algorithm#Pseudocode The following variables have been renamed to be consistent with out Dijkstra's implementation diff --git a/aequilibrae/paths/bfs_le.pxd b/aequilibrae/paths/bfs_le.pxd index db0b77bf0..b39036058 100644 --- a/aequilibrae/paths/bfs_le.pxd +++ b/aequilibrae/paths/bfs_le.pxd @@ -19,9 +19,6 @@ cdef class RouteChoice: long long [:] predecessors_view long long [:] ids_graph_view long long [:] conn_view - # Heuristic heuristic cdef void c_helloworld(RouteChoice self) noexcept nogil cpdef helloworld(self) - # cpdef run(self, origin, destination, max_depth = 0) - # cdef void generate_route_set(RouteChoice self, long origin_index, long dest_index, unsigned int max_depth) noexcept nogil - cdef void generate_route_set(RouteChoice self, long origin_index, long dest_index, unsigned int max_depth) nogil + cdef void generate_route_set(RouteChoice self, long origin_index, long dest_index, unsigned int max_depth) noexcept nogil diff --git a/aequilibrae/paths/bfs_le.pyx b/aequilibrae/paths/bfs_le.pyx index 4406c9994..9e3f8c638 100644 --- a/aequilibrae/paths/bfs_le.pyx +++ b/aequilibrae/paths/bfs_le.pyx @@ -119,7 +119,7 @@ cdef class RouteChoice: RouteChoice.generate_route_set(self, origin_index, dest_index, c_max_depth) - cdef void generate_route_set(RouteChoice self, long origin_index, long dest_index, unsigned int max_depth) nogil: + cdef void generate_route_set(RouteChoice self, long origin_index, long dest_index, unsigned int max_depth) noexcept nogil: """Main method for route set generation, thread safe.""" cdef long connector From b80b6ea4b0d4f9b79ff8dfdf46426b8cb3bcaf6d Mon Sep 17 00:00:00 2001 From: Jake-Moss Date: Thu, 11 Jan 2024 17:22:14 +1000 Subject: [PATCH 04/44] Working RouteChoice generation --- aequilibrae/paths/bfs_le.pxd | 11 +- aequilibrae/paths/bfs_le.pyx | 213 ++++++++++++++----- tests/aequilibrae/paths/test_route_choice.py | 40 ++++ 3 files changed, 203 insertions(+), 61 deletions(-) create mode 100644 tests/aequilibrae/paths/test_route_choice.py diff --git a/aequilibrae/paths/bfs_le.pxd b/aequilibrae/paths/bfs_le.pxd index b39036058..fad7f467c 100644 --- a/aequilibrae/paths/bfs_le.pxd +++ b/aequilibrae/paths/bfs_le.pxd @@ -1,9 +1,10 @@ # cython: language_level=3str from aequilibrae.paths.results import PathResults +from libcpp.vector cimport vector +from libcpp.unordered_set cimport unordered_set +from libcpp.unordered_map cimport unordered_map -cpdef float cube(float x) - cdef class RouteChoice: cdef: int num @@ -19,6 +20,6 @@ cdef class RouteChoice: long long [:] predecessors_view long long [:] ids_graph_view long long [:] conn_view - cdef void c_helloworld(RouteChoice self) noexcept nogil - cpdef helloworld(self) - cdef void generate_route_set(RouteChoice self, long origin_index, long dest_index, unsigned int max_depth) noexcept nogil + cdef void path_find(RouteChoice self, long origin_index, long dest_index, double [:] scratch_cost) noexcept nogil + # cdef unordered_map[unordered_set[long long] *, vector[long long] *] *generate_route_set(RouteChoice self, long origin_index, long dest_index, unsigned int max_routes, unsigned int max_depth, double [:] scratch_cost) noexcept nogil + cdef unordered_set[vector[long long] *] *generate_route_set(RouteChoice self, long origin_index, long dest_index, unsigned int max_routes, unsigned int max_depth, unsigned int seed_value, double [:] scratch_cost) nogil diff --git a/aequilibrae/paths/bfs_le.pyx b/aequilibrae/paths/bfs_le.pyx index 9e3f8c638..b62626a06 100644 --- a/aequilibrae/paths/bfs_le.pyx +++ b/aequilibrae/paths/bfs_le.pyx @@ -49,23 +49,47 @@ Thoughts: methods. Instead we can just require that the cost of a like is float or double and set it to INFINITY, that way the algorithms will never consider the link as viable. -Current front runner: Hash/tree set of removed link with prefix code to path set trie. Whether its worth incrementally +Current front runner: Hash/tree set of removed link with index to a noe in the path set suffix trie. Whether its worth incrementally building the trie or not should be tested. """ from aequilibrae import Graph from aequilibrae.paths.results import PathResults -from libc.stdio cimport printf, fprintf, stderr from libc.math cimport INFINITY +from libc.string cimport memcpy +from libc.stdio cimport printf +from libcpp.vector cimport vector +from libcpp.unordered_set cimport unordered_set +from libcpp.unordered_map cimport unordered_map +from libcpp.utility cimport pair +from libcpp.iterator cimport back_inserter +from libcpp.algorithm cimport sample +from cython.operator cimport dereference as deref, preincrement as inc import numpy as np -# It would really be nice if these were modules. The 'include' syntax is long deprecated -include 'basic_path_finding.pyx' -cpdef float cube(float x): - return x * x * x +from libc.stdint cimport uint_fast32_t, uint_fast64_t + +cdef extern from "" namespace "std" nogil: + cdef cppclass random_device: + ctypedef uint_fast32_t result_type + random_device() except + + result_type operator()() except + + + cdef cppclass minstd_rand: + ctypedef uint_fast32_t result_type + minstd_rand() except + + minstd_rand(result_type seed) except + + result_type operator()() except + + result_type min() except + + result_type max() except + + void discard(size_t z) except + + void seed(result_type seed) except + + +# It would really be nice if these were modules. The 'include' syntax is long deprecated and adds a lot to compilation times +include 'basic_path_finding.pyx' cdef class RouteChoice: """ @@ -76,12 +100,9 @@ cdef class RouteChoice: # cdef int num def __cinit__(self): """C level init. For C memory allocation and initalisation. Called exactly once per object.""" - print("cinit called") - def __init__(self, graph: Graph): """Python level init, may be called multiple times, for things that can't be done in __cinit__.""" - print("init called") self.res = PathResults() self.res.prepare(graph) @@ -92,69 +113,149 @@ cdef class RouteChoice: self.nodes_to_indices_view = graph.nodes_to_indices self.lat_view = graph.lonlat_index.lat.values self.lon_view = graph.lonlat_index.lon.values - self.predecessors_view = self.res.predecessors + self.predecessors_view = np.empty(graph.num_nodes + 1, dtype=np.int64) self.ids_graph_view = graph.graph.id.values - self.conn_view = self.res.connectors + self.conn_view = np.empty(graph.num_nodes + 1, dtype=np.int64) def __dealloc__(self): """ C level deallocation. For freeing memeory allocated by this object. *Must* have GIL, `self` may be in a partially deallocated state already. """ - print("Deallocating!") + pass - cdef void c_helloworld(RouteChoice self) noexcept nogil: - printf("Hello world\n") - - cpdef helloworld(self): - with nogil: - RouteChoice.c_helloworld(self) - - def run(self, origin, destination, max_depth=0): + def run(self, origin, destination, max_routes=0, max_depth=0, seed=0): cdef: long origin_index = self.nodes_to_indices_view[origin] long dest_index = self.nodes_to_indices_view[destination] + unsigned int c_max_routes = max_routes unsigned int c_max_depth = max_depth + unsigned int c_seed = seed + double [:] scratch_cost = np.empty(self.cost_view.shape[0]) # allocation of new memory view required gil + unordered_set[vector[long long] *] *results + unordered_map[unordered_set[long long] *, vector[long long] *].const_iterator results_iter with nogil: - RouteChoice.generate_route_set(self, origin_index, dest_index, c_max_depth) + results = RouteChoice.generate_route_set(self, origin_index, dest_index, c_max_routes, c_max_depth, c_seed, scratch_cost) + + res = [] + for x in deref(results): + res.append(list(deref(x))) + # res[frozenset(deref(x.first))] = tuple(deref(x.second)) + # del x.first + # del x.second + # del x + return res - cdef void generate_route_set(RouteChoice self, long origin_index, long dest_index, unsigned int max_depth) noexcept nogil: - """Main method for route set generation, thread safe.""" - cdef long connector + cdef void path_find(RouteChoice self, long origin_index, long dest_index, double [:] scratch_cost) noexcept nogil: + path_finding_a_star( + origin_index, + dest_index, + scratch_cost, + self.b_nodes_view, + self.graph_fs_view, + self.nodes_to_indices_view, + self.lat_view, + self.lon_view, + self.predecessors_view, + self.ids_graph_view, + self.conn_view, + EQUIRECTANGULAR # FIXME: enum import failing due to redefinition + ) + + # cdef unordered_map[unordered_set[long long] *, vector[long long] *] *generate_route_set(RouteChoice self, long origin_index, long dest_index, unsigned int max_routes, unsigned int max_depth, double [:] scratch_cost) noexcept nogil: + cdef unordered_set[vector[long long] *] *generate_route_set(RouteChoice self, long origin_index, long dest_index, unsigned int max_routes, unsigned int max_depth, unsigned int seed_value, double [:] scratch_cost) nogil: + """Main method for route set generation""" + cdef: + unordered_map[unordered_set[long long] *, vector[long long] *] *working_set + unordered_map[unordered_set[long long] *, vector[long long] *].const_iterator set_iter + vector[unordered_set[long long] *] queue + vector[unordered_set[long long] *] next_queue + unordered_set[long long] *banned + unordered_set[long long] *new_banned + unordered_set[vector[long long] *] *route_set + vector[long long] *vec + pair[unordered_set[long long] *, vector[long long] *] *pair_tmp + pair[unordered_set[long long] *, vector[long long] *] x + minstd_rand rng + long long p, connector + queue.push_back(new unordered_set[long long]()) # Start with no edges banned + working_set = new unordered_map[unordered_set[long long] *, vector[long long] *]() + route_set = new unordered_set[vector[long long] *]() + rng.seed(seed_value) + + # We'll go at most `max_depth` iterations down, at each depth we maintain a deque of the next set of banned edges to consider for depth in range(max_depth): - path_finding_a_star( - origin_index, - dest_index, - self.cost_view, - self.b_nodes_view, - self.graph_fs_view, - self.nodes_to_indices_view, - self.lat_view, - self.lon_view, - self.predecessors_view, - self.ids_graph_view, - self.conn_view, - EQUIRECTANGULAR # FIXME: enum import failing due to redefinition - ) - if self.predecessors_view[dest_index] >= 0: - printf("%ld is still reachable from %ld at depth %d\n", dest_index, origin_index, depth) - connector = self.conn_view[dest_index] - self.cost_view[connector] = INFINITY - printf("%ld has been banned\n", connector) - - with gil: - p = n = dest_index - all_connectors = [] - if p != origin_index: - while p != origin_index: - p = self.predecessors_view[p] - connector = self.conn_view[n] - all_connectors.append(connector + 1) - n = p - print("(", ", ".join(str(x) for x in all_connectors[::-1]), ")") + for banned in queue: + memcpy(&scratch_cost[0], &self.cost_view[0], self.cost_view.shape[0] * sizeof(double)) + + for connector in deref(banned): + scratch_cost[connector] = INFINITY + + RouteChoice.path_find(self, origin_index, dest_index, scratch_cost) + + vec = new vector[long long]() + if self.predecessors_view[dest_index] >= 0: + # Walk the predecessors tree to find our path, we build it up in a cpp vector because we can't know how long it'll be + p = dest_index + while p != origin_index: + connector = self.conn_view[p] + p = self.predecessors_view[p] + vec.push_back(connector) + + # This element is already in our set, skip it + if route_set.find(vec) != route_set.end(): + continue + + pair_tmp = new pair[unordered_set[long long] *, vector[long long] *](banned, vec) + working_set.insert(deref(pair_tmp)) # We'll keep this route around for (potential) insertion later + del pair_tmp + + else: + pass # Node was unreachable + + printf("%d + %d >= %d\n", working_set.size(), route_set.size(), max_routes) + if working_set.size() + route_set.size() >= max_routes: + printf("route set full (or close to full)\n") + # Randomly insert enough elements to fill our route_set, we don't need to add anything else to our queue since we're done now + # FIXME: Horrible assumption, never do this + # The iteration order of the working set is probably close enough to random, let's just grab the first max_routes - route_set.size() elements + printf("yeeting %d entries\n", working_set.size() - (max_routes - route_set.size())) + + # Skipp the elements we wish to save + set_iter = working_set.cbegin() + for _ in range(max_routes - route_set.size()): + inc(set_iter) + + # Destrory the remaining elements + working_set.erase(set_iter, working_set.cend()) + + # From the top, add what's left + for x in deref(working_set): + vec = x.second + route_set.insert(vec) - else: - printf("%ld is unreachable from %ld at depth %d\n", dest_index, origin_index, depth) break + else: + printf("route set not full (or close)\n") + for x in deref(working_set): + banned = x.first + vec = x.second + + route_set.insert(vec) + # Copy the previously banned links, then for each vector in the path we add one and push it onto our queue + for edge in deref(vec): + new_banned = new unordered_set[long long](deref(banned)) + new_banned.insert(edge) + next_queue.push_back(new_banned) + + queue.swap(next_queue) + next_queue.clear() + working_set.clear() + + # We may have added more banned link sets to the queue then found out we hit the max depth, we should free those + for banned in queue: + del banned + + return route_set diff --git a/tests/aequilibrae/paths/test_route_choice.py b/tests/aequilibrae/paths/test_route_choice.py new file mode 100644 index 000000000..1153e0b70 --- /dev/null +++ b/tests/aequilibrae/paths/test_route_choice.py @@ -0,0 +1,40 @@ +import os +import uuid +import zipfile +from os.path import join, dirname +from tempfile import gettempdir +from unittest import TestCase +import pandas as pd +import numpy as np + +from aequilibrae import Graph, Project +from aequilibrae.paths.bfs_le import RouteChoice +from ...data import siouxfalls_project + + +class TestRouteChoice(TestCase): + def setUp(self) -> None: + os.environ["PATH"] = os.path.join(gettempdir(), "temp_data") + ";" + os.environ["PATH"] + + proj_path = os.path.join(gettempdir(), "test_route_choice" + uuid.uuid4().hex) + os.mkdir(proj_path) + zipfile.ZipFile(join(dirname(siouxfalls_project), "sioux_falls_single_class.zip")).extractall(proj_path) + self.project = Project() + self.project.open(proj_path) + self.project.network.build_graphs() + self.graph = self.project.network.graphs["c"] # type: Graph + self.graph.set_graph("free_flow_time") + self.graph.set_blocked_centroid_flows(False) + + def tearDown(self) -> None: + self.project.close() + + def test_route_choice(self): + rc = RouteChoice(self.graph) + + # breakpoint() + results = rc.run(1, 20, max_routes=5000, max_depth=6) + # print(*results.items(), sep="\n") + print(*(results), sep="\n") + print(len(results)) + assert False From 3e37811eb150253fedb90effad6c97ec050c9cf9 Mon Sep 17 00:00:00 2001 From: Jake-Moss Date: Thu, 11 Jan 2024 17:29:21 +1000 Subject: [PATCH 05/44] Removed unused variables and junk --- aequilibrae/paths/bfs_le.pxd | 2 +- aequilibrae/paths/bfs_le.pyx | 40 ++++++------------------------------ 2 files changed, 7 insertions(+), 35 deletions(-) diff --git a/aequilibrae/paths/bfs_le.pxd b/aequilibrae/paths/bfs_le.pxd index fad7f467c..8679035f4 100644 --- a/aequilibrae/paths/bfs_le.pxd +++ b/aequilibrae/paths/bfs_le.pxd @@ -22,4 +22,4 @@ cdef class RouteChoice: long long [:] conn_view cdef void path_find(RouteChoice self, long origin_index, long dest_index, double [:] scratch_cost) noexcept nogil # cdef unordered_map[unordered_set[long long] *, vector[long long] *] *generate_route_set(RouteChoice self, long origin_index, long dest_index, unsigned int max_routes, unsigned int max_depth, double [:] scratch_cost) noexcept nogil - cdef unordered_set[vector[long long] *] *generate_route_set(RouteChoice self, long origin_index, long dest_index, unsigned int max_routes, unsigned int max_depth, unsigned int seed_value, double [:] scratch_cost) nogil + cdef unordered_set[vector[long long] *] *generate_route_set(RouteChoice self, long origin_index, long dest_index, unsigned int max_routes, unsigned int max_depth, double [:] scratch_cost) noexcept nogil diff --git a/aequilibrae/paths/bfs_le.pyx b/aequilibrae/paths/bfs_le.pyx index b62626a06..175a556b9 100644 --- a/aequilibrae/paths/bfs_le.pyx +++ b/aequilibrae/paths/bfs_le.pyx @@ -63,31 +63,10 @@ from libcpp.vector cimport vector from libcpp.unordered_set cimport unordered_set from libcpp.unordered_map cimport unordered_map from libcpp.utility cimport pair -from libcpp.iterator cimport back_inserter -from libcpp.algorithm cimport sample from cython.operator cimport dereference as deref, preincrement as inc import numpy as np - -from libc.stdint cimport uint_fast32_t, uint_fast64_t - -cdef extern from "" namespace "std" nogil: - cdef cppclass random_device: - ctypedef uint_fast32_t result_type - random_device() except + - result_type operator()() except + - - cdef cppclass minstd_rand: - ctypedef uint_fast32_t result_type - minstd_rand() except + - minstd_rand(result_type seed) except + - result_type operator()() except + - result_type min() except + - result_type max() except + - void discard(size_t z) except + - void seed(result_type seed) except + - # It would really be nice if these were modules. The 'include' syntax is long deprecated and adds a lot to compilation times include 'basic_path_finding.pyx' @@ -124,26 +103,22 @@ cdef class RouteChoice: """ pass - def run(self, origin, destination, max_routes=0, max_depth=0, seed=0): + def run(self, origin, destination, max_routes=0, max_depth=0): cdef: long origin_index = self.nodes_to_indices_view[origin] long dest_index = self.nodes_to_indices_view[destination] unsigned int c_max_routes = max_routes unsigned int c_max_depth = max_depth - unsigned int c_seed = seed double [:] scratch_cost = np.empty(self.cost_view.shape[0]) # allocation of new memory view required gil unordered_set[vector[long long] *] *results unordered_map[unordered_set[long long] *, vector[long long] *].const_iterator results_iter with nogil: - results = RouteChoice.generate_route_set(self, origin_index, dest_index, c_max_routes, c_max_depth, c_seed, scratch_cost) + results = RouteChoice.generate_route_set(self, origin_index, dest_index, c_max_routes, c_max_depth, scratch_cost) res = [] for x in deref(results): res.append(list(deref(x))) - # res[frozenset(deref(x.first))] = tuple(deref(x.second)) - # del x.first - # del x.second - # del x + del x return res @@ -163,8 +138,7 @@ cdef class RouteChoice: EQUIRECTANGULAR # FIXME: enum import failing due to redefinition ) - # cdef unordered_map[unordered_set[long long] *, vector[long long] *] *generate_route_set(RouteChoice self, long origin_index, long dest_index, unsigned int max_routes, unsigned int max_depth, double [:] scratch_cost) noexcept nogil: - cdef unordered_set[vector[long long] *] *generate_route_set(RouteChoice self, long origin_index, long dest_index, unsigned int max_routes, unsigned int max_depth, unsigned int seed_value, double [:] scratch_cost) nogil: + cdef unordered_set[vector[long long] *] *generate_route_set(RouteChoice self, long origin_index, long dest_index, unsigned int max_routes, unsigned int max_depth, double [:] scratch_cost) noexcept nogil: """Main method for route set generation""" cdef: unordered_map[unordered_set[long long] *, vector[long long] *] *working_set @@ -177,13 +151,11 @@ cdef class RouteChoice: vector[long long] *vec pair[unordered_set[long long] *, vector[long long] *] *pair_tmp pair[unordered_set[long long] *, vector[long long] *] x - minstd_rand rng long long p, connector queue.push_back(new unordered_set[long long]()) # Start with no edges banned working_set = new unordered_map[unordered_set[long long] *, vector[long long] *]() route_set = new unordered_set[vector[long long] *]() - rng.seed(seed_value) # We'll go at most `max_depth` iterations down, at each depth we maintain a deque of the next set of banned edges to consider for depth in range(max_depth): @@ -215,13 +187,13 @@ cdef class RouteChoice: else: pass # Node was unreachable - printf("%d + %d >= %d\n", working_set.size(), route_set.size(), max_routes) + printf("%ld + %ld >= %d\n", working_set.size(), route_set.size(), max_routes) if working_set.size() + route_set.size() >= max_routes: printf("route set full (or close to full)\n") # Randomly insert enough elements to fill our route_set, we don't need to add anything else to our queue since we're done now # FIXME: Horrible assumption, never do this # The iteration order of the working set is probably close enough to random, let's just grab the first max_routes - route_set.size() elements - printf("yeeting %d entries\n", working_set.size() - (max_routes - route_set.size())) + printf("yeeting %ld entries\n", working_set.size() - (max_routes - route_set.size())) # Skipp the elements we wish to save set_iter = working_set.cbegin() From 1a864c57c4ffefa8a7cf9b1352a858ac1f10f509 Mon Sep 17 00:00:00 2001 From: Jake-Moss Date: Thu, 11 Jan 2024 17:35:31 +1000 Subject: [PATCH 06/44] Notes --- aequilibrae/paths/bfs_le.pyx | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/aequilibrae/paths/bfs_le.pyx b/aequilibrae/paths/bfs_le.pyx index 175a556b9..e091e82e2 100644 --- a/aequilibrae/paths/bfs_le.pyx +++ b/aequilibrae/paths/bfs_le.pyx @@ -42,15 +42,21 @@ Thoughts: We could also nest the hash sets, essientially building up a hash set of previously seen hashes. - Hash functions: We're looking for a "incremental (multi)set hash function", because we don't need it to be secure - at all some commutative binary operation should work e.g. +, *, ^. Whether or not they make good hash functions remains to be - seen. + at all some commutative binary operation should work e.g. +, *, ^. Whether or not they make good hash functions + remains to be seen. - Graph modification: Actually removing a link from the graph would require modification of the existing methods. Instead we can just require that the cost of a like is float or double and set it to INFINITY, that way the algorithms will never consider the link as viable. -Current front runner: Hash/tree set of removed link with index to a noe in the path set suffix trie. Whether its worth incrementally -building the trie or not should be tested. +Current front runner: Suffix trie of (reversed) paths, each node will store a pointer to the parent node allowing +traversal up the tree to reconstruct the path. +Removed link set stored as a sorted prefix trie of sorts. Haven't flushed out the full idea for this but I think it +could work. Each node would store a pointer to a node in the route set tree that represents the path found with that +set of removed links. + +Current implementation: Hash maps, hash sets, and whatever it took to get somethihng working. Implementation is naive +and ineffiecent, data is copied all over the place. """ From 291bdc0318e84c5a03d534b396974cea3e598f70 Mon Sep 17 00:00:00 2001 From: Jake-Moss Date: Thu, 11 Jan 2024 17:47:58 +1000 Subject: [PATCH 07/44] fixup! Removed unused variables and junk --- aequilibrae/paths/bfs_le.pxd | 4 ---- aequilibrae/paths/bfs_le.pyx | 3 --- 2 files changed, 7 deletions(-) diff --git a/aequilibrae/paths/bfs_le.pxd b/aequilibrae/paths/bfs_le.pxd index 8679035f4..0ac8cbe77 100644 --- a/aequilibrae/paths/bfs_le.pxd +++ b/aequilibrae/paths/bfs_le.pxd @@ -7,10 +7,6 @@ from libcpp.unordered_map cimport unordered_map cdef class RouteChoice: cdef: - int num - readonly object res - unsigned int depth - double [:] cost_view long long [:] graph_fs_view long long [:] b_nodes_view diff --git a/aequilibrae/paths/bfs_le.pyx b/aequilibrae/paths/bfs_le.pyx index e091e82e2..aaa7a890f 100644 --- a/aequilibrae/paths/bfs_le.pyx +++ b/aequilibrae/paths/bfs_le.pyx @@ -88,9 +88,6 @@ cdef class RouteChoice: def __init__(self, graph: Graph): """Python level init, may be called multiple times, for things that can't be done in __cinit__.""" - self.res = PathResults() - self.res.prepare(graph) - # self.heuristic = HEURISTIC_MAP[self.res._heuristic] self.cost_view = graph.cost self.graph_fs_view = graph.fs From 93526c0d9c4e6114cceeb4d9fea361d578798f2b Mon Sep 17 00:00:00 2001 From: Jake-Moss Date: Thu, 11 Jan 2024 17:48:12 +1000 Subject: [PATCH 08/44] fixup! fixup! Removed unused variables and junk --- aequilibrae/paths/bfs_le.pxd | 1 - 1 file changed, 1 deletion(-) diff --git a/aequilibrae/paths/bfs_le.pxd b/aequilibrae/paths/bfs_le.pxd index 0ac8cbe77..c0132e9f8 100644 --- a/aequilibrae/paths/bfs_le.pxd +++ b/aequilibrae/paths/bfs_le.pxd @@ -17,5 +17,4 @@ cdef class RouteChoice: long long [:] ids_graph_view long long [:] conn_view cdef void path_find(RouteChoice self, long origin_index, long dest_index, double [:] scratch_cost) noexcept nogil - # cdef unordered_map[unordered_set[long long] *, vector[long long] *] *generate_route_set(RouteChoice self, long origin_index, long dest_index, unsigned int max_routes, unsigned int max_depth, double [:] scratch_cost) noexcept nogil cdef unordered_set[vector[long long] *] *generate_route_set(RouteChoice self, long origin_index, long dest_index, unsigned int max_routes, unsigned int max_depth, double [:] scratch_cost) noexcept nogil From 1c449df8a7f1e3c115ea4aeb0a62242d4c3c0185 Mon Sep 17 00:00:00 2001 From: Jake-Moss Date: Fri, 12 Jan 2024 09:17:05 +1000 Subject: [PATCH 09/44] Rename files --- aequilibrae/paths/{bfs_le.pxd => route_choice.pxd} | 0 aequilibrae/paths/{bfs_le.pyx => route_choice.pyx} | 0 setup.py | 4 ++-- tests/aequilibrae/paths/test_route_choice.py | 2 +- 4 files changed, 3 insertions(+), 3 deletions(-) rename aequilibrae/paths/{bfs_le.pxd => route_choice.pxd} (100%) rename aequilibrae/paths/{bfs_le.pyx => route_choice.pyx} (100%) diff --git a/aequilibrae/paths/bfs_le.pxd b/aequilibrae/paths/route_choice.pxd similarity index 100% rename from aequilibrae/paths/bfs_le.pxd rename to aequilibrae/paths/route_choice.pxd diff --git a/aequilibrae/paths/bfs_le.pyx b/aequilibrae/paths/route_choice.pyx similarity index 100% rename from aequilibrae/paths/bfs_le.pyx rename to aequilibrae/paths/route_choice.pyx diff --git a/setup.py b/setup.py index 6d29f08c4..97f16f28b 100644 --- a/setup.py +++ b/setup.py @@ -56,8 +56,8 @@ ) ext_mod_bfs_le = Extension( - "aequilibrae.paths.bfs_le", - [join("aequilibrae", "paths", "bfs_le.pyx")], + "aequilibrae.paths.route_choice", + [join("aequilibrae", "paths", "route_choice.pyx")], extra_compile_args=compile_args, extra_link_args=link_args, define_macros=[("NPY_NO_DEPRECATED_API", "NPY_1_7_API_VERSION")], diff --git a/tests/aequilibrae/paths/test_route_choice.py b/tests/aequilibrae/paths/test_route_choice.py index 1153e0b70..71b8d3994 100644 --- a/tests/aequilibrae/paths/test_route_choice.py +++ b/tests/aequilibrae/paths/test_route_choice.py @@ -8,7 +8,7 @@ import numpy as np from aequilibrae import Graph, Project -from aequilibrae.paths.bfs_le import RouteChoice +from aequilibrae.paths.route_choice import RouteChoice from ...data import siouxfalls_project From 08de182f48bdb56e7797ef4fb08ca059dba35d84 Mon Sep 17 00:00:00 2001 From: Jake-Moss Date: Fri, 12 Jan 2024 14:29:20 +1000 Subject: [PATCH 10/44] Minor fixes --- aequilibrae/paths/route_choice.pyx | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/aequilibrae/paths/route_choice.pyx b/aequilibrae/paths/route_choice.pyx index aaa7a890f..d3700a9ee 100644 --- a/aequilibrae/paths/route_choice.pyx +++ b/aequilibrae/paths/route_choice.pyx @@ -65,6 +65,7 @@ from aequilibrae.paths.results import PathResults from libc.math cimport INFINITY from libc.string cimport memcpy from libc.stdio cimport printf +from libc.limits cimport UINT_MAX from libcpp.vector cimport vector from libcpp.unordered_set cimport unordered_set from libcpp.unordered_map cimport unordered_map @@ -106,17 +107,15 @@ cdef class RouteChoice: """ pass - def run(self, origin, destination, max_routes=0, max_depth=0): + def run(self, long origin, long destination, unsigned int max_routes=0, unsigned int max_depth=0): cdef: long origin_index = self.nodes_to_indices_view[origin] long dest_index = self.nodes_to_indices_view[destination] - unsigned int c_max_routes = max_routes - unsigned int c_max_depth = max_depth double [:] scratch_cost = np.empty(self.cost_view.shape[0]) # allocation of new memory view required gil unordered_set[vector[long long] *] *results unordered_map[unordered_set[long long] *, vector[long long] *].const_iterator results_iter with nogil: - results = RouteChoice.generate_route_set(self, origin_index, dest_index, c_max_routes, c_max_depth, scratch_cost) + results = RouteChoice.generate_route_set(self, origin_index, dest_index, max_routes, max_depth, scratch_cost) res = [] for x in deref(results): @@ -141,6 +140,10 @@ cdef class RouteChoice: EQUIRECTANGULAR # FIXME: enum import failing due to redefinition ) + @cython.boundscheck(False) + @cython.wraparound(False) + @cython.embedsignature(True) + @cython.initializedcheck(False) cdef unordered_set[vector[long long] *] *generate_route_set(RouteChoice self, long origin_index, long dest_index, unsigned int max_routes, unsigned int max_depth, double [:] scratch_cost) noexcept nogil: """Main method for route set generation""" cdef: @@ -156,6 +159,9 @@ cdef class RouteChoice: pair[unordered_set[long long] *, vector[long long] *] x long long p, connector + max_routes = max_routes if max_routes != 0 else UINT_MAX + max_depth = max_depth if max_depth != 0 else UINT_MAX + queue.push_back(new unordered_set[long long]()) # Start with no edges banned working_set = new unordered_map[unordered_set[long long] *, vector[long long] *]() route_set = new unordered_set[vector[long long] *]() @@ -186,7 +192,6 @@ cdef class RouteChoice: pair_tmp = new pair[unordered_set[long long] *, vector[long long] *](banned, vec) working_set.insert(deref(pair_tmp)) # We'll keep this route around for (potential) insertion later del pair_tmp - else: pass # Node was unreachable From a8b0c54f31f12bb3b31744f400776be35b8f2c82 Mon Sep 17 00:00:00 2001 From: Jake-Moss Date: Fri, 12 Jan 2024 14:29:26 +1000 Subject: [PATCH 11/44] Use comrpessed graph --- aequilibrae/paths/route_choice.pyx | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/aequilibrae/paths/route_choice.pyx b/aequilibrae/paths/route_choice.pyx index d3700a9ee..9ace11671 100644 --- a/aequilibrae/paths/route_choice.pyx +++ b/aequilibrae/paths/route_choice.pyx @@ -90,15 +90,15 @@ cdef class RouteChoice: def __init__(self, graph: Graph): """Python level init, may be called multiple times, for things that can't be done in __cinit__.""" # self.heuristic = HEURISTIC_MAP[self.res._heuristic] - self.cost_view = graph.cost - self.graph_fs_view = graph.fs - self.b_nodes_view = graph.graph.b_node.values # FIXME: Why does path_computation copy this? - self.nodes_to_indices_view = graph.nodes_to_indices + self.cost_view = graph.compact_cost + self.graph_fs_view = graph.compact_fs + self.b_nodes_view = graph.compact_graph.b_node.values # FIXME: Why does path_computation copy this? + self.nodes_to_indices_view = graph.compact_nodes_to_indices self.lat_view = graph.lonlat_index.lat.values self.lon_view = graph.lonlat_index.lon.values - self.predecessors_view = np.empty(graph.num_nodes + 1, dtype=np.int64) - self.ids_graph_view = graph.graph.id.values - self.conn_view = np.empty(graph.num_nodes + 1, dtype=np.int64) + self.predecessors_view = np.empty(graph.compact_num_nodes + 1, dtype=np.int64) + self.ids_graph_view = graph.compact_graph.id.values + self.conn_view = np.empty(graph.compact_num_nodes + 1, dtype=np.int64) def __dealloc__(self): """ From a08ecc0b11ba3a7b8418ab98ec190230b78e20c6 Mon Sep 17 00:00:00 2001 From: Jake-Moss Date: Fri, 12 Jan 2024 16:37:55 +1000 Subject: [PATCH 12/44] Avoid unnessacary work when we may fill the route set this iteration --- aequilibrae/paths/route_choice.pyx | 86 +++++++++++++++++------------- 1 file changed, 49 insertions(+), 37 deletions(-) diff --git a/aequilibrae/paths/route_choice.pyx b/aequilibrae/paths/route_choice.pyx index 9ace11671..4717936f2 100644 --- a/aequilibrae/paths/route_choice.pyx +++ b/aequilibrae/paths/route_choice.pyx @@ -60,8 +60,9 @@ and ineffiecent, data is copied all over the place. """ -from aequilibrae import Graph from aequilibrae.paths.results import PathResults +import numpy as np + from libc.math cimport INFINITY from libc.string cimport memcpy from libc.stdio cimport printf @@ -74,6 +75,31 @@ from cython.operator cimport dereference as deref, preincrement as inc import numpy as np +# std::linear_congruential_engine is not available in the Cython libcpp.random shim. We'll import it ourselves +# from libcpp.random cimport minstd_rand +from libc.stdint cimport uint_fast32_t, uint_fast64_t + +cdef extern from "" namespace "std" nogil: + cdef cppclass random_device: + ctypedef uint_fast32_t result_type + random_device() except + + result_type operator()() except + + + cdef cppclass minstd_rand: + ctypedef uint_fast32_t result_type + minstd_rand() except + + minstd_rand(result_type seed) except + + result_type operator()() except + + result_type min() except + + result_type max() except + + void discard(size_t z) except + + void seed(result_type seed) except + + +# std::shuffle is not available in the Cython libcpp.algorithm shim. We'll import it ourselves +# from libcpp.algorithm cimport shuffle +cdef extern from "" namespace "std" nogil: + void shuffle[RandomIt, URBG](RandomIt first, RandomIt last, URBG&& g) except + + # It would really be nice if these were modules. The 'include' syntax is long deprecated and adds a lot to compilation times include 'basic_path_finding.pyx' @@ -83,7 +109,6 @@ cdef class RouteChoice: Balmer, and Axhausen, 'Route Choice Sets for Very High-Resolution Data' """ - # cdef int num def __cinit__(self): """C level init. For C memory allocation and initalisation. Called exactly once per object.""" @@ -119,7 +144,7 @@ cdef class RouteChoice: res = [] for x in deref(results): - res.append(list(deref(x))) + res.append(np.array(deref(x))) del x return res @@ -158,6 +183,7 @@ cdef class RouteChoice: pair[unordered_set[long long] *, vector[long long] *] *pair_tmp pair[unordered_set[long long] *, vector[long long] *] x long long p, connector + minstd_rand rng max_routes = max_routes if max_routes != 0 else UINT_MAX max_depth = max_depth if max_depth != 0 else UINT_MAX @@ -165,9 +191,16 @@ cdef class RouteChoice: queue.push_back(new unordered_set[long long]()) # Start with no edges banned working_set = new unordered_map[unordered_set[long long] *, vector[long long] *]() route_set = new unordered_set[vector[long long] *]() + rng.seed(0) # We'll go at most `max_depth` iterations down, at each depth we maintain a deque of the next set of banned edges to consider for depth in range(max_depth): + # If we could potentially fill the route_set after this depth, shuffle the queue + if queue.size() + route_set.size() >= max_routes: + printf("%ld + %ld >= %d, ", queue.size(), route_set.size(), max_routes) + printf("route set full (or close to full), shuffling queue\n") + shuffle(queue.begin(), queue.end(), rng) + for banned in queue: memcpy(&scratch_cost[0], &self.cost_view[0], self.cost_view.shape[0] * sizeof(double)) @@ -192,43 +225,22 @@ cdef class RouteChoice: pair_tmp = new pair[unordered_set[long long] *, vector[long long] *](banned, vec) working_set.insert(deref(pair_tmp)) # We'll keep this route around for (potential) insertion later del pair_tmp + + if working_set.size() + route_set.size() >= max_routes: + break else: pass # Node was unreachable - printf("%ld + %ld >= %d\n", working_set.size(), route_set.size(), max_routes) - if working_set.size() + route_set.size() >= max_routes: - printf("route set full (or close to full)\n") - # Randomly insert enough elements to fill our route_set, we don't need to add anything else to our queue since we're done now - # FIXME: Horrible assumption, never do this - # The iteration order of the working set is probably close enough to random, let's just grab the first max_routes - route_set.size() elements - printf("yeeting %ld entries\n", working_set.size() - (max_routes - route_set.size())) - - # Skipp the elements we wish to save - set_iter = working_set.cbegin() - for _ in range(max_routes - route_set.size()): - inc(set_iter) - - # Destrory the remaining elements - working_set.erase(set_iter, working_set.cend()) - - # From the top, add what's left - for x in deref(working_set): - vec = x.second - route_set.insert(vec) - - break - else: - printf("route set not full (or close)\n") - for x in deref(working_set): - banned = x.first - vec = x.second - - route_set.insert(vec) - # Copy the previously banned links, then for each vector in the path we add one and push it onto our queue - for edge in deref(vec): - new_banned = new unordered_set[long long](deref(banned)) - new_banned.insert(edge) - next_queue.push_back(new_banned) + for x in deref(working_set): + banned = x.first + vec = x.second + + route_set.insert(vec) + # Copy the previously banned links, then for each vector in the path we add one and push it onto our queue + for edge in deref(vec): + new_banned = new unordered_set[long long](deref(banned)) + new_banned.insert(edge) + next_queue.push_back(new_banned) queue.swap(next_queue) next_queue.clear() From 4f46026bbd4312f8bdd3e79f3b8dc007678b3c42 Mon Sep 17 00:00:00 2001 From: Jake-Moss Date: Mon, 15 Jan 2024 17:31:57 +1000 Subject: [PATCH 13/44] Fix pointer issues, move custom imports --- aequilibrae/paths/route_choice.pxd | 109 ++++++++++++++++++- aequilibrae/paths/route_choice.pyx | 76 +++++-------- tests/aequilibrae/paths/test_route_choice.py | 27 +++-- 3 files changed, 155 insertions(+), 57 deletions(-) diff --git a/aequilibrae/paths/route_choice.pxd b/aequilibrae/paths/route_choice.pxd index c0132e9f8..c89ab8531 100644 --- a/aequilibrae/paths/route_choice.pxd +++ b/aequilibrae/paths/route_choice.pxd @@ -3,6 +3,112 @@ from aequilibrae.paths.results import PathResults from libcpp.vector cimport vector from libcpp.unordered_set cimport unordered_set from libcpp.unordered_map cimport unordered_map +from libcpp.utility cimport pair +from libcpp cimport bool + +# std::linear_congruential_engine is not available in the Cython libcpp.random shim. We'll import it ourselves +# from libcpp.random cimport minstd_rand +from libc.stdint cimport uint_fast32_t, uint_fast64_t + +cdef extern from "" namespace "std" nogil: + cdef cppclass random_device: + ctypedef uint_fast32_t result_type + random_device() except + + result_type operator()() except + + + cdef cppclass minstd_rand: + ctypedef uint_fast32_t result_type + minstd_rand() except + + minstd_rand(result_type seed) except + + result_type operator()() except + + result_type min() except + + result_type max() except + + void discard(size_t z) except + + void seed(result_type seed) except + + +# std::shuffle is not available in the Cython libcpp.algorithm shim. We'll import it ourselves +# from libcpp.algorithm cimport shuffle +cdef extern from "" namespace "std" nogil: + void shuffle[RandomIt, URBG](RandomIt first, RandomIt last, URBG&& g) except + + +# std::make_pair is not available in the Cython libcpp.utilities shim. We'll import it ourselves based on the C++11 til C++14 +# definition because C++14 makes this signature weird +# See https://github.com/cython/cython/issues/2706 +cdef extern from "" namespace "std" nogil: + pair[T, U] make_pair[T, U](T&& t, U&& u) + +# To define our own hashing functions we have to write a little cpp. The string is inlined directly into route_choice.cpp +# To make Cython aware of our hash types we also must declare them with the right signatures +# +# OrderedVectorPointerHasher: This hash function is for hashing the routes, thus it should be order *DEPENDENT*. +# Potential performance improvements may come from https://en.wikipedia.org/wiki/Universal_hashing#Hashing_vectors +# +# UnorderedSetPointerHasher: This hash function is for hashing the banned route sets, thus it should be order *INDEPENDENT*. +# Potential performance improvements may come from: +# Mark N. Wegman, J.Lawrence Carter, +# New hash functions and their use in authentication and set equality +# https://doi.org/10.1016/0022-0000(81)90033-7 +# +# PointerDereferenceEqualTo: Because we are storing and hashing the pointers to objects to avoid unnessecary copies we must +# define our own comparitor to resolve hash collisions. Without this equaility operator the bare pointers are compared. +cdef extern from * nogil: + """ + // Source: https://stackoverflow.com/a/72073933 + // License: CC BY-SA 4.0 Deed, https://creativecommons.org/licenses/by-sa/4.0/ + struct OrderedVectorPointerHasher { + size_t operator()(const std::vector *V) const { + size_t seed = V->size(); + // printf("Vector at %p, [", V); + long long x; + for(auto &i : *V) { + x = ((i >> 16) ^ i) * 0x45d9f3b; + x = ((x >> 16) ^ x) * 0x45d9f3b; + x = (x >> 16) ^ x; + seed ^= x + 0x9e3779b9 + (seed << 6) + (seed >> 2); + // printf("%lld, ", i); + } + // printf("] has hash: %zu\\n", seed); + return seed; + } + }; + + // Source: https://stackoverflow.com/a/1537189 + // License: CC BY-SA 2.5 Deed, https://creativecommons.org/licenses/by-sa/2.5/ + struct UnorderedSetPointerHasher { + size_t operator()(const std::unordered_set *S) const { + size_t hash = 1; + // printf("Set at %p, [", S); + for(auto &i : *S) { + hash *= 1779033703 + 2 * i; + // printf("%lld, ", i); + } + hash /= 2; + // printf("] has hash: %zu\\n", hash); + return hash; + } + }; + + template + struct PointerDereferenceEqualTo { + bool operator()(const T& lhs, const T& rhs) const { + return *lhs == *rhs; + } + }; + """ + cppclass OrderedVectorPointerHasher: + size_t operator()(const vector[long long] *V) const + + cppclass UnorderedSetPointerHasher: + size_t operator()(const unordered_set[long long] *S) const + + cppclass PointerDereferenceEqualTo[T]: + bool operator()(const T& lhs, const T& rhs) const + + +# For typing (haha) convenince, the types names are getting long +ctypedef unordered_set[vector[long long] *, OrderedVectorPointerHasher, PointerDereferenceEqualTo[vector[long long] *]] RouteSet_t +ctypedef unordered_set[unordered_set[long long] *, UnorderedSetPointerHasher, PointerDereferenceEqualTo[unordered_set[long long] *]] LinkSet_t +ctypedef vector[pair[unordered_set[long long] *, vector[long long] *]] RouteMap_t cdef class RouteChoice: @@ -16,5 +122,6 @@ cdef class RouteChoice: long long [:] predecessors_view long long [:] ids_graph_view long long [:] conn_view + long long [:] compressed_link_ids cdef void path_find(RouteChoice self, long origin_index, long dest_index, double [:] scratch_cost) noexcept nogil - cdef unordered_set[vector[long long] *] *generate_route_set(RouteChoice self, long origin_index, long dest_index, unsigned int max_routes, unsigned int max_depth, double [:] scratch_cost) noexcept nogil + cdef RouteSet_t *generate_route_set(RouteChoice self, long origin_index, long dest_index, unsigned int max_routes, unsigned int max_depth, double [:] scratch_cost) noexcept nogil diff --git a/aequilibrae/paths/route_choice.pyx b/aequilibrae/paths/route_choice.pyx index 4717936f2..f3dd163cb 100644 --- a/aequilibrae/paths/route_choice.pyx +++ b/aequilibrae/paths/route_choice.pyx @@ -61,6 +61,7 @@ and ineffiecent, data is copied all over the place. """ from aequilibrae.paths.results import PathResults +from aequilibrae import Graph import numpy as np from libc.math cimport INFINITY @@ -75,31 +76,6 @@ from cython.operator cimport dereference as deref, preincrement as inc import numpy as np -# std::linear_congruential_engine is not available in the Cython libcpp.random shim. We'll import it ourselves -# from libcpp.random cimport minstd_rand -from libc.stdint cimport uint_fast32_t, uint_fast64_t - -cdef extern from "" namespace "std" nogil: - cdef cppclass random_device: - ctypedef uint_fast32_t result_type - random_device() except + - result_type operator()() except + - - cdef cppclass minstd_rand: - ctypedef uint_fast32_t result_type - minstd_rand() except + - minstd_rand(result_type seed) except + - result_type operator()() except + - result_type min() except + - result_type max() except + - void discard(size_t z) except + - void seed(result_type seed) except + - -# std::shuffle is not available in the Cython libcpp.algorithm shim. We'll import it ourselves -# from libcpp.algorithm cimport shuffle -cdef extern from "" namespace "std" nogil: - void shuffle[RandomIt, URBG](RandomIt first, RandomIt last, URBG&& g) except + - # It would really be nice if these were modules. The 'include' syntax is long deprecated and adds a lot to compilation times include 'basic_path_finding.pyx' @@ -137,14 +113,14 @@ cdef class RouteChoice: long origin_index = self.nodes_to_indices_view[origin] long dest_index = self.nodes_to_indices_view[destination] double [:] scratch_cost = np.empty(self.cost_view.shape[0]) # allocation of new memory view required gil - unordered_set[vector[long long] *] *results + RouteSet_t *results unordered_map[unordered_set[long long] *, vector[long long] *].const_iterator results_iter with nogil: results = RouteChoice.generate_route_set(self, origin_index, dest_index, max_routes, max_depth, scratch_cost) res = [] for x in deref(results): - res.append(np.array(deref(x))) + res.append(tuple(deref(x))) del x return res @@ -169,36 +145,35 @@ cdef class RouteChoice: @cython.wraparound(False) @cython.embedsignature(True) @cython.initializedcheck(False) - cdef unordered_set[vector[long long] *] *generate_route_set(RouteChoice self, long origin_index, long dest_index, unsigned int max_routes, unsigned int max_depth, double [:] scratch_cost) noexcept nogil: + cdef RouteSet_t *generate_route_set(RouteChoice self, long origin_index, long dest_index, unsigned int max_routes, unsigned int max_depth, double [:] scratch_cost) noexcept nogil: """Main method for route set generation""" cdef: - unordered_map[unordered_set[long long] *, vector[long long] *] *working_set - unordered_map[unordered_set[long long] *, vector[long long] *].const_iterator set_iter + RouteSet_t *route_set + LinkSet_t removed_links + RouteMap_t working_set + minstd_rand rng + + # Scatch objects vector[unordered_set[long long] *] queue vector[unordered_set[long long] *] next_queue unordered_set[long long] *banned unordered_set[long long] *new_banned - unordered_set[vector[long long] *] *route_set vector[long long] *vec - pair[unordered_set[long long] *, vector[long long] *] *pair_tmp - pair[unordered_set[long long] *, vector[long long] *] x long long p, connector - minstd_rand rng max_routes = max_routes if max_routes != 0 else UINT_MAX max_depth = max_depth if max_depth != 0 else UINT_MAX queue.push_back(new unordered_set[long long]()) # Start with no edges banned - working_set = new unordered_map[unordered_set[long long] *, vector[long long] *]() - route_set = new unordered_set[vector[long long] *]() + route_set = new RouteSet_t() rng.seed(0) # We'll go at most `max_depth` iterations down, at each depth we maintain a deque of the next set of banned edges to consider for depth in range(max_depth): # If we could potentially fill the route_set after this depth, shuffle the queue if queue.size() + route_set.size() >= max_routes: - printf("%ld + %ld >= %d, ", queue.size(), route_set.size(), max_routes) - printf("route set full (or close to full), shuffling queue\n") + # printf("%ld + %ld >= %d, ", queue.size(), route_set.size(), max_routes) + # printf("route set full (or close to full), shuffling queue\n") shuffle(queue.begin(), queue.end(), rng) for banned in queue: @@ -218,29 +193,36 @@ cdef class RouteChoice: p = self.predecessors_view[p] vec.push_back(connector) - # This element is already in our set, skip it + # Mark this set of banned links as seen + removed_links.insert(banned) + + # This element is already in our route set, skip it if route_set.find(vec) != route_set.end(): continue - pair_tmp = new pair[unordered_set[long long] *, vector[long long] *](banned, vec) - working_set.insert(deref(pair_tmp)) # We'll keep this route around for (potential) insertion later - del pair_tmp - + working_set.push_back(make_pair(banned, vec)) if working_set.size() + route_set.size() >= max_routes: break - else: - pass # Node was unreachable - for x in deref(working_set): + for x in working_set: banned = x.first vec = x.second route_set.insert(vec) + # Copy the previously banned links, then for each vector in the path we add one and push it onto our queue for edge in deref(vec): + # This is one area for potential improvement. Here we construct a new set from the old one, copying all the elements + # then add a single element. An incremental set hash function could be of use. However, the since of this set is + # directly dependent on the current depth. As the route set size grows so incredibly fast the depth will rarely get + # high enough for this to matter. new_banned = new unordered_set[long long](deref(banned)) new_banned.insert(edge) - next_queue.push_back(new_banned) + # If we've already seen this set of removed links before we already know what the path is and its in our route set + if removed_links.find(new_banned) != removed_links.end(): + del new_banned + else: + next_queue.push_back(new_banned) queue.swap(next_queue) next_queue.clear() diff --git a/tests/aequilibrae/paths/test_route_choice.py b/tests/aequilibrae/paths/test_route_choice.py index 71b8d3994..07908f6e8 100644 --- a/tests/aequilibrae/paths/test_route_choice.py +++ b/tests/aequilibrae/paths/test_route_choice.py @@ -9,16 +9,18 @@ from aequilibrae import Graph, Project from aequilibrae.paths.route_choice import RouteChoice -from ...data import siouxfalls_project +# from ...data import siouxfalls_project class TestRouteChoice(TestCase): def setUp(self) -> None: - os.environ["PATH"] = os.path.join(gettempdir(), "temp_data") + ";" + os.environ["PATH"] + # os.environ["PATH"] = os.path.join(gettempdir(), "temp_data") + ";" + os.environ["PATH"] - proj_path = os.path.join(gettempdir(), "test_route_choice" + uuid.uuid4().hex) - os.mkdir(proj_path) - zipfile.ZipFile(join(dirname(siouxfalls_project), "sioux_falls_single_class.zip")).extractall(proj_path) + # proj_path = os.path.join(gettempdir(), "test_route_choice" + uuid.uuid4().hex) + # os.mkdir(proj_path) + # zipfile.ZipFile(join(dirname(siouxfalls_project), "sioux_falls_single_class.zip")).extractall(proj_path) + + proj_path = "/home/jake/Software/aequilibrae_performance_tests/models/chicago_sketch/" self.project = Project() self.project.open(proj_path) self.project.network.build_graphs() @@ -34,7 +36,14 @@ def test_route_choice(self): # breakpoint() results = rc.run(1, 20, max_routes=5000, max_depth=6) - # print(*results.items(), sep="\n") - print(*(results), sep="\n") - print(len(results)) - assert False + # print(*results, sep="\n") + print(len(results), len(set(results))) + self.assertEqual(len(results), len(set(results))) + + # breakpoint() + + +if __name__ == "__main__": + t = TestRouteChoice() + t.setUp() + t.test_route_choice() From c74179ba1605b555a77bee5e2100c356bac7c5a7 Mon Sep 17 00:00:00 2001 From: Jake-Moss Date: Tue, 16 Jan 2024 15:54:17 +1000 Subject: [PATCH 14/44] Spelling --- aequilibrae/paths/route_choice.pyx | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/aequilibrae/paths/route_choice.pyx b/aequilibrae/paths/route_choice.pyx index f3dd163cb..f5eea2865 100644 --- a/aequilibrae/paths/route_choice.pyx +++ b/aequilibrae/paths/route_choice.pyx @@ -25,9 +25,9 @@ Thoughts: Another option is hash maps, just throw hash maps at it. Upsides of this is they are much more generic, no special methods required and we'll likely be able to use something off the shelf. Downsides are that their performance is largely dependent on the hash function. We'll need to use the set of removed edges as the key, which the path as - the value. This means we can't compresse the paths. Choosing a good hash function may be tough, because link/node + the value. This means we can't compress the paths. Choosing a good hash function may be tough, because link/node ids can be arbitrarily large we'll have to consider overflows, though an non-naive function should handle this - fine. We'll also want to avoid modulo, wikipedia says using a multiply-shift scheme with a Mersenne prime like + fine. We'll also want to avoid modulo, Wikipedia says using a multiply-shift scheme with a Mersenne prime like 2^61 - 1 should work well. Although we have variable length paths, fixed length vector hashing can be applied and padded to blocks of our paths. @@ -36,10 +36,10 @@ Thoughts: tree set using sorted node/link ids could work. Another option is a bit map. For a million link network, the bitmap would "only" be 125kB. Membership checks and - addition and removal from this set would be incredily fast, faster than anything else, however comparison may + addition and removal from this set would be incredibly fast, faster than anything else, however comparison may suffer. We could hash this bitmap but if we're hashing it we might as well just hash the removal set. - We could also nest the hash sets, essientially building up a hash set of previously seen hashes. + We could also nest the hash sets, essentially building up a hash set of previously seen hashes. - Hash functions: We're looking for a "incremental (multi)set hash function", because we don't need it to be secure at all some commutative binary operation should work e.g. +, *, ^. Whether or not they make good hash functions @@ -55,8 +55,8 @@ Removed link set stored as a sorted prefix trie of sorts. Haven't flushed out th could work. Each node would store a pointer to a node in the route set tree that represents the path found with that set of removed links. -Current implementation: Hash maps, hash sets, and whatever it took to get somethihng working. Implementation is naive -and ineffiecent, data is copied all over the place. +Current implementation: Hash maps, hash sets, and whatever it took to get something working. Implementation is naive +and inefficient, data is copied all over the place. """ @@ -86,7 +86,7 @@ cdef class RouteChoice: """ def __cinit__(self): - """C level init. For C memory allocation and initalisation. Called exactly once per object.""" + """C level init. For C memory allocation and initialisation. Called exactly once per object.""" def __init__(self, graph: Graph): """Python level init, may be called multiple times, for things that can't be done in __cinit__.""" @@ -103,7 +103,7 @@ cdef class RouteChoice: def __dealloc__(self): """ - C level deallocation. For freeing memeory allocated by this object. *Must* have GIL, `self` may be in a + C level deallocation. For freeing memory allocated by this object. *Must* have GIL, `self` may be in a partially deallocated state already. """ pass From 62b058d7927a8aa9d7ba24f8fe3836508383c036 Mon Sep 17 00:00:00 2001 From: Jake-Moss Date: Tue, 16 Jan 2024 15:58:35 +1000 Subject: [PATCH 15/44] Prevent memory leak when destination is unreachable --- aequilibrae/paths/route_choice.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aequilibrae/paths/route_choice.pyx b/aequilibrae/paths/route_choice.pyx index f5eea2865..ed2bff995 100644 --- a/aequilibrae/paths/route_choice.pyx +++ b/aequilibrae/paths/route_choice.pyx @@ -184,8 +184,8 @@ cdef class RouteChoice: RouteChoice.path_find(self, origin_index, dest_index, scratch_cost) - vec = new vector[long long]() if self.predecessors_view[dest_index] >= 0: + vec = new vector[long long]() # Walk the predecessors tree to find our path, we build it up in a cpp vector because we can't know how long it'll be p = dest_index while p != origin_index: From 5ddf8f4225b319fba9dc85f5bbb1563431d58623 Mon Sep 17 00:00:00 2001 From: Jake-Moss Date: Tue, 16 Jan 2024 15:59:10 +1000 Subject: [PATCH 16/44] Remove print out --- aequilibrae/paths/route_choice.pyx | 2 -- 1 file changed, 2 deletions(-) diff --git a/aequilibrae/paths/route_choice.pyx b/aequilibrae/paths/route_choice.pyx index ed2bff995..8709897ba 100644 --- a/aequilibrae/paths/route_choice.pyx +++ b/aequilibrae/paths/route_choice.pyx @@ -172,8 +172,6 @@ cdef class RouteChoice: for depth in range(max_depth): # If we could potentially fill the route_set after this depth, shuffle the queue if queue.size() + route_set.size() >= max_routes: - # printf("%ld + %ld >= %d, ", queue.size(), route_set.size(), max_routes) - # printf("route set full (or close to full), shuffling queue\n") shuffle(queue.begin(), queue.end(), rng) for banned in queue: From 5e9aa214f9ffa176a1dae3ffd3dd08c982686dbf Mon Sep 17 00:00:00 2001 From: Jake-Moss Date: Tue, 16 Jan 2024 15:59:26 +1000 Subject: [PATCH 17/44] Prevent infinite loop when depth is unlimited --- aequilibrae/paths/route_choice.pyx | 3 +++ 1 file changed, 3 insertions(+) diff --git a/aequilibrae/paths/route_choice.pyx b/aequilibrae/paths/route_choice.pyx index 8709897ba..b61e17409 100644 --- a/aequilibrae/paths/route_choice.pyx +++ b/aequilibrae/paths/route_choice.pyx @@ -170,6 +170,9 @@ cdef class RouteChoice: # We'll go at most `max_depth` iterations down, at each depth we maintain a deque of the next set of banned edges to consider for depth in range(max_depth): + if route_set.size() >= max_routes: + break + # If we could potentially fill the route_set after this depth, shuffle the queue if queue.size() + route_set.size() >= max_routes: shuffle(queue.begin(), queue.end(), rng) From 8d5cdc63f3df329fcd341d5421bc9c9a2853c861 Mon Sep 17 00:00:00 2001 From: Jake-Moss Date: Tue, 16 Jan 2024 15:59:57 +1000 Subject: [PATCH 18/44] Prevent oob access and incredible memory consumption (2gb/s) from A* --- aequilibrae/paths/route_choice.pyx | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/aequilibrae/paths/route_choice.pyx b/aequilibrae/paths/route_choice.pyx index b61e17409..5d2fad06f 100644 --- a/aequilibrae/paths/route_choice.pyx +++ b/aequilibrae/paths/route_choice.pyx @@ -95,8 +95,9 @@ cdef class RouteChoice: self.graph_fs_view = graph.compact_fs self.b_nodes_view = graph.compact_graph.b_node.values # FIXME: Why does path_computation copy this? self.nodes_to_indices_view = graph.compact_nodes_to_indices - self.lat_view = graph.lonlat_index.lat.values - self.lon_view = graph.lonlat_index.lon.values + tmp = graph.lonlat_index.loc[graph.compact_all_nodes] + self.lat_view = tmp.lat.values + self.lon_view = tmp.lon.values self.predecessors_view = np.empty(graph.compact_num_nodes + 1, dtype=np.int64) self.ids_graph_view = graph.compact_graph.id.values self.conn_view = np.empty(graph.compact_num_nodes + 1, dtype=np.int64) @@ -115,6 +116,12 @@ cdef class RouteChoice: double [:] scratch_cost = np.empty(self.cost_view.shape[0]) # allocation of new memory view required gil RouteSet_t *results unordered_map[unordered_set[long long] *, vector[long long] *].const_iterator results_iter + + if origin_index == -1: + raise ValueError(f"Origin {origin} is not present within the compact graph") + if dest_index == -1: + raise ValueError(f"Destination {destination} is not present within the compact graph") + with nogil: results = RouteChoice.generate_route_set(self, origin_index, dest_index, max_routes, max_depth, scratch_cost) From 614b77bd19efb64e005e832611fadba1e0db027a Mon Sep 17 00:00:00 2001 From: Jake-Moss Date: Tue, 16 Jan 2024 16:00:51 +1000 Subject: [PATCH 19/44] Scratch testing on Arkansas --- tests/aequilibrae/paths/test_route_choice.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/tests/aequilibrae/paths/test_route_choice.py b/tests/aequilibrae/paths/test_route_choice.py index 07908f6e8..ea7ad7f27 100644 --- a/tests/aequilibrae/paths/test_route_choice.py +++ b/tests/aequilibrae/paths/test_route_choice.py @@ -20,12 +20,12 @@ def setUp(self) -> None: # os.mkdir(proj_path) # zipfile.ZipFile(join(dirname(siouxfalls_project), "sioux_falls_single_class.zip")).extractall(proj_path) - proj_path = "/home/jake/Software/aequilibrae_performance_tests/models/chicago_sketch/" + proj_path = "/home/jake/Software/aequilibrae_performance_tests/models/Arkansas/" self.project = Project() self.project.open(proj_path) - self.project.network.build_graphs() + self.project.network.build_graphs(fields=["distance"], modes=["c"]) self.graph = self.project.network.graphs["c"] # type: Graph - self.graph.set_graph("free_flow_time") + self.graph.set_graph("distance") self.graph.set_blocked_centroid_flows(False) def tearDown(self) -> None: @@ -34,12 +34,20 @@ def tearDown(self) -> None: def test_route_choice(self): rc = RouteChoice(self.graph) - # breakpoint() - results = rc.run(1, 20, max_routes=5000, max_depth=6) + results = rc.run(220591, 352, max_routes=5000, max_depth=0) # print(*results, sep="\n") print(len(results), len(set(results))) self.assertEqual(len(results), len(set(results))) + # import shapely + # links = self.project.network.links.data.set_index("link_id") + # df = [] + # for route in results: + # df.append((route, shapely.MultiLineString(links.loc[self.graph.graph[self.graph.graph.__compressed_id__.isin(route)].link_id].geometry.to_list()).wkt)) + + # df = pd.DataFrame(df, columns=["route", "geometry"]) + # df.to_csv("test1.csv") + # breakpoint() From e14809cac824766222557d25309d2c2e70eb37d2 Mon Sep 17 00:00:00 2001 From: Jake-Moss Date: Wed, 17 Jan 2024 11:51:50 +1000 Subject: [PATCH 20/44] Remove working_set, PO1 makes this redundant, reduce memory usage Graphs should also been marked as seen regardless of if the destination is reachable --- aequilibrae/paths/route_choice.pyx | 60 ++++++++++++++---------------- 1 file changed, 27 insertions(+), 33 deletions(-) diff --git a/aequilibrae/paths/route_choice.pyx b/aequilibrae/paths/route_choice.pyx index 5d2fad06f..e4292c21c 100644 --- a/aequilibrae/paths/route_choice.pyx +++ b/aequilibrae/paths/route_choice.pyx @@ -157,10 +157,9 @@ cdef class RouteChoice: cdef: RouteSet_t *route_set LinkSet_t removed_links - RouteMap_t working_set minstd_rand rng - # Scatch objects + # Scratch objects vector[unordered_set[long long] *] queue vector[unordered_set[long long] *] next_queue unordered_set[long long] *banned @@ -175,7 +174,7 @@ cdef class RouteChoice: route_set = new RouteSet_t() rng.seed(0) - # We'll go at most `max_depth` iterations down, at each depth we maintain a deque of the next set of banned edges to consider + # We'll go at most `max_depth` iterations down, at each depth we maintain a queue of the next set of banned edges to consider for depth in range(max_depth): if route_set.size() >= max_routes: break @@ -185,6 +184,7 @@ cdef class RouteChoice: shuffle(queue.begin(), queue.end(), rng) for banned in queue: + # Copying the costs back into the scratch costs buffer. We could keep track of the modifications and reverse them as well memcpy(&scratch_cost[0], &self.cost_view[0], self.cost_view.shape[0] * sizeof(double)) for connector in deref(banned): @@ -192,6 +192,10 @@ cdef class RouteChoice: RouteChoice.path_find(self, origin_index, dest_index, scratch_cost) + # Mark this set of banned links as seen + removed_links.insert(banned) + + # If the destination is reachable we must build the path and readd if self.predecessors_view[dest_index] >= 0: vec = new vector[long long]() # Walk the predecessors tree to find our path, we build it up in a cpp vector because we can't know how long it'll be @@ -201,43 +205,33 @@ cdef class RouteChoice: p = self.predecessors_view[p] vec.push_back(connector) - # Mark this set of banned links as seen - removed_links.insert(banned) - - # This element is already in our route set, skip it - if route_set.find(vec) != route_set.end(): - continue - - working_set.push_back(make_pair(banned, vec)) - if working_set.size() + route_set.size() >= max_routes: + for connector in deref(vec): + # This is one area for potential improvement. Here we construct a new set from the old one, copying all the elements + # then add a single element. An incremental set hash function could be of use. However, the since of this set is + # directly dependent on the current depth. As the route set size grows so incredibly fast the depth will rarely get + # high enough for this to matter. + # Copy the previously banned links, then for each vector in the path we add one and push it onto our queue + new_banned = new unordered_set[long long](deref(banned)) + new_banned.insert(connector) + # If we've already seen this set of removed links before we already know what the path is and its in our route set + if removed_links.find(new_banned) != removed_links.end(): + del new_banned + else: + next_queue.push_back(new_banned) + + # The deduplication of routes occurs here + route_set.insert(vec) + if route_set.size() >= max_routes: break - for x in working_set: - banned = x.first - vec = x.second - - route_set.insert(vec) - - # Copy the previously banned links, then for each vector in the path we add one and push it onto our queue - for edge in deref(vec): - # This is one area for potential improvement. Here we construct a new set from the old one, copying all the elements - # then add a single element. An incremental set hash function could be of use. However, the since of this set is - # directly dependent on the current depth. As the route set size grows so incredibly fast the depth will rarely get - # high enough for this to matter. - new_banned = new unordered_set[long long](deref(banned)) - new_banned.insert(edge) - # If we've already seen this set of removed links before we already know what the path is and its in our route set - if removed_links.find(new_banned) != removed_links.end(): - del new_banned - else: - next_queue.push_back(new_banned) - queue.swap(next_queue) next_queue.clear() - working_set.clear() # We may have added more banned link sets to the queue then found out we hit the max depth, we should free those for banned in queue: del banned + printf("Depth: %d\n", depth) + printf("Routes: %zu\n", route_set.size()) + return route_set From 129360ccac50bdbcb23a97ae07c373600dd5bf22 Mon Sep 17 00:00:00 2001 From: Jake-Moss Date: Wed, 17 Jan 2024 11:52:26 +1000 Subject: [PATCH 21/44] Remove prints --- aequilibrae/paths/route_choice.pxd | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/aequilibrae/paths/route_choice.pxd b/aequilibrae/paths/route_choice.pxd index c89ab8531..24d2ee2e1 100644 --- a/aequilibrae/paths/route_choice.pxd +++ b/aequilibrae/paths/route_choice.pxd @@ -58,16 +58,13 @@ cdef extern from * nogil: struct OrderedVectorPointerHasher { size_t operator()(const std::vector *V) const { size_t seed = V->size(); - // printf("Vector at %p, [", V); long long x; for(auto &i : *V) { x = ((i >> 16) ^ i) * 0x45d9f3b; x = ((x >> 16) ^ x) * 0x45d9f3b; x = (x >> 16) ^ x; seed ^= x + 0x9e3779b9 + (seed << 6) + (seed >> 2); - // printf("%lld, ", i); } - // printf("] has hash: %zu\\n", seed); return seed; } }; @@ -77,14 +74,10 @@ cdef extern from * nogil: struct UnorderedSetPointerHasher { size_t operator()(const std::unordered_set *S) const { size_t hash = 1; - // printf("Set at %p, [", S); for(auto &i : *S) { hash *= 1779033703 + 2 * i; - // printf("%lld, ", i); } - hash /= 2; - // printf("] has hash: %zu\\n", hash); - return hash; + return hash / 2; } }; From dc9577817466012521ab76c80d4ab89c04a30244 Mon Sep 17 00:00:00 2001 From: Jake-Moss Date: Wed, 17 Jan 2024 15:42:53 +1000 Subject: [PATCH 22/44] Add parallelised batched method for running a list of od pairs --- aequilibrae/paths/route_choice.pxd | 17 +++- aequilibrae/paths/route_choice.pyx | 94 ++++++++++++++++---- tests/aequilibrae/paths/test_route_choice.py | 78 +++++++++++++--- 3 files changed, 156 insertions(+), 33 deletions(-) diff --git a/aequilibrae/paths/route_choice.pxd b/aequilibrae/paths/route_choice.pxd index 24d2ee2e1..79598e49e 100644 --- a/aequilibrae/paths/route_choice.pxd +++ b/aequilibrae/paths/route_choice.pxd @@ -112,9 +112,18 @@ cdef class RouteChoice: long long [:] nodes_to_indices_view double [:] lat_view double [:] lon_view - long long [:] predecessors_view long long [:] ids_graph_view - long long [:] conn_view long long [:] compressed_link_ids - cdef void path_find(RouteChoice self, long origin_index, long dest_index, double [:] scratch_cost) noexcept nogil - cdef RouteSet_t *generate_route_set(RouteChoice self, long origin_index, long dest_index, unsigned int max_routes, unsigned int max_depth, double [:] scratch_cost) noexcept nogil + long long num_nodes + cdef void path_find(RouteChoice self, long origin_index, long dest_index, double [:] scratch_cost, long long [:] thread_predecessors, long long [:] thread_conn) noexcept nogil + cdef RouteSet_t *generate_route_set( + RouteChoice self, + long origin_index, + long dest_index, + unsigned int max_routes, + unsigned int max_depth, + double [:] thread_cost, + long long [:] thread_predecessors, + long long [:] thread_conn, + unsigned int seed + ) noexcept nogil diff --git a/aequilibrae/paths/route_choice.pyx b/aequilibrae/paths/route_choice.pyx index e4292c21c..1bc68f268 100644 --- a/aequilibrae/paths/route_choice.pyx +++ b/aequilibrae/paths/route_choice.pyx @@ -63,6 +63,7 @@ and inefficient, data is copied all over the place. from aequilibrae.paths.results import PathResults from aequilibrae import Graph import numpy as np +from typing import List, Tuple from libc.math cimport INFINITY from libc.string cimport memcpy @@ -73,6 +74,7 @@ from libcpp.unordered_set cimport unordered_set from libcpp.unordered_map cimport unordered_map from libcpp.utility cimport pair from cython.operator cimport dereference as deref, preincrement as inc +from cython.parallel cimport parallel, prange, threadid import numpy as np @@ -98,9 +100,8 @@ cdef class RouteChoice: tmp = graph.lonlat_index.loc[graph.compact_all_nodes] self.lat_view = tmp.lat.values self.lon_view = tmp.lon.values - self.predecessors_view = np.empty(graph.compact_num_nodes + 1, dtype=np.int64) self.ids_graph_view = graph.compact_graph.id.values - self.conn_view = np.empty(graph.compact_num_nodes + 1, dtype=np.int64) + self.num_nodes = graph.compact_num_nodes def __dealloc__(self): """ @@ -109,21 +110,24 @@ cdef class RouteChoice: """ pass - def run(self, long origin, long destination, unsigned int max_routes=0, unsigned int max_depth=0): + def run(self, long origin, long destination, unsigned int max_routes=0, unsigned int max_depth=0, unsigned int seed=0): cdef: long origin_index = self.nodes_to_indices_view[origin] long dest_index = self.nodes_to_indices_view[destination] double [:] scratch_cost = np.empty(self.cost_view.shape[0]) # allocation of new memory view required gil + long long [:] scratch_predecessor = np.empty(self.num_nodes + 1, dtype=np.int64) + long long [:] scratch_conn = np.empty(self.num_nodes + 1, dtype=np.int64) RouteSet_t *results - unordered_map[unordered_set[long long] *, vector[long long] *].const_iterator results_iter if origin_index == -1: raise ValueError(f"Origin {origin} is not present within the compact graph") if dest_index == -1: raise ValueError(f"Destination {destination} is not present within the compact graph") + print(origin_index, dest_index) + assert False with nogil: - results = RouteChoice.generate_route_set(self, origin_index, dest_index, max_routes, max_depth, scratch_cost) + results = RouteChoice.generate_route_set(self, origin_index, dest_index, max_routes, max_depth, scratch_cost, scratch_predecessor, scratch_conn, seed) res = [] for x in deref(results): @@ -132,19 +136,61 @@ cdef class RouteChoice: return res - cdef void path_find(RouteChoice self, long origin_index, long dest_index, double [:] scratch_cost) noexcept nogil: + def batched(self, ods: List[Tuple[int, int]], unsigned int max_routes=0, unsigned int max_depth=0, unsigned int seed=0, unsigned int cores=1): + cdef: + vector[pair[long long, long long]] c_ods = ods + + # A* (and Dijkstra's) require memory views, so we must allocate here and take slices. Python can handle this memory + double [:, :] cost_matrix = np.empty((cores, self.cost_view.shape[0]), dtype=float) # allocation of new memory view required gil + long long [:, :] predecessors_matrix = np.empty((cores, self.num_nodes + 1), dtype=np.int64) + long long [:, :] conn_matrix = np.empty((cores, self.num_nodes + 1), dtype=np.int64) + + vector[RouteSet_t *] *results = new vector[RouteSet_t *](len(ods)) + long origin_index + long dest_index + long long i + + with nogil, parallel(num_threads=cores): + for i in prange(c_ods.size()): + origin_index = self.nodes_to_indices_view[c_ods[i].first] + dest_index = self.nodes_to_indices_view[c_ods[i].second] + deref(results)[i] = RouteChoice.generate_route_set( + self, + origin_index, + dest_index, + max_routes, + max_depth, + cost_matrix[threadid()], + predecessors_matrix[threadid()], + conn_matrix[threadid()], + seed + ) + + # Build results into python objects using Cythons auto-conversion from vector[long long] to list then to tuple (for set operations) + res = [] + for result in deref(results): + links = [] + for route in deref(result): + links.append(tuple(deref(route))) + del route + res.append(links) + + del results + return dict(zip(ods, res)) + + cdef void path_find(RouteChoice self, long origin_index, long dest_index, double [:] thread_cost, long long [:] thread_predecessors, long long [:] thread_conn) noexcept nogil: path_finding_a_star( origin_index, dest_index, - scratch_cost, + thread_cost, self.b_nodes_view, self.graph_fs_view, self.nodes_to_indices_view, self.lat_view, self.lon_view, - self.predecessors_view, + thread_predecessors, self.ids_graph_view, - self.conn_view, + thread_conn, EQUIRECTANGULAR # FIXME: enum import failing due to redefinition ) @@ -152,7 +198,17 @@ cdef class RouteChoice: @cython.wraparound(False) @cython.embedsignature(True) @cython.initializedcheck(False) - cdef RouteSet_t *generate_route_set(RouteChoice self, long origin_index, long dest_index, unsigned int max_routes, unsigned int max_depth, double [:] scratch_cost) noexcept nogil: + cdef RouteSet_t *generate_route_set( + RouteChoice self, + long origin_index, + long dest_index, + unsigned int max_routes, + unsigned int max_depth, + double [:] thread_cost, + long long [:] thread_predecessors, + long long [:] thread_conn, + unsigned int seed + ) noexcept nogil: """Main method for route set generation""" cdef: RouteSet_t *route_set @@ -172,7 +228,7 @@ cdef class RouteChoice: queue.push_back(new unordered_set[long long]()) # Start with no edges banned route_set = new RouteSet_t() - rng.seed(0) + rng.seed(seed) # We'll go at most `max_depth` iterations down, at each depth we maintain a queue of the next set of banned edges to consider for depth in range(max_depth): @@ -185,24 +241,24 @@ cdef class RouteChoice: for banned in queue: # Copying the costs back into the scratch costs buffer. We could keep track of the modifications and reverse them as well - memcpy(&scratch_cost[0], &self.cost_view[0], self.cost_view.shape[0] * sizeof(double)) + memcpy(&thread_cost[0], &self.cost_view[0], self.cost_view.shape[0] * sizeof(double)) for connector in deref(banned): - scratch_cost[connector] = INFINITY + thread_cost[connector] = INFINITY - RouteChoice.path_find(self, origin_index, dest_index, scratch_cost) + RouteChoice.path_find(self, origin_index, dest_index, thread_cost, thread_predecessors, thread_conn) # Mark this set of banned links as seen removed_links.insert(banned) # If the destination is reachable we must build the path and readd - if self.predecessors_view[dest_index] >= 0: + if thread_predecessors[dest_index] >= 0: vec = new vector[long long]() # Walk the predecessors tree to find our path, we build it up in a cpp vector because we can't know how long it'll be p = dest_index while p != origin_index: - connector = self.conn_view[p] - p = self.predecessors_view[p] + connector = thread_conn[p] + p = thread_predecessors[p] vec.push_back(connector) for connector in deref(vec): @@ -231,7 +287,7 @@ cdef class RouteChoice: for banned in queue: del banned - printf("Depth: %d\n", depth) - printf("Routes: %zu\n", route_set.size()) + # printf("Depth: %d\n", depth) + # printf("Routes: %zu\n", route_set.size()) return route_set diff --git a/tests/aequilibrae/paths/test_route_choice.py b/tests/aequilibrae/paths/test_route_choice.py index ea7ad7f27..9ad9e3c99 100644 --- a/tests/aequilibrae/paths/test_route_choice.py +++ b/tests/aequilibrae/paths/test_route_choice.py @@ -9,6 +9,9 @@ from aequilibrae import Graph, Project from aequilibrae.paths.route_choice import RouteChoice + +import time + # from ...data import siouxfalls_project @@ -31,25 +34,80 @@ def setUp(self) -> None: def tearDown(self) -> None: self.project.close() - def test_route_choice(self): + # def test_route_choice(self): + # rc = RouteChoice(self.graph) + + # results = rc.run(220591, 352, max_routes=1000, max_depth=0) + # # print(*results, sep="\n") + # print(len(results), len(set(results))) + # self.assertEqual(len(results), len(set(results))) + + # import shapely + + # links = self.project.network.links.data.set_index("link_id") + # df = [] + # for route in results: + # df.append( + # ( + # route, + # shapely.MultiLineString( + # links.loc[ + # self.graph.graph[self.graph.graph.__compressed_id__.isin(route)].link_id + # ].geometry.to_list() + # ).wkt, + # ) + # ) + + # df = pd.DataFrame(df, columns=["route", "geometry"]) + # df.to_csv("test1.csv") + + # # breakpoint() + + def test_route_choice_batched(self): rc = RouteChoice(self.graph) - results = rc.run(220591, 352, max_routes=5000, max_depth=0) - # print(*results, sep="\n") - print(len(results), len(set(results))) - self.assertEqual(len(results), len(set(results))) + # breakpoint() + # results = + np.random.seed(0) + n = 1000 + cores = 4 + + nodes = [tuple(x) for x in np.random.choice(self.graph.centroids, size=(n, 2), replace=False)] + + t = time.time() + results = rc.batched(nodes, max_routes=20, max_depth=0, cores=cores) + end = time.time() - t + print("Time:", end, "per:", end / n) + + # breakpoint() + + for od, route_set in results.items(): + self.assertEqual(len(route_set), len(set(route_set))) + # import geopandas as gpd # import shapely + # links = self.project.network.links.data.set_index("link_id") # df = [] - # for route in results: - # df.append((route, shapely.MultiLineString(links.loc[self.graph.graph[self.graph.graph.__compressed_id__.isin(route)].link_id].geometry.to_list()).wkt)) + # for od, route_set in results.items(): + # for route in route_set: + # df.append( + # ( + # *od, + # shapely.MultiLineString( + # links.loc[ + # self.graph.graph[self.graph.graph.__compressed_id__.isin(route)].link_id + # ].geometry.to_list() + # ), + # ) + # ) - # df = pd.DataFrame(df, columns=["route", "geometry"]) - # df.to_csv("test1.csv") + # df = gpd.GeoDataFrame(df, columns=["origin", "destination", "geometry"]) + # df.set_geometry("geometry") + # df.to_file("test1.gpkg", layer='routes', driver="GPKG") # breakpoint() - + assert False if __name__ == "__main__": t = TestRouteChoice() From 82204ba546a04eb2de98f22d545e381b39733a6f Mon Sep 17 00:00:00 2001 From: Jake-Moss Date: Thu, 18 Jan 2024 11:30:05 +1000 Subject: [PATCH 23/44] Remove dead code and fix A* test now that skimming is disabled --- aequilibrae/paths/route_choice.pyx | 3 --- tests/aequilibrae/paths/test_pathResults.py | 7 ++++--- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/aequilibrae/paths/route_choice.pyx b/aequilibrae/paths/route_choice.pyx index 1bc68f268..b41fcde75 100644 --- a/aequilibrae/paths/route_choice.pyx +++ b/aequilibrae/paths/route_choice.pyx @@ -287,7 +287,4 @@ cdef class RouteChoice: for banned in queue: del banned - # printf("Depth: %d\n", depth) - # printf("Routes: %zu\n", route_set.size()) - return route_set diff --git a/tests/aequilibrae/paths/test_pathResults.py b/tests/aequilibrae/paths/test_pathResults.py index c1e1def5e..27df20bd0 100644 --- a/tests/aequilibrae/paths/test_pathResults.py +++ b/tests/aequilibrae/paths/test_pathResults.py @@ -96,12 +96,13 @@ def test_compute_paths(self): self.assertEqual(list(self.r.milepost), [0, 4, 9], "Path computation failed. Wrong milepost results") def test_compute_with_skimming(self): - for early_exit, a_star in product([True, False], repeat=2): - with self.subTest(early_exit=early_exit, a_star=a_star): + # Skimming for A* disabled + for early_exit in (True, False): + with self.subTest(early_exit=early_exit): r = PathResults() self.g.set_skimming("free_flow_time") r.prepare(self.g) - r.compute_path(origin, dest, early_exit=early_exit, a_star=a_star) + r.compute_path(origin, dest, early_exit=early_exit) self.assertEqual(r.milepost[-1], r.skims[dest], "Skims computed wrong when computing path") def test_update_trace(self): From 1313f3c48620766d0cbd65a4f1963596643ec8db Mon Sep 17 00:00:00 2001 From: Jake-Moss Date: Thu, 18 Jan 2024 11:30:38 +1000 Subject: [PATCH 24/44] Move `RouteChoice.run` to be wrapper around `RouteChoice.batched` No need for both methods --- aequilibrae/paths/route_choice.pyx | 55 ++++++++++++++---------------- 1 file changed, 25 insertions(+), 30 deletions(-) diff --git a/aequilibrae/paths/route_choice.pyx b/aequilibrae/paths/route_choice.pyx index b41fcde75..39e197af7 100644 --- a/aequilibrae/paths/route_choice.pyx +++ b/aequilibrae/paths/route_choice.pyx @@ -110,33 +110,10 @@ cdef class RouteChoice: """ pass - def run(self, long origin, long destination, unsigned int max_routes=0, unsigned int max_depth=0, unsigned int seed=0): - cdef: - long origin_index = self.nodes_to_indices_view[origin] - long dest_index = self.nodes_to_indices_view[destination] - double [:] scratch_cost = np.empty(self.cost_view.shape[0]) # allocation of new memory view required gil - long long [:] scratch_predecessor = np.empty(self.num_nodes + 1, dtype=np.int64) - long long [:] scratch_conn = np.empty(self.num_nodes + 1, dtype=np.int64) - RouteSet_t *results - - if origin_index == -1: - raise ValueError(f"Origin {origin} is not present within the compact graph") - if dest_index == -1: - raise ValueError(f"Destination {destination} is not present within the compact graph") - print(origin_index, dest_index) - assert False - - with nogil: - results = RouteChoice.generate_route_set(self, origin_index, dest_index, max_routes, max_depth, scratch_cost, scratch_predecessor, scratch_conn, seed) - - res = [] - for x in deref(results): - res.append(tuple(deref(x))) - del x + def run(self, origin: int, destination: int, max_routes: int = 0, max_depth: int = 0, seed: int = 0): + return self.batched([(origin, destination)], max_routes=max_routes, max_depth=max_depth, seed=seed)[(origin, destination)] - return res - - def batched(self, ods: List[Tuple[int, int]], unsigned int max_routes=0, unsigned int max_depth=0, unsigned int seed=0, unsigned int cores=1): + def batched(self, ods: List[Tuple[int, int]], max_routes: int = 0, max_depth: int = 0, seed: int = 0, cores: int = 1): cdef: vector[pair[long long, long long]] c_ods = ods @@ -150,7 +127,25 @@ cdef class RouteChoice: long dest_index long long i - with nogil, parallel(num_threads=cores): + if max_routes == 0 and max_depth == 0: + raise ValueError("Either `max_routes` or `max_depth` must be >= 0") + + if max_routes < 0 or max_depth < 0 or cores < 0: + raise ValueError("`max_routes`, `max_depth`, and `cores` must be non-negative") + + for o, d in ods: + if self.nodes_to_indices_view[o] == -1: + raise ValueError(f"Origin {o} is not present within the compact graph") + if self.nodes_to_indices_view[d] == -1: + raise ValueError(f"Destination {d} is not present within the compact graph") + + cdef: + unsigned int c_max_routes = max_routes + unsigned int c_max_depth = max_depth + unsigned int c_seed = seed + unsigned int c_cores = cores + + with nogil, parallel(num_threads=c_cores): for i in prange(c_ods.size()): origin_index = self.nodes_to_indices_view[c_ods[i].first] dest_index = self.nodes_to_indices_view[c_ods[i].second] @@ -158,12 +153,12 @@ cdef class RouteChoice: self, origin_index, dest_index, - max_routes, - max_depth, + c_max_routes, + c_max_depth, cost_matrix[threadid()], predecessors_matrix[threadid()], conn_matrix[threadid()], - seed + c_seed ) # Build results into python objects using Cythons auto-conversion from vector[long long] to list then to tuple (for set operations) From bacc76c49d499e109bed8c875cf7b72ba2637915 Mon Sep 17 00:00:00 2001 From: Jake-Moss Date: Thu, 18 Jan 2024 11:31:17 +1000 Subject: [PATCH 25/44] Fix infinite loop in the case that all possible paths are exhausted --- aequilibrae/paths/route_choice.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aequilibrae/paths/route_choice.pyx b/aequilibrae/paths/route_choice.pyx index 39e197af7..c476d80c5 100644 --- a/aequilibrae/paths/route_choice.pyx +++ b/aequilibrae/paths/route_choice.pyx @@ -227,7 +227,7 @@ cdef class RouteChoice: # We'll go at most `max_depth` iterations down, at each depth we maintain a queue of the next set of banned edges to consider for depth in range(max_depth): - if route_set.size() >= max_routes: + if route_set.size() >= max_routes or queue.size() == 0: break # If we could potentially fill the route_set after this depth, shuffle the queue From 4d6536ffc3edeab858516e2024281395fbe36911 Mon Sep 17 00:00:00 2001 From: Jake-Moss Date: Thu, 18 Jan 2024 11:31:47 +1000 Subject: [PATCH 26/44] Add comprehensive testing --- tests/aequilibrae/paths/test_route_choice.py | 141 +++++++++---------- 1 file changed, 66 insertions(+), 75 deletions(-) diff --git a/tests/aequilibrae/paths/test_route_choice.py b/tests/aequilibrae/paths/test_route_choice.py index 9ad9e3c99..660117e51 100644 --- a/tests/aequilibrae/paths/test_route_choice.py +++ b/tests/aequilibrae/paths/test_route_choice.py @@ -12,18 +12,18 @@ import time -# from ...data import siouxfalls_project +from ...data import siouxfalls_project +# In these tests `max_depth` should be provided to prevent a runaway test case and just burning CI time class TestRouteChoice(TestCase): def setUp(self) -> None: - # os.environ["PATH"] = os.path.join(gettempdir(), "temp_data") + ";" + os.environ["PATH"] + os.environ["PATH"] = os.path.join(gettempdir(), "temp_data") + ";" + os.environ["PATH"] - # proj_path = os.path.join(gettempdir(), "test_route_choice" + uuid.uuid4().hex) - # os.mkdir(proj_path) - # zipfile.ZipFile(join(dirname(siouxfalls_project), "sioux_falls_single_class.zip")).extractall(proj_path) + proj_path = os.path.join(gettempdir(), "test_route_choice" + uuid.uuid4().hex) + os.mkdir(proj_path) + zipfile.ZipFile(join(dirname(siouxfalls_project), "sioux_falls_single_class.zip")).extractall(proj_path) - proj_path = "/home/jake/Software/aequilibrae_performance_tests/models/Arkansas/" self.project = Project() self.project.open(proj_path) self.project.network.build_graphs(fields=["distance"], modes=["c"]) @@ -34,82 +34,73 @@ def setUp(self) -> None: def tearDown(self) -> None: self.project.close() - # def test_route_choice(self): - # rc = RouteChoice(self.graph) - - # results = rc.run(220591, 352, max_routes=1000, max_depth=0) - # # print(*results, sep="\n") - # print(len(results), len(set(results))) - # self.assertEqual(len(results), len(set(results))) - - # import shapely - - # links = self.project.network.links.data.set_index("link_id") - # df = [] - # for route in results: - # df.append( - # ( - # route, - # shapely.MultiLineString( - # links.loc[ - # self.graph.graph[self.graph.graph.__compressed_id__.isin(route)].link_id - # ].geometry.to_list() - # ).wkt, - # ) - # ) - - # df = pd.DataFrame(df, columns=["route", "geometry"]) - # df.to_csv("test1.csv") + def test_route_choice(self): + rc = RouteChoice(self.graph) + a, b = 1, 20 + + results = rc.run(a, b, max_routes=10, max_depth=0) + self.assertEqual(len(results), 10, "Returned more routes than expected") + self.assertEqual(len(results), len(set(results)), "Returned duplicate routes") + + # With a depth 1 only one path will be found + results = rc.run(a, b, max_routes=0, max_depth=1) + self.assertEqual(len(results), 1, "Depth of 1 didn't yield a lone route") + self.assertListEqual(results, [(58, 52, 29, 24, 12, 8, 5, 1)], "Initial route isn't the shortest A* route") + + # A depth of 2 should yield the same initial route plus the length of that route more routes minus duplicates and unreachable paths + results2 = rc.run(a, b, max_routes=0, max_depth=2) + self.assertEqual( + len(results2), 1 + len(results[0]) - 4, "Depth of 2 didn't yield the expected number of routes" + ) + self.assertTrue(results[0] in results2, "Initial route isn't present in a lower depth") + + self.assertListEqual( + rc.run(a, b, max_routes=0, max_depth=2, seed=0), + rc.run(a, b, max_routes=0, max_depth=2, seed=10), + "Seeded and unseeded results differ with unlimited `max_routes` (queue is incorrectly being shuffled)", + ) + + self.assertNotEqual( + rc.run(a, b, max_routes=3, max_depth=2, seed=0), + rc.run(a, b, max_routes=3, max_depth=2, seed=10), + "Seeded and unseeded results don't differ with limited `max_routes` (queue is not being shuffled)", + ) + + def test_route_choice_empty_path(self): + rc = RouteChoice(self.graph) + a = 1 - # # breakpoint() + self.assertEqual( + rc.batched([(a, a)], max_routes=0, max_depth=3), {(a, a): []}, "Route set from self to self should be empty" + ) def test_route_choice_batched(self): - rc = RouteChoice(self.graph) - - # breakpoint() - # results = np.random.seed(0) - n = 1000 - cores = 4 - - nodes = [tuple(x) for x in np.random.choice(self.graph.centroids, size=(n, 2), replace=False)] + rc = RouteChoice(self.graph) + nodes = [tuple(x) for x in np.random.choice(self.graph.centroids, size=(10, 2), replace=False)] - t = time.time() - results = rc.batched(nodes, max_routes=20, max_depth=0, cores=cores) - end = time.time() - t - print("Time:", end, "per:", end / n) + max_routes = 20 + results = rc.batched(nodes, max_routes=max_routes, max_depth=10, cores=1) # breakpoint() - for od, route_set in results.items(): - self.assertEqual(len(route_set), len(set(route_set))) - - # import geopandas as gpd - # import shapely - - # links = self.project.network.links.data.set_index("link_id") - # df = [] - # for od, route_set in results.items(): - # for route in route_set: - # df.append( - # ( - # *od, - # shapely.MultiLineString( - # links.loc[ - # self.graph.graph[self.graph.graph.__compressed_id__.isin(route)].link_id - # ].geometry.to_list() - # ), - # ) - # ) - - # df = gpd.GeoDataFrame(df, columns=["origin", "destination", "geometry"]) - # df.set_geometry("geometry") - # df.to_file("test1.gpkg", layer='routes', driver="GPKG") + self.assertEqual(len(results), len(nodes), "Requested number of route sets not returned") - # breakpoint() - assert False + for od, route_set in results.items(): + self.assertEqual(len(route_set), len(set(route_set)), f"Duplicate routes returned for {od}") + self.assertEqual(len(route_set), max_routes, f"Requested number of routes not returned for {od}") -if __name__ == "__main__": - t = TestRouteChoice() - t.setUp() - t.test_route_choice() + def test_route_choice_exceptions(self): + rc = RouteChoice(self.graph) + args = [ + (1, 20, 0, 0), + (1, 20, -1, 0), + (1, 20, 0, -1), + (0, 20, 1, 1), + (1, 0, 1, 1), + ] + + for a, b, max_routes, max_depth in args: + with self.subTest(a=a, b=b, max_routes=max_routes, max_depth=max_depth): + with self.assertRaises(ValueError): + rc.run(a, b, max_routes=max_routes, max_depth=max_depth) From e55a4333c79249a4e77df9c8736a0ddaa94a61b9 Mon Sep 17 00:00:00 2001 From: Jake-Moss Date: Thu, 18 Jan 2024 11:39:41 +1000 Subject: [PATCH 27/44] Warning clean up --- aequilibrae/paths/route_choice.pyx | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/aequilibrae/paths/route_choice.pyx b/aequilibrae/paths/route_choice.pyx index c476d80c5..e63d91403 100644 --- a/aequilibrae/paths/route_choice.pyx +++ b/aequilibrae/paths/route_choice.pyx @@ -89,6 +89,7 @@ cdef class RouteChoice: def __cinit__(self): """C level init. For C memory allocation and initialisation. Called exactly once per object.""" + pass def __init__(self, graph: Graph): """Python level init, may be called multiple times, for things that can't be done in __cinit__.""" @@ -113,6 +114,8 @@ cdef class RouteChoice: def run(self, origin: int, destination: int, max_routes: int = 0, max_depth: int = 0, seed: int = 0): return self.batched([(origin, destination)], max_routes=max_routes, max_depth=max_depth, seed=seed)[(origin, destination)] + # Bounds checking doesn't really need to be disabled here but the warning is annoying + @cython.boundscheck(False) def batched(self, ods: List[Tuple[int, int]], max_routes: int = 0, max_depth: int = 0, seed: int = 0, cores: int = 1): cdef: vector[pair[long long, long long]] c_ods = ods @@ -123,9 +126,7 @@ cdef class RouteChoice: long long [:, :] conn_matrix = np.empty((cores, self.num_nodes + 1), dtype=np.int64) vector[RouteSet_t *] *results = new vector[RouteSet_t *](len(ods)) - long origin_index - long dest_index - long long i + long long origin_index, dest_index, i if max_routes == 0 and max_depth == 0: raise ValueError("Either `max_routes` or `max_depth` must be >= 0") @@ -133,17 +134,18 @@ cdef class RouteChoice: if max_routes < 0 or max_depth < 0 or cores < 0: raise ValueError("`max_routes`, `max_depth`, and `cores` must be non-negative") - for o, d in ods: - if self.nodes_to_indices_view[o] == -1: - raise ValueError(f"Origin {o} is not present within the compact graph") - if self.nodes_to_indices_view[d] == -1: - raise ValueError(f"Destination {d} is not present within the compact graph") - cdef: unsigned int c_max_routes = max_routes unsigned int c_max_depth = max_depth unsigned int c_seed = seed unsigned int c_cores = cores + long long o, d + + for o, d in ods: + if self.nodes_to_indices_view[o] == -1: + raise ValueError(f"Origin {o} is not present within the compact graph") + if self.nodes_to_indices_view[d] == -1: + raise ValueError(f"Destination {d} is not present within the compact graph") with nogil, parallel(num_threads=c_cores): for i in prange(c_ods.size()): From e7647b95f35c55a68c3bb1023dba0f0c6cb5f1dd Mon Sep 17 00:00:00 2001 From: Jake-Moss Date: Thu, 18 Jan 2024 12:17:49 +1000 Subject: [PATCH 28/44] Update commentary and fix memory leak --- aequilibrae/paths/route_choice.pyx | 110 ++++++++++++++--------------- 1 file changed, 52 insertions(+), 58 deletions(-) diff --git a/aequilibrae/paths/route_choice.pyx b/aequilibrae/paths/route_choice.pyx index e63d91403..d006156f1 100644 --- a/aequilibrae/paths/route_choice.pyx +++ b/aequilibrae/paths/route_choice.pyx @@ -1,82 +1,72 @@ # cython: language_level=3str -# distutils: define_macros=CYTHON_TRACE_NOGIL=1 -"""This module aims to implemented the BFS-LE algorithm as described in Rieser-Schüssler, Balmer, and Axhausen, -'Route Choice Sets for Very High-Resolution Data'. +"""This module aims to implemented the BFS-LE algorithm as described in Rieser-Schüssler, Balmer, and Axhausen, 'Route +Choice Sets for Very High-Resolution Data'. https://doi.org/10.1080/18128602.2012.671383 A rough overview of the algorithm is as follows. 1. Prepare the initial graph, this is depth 0 with no links removed. - 2. Find a short path, P. If P is empty stop, else add P to the path set. + 2. Find a short path, P. If P is not empty add P to the path set. 3. For all links p in P, remove p from E, compounding with the previously removed links. 4. De-duplicate the sub-graphs, we only care about unique sub-graphs. 5. Go to 2. -Thoughts: - - - Path set: One issue is that the path set can't be stored as a simple sub-graph. This is because it is the union of - multiple paths. The union of two partially separate paths may create paths that are no in the paths - themselves. Instead I believe we can store the paths as (compressed) tries, operating on the common prefix - links/nodes. Each branch in the trie would encode a branch in the choice of route. This has the benefit of being - very small to store and iterating over all choices is some traversal of the tree. Downsides of this are, insertion - and search scale with the length of the paths. Each lookup requires a linear time search of the existing tree. As - each link in the path is removed the number of branches scales with the length of the path times the degree of the - vertices. - - Another option is hash maps, just throw hash maps at it. Upsides of this is they are much more generic, no special - methods required and we'll likely be able to use something off the shelf. Downsides are that their performance is - largely dependent on the hash function. We'll need to use the set of removed edges as the key, which the path as - the value. This means we can't compress the paths. Choosing a good hash function may be tough, because link/node - ids can be arbitrarily large we'll have to consider overflows, though an non-naive function should handle this - fine. We'll also want to avoid modulo, Wikipedia says using a multiply-shift scheme with a Mersenne prime like - 2^61 - 1 should work well. Although we have variable length paths, fixed length vector hashing can be applied and - padded to blocks of our paths. - - - Removed link set: This set suffers from the similar issues as the path set as order doesn't matter. I think a hash - or tree set is about the only way to go about this. Since order doesn't matter a trie doesn't make sense but a - tree set using sorted node/link ids could work. - - Another option is a bit map. For a million link network, the bitmap would "only" be 125kB. Membership checks and - addition and removal from this set would be incredibly fast, faster than anything else, however comparison may - suffer. We could hash this bitmap but if we're hashing it we might as well just hash the removal set. - - We could also nest the hash sets, essentially building up a hash set of previously seen hashes. - - - Hash functions: We're looking for a "incremental (multi)set hash function", because we don't need it to be secure - at all some commutative binary operation should work e.g. +, *, ^. Whether or not they make good hash functions - remains to be seen. - - - Graph modification: Actually removing a link from the graph would require modification of the existing - methods. Instead we can just require that the cost of a like is float or double and set it to INFINITY, that way - the algorithms will never consider the link as viable. - -Current front runner: Suffix trie of (reversed) paths, each node will store a pointer to the parent node allowing -traversal up the tree to reconstruct the path. -Removed link set stored as a sorted prefix trie of sorts. Haven't flushed out the full idea for this but I think it -could work. Each node would store a pointer to a node in the route set tree that represents the path found with that -set of removed links. - -Current implementation: Hash maps, hash sets, and whatever it took to get something working. Implementation is naive -and inefficient, data is copied all over the place. +Details: The general idea of the algorithm is pretty simple, as is the implementation. The caveats here is that there is +a lot of cpp interop and memory management. A description of the purpose of variables is in order: + +route_set: See route_choice.pxd for full type signature. It's an unordered set (hash set) of pointers to vectors of link +IDs. It uses a custom hashing function and comparator. The hashing function is defined in a string that in inlined +directly into the output ccp. This is done allow declaring an the `()` operator, which is required and AFAIK not +possible in Cython. The hash is designed to dereference then hash order dependent vectors. One isn't provided by +stdlib. The comparator simply dereferences the pointer and uses the vector comparator. It's designed to store the +outputted paths. Heap allocated (needs to be returned). + +removed_links: See route_choice.pxd for full type signature. It's an unordered set of pointers to unordered sets of link +IDs. Similarly to `route_set` is uses a custom hash function and comparator. This hash function is designed to be order +independent and should only use commutative operations. The comparator is the same. It's designed to store all of the +removed link sets we've seen before. This allows us to detected duplicated graphs. + +rng: A custom imported version of std::linear_congruential_engine. libcpp doesn't provide one so we do. It should be +significantly faster than the std::mersenne_twister_engine without sacrificing much. We don't need amazing RNG, just +ok and fast. This is only used to shuffle the queue. + +queue, next_queue: These are vectors of pointers to sets of removed links. We never need to push to the front of these so a +vector is best. We maintain two queues, one that we are currently iterating over, and one that we can add to, building +up with all the newly removed link sets. These two are swapped at the end of an iterator, next_queue is then +cleared. These store sets of removed links. + +banned, next_banned: `banned` is the iterator variable for `queue`. `banned` is copied into `next_banned` where another +link can be added without mutating `banned`. If we've already seen this set of removed links `next_banned` is immediately +deallocated. Otherwise it's placed into `next_queue`. + +vec: `vec` is a scratch variable to store pointers to new vectors, or paths while we are building them. Each time a path +is found a new one is allocated, built, and stored in the route_set. + +p, connector: Scratch variables for iteration. + + +Optimisations: As described in the paper, both optimisations have been implemented. The path finding operates on the +compressed graph and the queue is shuffled if its possible to fill the route set that iteration. The route set may not +be filled due to duplicate paths but we can't know that in advance so we shuffle anyway. + +Any further optimisations should focus on the path finding, from benchmarks it dominates the run time (~98%). Since huge +routes aren't required small-ish things like the memcpy and banned link set copy aren't high priority. """ -from aequilibrae.paths.results import PathResults from aequilibrae import Graph -import numpy as np -from typing import List, Tuple from libc.math cimport INFINITY from libc.string cimport memcpy -from libc.stdio cimport printf from libc.limits cimport UINT_MAX from libcpp.vector cimport vector from libcpp.unordered_set cimport unordered_set from libcpp.unordered_map cimport unordered_map from libcpp.utility cimport pair -from cython.operator cimport dereference as deref, preincrement as inc +from cython.operator cimport dereference as deref from cython.parallel cimport parallel, prange, threadid import numpy as np +from typing import List, Tuple # It would really be nice if these were modules. The 'include' syntax is long deprecated and adds a lot to compilation times include 'basic_path_finding.pyx' @@ -121,7 +111,7 @@ cdef class RouteChoice: vector[pair[long long, long long]] c_ods = ods # A* (and Dijkstra's) require memory views, so we must allocate here and take slices. Python can handle this memory - double [:, :] cost_matrix = np.empty((cores, self.cost_view.shape[0]), dtype=float) # allocation of new memory view required gil + double [:, :] cost_matrix = np.empty((cores, self.cost_view.shape[0]), dtype=float) long long [:, :] predecessors_matrix = np.empty((cores, self.num_nodes + 1), dtype=np.int64) long long [:, :] conn_matrix = np.empty((cores, self.num_nodes + 1), dtype=np.int64) @@ -206,7 +196,7 @@ cdef class RouteChoice: long long [:] thread_conn, unsigned int seed ) noexcept nogil: - """Main method for route set generation""" + """Main method for route set generation. See top of file for commentary.""" cdef: RouteSet_t *route_set LinkSet_t removed_links @@ -261,7 +251,7 @@ cdef class RouteChoice: for connector in deref(vec): # This is one area for potential improvement. Here we construct a new set from the old one, copying all the elements # then add a single element. An incremental set hash function could be of use. However, the since of this set is - # directly dependent on the current depth. As the route set size grows so incredibly fast the depth will rarely get + # directly dependent on the current depth and as the route set size grows so incredibly fast the depth will rarely get # high enough for this to matter. # Copy the previously banned links, then for each vector in the path we add one and push it onto our queue new_banned = new unordered_set[long long](deref(banned)) @@ -284,4 +274,8 @@ cdef class RouteChoice: for banned in queue: del banned + # We should also free all the sets in removed_links, we don't be needing them + for banned in removed_links: + del banned + return route_set From 80bd7f7d132c0956ded804dfe746e7565cdafbcd Mon Sep 17 00:00:00 2001 From: Jake-Moss Date: Thu, 18 Jan 2024 12:20:39 +1000 Subject: [PATCH 29/44] Typos --- aequilibrae/paths/route_choice.pyx | 29 ++++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/aequilibrae/paths/route_choice.pyx b/aequilibrae/paths/route_choice.pyx index d006156f1..a0b4d8a6e 100644 --- a/aequilibrae/paths/route_choice.pyx +++ b/aequilibrae/paths/route_choice.pyx @@ -31,19 +31,18 @@ ok and fast. This is only used to shuffle the queue. queue, next_queue: These are vectors of pointers to sets of removed links. We never need to push to the front of these so a vector is best. We maintain two queues, one that we are currently iterating over, and one that we can add to, building -up with all the newly removed link sets. These two are swapped at the end of an iterator, next_queue is then +up with all the newly removed link sets. These two are swapped at the end of an iteration, next_queue is then cleared. These store sets of removed links. banned, next_banned: `banned` is the iterator variable for `queue`. `banned` is copied into `next_banned` where another link can be added without mutating `banned`. If we've already seen this set of removed links `next_banned` is immediately deallocated. Otherwise it's placed into `next_queue`. -vec: `vec` is a scratch variable to store pointers to new vectors, or paths while we are building them. Each time a path +vec: `vec` is a scratch variable to store pointers to new vectors, or rather, paths while we are building them. Each time a path is found a new one is allocated, built, and stored in the route_set. p, connector: Scratch variables for iteration. - Optimisations: As described in the paper, both optimisations have been implemented. The path finding operates on the compressed graph and the queue is shuffled if its possible to fill the route set that iteration. The route set may not be filled due to duplicate paths but we can't know that in advance so we shuffle anyway. @@ -165,6 +164,30 @@ cdef class RouteChoice: del results return dict(zip(ods, res)) + def _generate_line_strins(self, project, graph, results): + """Debug method""" + import geopandas as gpd + import shapely + + links = project.network.links.data.set_index("link_id") + df = [] + for od, route_set in results.items(): + for route in route_set: + df.append( + ( + *od, + shapely.MultiLineString( + links.loc[ + graph.graph[graph.graph.__compressed_id__.isin(route)].link_id + ].geometry.to_list() + ), + ) + ) + + df = gpd.GeoDataFrame(df, columns=["origin", "destination", "geometry"]) + df.set_geometry("geometry") + df.to_file("test1.gpkg", layer='routes', driver="GPKG") + cdef void path_find(RouteChoice self, long origin_index, long dest_index, double [:] thread_cost, long long [:] thread_predecessors, long long [:] thread_conn) noexcept nogil: path_finding_a_star( origin_index, From b45fc9f985530d6209dd77c3ff78bbf8d25f0455 Mon Sep 17 00:00:00 2001 From: Jake-Moss Date: Thu, 18 Jan 2024 12:38:06 +1000 Subject: [PATCH 30/44] Rename and remove imports --- aequilibrae/paths/route_choice.pxd | 6 +++--- aequilibrae/paths/route_choice.pyx | 10 +++++----- tests/aequilibrae/paths/test_route_choice.py | 14 +++++--------- 3 files changed, 13 insertions(+), 17 deletions(-) diff --git a/aequilibrae/paths/route_choice.pxd b/aequilibrae/paths/route_choice.pxd index 79598e49e..bf68c87ea 100644 --- a/aequilibrae/paths/route_choice.pxd +++ b/aequilibrae/paths/route_choice.pxd @@ -104,7 +104,7 @@ ctypedef unordered_set[unordered_set[long long] *, UnorderedSetPointerHasher, Po ctypedef vector[pair[unordered_set[long long] *, vector[long long] *]] RouteMap_t -cdef class RouteChoice: +cdef class RouteChoiceSet: cdef: double [:] cost_view long long [:] graph_fs_view @@ -115,9 +115,9 @@ cdef class RouteChoice: long long [:] ids_graph_view long long [:] compressed_link_ids long long num_nodes - cdef void path_find(RouteChoice self, long origin_index, long dest_index, double [:] scratch_cost, long long [:] thread_predecessors, long long [:] thread_conn) noexcept nogil + cdef void path_find(RouteChoiceSet self, long origin_index, long dest_index, double [:] scratch_cost, long long [:] thread_predecessors, long long [:] thread_conn) noexcept nogil cdef RouteSet_t *generate_route_set( - RouteChoice self, + RouteChoiceSet self, long origin_index, long dest_index, unsigned int max_routes, diff --git a/aequilibrae/paths/route_choice.pyx b/aequilibrae/paths/route_choice.pyx index a0b4d8a6e..98bd8df36 100644 --- a/aequilibrae/paths/route_choice.pyx +++ b/aequilibrae/paths/route_choice.pyx @@ -70,7 +70,7 @@ from typing import List, Tuple # It would really be nice if these were modules. The 'include' syntax is long deprecated and adds a lot to compilation times include 'basic_path_finding.pyx' -cdef class RouteChoice: +cdef class RouteChoiceSet: """ Route choice implemented via breadth first search with link removal (BFS-LE) as described in Rieser-Schüssler, Balmer, and Axhausen, 'Route Choice Sets for Very High-Resolution Data' @@ -140,7 +140,7 @@ cdef class RouteChoice: for i in prange(c_ods.size()): origin_index = self.nodes_to_indices_view[c_ods[i].first] dest_index = self.nodes_to_indices_view[c_ods[i].second] - deref(results)[i] = RouteChoice.generate_route_set( + deref(results)[i] = RouteChoiceSet.generate_route_set( self, origin_index, dest_index, @@ -188,7 +188,7 @@ cdef class RouteChoice: df.set_geometry("geometry") df.to_file("test1.gpkg", layer='routes', driver="GPKG") - cdef void path_find(RouteChoice self, long origin_index, long dest_index, double [:] thread_cost, long long [:] thread_predecessors, long long [:] thread_conn) noexcept nogil: + cdef void path_find(RouteChoiceSet self, long origin_index, long dest_index, double [:] thread_cost, long long [:] thread_predecessors, long long [:] thread_conn) noexcept nogil: path_finding_a_star( origin_index, dest_index, @@ -209,7 +209,7 @@ cdef class RouteChoice: @cython.embedsignature(True) @cython.initializedcheck(False) cdef RouteSet_t *generate_route_set( - RouteChoice self, + RouteChoiceSet self, long origin_index, long dest_index, unsigned int max_routes, @@ -256,7 +256,7 @@ cdef class RouteChoice: for connector in deref(banned): thread_cost[connector] = INFINITY - RouteChoice.path_find(self, origin_index, dest_index, thread_cost, thread_predecessors, thread_conn) + RouteChoiceSet.path_find(self, origin_index, dest_index, thread_cost, thread_predecessors, thread_conn) # Mark this set of banned links as seen removed_links.insert(banned) diff --git a/tests/aequilibrae/paths/test_route_choice.py b/tests/aequilibrae/paths/test_route_choice.py index 660117e51..e5a42993f 100644 --- a/tests/aequilibrae/paths/test_route_choice.py +++ b/tests/aequilibrae/paths/test_route_choice.py @@ -8,9 +8,7 @@ import numpy as np from aequilibrae import Graph, Project -from aequilibrae.paths.route_choice import RouteChoice - -import time +from aequilibrae.paths.route_choice import RouteChoiceSet from ...data import siouxfalls_project @@ -35,7 +33,7 @@ def tearDown(self) -> None: self.project.close() def test_route_choice(self): - rc = RouteChoice(self.graph) + rc = RouteChoiceSet(self.graph) a, b = 1, 20 results = rc.run(a, b, max_routes=10, max_depth=0) @@ -67,7 +65,7 @@ def test_route_choice(self): ) def test_route_choice_empty_path(self): - rc = RouteChoice(self.graph) + rc = RouteChoiceSet(self.graph) a = 1 self.assertEqual( @@ -76,14 +74,12 @@ def test_route_choice_empty_path(self): def test_route_choice_batched(self): np.random.seed(0) - rc = RouteChoice(self.graph) + rc = RouteChoiceSet(self.graph) nodes = [tuple(x) for x in np.random.choice(self.graph.centroids, size=(10, 2), replace=False)] max_routes = 20 results = rc.batched(nodes, max_routes=max_routes, max_depth=10, cores=1) - # breakpoint() - self.assertEqual(len(results), len(nodes), "Requested number of route sets not returned") for od, route_set in results.items(): @@ -91,7 +87,7 @@ def test_route_choice_batched(self): self.assertEqual(len(route_set), max_routes, f"Requested number of routes not returned for {od}") def test_route_choice_exceptions(self): - rc = RouteChoice(self.graph) + rc = RouteChoiceSet(self.graph) args = [ (1, 20, 0, 0), (1, 20, -1, 0), From 4e442f1e05cc494df39ff8c5cfbd3647c1082902 Mon Sep 17 00:00:00 2001 From: Jake-Moss Date: Thu, 18 Jan 2024 12:38:29 +1000 Subject: [PATCH 31/44] Disable initialisation and bounds checking --- aequilibrae/paths/route_choice.pyx | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/aequilibrae/paths/route_choice.pyx b/aequilibrae/paths/route_choice.pyx index 98bd8df36..abe0a12ce 100644 --- a/aequilibrae/paths/route_choice.pyx +++ b/aequilibrae/paths/route_choice.pyx @@ -105,6 +105,9 @@ cdef class RouteChoiceSet: # Bounds checking doesn't really need to be disabled here but the warning is annoying @cython.boundscheck(False) + @cython.wraparound(False) + @cython.embedsignature(True) + @cython.initializedcheck(False) def batched(self, ods: List[Tuple[int, int]], max_routes: int = 0, max_depth: int = 0, seed: int = 0, cores: int = 1): cdef: vector[pair[long long, long long]] c_ods = ods @@ -188,6 +191,7 @@ cdef class RouteChoiceSet: df.set_geometry("geometry") df.to_file("test1.gpkg", layer='routes', driver="GPKG") + @cython.initializedcheck(False) cdef void path_find(RouteChoiceSet self, long origin_index, long dest_index, double [:] thread_cost, long long [:] thread_predecessors, long long [:] thread_conn) noexcept nogil: path_finding_a_star( origin_index, From 4b65f66b5797a8d9a25b347069a6097cbd2e3633 Mon Sep 17 00:00:00 2001 From: Jake-Moss Date: Thu, 18 Jan 2024 15:51:18 +1000 Subject: [PATCH 32/44] Add docs and support blocking centroid flow with tests --- aequilibrae/paths/route_choice.pxd | 15 +++- aequilibrae/paths/route_choice.pyx | 83 +++++++++++++++++++- tests/aequilibrae/paths/test_route_choice.py | 15 ++++ 3 files changed, 108 insertions(+), 5 deletions(-) diff --git a/aequilibrae/paths/route_choice.pxd b/aequilibrae/paths/route_choice.pxd index bf68c87ea..3d69a097a 100644 --- a/aequilibrae/paths/route_choice.pxd +++ b/aequilibrae/paths/route_choice.pxd @@ -115,7 +115,19 @@ cdef class RouteChoiceSet: long long [:] ids_graph_view long long [:] compressed_link_ids long long num_nodes - cdef void path_find(RouteChoiceSet self, long origin_index, long dest_index, double [:] scratch_cost, long long [:] thread_predecessors, long long [:] thread_conn) noexcept nogil + long long zones + bint block_flows_through_centroids + + cdef void path_find( + RouteChoiceSet self, + long origin_index, + long dest_index, + double [:] scratch_cost, + long long [:] thread_predecessors, + long long [:] thread_conn, + long long [:] thread_b_nodes + ) noexcept nogil + cdef RouteSet_t *generate_route_set( RouteChoiceSet self, long origin_index, @@ -125,5 +137,6 @@ cdef class RouteChoiceSet: double [:] thread_cost, long long [:] thread_predecessors, long long [:] thread_conn, + long long [:] thread_b_nodes, unsigned int seed ) noexcept nogil diff --git a/aequilibrae/paths/route_choice.pyx b/aequilibrae/paths/route_choice.pyx index abe0a12ce..3aa856b22 100644 --- a/aequilibrae/paths/route_choice.pyx +++ b/aequilibrae/paths/route_choice.pyx @@ -70,6 +70,7 @@ from typing import List, Tuple # It would really be nice if these were modules. The 'include' syntax is long deprecated and adds a lot to compilation times include 'basic_path_finding.pyx' +@cython.embedsignature(True) cdef class RouteChoiceSet: """ Route choice implemented via breadth first search with link removal (BFS-LE) as described in Rieser-Schüssler, @@ -92,6 +93,8 @@ cdef class RouteChoiceSet: self.lon_view = tmp.lon.values self.ids_graph_view = graph.compact_graph.id.values self.num_nodes = graph.compact_num_nodes + self.zones = graph.num_zones + self.block_flows_through_centroids = graph.block_centroid_flows def __dealloc__(self): """ @@ -100,7 +103,27 @@ cdef class RouteChoiceSet: """ pass + @cython.embedsignature(True) def run(self, origin: int, destination: int, max_routes: int = 0, max_depth: int = 0, seed: int = 0): + """ + Compute the a route set for a single OD pair. + + Often the returned list's length is ``max_routes``, however, it may be limited by ``max_depth`` or if all + unique possible paths have been found then a smaller set will be returned. + + Thin wrapper around ``RouteChoiceSet.batched``. + + :Arguments: + **origin** (:obj:`int`): Origin node ID. Must be present within compact graph. Recommended to choose a centroid. + **destination** (:obj:`int`): Destination node ID. Must be present within compact graph. Recommended to choose a centroid. + **max_routes** (:obj:`int`): Maximum size of the generated route set. Must be non-negative. Default of ``0`` for unlimited. + **max_depth** (:obj:`int`): Maximum depth BFS can explore. Must be non-negative. Default of ``0`` for unlimited. + **seed** (:obj:`int`): Seed used for rng. Must be non-negative. Default of ``0``. + + :Returns: + **route set** (:obj:`list[tuple[int, ...]]): Returns a list of unique variable length tuples of compact link IDs. + Represents paths from ``origin`` to ``destination``. + """ return self.batched([(origin, destination)], max_routes=max_routes, max_depth=max_depth, seed=seed)[(origin, destination)] # Bounds checking doesn't really need to be disabled here but the warning is annoying @@ -109,6 +132,25 @@ cdef class RouteChoiceSet: @cython.embedsignature(True) @cython.initializedcheck(False) def batched(self, ods: List[Tuple[int, int]], max_routes: int = 0, max_depth: int = 0, seed: int = 0, cores: int = 1): + """ + Compute the a route set for a list of OD pairs. + + Often the returned list for each OD pair's length is ``max_routes``, however, it may be limited by ``max_depth`` or if all + unique possible paths have been found then a smaller set will be returned. + + :Arguments: + **ods** (:obj:`list[tuple[int, int]]`): List of OD pairs ``(origin, destination)``. Origin and destination node ID must be + present within compact graph. Recommended to choose a centroids. + **max_routes** (:obj:`int`): Maximum size of the generated route set. Must be non-negative. Default of ``0`` for unlimited. + **max_depth** (:obj:`int`): Maximum depth BFS can explore. Must be non-negative. Default of ``0`` for unlimited. + **seed** (:obj:`int`): Seed used for rng. Must be non-negative. Default of ``0``. + **cores** (:obj:`int`): Number of cores to use when parallelising over OD pairs. Must be non-negative. Default of ``1``. + + :Returns: + **route sets** (:obj:`dict[tuple[int, int], list[tuple[int, ...]]]`): Returns a list of unique tuples of compact link IDs for + each OD pair provided (as keys). Represents paths from + ``origin`` to ``destination``. + """ cdef: vector[pair[long long, long long]] c_ods = ods @@ -116,6 +158,7 @@ cdef class RouteChoiceSet: double [:, :] cost_matrix = np.empty((cores, self.cost_view.shape[0]), dtype=float) long long [:, :] predecessors_matrix = np.empty((cores, self.num_nodes + 1), dtype=np.int64) long long [:, :] conn_matrix = np.empty((cores, self.num_nodes + 1), dtype=np.int64) + long long [:, :] b_nodes_matrix = np.broadcast_to(self.b_nodes_view, (cores, self.b_nodes_view.shape[0])).copy() vector[RouteSet_t *] *results = new vector[RouteSet_t *](len(ods)) long long origin_index, dest_index, i @@ -143,6 +186,17 @@ cdef class RouteChoiceSet: for i in prange(c_ods.size()): origin_index = self.nodes_to_indices_view[c_ods[i].first] dest_index = self.nodes_to_indices_view[c_ods[i].second] + + if self.block_flows_through_centroids: + blocking_centroid_flows( + 0, # Always blocking + origin_index, + self.zones, + self.graph_fs_view, + b_nodes_matrix[threadid()], + self.b_nodes_view, + ) + deref(results)[i] = RouteChoiceSet.generate_route_set( self, origin_index, @@ -152,9 +206,20 @@ cdef class RouteChoiceSet: cost_matrix[threadid()], predecessors_matrix[threadid()], conn_matrix[threadid()], - c_seed + b_nodes_matrix[threadid()], + c_seed, ) + if self.block_flows_through_centroids: + blocking_centroid_flows( + 1, # Always unblocking + origin_index, + self.zones, + self.graph_fs_view, + b_nodes_matrix[threadid()], + self.b_nodes_view, + ) + # Build results into python objects using Cythons auto-conversion from vector[long long] to list then to tuple (for set operations) res = [] for result in deref(results): @@ -192,12 +257,21 @@ cdef class RouteChoiceSet: df.to_file("test1.gpkg", layer='routes', driver="GPKG") @cython.initializedcheck(False) - cdef void path_find(RouteChoiceSet self, long origin_index, long dest_index, double [:] thread_cost, long long [:] thread_predecessors, long long [:] thread_conn) noexcept nogil: + cdef void path_find( + RouteChoiceSet self, + long origin_index, + long dest_index, + double [:] thread_cost, + long long [:] thread_predecessors, + long long [:] thread_conn, + long long [:] thread_b_nodes + ) noexcept nogil: + """Small wrapper around path finding, thread locals should be passes as arguments.""" path_finding_a_star( origin_index, dest_index, thread_cost, - self.b_nodes_view, + thread_b_nodes, self.graph_fs_view, self.nodes_to_indices_view, self.lat_view, @@ -221,6 +295,7 @@ cdef class RouteChoiceSet: double [:] thread_cost, long long [:] thread_predecessors, long long [:] thread_conn, + long long [:] thread_b_nodes, unsigned int seed ) noexcept nogil: """Main method for route set generation. See top of file for commentary.""" @@ -260,7 +335,7 @@ cdef class RouteChoiceSet: for connector in deref(banned): thread_cost[connector] = INFINITY - RouteChoiceSet.path_find(self, origin_index, dest_index, thread_cost, thread_predecessors, thread_conn) + RouteChoiceSet.path_find(self, origin_index, dest_index, thread_cost, thread_predecessors, thread_conn, thread_b_nodes) # Mark this set of banned links as seen removed_links.insert(banned) diff --git a/tests/aequilibrae/paths/test_route_choice.py b/tests/aequilibrae/paths/test_route_choice.py index e5a42993f..37c81f262 100644 --- a/tests/aequilibrae/paths/test_route_choice.py +++ b/tests/aequilibrae/paths/test_route_choice.py @@ -72,6 +72,21 @@ def test_route_choice_empty_path(self): rc.batched([(a, a)], max_routes=0, max_depth=3), {(a, a): []}, "Route set from self to self should be empty" ) + def test_route_choice_blocking_centroids(self): + a, b = 1, 20 + + self.graph.set_blocked_centroid_flows(False) + rc = RouteChoiceSet(self.graph) + + results = rc.run(a, b, max_routes=2, max_depth=2) + self.assertNotEqual(results, [], "Unblocked centroid flow found no paths") + + self.graph.set_blocked_centroid_flows(True) + rc = RouteChoiceSet(self.graph) + + results = rc.run(a, b, max_routes=2, max_depth=2) + self.assertListEqual(results, [], "Blocked centroid flow found a path") + def test_route_choice_batched(self): np.random.seed(0) rc = RouteChoiceSet(self.graph) From eb8800fe524cd8556560ab3abd802922b3417153 Mon Sep 17 00:00:00 2001 From: Pedro Camargo Date: Mon, 29 Jan 2024 04:04:40 -0600 Subject: [PATCH 33/44] new matrix API (#496) * new matrix API * suffixes --------- Co-authored-by: pveigadecamargo --- .../distribution/gravity_calibration.py | 1 + aequilibrae/matrix/aequilibrae_matrix.py | 51 ++++++++++++++++--- aequilibrae/paths/results/__init__.py | 1 + aequilibrae/project/network/osm_downloader.py | 1 + aequilibrae/transit/gtfs_loader.py | 6 +-- aequilibrae/transit/transit_graph_builder.py | 8 +-- aequilibrae/utils/worker_thread.py | 1 + 7 files changed, 56 insertions(+), 13 deletions(-) diff --git a/aequilibrae/distribution/gravity_calibration.py b/aequilibrae/distribution/gravity_calibration.py index 691c465d3..f89cfa477 100644 --- a/aequilibrae/distribution/gravity_calibration.py +++ b/aequilibrae/distribution/gravity_calibration.py @@ -4,6 +4,7 @@ The procedures implemented in this code are some of those suggested in Modelling Transport, 4th Edition, Ortuzar and Willumsen, Wiley 2011 """ + from time import perf_counter import numpy as np diff --git a/aequilibrae/matrix/aequilibrae_matrix.py b/aequilibrae/matrix/aequilibrae_matrix.py index 26e8660f0..41db0d9e3 100644 --- a/aequilibrae/matrix/aequilibrae_matrix.py +++ b/aequilibrae/matrix/aequilibrae_matrix.py @@ -5,6 +5,7 @@ import uuid import warnings from functools import reduce +from pathlib import Path from typing import List import numpy as np @@ -90,7 +91,7 @@ def __init__(self): self.omx_file = None # type: omx.File self.__version__ = VERSION # Writes file version - def save(self, names=()) -> None: + def save(self, names=(), file_name=None) -> None: """Saves matrix data back to file. If working with AEM file, it flushes data to disk. If working with OMX, requires new names. @@ -98,6 +99,10 @@ def save(self, names=()) -> None: :Arguments: **names** (:obj:`tuple(str)`, `Optional`): New names for the matrices. Required if working with OMX files """ + if file_name is not None: + cores = names if len(names) else self.names + self.__save_as(file_name, cores) + return if not self.__omx: self.__flush(self.matrices) @@ -122,6 +127,38 @@ def save(self, names=()) -> None: self.names = self.omx_file.list_matrices() self.computational_view(names) + def __save_as(self, file_name: str, cores: List[str]): + + if Path(file_name).suffix.lower() == ".aem": + mat = AequilibraeMatrix() + args = { + "zones": self.zones, + "matrix_names": cores, + "index_names": self.index_names, + "memory_only": False, + "file_name": file_name, + } + mat.create_empty(**args) + mat.indices[:, :] = self.indices[:, :] + for core in cores: + mat.matrix[core][:, :] = self.matrix[core][:, :] + mat.name = self.name + mat.description = self.description + mat.close() + del mat + + elif Path(file_name).suffix.lower() == ".omx": + omx_mat = omx.open_file(file_name, "w") + for core in cores: + omx_mat[core] = self.matrix[core] + + for index in self.index_names: + omx_mat.create_mapping(index, self.indices[index]) + + omx_mat.attrs.name = self.name + omx_mat.attrs.description = self.description + omx_mat.close() + def create_empty( self, file_name: str = None, @@ -570,9 +607,9 @@ def __write__(self): np.memmap(self.file_path, dtype="uint8", offset=17, mode="r+", shape=1)[0] = data_size # matrix name - np.memmap(self.file_path, dtype="S" + str(MATRIX_NAME_MAX_LENGTH), offset=18, mode="r+", shape=1)[ - 0 - ] = self.name + np.memmap(self.file_path, dtype="S" + str(MATRIX_NAME_MAX_LENGTH), offset=18, mode="r+", shape=1)[0] = ( + self.name + ) # matrix description offset = 18 + MATRIX_NAME_MAX_LENGTH @@ -1095,9 +1132,9 @@ def setName(self, matrix_name: str): if len(str(matrix_name)) > MATRIX_NAME_MAX_LENGTH: matrix_name = str(matrix_name)[0:MATRIX_NAME_MAX_LENGTH] - np.memmap(self.file_path, dtype="S" + str(MATRIX_NAME_MAX_LENGTH), offset=18, mode="r+", shape=1)[ - 0 - ] = matrix_name + np.memmap(self.file_path, dtype="S" + str(MATRIX_NAME_MAX_LENGTH), offset=18, mode="r+", shape=1)[0] = ( + matrix_name + ) def setDescription(self, matrix_description: str): """ diff --git a/aequilibrae/paths/results/__init__.py b/aequilibrae/paths/results/__init__.py index 03b8d9e2a..9c398aae4 100644 --- a/aequilibrae/paths/results/__init__.py +++ b/aequilibrae/paths/results/__init__.py @@ -6,6 +6,7 @@ STILL NEED TO ADD SOME EXPLANATIONS HERE """ + __author__ = "Pedro Camargo ($Author: Pedro Camargo $)" __version__ = "1.0" __revision__ = "$Revision: 1 $" diff --git a/aequilibrae/project/network/osm_downloader.py b/aequilibrae/project/network/osm_downloader.py index f0794f71a..d614ca44b 100644 --- a/aequilibrae/project/network/osm_downloader.py +++ b/aequilibrae/project/network/osm_downloader.py @@ -9,6 +9,7 @@ For the original work, please see https://github.com/gboeing/osmnx """ + import logging import time import re diff --git a/aequilibrae/transit/gtfs_loader.py b/aequilibrae/transit/gtfs_loader.py index 3405a2c7d..f8f710bd5 100644 --- a/aequilibrae/transit/gtfs_loader.py +++ b/aequilibrae/transit/gtfs_loader.py @@ -180,9 +180,9 @@ def __deconflict_stop_times(self) -> None: ) for _, rec in max_speeds.iterrows(): - df.loc[ - (df.dist >= rec.min_distance) & ((df.dist < rec.max_distance)), "max_speed" - ] = rec.speed + df.loc[(df.dist >= rec.min_distance) & ((df.dist < rec.max_distance)), "max_speed"] = ( + rec.speed + ) to_fix = df[df.max_speed < df.speed].index.values if to_fix.shape[0] > 0: diff --git a/aequilibrae/transit/transit_graph_builder.py b/aequilibrae/transit/transit_graph_builder.py index 6add46052..c43778923 100644 --- a/aequilibrae/transit/transit_graph_builder.py +++ b/aequilibrae/transit/transit_graph_builder.py @@ -1428,9 +1428,11 @@ def to_transit_graph(self) -> TransitGraph: g.network["id"] = g.network.link_id g.prepare_graph( self.vertices[ - (self.vertices.node_type == "origin") - if self.blocking_centroid_flows - else (self.vertices.node_type == "od") + ( + (self.vertices.node_type == "origin") + if self.blocking_centroid_flows + else (self.vertices.node_type == "od") + ) ].node_id.values ) g.set_graph("trav_time") diff --git a/aequilibrae/utils/worker_thread.py b/aequilibrae/utils/worker_thread.py index 1494e5b2b..8cc22fb72 100644 --- a/aequilibrae/utils/worker_thread.py +++ b/aequilibrae/utils/worker_thread.py @@ -1,6 +1,7 @@ """ Original Author: UNKNOWN. COPIED FROM STACKOVERFLOW BUT CAN'T REMEMBER EXACTLY WHERE """ + import importlib.util as iutil spec = iutil.find_spec("PyQt5") From 6dc3427c8c2411b9c9657066ee2e94f5ae39e748 Mon Sep 17 00:00:00 2001 From: Pedro Camargo Date: Tue, 30 Jan 2024 17:52:16 -0600 Subject: [PATCH 34/44] linting (#498) * linting * Fixes linting packages * Fixes linting packages * . * . * . --------- Co-authored-by: pveigadecamargo --- .github/workflows/unit_tests.yml | 4 +- .../distribution/synthetic_gravity_model.py | 2 +- aequilibrae/matrix/aequilibrae_data.py | 11 ++-- aequilibrae/matrix/aequilibrae_matrix.py | 9 ++-- aequilibrae/paths/assignment_paths.py | 13 ++--- aequilibrae/paths/graph.py | 16 +++--- aequilibrae/paths/linear_approximation.py | 5 +- .../paths/results/assignment_results.py | 6 +-- aequilibrae/paths/traffic_assignment.py | 26 ++++------ aequilibrae/paths/traffic_class.py | 2 +- aequilibrae/project/about.py | 4 +- aequilibrae/project/data/matrices.py | 6 +-- aequilibrae/project/network/gmns_builder.py | 12 +++-- aequilibrae/project/network/gmns_exporter.py | 2 +- aequilibrae/project/network/link_types.py | 2 +- aequilibrae/project/network/links.py | 2 +- aequilibrae/project/network/network.py | 2 +- aequilibrae/project/network/nodes.py | 2 +- aequilibrae/project/network/osm_downloader.py | 6 ++- aequilibrae/project/network/periods.py | 2 +- aequilibrae/project/zoning.py | 2 +- aequilibrae/transit/constants.py | 20 ++++---- aequilibrae/transit/gtfs_loader.py | 14 +++--- aequilibrae/transit/lib_gtfs.py | 14 +++--- aequilibrae/transit/parse_csv.py | 2 +- aequilibrae/transit/transit.py | 2 +- .../transit/transit_elements/pattern.py | 4 +- aequilibrae/transit/transit_elements/stop.py | 2 +- aequilibrae/utils/create_delaunay_network.py | 2 +- benchmarks/select_link_benchmark.py | 4 +- pyproject.toml | 50 ++++++++++++++++++- setup.py | 4 +- tests/aequilibrae/paths/test_select_link.py | 2 +- tests/aequilibrae/project/test_link_type.py | 2 +- .../project/test_link_type_triggers.py | 4 +- tests/aequilibrae/project/test_link_types.py | 4 +- tests/aequilibrae/project/test_links.py | 4 +- .../project/test_modes_table_triggers.py | 9 ++-- tests/aequilibrae/project/test_nodes.py | 8 ++- tests/aequilibrae/project/test_periods.py | 4 +- tests/aequilibrae/transit/test_gtfs_loader.py | 2 +- tests/aequilibrae/transit/test_gtfs_route.py | 2 +- tests/aequilibrae/transit/test_gtfs_stop.py | 19 +++---- tests/aequilibrae/transit/test_gtfs_trip.py | 7 +-- tests/aequilibrae/transit/test_lib_gtfs.py | 2 +- tests/aequilibrae/transit/test_pattern.py | 2 +- tests/requirements_tests.txt | 4 +- tests/setup_windows_spatialite.py | 2 +- 48 files changed, 180 insertions(+), 151 deletions(-) diff --git a/.github/workflows/unit_tests.yml b/.github/workflows/unit_tests.yml index c81090a9d..88764c71c 100644 --- a/.github/workflows/unit_tests.yml +++ b/.github/workflows/unit_tests.yml @@ -20,8 +20,8 @@ jobs: pip install -r requirements_additional.txt pip install -r tests/requirements_tests.txt - - name: Lint with flake8 - run: flake8 + - name: Lint with ruff + run: ruff . - name: Check code format with Black run: black --check . diff --git a/aequilibrae/distribution/synthetic_gravity_model.py b/aequilibrae/distribution/synthetic_gravity_model.py index c6bc7c880..e64d534f9 100644 --- a/aequilibrae/distribution/synthetic_gravity_model.py +++ b/aequilibrae/distribution/synthetic_gravity_model.py @@ -57,7 +57,7 @@ def load(self, file_name): else: raise ValueError("Model has unknown parameters: " + str(key)) except ValueError as err: - raise ValueError("File provided is not a valid Synthetic Gravity Model - {}".format(err.__str__())) + raise ValueError("File provided is not a valid Synthetic Gravity Model - {}".format(err.__str__())) from err def save(self, file_name): R"""Saves model to disk in yaml format. Extension is \*.mod""" diff --git a/aequilibrae/matrix/aequilibrae_data.py b/aequilibrae/matrix/aequilibrae_data.py index 54405c126..445e07c35 100644 --- a/aequilibrae/matrix/aequilibrae_data.py +++ b/aequilibrae/matrix/aequilibrae_data.py @@ -99,15 +99,10 @@ def create_empty( if not isinstance(self.data_types, list): raise ValueError('Data types, "data_types", needs to be a list') - # The check below is not working properly with the QGIS importer - # else: - # for dt in self.data_types: - # if not isinstance(dt, type): - # raise ValueError('Data types need to be Python or Numpy data types') for field in self.fields: - if not type(field) is str: - raise TypeError(field + " is not a string. You cannot use it as a field name") + if not isinstance(field, str): + raise TypeError(f"{field} is not a string. You cannot use it as a field name") if not field.isidentifier(): raise Exception(field + " is a not a valid identifier name. You cannot use it as a field name") if field in object.__dict__: @@ -115,7 +110,7 @@ def create_empty( self.num_fields = len(self.fields) - dtype = [("index", self.aeq_index_type)] + [(f, dt) for f, dt in zip(self.fields, self.data_types)] + dtype = [("index", self.aeq_index_type)] + list(zip(self.fields, self.data_types)) # the file if self.memory_mode: diff --git a/aequilibrae/matrix/aequilibrae_matrix.py b/aequilibrae/matrix/aequilibrae_matrix.py index 41db0d9e3..45177263f 100644 --- a/aequilibrae/matrix/aequilibrae_matrix.py +++ b/aequilibrae/matrix/aequilibrae_matrix.py @@ -4,6 +4,7 @@ import tempfile import uuid import warnings +from copy import copy from functools import reduce from pathlib import Path from typing import List @@ -258,7 +259,7 @@ def create_empty( else: raise Exception("Matrix names need to be provided as a list") - self.names = [x for x in matrix_names] + self.names = copy(matrix_names) self.cores = len(self.names) if self.zones is None: return @@ -381,8 +382,8 @@ def robust_name(input_name: str, max_length: int, forbiden_names: List[str]) -> ) idx_names = functools.reduce(lambda acc, n: acc + [robust_name(n, INDEX_NAME_MAX_LENGTH, acc)], do_idx, []) else: - core_names = [x for x in do_cores] - idx_names = [x for x in do_idx] + core_names = list(do_cores) + idx_names = list(do_idx) self.create_empty( file_name=file_path, @@ -428,7 +429,7 @@ def create_from_trip_list(self, path_to_file: str, from_column: str, to_column: trip_df = pd.read_csv(path_to_file) # Creating zone indices - zones_list = sorted(list(set(list(trip_df[from_column].unique()) + list(trip_df[to_column].unique())))) + zones_list = sorted(set(list(trip_df[from_column].unique()) + list(trip_df[to_column].unique()))) zones_df = pd.DataFrame({"zone": zones_list, "idx": list(np.arange(len(zones_list)))}) trip_df = trip_df.merge( diff --git a/aequilibrae/paths/assignment_paths.py b/aequilibrae/paths/assignment_paths.py index a1a1489cb..f34ea2505 100644 --- a/aequilibrae/paths/assignment_paths.py +++ b/aequilibrae/paths/assignment_paths.py @@ -94,15 +94,12 @@ def _read_compressed_graph_correspondence(self) -> Dict: def read_path_file(self, origin: int, iteration: int, traffic_class_id: str) -> (pd.DataFrame, pd.DataFrame): possible_traffic_classes = list(filter(lambda x: x.id == traffic_class_id, self.classes)) - assert ( - len(possible_traffic_classes) == 1 - ), f"traffic class id not unique, please choose one of {list(map(lambda x: x.id, self.classes))}" + class_ids = [x.id for x in self.classes] + assert len(possible_traffic_classes) == 1, f"traffic class id not unique, please choose one of {class_ids}" traffic_class = possible_traffic_classes[0] - base_dir = os.path.join( - self.path_base_dir, f"iter{iteration}", f"path_c{traffic_class.id}_{traffic_class.name}" - ) - path_o_f = os.path.join(base_dir, f"o{origin}.feather") - path_o_index_f = os.path.join(base_dir, f"o{origin}_indexdata.feather") + b_dir = os.path.join(self.path_base_dir, f"iter{iteration}", f"path_c{traffic_class.id}_{traffic_class.name}") + path_o_f = os.path.join(b_dir, f"o{origin}.feather") + path_o_index_f = os.path.join(b_dir, f"o{origin}_indexdata.feather") path_o = pd.read_feather(path_o_f) path_o_index = pd.read_feather(path_o_index_f) return path_o, path_o_index diff --git a/aequilibrae/paths/graph.py b/aequilibrae/paths/graph.py index ea777cf95..ea0193764 100644 --- a/aequilibrae/paths/graph.py +++ b/aequilibrae/paths/graph.py @@ -12,7 +12,7 @@ from aequilibrae.context import get_logger -class GraphBase(ABC): +class GraphBase(ABC): # noqa: B024 """ Graph class """ @@ -244,8 +244,8 @@ def exclude_links(self, links: list) -> None: self._id = uuid.uuid4().hex def __build_column_names(self, all_titles: List[str]) -> Tuple[list, list]: - fields = [x for x in self.required_default_fields] - types = [x for x in self.__required_default_types] + fields = list(self.required_default_fields) + types = list(self.__required_default_types) for column in all_titles: if column not in self.required_default_fields and column[0:-3] not in self.required_default_fields: if column[-3:] == "_ab": @@ -462,19 +462,17 @@ def __determine_types__(self, new_type, current_type): new_type = float(new_type) except ValueError as verr: self.logger.warning("Could not convert {} - {}".format(new_type, verr.__str__())) - nt = type(new_type) - def_type = None - if nt == int: + if isinstance(new_type, int): def_type = int if current_type == float: - def_type == float + def_type = float elif current_type == str: def_type = str - elif nt == float: + elif isinstance(new_type, float): def_type = float if current_type == str: def_type = str - elif nt == str: + elif isinstance(new_type, str): def_type = str else: raise ValueError("WRONG TYPE OR NULL VALUE") diff --git a/aequilibrae/paths/linear_approximation.py b/aequilibrae/paths/linear_approximation.py index 2249f87ff..fa0c376a2 100644 --- a/aequilibrae/paths/linear_approximation.py +++ b/aequilibrae/paths/linear_approximation.py @@ -12,7 +12,6 @@ from aequilibrae.paths.all_or_nothing import allOrNothing from aequilibrae.paths.results import AssignmentResults from aequilibrae.paths.traffic_class import TrafficClass -from aequilibrae.context import get_active_project from ..utils import WorkerThread try: @@ -342,7 +341,7 @@ def __maybe_create_path_file_directories(self): def doWork(self): self.execute() - def execute(self): + def execute(self): # noqa: C901 # We build the fixed cost field for c in self.traffic_classes: @@ -371,7 +370,7 @@ def execute(self): self.logger.info(f"{self.algorithm} Assignment STATS") self.logger.info("Iteration, RelativeGap, stepsize") - for self.iter in range(1, self.max_iter + 1): + for self.iter in range(1, self.max_iter + 1): # noqa: B020 self.iteration_issue = [] if pyqt: self.equilibration.emit(["rgap", self.rgap]) diff --git a/aequilibrae/paths/results/assignment_results.py b/aequilibrae/paths/results/assignment_results.py index c0358ef25..f0921853d 100644 --- a/aequilibrae/paths/results/assignment_results.py +++ b/aequilibrae/paths/results/assignment_results.py @@ -138,14 +138,14 @@ def prepare(self, graph: Graph, matrix: AequilibraeMatrix) -> None: self.__integer_type = graph.default_types("int") if matrix.view_names is None: - raise ("Please set the matrix_procedures computational view") + raise ValueError("Please set the matrix_procedures computational view") self.classes["number"] = 1 if len(matrix.matrix_view.shape) > 2: self.classes["number"] = matrix.matrix_view.shape[2] self.classes["names"] = matrix.view_names if graph is None: - raise ("Please provide a graph") + raise ValueError("Please provide a graph") self.compact_nodes = graph.compact_num_nodes self.compact_links = graph.compact_num_links @@ -154,7 +154,7 @@ def prepare(self, graph: Graph, matrix: AequilibraeMatrix) -> None: self.centroids = graph.centroids self.links = graph.num_links self.num_skims = len(graph.skim_fields) - self.skim_names = [x for x in graph.skim_fields] + self.skim_names = list(graph.skim_fields) self.lids = graph.graph.link_id.values self.direcs = graph.graph.direction.values self.crosswalk = np.zeros(graph.graph.shape[0], self.__integer_type) diff --git a/aequilibrae/paths/traffic_assignment.py b/aequilibrae/paths/traffic_assignment.py index 0cec0fb73..a83b120d7 100644 --- a/aequilibrae/paths/traffic_assignment.py +++ b/aequilibrae/paths/traffic_assignment.py @@ -1,14 +1,13 @@ -from copy import deepcopy import importlib.util as iutil import logging import socket import sqlite3 +from abc import ABC, abstractmethod from datetime import datetime from os import path from pathlib import Path from typing import List, Dict, Union from uuid import uuid4 -from abc import ABC, abstractmethod import numpy as np import pandas as pd @@ -19,10 +18,10 @@ from aequilibrae.matrix import AequilibraeData from aequilibrae.matrix import AequilibraeMatrix from aequilibrae.paths.linear_approximation import LinearApproximation -from aequilibrae.paths.traffic_class import TrafficClass, TransitClass, TransportClassBase +from aequilibrae.paths.optimal_strategies import OptimalStrategies +from aequilibrae.paths.traffic_class import TrafficClass, TransportClassBase from aequilibrae.paths.vdf import VDF, all_vdf_functions from aequilibrae.project.database_connection import database_connection -from aequilibrae.paths.optimal_strategies import OptimalStrategies spec = iutil.find_spec("openmatrix") has_omx = spec is not None @@ -105,7 +104,7 @@ def set_classes(self, classes: List[TransportClassBase]) -> None: **classes** (:obj:`List[TransportClassBase]`:) List of TransportClass's for assignment """ - ids = set([x._id for x in classes]) + ids = {x._id for x in classes} if len(ids) < len(classes): raise ValueError("Classes need to be unique. Your list of classes has repeated items/IDs") self.classes = classes # type: List[TransportClassBase] @@ -286,15 +285,12 @@ def __check_attributes(self, instance, value): elif instance == "vdf_parameters": if not self.__validate_parameters(value): return False, value, f"Parameter set is not valid: {value} " - elif instance in ["time_field", "capacity_field"]: - if not isinstance(value, str): - return False, value, f"Value for {instance} is not string" - elif instance == "cores": - if not isinstance(value, int): - return False, value, f"Value for {instance} is not integer" - elif instance == "save_path_files": - if not isinstance(value, bool): - return False, value, f"Value for {instance} is not boolean" + elif instance in ["time_field", "capacity_field"] and not isinstance(value, str): + return False, value, f"Value for {instance} is not string" + elif instance == "cores" and not isinstance(value, int): + return False, value, f"Value for {instance} is not integer" + elif instance == "save_path_files" and not isinstance(value, bool): + return False, value, f"Value for {instance} is not boolean" if instance not in self.__dict__: return False, value, f"TrafficAssignment class does not have property {instance}" return True, value, "" @@ -316,7 +312,7 @@ def set_classes(self, classes: List[TrafficClass]) -> None: **classes** (:obj:`List[TrafficClass]`:) List of Traffic classes for assignment """ - ids = set([x._id for x in classes]) + ids = {x._id for x in classes} if len(ids) < len(classes): raise ValueError("Classes need to be unique. Your list of classes has repeated items/IDs") self.classes = classes # type: List[TrafficClass] diff --git a/aequilibrae/paths/traffic_class.py b/aequilibrae/paths/traffic_class.py index 7e6552972..b12b41ba7 100644 --- a/aequilibrae/paths/traffic_class.py +++ b/aequilibrae/paths/traffic_class.py @@ -10,7 +10,7 @@ from aequilibrae.paths.results import AssignmentResults, TransitAssignmentResults -class TransportClassBase(ABC): +class TransportClassBase(ABC): # noqa: B024 def __init__(self, name: str, graph: GraphBase, matrix: AequilibraeMatrix) -> None: """ Instantiates the class diff --git a/aequilibrae/project/about.py b/aequilibrae/project/about.py index 64c7280e9..8ffc8484a 100644 --- a/aequilibrae/project/about.py +++ b/aequilibrae/project/about.py @@ -56,7 +56,7 @@ def create(self): def list_fields(self) -> list: """Returns a list of all characteristics the about table holds""" - return [x for x in self.__characteristics] + return list(self.__characteristics) def add_info_field(self, info_field: str) -> None: """Adds new information field to the model @@ -105,7 +105,7 @@ def write_back(self): def __has_about(self, conn): sql = "SELECT name FROM sqlite_master WHERE type='table';" - return any(["about" in x[0] for x in conn.execute(sql).fetchall()]) + return any("about" in x[0] for x in conn.execute(sql).fetchall()) def __load(self, conn): self.__characteristics = [] diff --git a/aequilibrae/project/data/matrices.py b/aequilibrae/project/data/matrices.py index e2ec90e79..9b6923675 100644 --- a/aequilibrae/project/data/matrices.py +++ b/aequilibrae/project/data/matrices.py @@ -23,7 +23,7 @@ def __init__(self, project): tl = TableLoader() with commit_and_close(self.project.connect()) as conn: matrices_list = tl.load_table(conn, "matrices") - self.__fields = [x for x in tl.fields] + self.__fields = list(tl.fields) if matrices_list: self.__properties = list(matrices_list[0].keys()) for lt in matrices_list: @@ -143,7 +143,7 @@ def delete_record(self, matrix_name: str) -> None: mr = self.get_record(matrix_name) mr.delete() - def new_record(self, name: str, file_name: str, matrix=AequilibraeMatrix()) -> MatrixRecord: + def new_record(self, name: str, file_name: str, matrix=None) -> MatrixRecord: """Creates a new record for a matrix in disk, but does not save it If the matrix file is not already on disk, it will fail @@ -155,7 +155,7 @@ def new_record(self, name: str, file_name: str, matrix=AequilibraeMatrix()) -> M Return: *matrix_record* (:obj:`MatrixRecord`): A matrix record that can be manipulated in memory before saving """ - + matrix = matrix or AequilibraeMatrix() if name in self.__items: raise ValueError(f"There is already a matrix of name ({name}). It must be unique.") diff --git a/aequilibrae/project/network/gmns_builder.py b/aequilibrae/project/network/gmns_builder.py index 5f97acec6..be0eb93e6 100644 --- a/aequilibrae/project/network/gmns_builder.py +++ b/aequilibrae/project/network/gmns_builder.py @@ -1,5 +1,7 @@ import math import re +from copy import deepcopy + import numpy as np import pandas as pd import string @@ -98,7 +100,7 @@ def doWork(self): l_fields.save() - all_fields = [k for k in gmns_l_fields] + all_fields = list(gmns_l_fields) missing_f = [c for c in list(self.link_df.columns) if c not in all_fields] if missing_f != []: print( @@ -123,7 +125,7 @@ def doWork(self): n_fields.save() - all_fields = [k for k in gmns_n_fields] + all_fields = list(gmns_n_fields) missing_f = [c for c in list(self.node_df.columns) if c not in all_fields] if missing_f != []: print( @@ -248,7 +250,7 @@ def get_aeq_direction(self): sorted_df.drop(to_drop_lst, axis=0, inplace=True) self.link_df = self.link_df[self.link_df.link_id.isin(list(sorted_df.link_id))] self.link_df.reset_index(drop=True, inplace=True) - direction = [x for x in list(self.link_df[gmns_dir])] + direction = list(self.link_df[gmns_dir]) return direction @@ -377,7 +379,7 @@ def save_modes_to_aeq(self): pattern.sub(lambda x: "_" + char_replaces[x.group()], s).replace("+", "").replace("-", "_") for s in modes_list ] - mode_ids_list = [x for x in modes_list] + mode_ids_list = deepcopy(modes_list) saved_modes = list(self.modes.all_modes()) with commit_and_close(connect_spatialite(self.__pth_file)) as conn: @@ -414,7 +416,7 @@ def save_modes_to_aeq(self): new_mode.save() mode_to_add += letters[0] - saved_modes += [x for x in mode_to_add] + saved_modes += list(mode_to_add) else: mode_to_add += modes_df.loc[modes_df.name == m, "id"].item() diff --git a/aequilibrae/project/network/gmns_exporter.py b/aequilibrae/project/network/gmns_exporter.py index 712cff0d3..9319e5406 100644 --- a/aequilibrae/project/network/gmns_exporter.py +++ b/aequilibrae/project/network/gmns_exporter.py @@ -48,7 +48,7 @@ def doWork(self): self.modes_df.to_csv(join(self.output_path, "use_definition.csv"), index=False) def update_direction_field(self): - two_way_cols = list(set([col[:-3] for col in list(self.links_df.columns) if col[-3:] in ["_ab", "_ba"]])) + two_way_cols = list({col[:-3] for col in list(self.links_df.columns) if col[-3:] in ["_ab", "_ba"]}) ab_links = pd.DataFrame(self.links_df[self.links_df.direction > -1], copy=True) ba_links = pd.DataFrame(self.links_df[self.links_df.direction < 1], copy=True) diff --git a/aequilibrae/project/network/link_types.py b/aequilibrae/project/network/link_types.py index 9dfe11a90..05b3524ae 100644 --- a/aequilibrae/project/network/link_types.py +++ b/aequilibrae/project/network/link_types.py @@ -67,7 +67,7 @@ def __init__(self, net): link_types_list = tl.load_table(conn, "link_types") existing_list = [lt["link_type_id"] for lt in link_types_list] - self.__fields = [x for x in tl.fields] + self.__fields = list(tl.fields) for lt in link_types_list: if lt["link_type_id"] not in self.__items: self.__items[lt["link_type_id"]] = LinkType(lt, self.project) diff --git a/aequilibrae/project/network/links.py b/aequilibrae/project/network/links.py index 3cd532367..c46c4e36e 100644 --- a/aequilibrae/project/network/links.py +++ b/aequilibrae/project/network/links.py @@ -163,7 +163,7 @@ def __link_data(self, link_id: int) -> dict: with commit_and_close(connect_spatialite(self.project.path_to_file)) as conn: data = conn.execute(f"{self.sql} where link_id=?", [link_id]).fetchone() if data: - return {key: val for key, val in zip(self.__fields, data)} + return dict(zip(self.__fields, data)) raise ValueError("Link_id does not exist on the network") def __new_link_id(self): diff --git a/aequilibrae/project/network/network.py b/aequilibrae/project/network/network.py index 64074c9a9..e9470a62d 100644 --- a/aequilibrae/project/network/network.py +++ b/aequilibrae/project/network/network.py @@ -130,7 +130,7 @@ def create_from_osm( east: float = None, north: float = None, place_name: str = None, - modes=["car", "transit", "bicycle", "walk"], + modes=["car", "transit", "bicycle", "walk"], # noqa: B006 ) -> None: """ Downloads the network from Open-Street Maps diff --git a/aequilibrae/project/network/nodes.py b/aequilibrae/project/network/nodes.py index d09d515ec..9fcae40ec 100644 --- a/aequilibrae/project/network/nodes.py +++ b/aequilibrae/project/network/nodes.py @@ -66,7 +66,7 @@ def get(self, node_id: int) -> Node: with commit_and_close(connect_spatialite(self.project.path_to_file)) as conn: data = conn.execute(f"{self.sql} where node_id=?", [node_id]).fetchone() if data: - data = {key: val for key, val in zip(self.__fields, data)} + data = dict(zip(self.__fields, data)) node = Node(data, self.project) self.__items[node.node_id] = node return node diff --git a/aequilibrae/project/network/osm_downloader.py b/aequilibrae/project/network/osm_downloader.py index d614ca44b..79ad15170 100644 --- a/aequilibrae/project/network/osm_downloader.py +++ b/aequilibrae/project/network/osm_downloader.py @@ -118,7 +118,7 @@ def overpass_request(self, data, pause_duration=None, timeout=180, error_pause_d msg = f'Server remark: "{response_json["remark"]}"' self.report.append(msg) self.logger.info(msg) - except Exception: + except Exception as err: # 429 is 'too many requests' and 504 is 'gateway timeout' from server # overload - handle these errors by recursively calling # overpass_request until we get a valid response @@ -137,7 +137,9 @@ def overpass_request(self, data, pause_duration=None, timeout=180, error_pause_d # else, this was an unhandled status_code, throw an exception else: self.report.append(f"Server at {domain} returned status code {response.status_code} and no JSON data") - raise Exception(f"Server returned no JSON data.\n{response} {response.reason}\n{response.text}") + raise Exception( + f"Server returned no JSON data.\n{response} {response.reason}\n{response.text}" + ) from err return response_json diff --git a/aequilibrae/project/network/periods.py b/aequilibrae/project/network/periods.py index 5183ccdb3..da80aa3ea 100644 --- a/aequilibrae/project/network/periods.py +++ b/aequilibrae/project/network/periods.py @@ -71,7 +71,7 @@ def get(self, period_id: int) -> Period: with commit_and_close(connect_spatialite(self.project.path_to_file)) as conn: data = conn.execute(f"{self.sql} where period_id=?", [period_id]).fetchone() if data: - data = {key: val for key, val in zip(self.__fields, data)} + data = dict(zip(self.__fields, data)) period = Period(data, self.project) self.__items[period.period_id] = period return period diff --git a/aequilibrae/project/zoning.py b/aequilibrae/project/zoning.py index 630cb2077..8ac5c974e 100644 --- a/aequilibrae/project/zoning.py +++ b/aequilibrae/project/zoning.py @@ -134,7 +134,7 @@ def refresh_geo_index(self): def __has_zoning(self): with commit_and_close(connect_spatialite(self.project.path_to_file)) as conn: dt = conn.execute("SELECT name FROM sqlite_master WHERE type='table';").fetchall() - return any(["zone" in x[0].lower() for x in dt]) + return any("zone" in x[0].lower() for x in dt) def __load(self): tl = TableLoader() diff --git a/aequilibrae/transit/constants.py b/aequilibrae/transit/constants.py index f2056a894..8ceaecc84 100644 --- a/aequilibrae/transit/constants.py +++ b/aequilibrae/transit/constants.py @@ -15,13 +15,13 @@ class Constants: - agencies: Dict[str, Any] = dict() - srid: Dict[int, int] = dict() - routes: Dict[int, int] = dict() - trips: Dict[int, int] = dict() - patterns: Dict[int, int] = dict() - pattern_lookup: Dict[int, int] = dict() - stops: Dict[int, int] = dict() - fares: Dict[int, int] = dict() - links: Dict[int, int] = dict() - transit_links: Dict[int, int] = dict() + agencies: Dict[str, Any] = {} + srid: Dict[int, int] = {} + routes: Dict[int, int] = {} + trips: Dict[int, int] = {} + patterns: Dict[int, int] = {} + pattern_lookup: Dict[int, int] = {} + stops: Dict[int, int] = {} + fares: Dict[int, int] = {} + links: Dict[int, int] = {} + transit_links: Dict[int, int] = {} diff --git a/aequilibrae/transit/gtfs_loader.py b/aequilibrae/transit/gtfs_loader.py index f8f710bd5..ba9f33d8d 100644 --- a/aequilibrae/transit/gtfs_loader.py +++ b/aequilibrae/transit/gtfs_loader.py @@ -49,16 +49,16 @@ def __init__(self): self.feed_date = "" self.agency = Agency() self.services = {} - self.routes: Dict[int, Route] = dict() - self.trips: Dict[int, Dict[Route]] = dict() - self.stops: Dict[int, Stop] = dict() + self.routes: Dict[int, Route] = {} + self.trips: Dict[int, Dict[Route]] = {} + self.stops: Dict[int, Stop] = {} self.stop_times = {} self.shapes = {} self.trip_data = {} self.fare_rules = [] self.fare_attributes = {} self.feed_dates = [] - self.data_arrays = dict() + self.data_arrays = {} self.wgs84 = pyproj.Proj("epsg:4326") self.srid = get_srid() self.transformer = Transformer.from_crs("epsg:4326", f"epsg:{self.srid}", always_xy=False) @@ -280,7 +280,7 @@ def __load_shapes_table(self): self.signal.emit(["update", "secondary", i + 1, msg_txt, self.__mt]) items = shapes[shapes["shape_id"] == shape_id] items = items[np.argsort(items["shape_pt_sequence"])] - shape = LineString([x for x in zip(items["shape_pt_lon"], items["shape_pt_lat"])]) + shape = LineString(list(zip(items["shape_pt_lon"], items["shape_pt_lat"]))) self.shapes[shape_id] = shape def __load_trips_table(self): @@ -422,8 +422,8 @@ def __load_stop_times(self): df = pd.DataFrame(stoptimes) - df.loc[:, "arrival_time"] == df.loc[:, ["arrival_time", "departure_time"]].max(axis=1) - df.loc[:, "departure_time"] == df.loc[:, "arrival_time"] + df.loc[:, "arrival_time"] = df.loc[:, ["arrival_time", "departure_time"]].max(axis=1) + df.loc[:, "departure_time"] = df.loc[:, "arrival_time"] counter = df.shape[0] df = df.assign(other_stop=df.stop_id.shift(-1), other_trip=df.trip_id.shift(-1)) diff --git a/aequilibrae/transit/lib_gtfs.py b/aequilibrae/transit/lib_gtfs.py index 2e8f4aaf0..8a075dacd 100644 --- a/aequilibrae/transit/lib_gtfs.py +++ b/aequilibrae/transit/lib_gtfs.py @@ -37,7 +37,7 @@ class GTFSRouteSystemBuilder(WorkerThread): signal = SignalImpl(object) - def __init__(self, network, agency_identifier, file_path, day="", description="", default_capacities={}): + def __init__(self, network, agency_identifier, file_path, day="", description="", capacities={}): # noqa: B006 """Instantiates a transit class for the network :Arguments: @@ -67,7 +67,7 @@ def __init__(self, network, agency_identifier, file_path, day="", description="" self.sridproj = pyproj.Proj(f"epsg:{self.srid}") self.gtfs_data.agency.agency = agency_identifier self.gtfs_data.agency.description = description - self.__default_capacities = default_capacities + self.__default_capacities = capacities self.__do_execute_map_matching = False self.__target_date__ = None self.__outside_zones = 0 @@ -104,7 +104,7 @@ def set_maximum_speeds(self, max_speeds: pd.DataFrame): Modes not covered in the data will not be touched and distance brackets not covered will receive the maximum speed, with a warning """ - dict_speeds = {x: df for x, df in max_speeds.groupby(["mode"])} + dict_speeds = dict(max_speeds.groupby(["mode"])) self.gtfs_data._set_maximum_speeds(dict_speeds) def dates_available(self) -> list: @@ -126,7 +126,7 @@ def set_allow_map_match(self, allow=True): self.__do_execute_map_matching = allow - def map_match(self, route_types=[3]) -> None: + def map_match(self, route_types=[3]) -> None: # noqa: B006 """Performs map-matching for all routes of one or more types. Defaults to map-matching Bus routes (type 3) only. @@ -139,7 +139,7 @@ def map_match(self, route_types=[3]) -> None: if not isinstance(route_types, list) and not isinstance(route_types, tuple): raise TypeError("Route_types must be list or tuple") - if any([not isinstance(item, int) for item in route_types]): + if any(not isinstance(item, int) for item in route_types): raise TypeError("All route types must be integers") mt = f"Map-matching routes for {self.gtfs_data.agency.agency}" @@ -313,7 +313,7 @@ def __build_data(self): all_pats = [trip.pattern_hash for trip in new_trips] cntr = route.route_id + PATTERN_ID_MULTIPLIER - for pat_hash in sorted(list(set(all_pats))): + for pat_hash in sorted(set(all_pats)): c.pattern_lookup[pat_hash] = cntr cntr += ((all_pats.count(pat_hash) // 100) + 1) * PATTERN_ID_MULTIPLIER @@ -447,7 +447,7 @@ def builds_link_graphs_with_broken_stops(self): **mode_id** (:obj:`int`): Mode ID for which we will build the graph for """ - route_types = list(set([r.route_type for r in self.select_routes.values()])) + route_types = list({r.route_type for r in self.select_routes.values()}) route_types = [mode_id for mode_id in route_types if mode_correspondence[mode_id] not in self.graphs] if not route_types: return diff --git a/aequilibrae/transit/parse_csv.py b/aequilibrae/transit/parse_csv.py index e8ab82e98..408febda2 100644 --- a/aequilibrae/transit/parse_csv.py +++ b/aequilibrae/transit/parse_csv.py @@ -5,7 +5,7 @@ from numpy.lib.recfunctions import append_fields -def parse_csv(file_name: str, column_order=[]): +def parse_csv(file_name: str, column_order=[]): # noqa B006 tot = [] if isinstance(file_name, str): csvfile = open(file_name, encoding="utf-8-sig") diff --git a/aequilibrae/transit/transit.py b/aequilibrae/transit/transit.py index 6c3eb6c18..068cbaa0d 100644 --- a/aequilibrae/transit/transit.py +++ b/aequilibrae/transit/transit.py @@ -65,7 +65,7 @@ def new_gtfs_builder(self, agency, file_path, day="", description="") -> GTFSRou file_path=file_path, day=day, description=description, - default_capacities=self.default_capacities, + capacities=self.default_capacities, ) return gtfs diff --git a/aequilibrae/transit/transit_elements/pattern.py b/aequilibrae/transit/transit_elements/pattern.py index 8b5e95b04..f2b780775 100644 --- a/aequilibrae/transit/transit_elements/pattern.py +++ b/aequilibrae/transit/transit_elements/pattern.py @@ -142,7 +142,7 @@ def map_match(self): if not self.__feed.graphs: self.__feed.builds_link_graphs_with_broken_stops() - if not mode_correspondence[self.route_type] in self.__feed.graphs: + if mode_correspondence[self.route_type] not in self.__feed.graphs: return self.__feed.path_store.add_graph( @@ -183,7 +183,7 @@ def __map_matching_complete_path_building(self): empty_frame = pd.DataFrame([]) # We search for disconnected stops: - candidate_stops = [stop for stop in self.stops] + candidate_stops = list(self.stops) stop_node_idxs = [stop.___map_matching_id__[self.route_type] for stop in candidate_stops] node0 = graph.network.a_node[~graph.network.a_node.isin(graph.centroids)].min() diff --git a/aequilibrae/transit/transit_elements/stop.py b/aequilibrae/transit/transit_elements/stop.py index 5d97fd924..7efb33571 100644 --- a/aequilibrae/transit/transit_elements/stop.py +++ b/aequilibrae/transit/transit_elements/stop.py @@ -37,7 +37,7 @@ def __init__(self, agency_id: int, record: tuple, headers: list): self.srid = -1 self.geo: Optional[Point] = None self.route_type: Optional[int] = None - self.___map_matching_id__: Dict[Any, Any] = dict() + self.___map_matching_id__: Dict[Any, Any] = {} self.__moved_map_matching__ = 0 for key, value in zip(headers, record): diff --git a/aequilibrae/utils/create_delaunay_network.py b/aequilibrae/utils/create_delaunay_network.py index 2b59d5ada..39a81159f 100644 --- a/aequilibrae/utils/create_delaunay_network.py +++ b/aequilibrae/utils/create_delaunay_network.py @@ -57,7 +57,7 @@ def create_network(self, source="zones", overwrite=False): for triangle in all_edges: links = list(combinations(triangle, 2)) for i in links: - f, t = sorted(list(i)) + f, t = sorted(i) edges.append([points.at[f, "node_id"], points.at[t, "node_id"]]) edges = pd.DataFrame(edges) diff --git a/benchmarks/select_link_benchmark.py b/benchmarks/select_link_benchmark.py index 6200b6dd5..3ed42ea39 100644 --- a/benchmarks/select_link_benchmark.py +++ b/benchmarks/select_link_benchmark.py @@ -11,7 +11,7 @@ sys.path.append(str(Path(__file__).resolve().parent.parent)) -from aequilibrae import Project, TrafficAssignment, TrafficClass +from aequilibrae import Project, TrafficAssignment, TrafficClass # noqa: E402 def aequilibrae_init(proj_path: str, cost: str): @@ -166,7 +166,7 @@ def main(): print("BENCHING links: ", link.keys() if link else link) qries = [] if link is None else list(link.values()) - t = timeit.Timer(lambda: assignment.execute()) + t = timeit.Timer(lambda: assignment.execute()) # noqa: B023 times = t.repeat(repeat=3, number=args["iters"]) df = pd.DataFrame( { diff --git a/pyproject.toml b/pyproject.toml index 3bcb1ab1f..ff2ab1ff1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,4 +6,52 @@ extend-exclude = '''docs/*''' [build-system] -requires = ["setuptools", "numpy", "cython", "pyaml", "pyqt5", "requests", "scipy", "shapely", "pandas", "pyarrow", "pyproj", "wheel"] \ No newline at end of file +requires = ["setuptools", "numpy", "cython", "pyaml", "pyqt5", "requests", "scipy", "shapely", "pandas", "pyarrow", "pyproj", "wheel"] + + +[tool.ruff] + +select = ["B", "C", "E", "F", "W"] +ignore = ["E501", "F401", "F403", "B028", "B007", "B017"] +exclude = [ + ".idea", + "__pycache__", + "sphinx", + ".ipynb_checkpoints", + "aequilibrae.egg-info", + "docs/*", + "notebooks", + "bin/hpc", + ".bzr", + ".direnv", + ".eggs", + ".git", + ".git-rewrite", + ".hg", + ".mypy_cache", + ".nox", + ".pants.d", + ".pytype", + ".ruff_cache", + ".svn", + ".tox", + ".venv", + "__pypackages__", + "_build", + "buck-out", + "build", + "dist", + "node_modules", + "venv", + ".venv", + ".venv39", + ".venv310", + ".venv311", + ".venv312", +] +line-length = 120 +dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$" +target-version = "py39" + +[tool.ruff.mccabe] +max-complexity = 20 diff --git a/setup.py b/setup.py index 6d87683b4..46f89c43f 100644 --- a/setup.py +++ b/setup.py @@ -17,7 +17,7 @@ include_dirs.append(pa.get_include()) is_win = "WINDOWS" in platform.platform().upper() -is_mac = any([e in platform.platform().upper() for e in ["MACOS", "DARWIN"]]) +is_mac = any(e in platform.platform().upper() for e in ["MACOS", "DARWIN"]) prefix = "/" if is_win else "-f" cpp_std = "/std:c++17" if is_win else "-std=c++17" compile_args = [cpp_std, f"{prefix}openmp"] @@ -59,7 +59,7 @@ with open("requirements.txt", "r") as fl: install_requirements = [x.strip() for x in fl.readlines()] -pkgs = [pkg for pkg in find_packages()] +pkgs = list(find_packages()) pkg_data = { "aequilibrae.reference_files": ["spatialite.sqlite", "nauru.zip", "sioux_falls.zip", "coquimbo.zip"], diff --git a/tests/aequilibrae/paths/test_select_link.py b/tests/aequilibrae/paths/test_select_link.py index a8f89f02d..f499f91a2 100644 --- a/tests/aequilibrae/paths/test_select_link.py +++ b/tests/aequilibrae/paths/test_select_link.py @@ -216,7 +216,7 @@ def g(o, d): for i in range(len(sl)): node_pair = graph.graph.iloc[sl[i]]["a_node"] + 1, graph.graph.iloc[sl[i]]["b_node"] + 1 sl_links.append(node_pair) - mask = dict() + mask = {} for origin, val in enumerate(a): for dest, path in enumerate(val): for k in range(1, len(path)): diff --git a/tests/aequilibrae/project/test_link_type.py b/tests/aequilibrae/project/test_link_type.py index 531f643f6..92424a48a 100644 --- a/tests/aequilibrae/project/test_link_type.py +++ b/tests/aequilibrae/project/test_link_type.py @@ -21,7 +21,7 @@ def tearDown(self) -> None: def test_changing_link_type_id(self): ltypes = self.proj.network.link_types - lt = random.choice([x for x in ltypes.all_types().values()]) + lt = random.choice(list(ltypes.all_types().values())) with self.assertRaises(ValueError): lt.link_type_id = "test my description" diff --git a/tests/aequilibrae/project/test_link_type_triggers.py b/tests/aequilibrae/project/test_link_type_triggers.py index 12b032e0e..5f4ddb4ea 100644 --- a/tests/aequilibrae/project/test_link_type_triggers.py +++ b/tests/aequilibrae/project/test_link_type_triggers.py @@ -22,7 +22,7 @@ def setUp(self) -> None: qry_file = os.path.join(root, "database_specification/network/triggers/link_type_table_triggers.sql") with open(qry_file, "r") as sql_file: self.queries = sql_file.read() - self.queries = [cmd for cmd in self.queries.split("#")] + self.queries = list(self.queries.split("#")) def tearDown(self) -> None: self.proj.close() @@ -106,7 +106,7 @@ def test_link_type_on_links_insert(self): fields = {x[1]: x[0] for x in f} sql = "select * from links where link_id=70" - a = [x for x in self.proj.conn.execute(sql).fetchone()] + a = list(self.proj.conn.execute(sql).fetchone()) a[fields["link_type"]] = "something indeed silly123" a[fields["link_id"]] = 456789 a[fields["a_node"]] = 777 diff --git a/tests/aequilibrae/project/test_link_types.py b/tests/aequilibrae/project/test_link_types.py index 3f0eb4d51..ab7ac42ba 100644 --- a/tests/aequilibrae/project/test_link_types.py +++ b/tests/aequilibrae/project/test_link_types.py @@ -55,11 +55,11 @@ def test_get_and_get_by_name(self): def test_all_types(self): lt = self.proj.network.link_types - all_lts = set([x for x in lt.all_types().keys()]) + all_lts = set(lt.all_types().keys()) c = self.proj.conn.cursor() c.execute("select link_type_id from link_types") - reallts = set([x[0] for x in c.fetchall()]) + reallts = {x[0] for x in c.fetchall()} diff = all_lts.symmetric_difference(reallts) self.assertEqual(diff, set(), "Getting all link_types failed") diff --git a/tests/aequilibrae/project/test_links.py b/tests/aequilibrae/project/test_links.py index f4bc67ff6..0719b6e25 100644 --- a/tests/aequilibrae/project/test_links.py +++ b/tests/aequilibrae/project/test_links.py @@ -93,9 +93,7 @@ def test_fields(self): self.curr.execute("pragma table_info(links)") dt = self.curr.fetchall() - actual_fields = set([x[1].replace("_ab", "").replace("_ba", "") for x in dt if x[1] != "ogc_fid"]) - actual_fields = sorted(list(actual_fields)) - + actual_fields = sorted({x[1].replace("_ab", "").replace("_ba", "") for x in dt if x[1] != "ogc_fid"}) self.assertEqual(fields, actual_fields, "Table editor is weird for table links") def test_refresh(self): diff --git a/tests/aequilibrae/project/test_modes_table_triggers.py b/tests/aequilibrae/project/test_modes_table_triggers.py index 25b58bc5a..4feec1517 100644 --- a/tests/aequilibrae/project/test_modes_table_triggers.py +++ b/tests/aequilibrae/project/test_modes_table_triggers.py @@ -32,7 +32,7 @@ def setUp(self) -> None: qry_file = os.path.join(root, "database_specification/network/triggers/modes_table_triggers.sql") with open(qry_file, "r") as sql_file: self.queries = sql_file.read() - self.queries = [cmd for cmd in self.queries.split("#")] + self.queries = list(self.queries.split("#")) def tearDown(self) -> None: self.proj.close() @@ -189,12 +189,11 @@ def test_modes_on_links_insert(self): cmd = self.__get_query("modes_on_links_insert") if self.rtree: self.curr.execute("pragma table_info(links)") - f = self.curr.fetchall() - fields = {x[1]: x[0] for x in f} + fields = {x[1]: x[0] for x in self.curr.fetchall()} sql = "select * from links where link_id=10" self.curr.execute(sql) - a = [x for x in self.curr.fetchone()] + a = list(self.curr.fetchone()) a[fields["modes"]] = "as12" a[fields["link_id"]] = 1234 a[fields["a_node"]] = 999 @@ -220,7 +219,7 @@ def test_modes_length_on_links_insert(self): sql = "select * from links where link_id=70" self.curr.execute(sql) - a = [x for x in self.curr.fetchone()] + a = list(self.curr.fetchone()) a[fields["modes"]] = "" a[fields["link_id"]] = 4321 a[fields["a_node"]] = 888 diff --git a/tests/aequilibrae/project/test_nodes.py b/tests/aequilibrae/project/test_nodes.py index 9e0220795..dbf3ed28f 100644 --- a/tests/aequilibrae/project/test_nodes.py +++ b/tests/aequilibrae/project/test_nodes.py @@ -62,8 +62,8 @@ def test_save(self): node.geometry = Point([x, y]) nodes.save() - for nd, coords in zip(chosen, coords): - x, y = coords + for nd, crd in zip(chosen, coords): + x, y = crd self.curr.execute("Select is_centroid, asBinary(geometry) from nodes where node_id=?;", [nd]) flag, wkb = self.curr.fetchone() self.assertEqual(flag, 0, "Saving of is_centroid failed") @@ -80,9 +80,7 @@ def test_fields(self): self.curr.execute("pragma table_info(nodes)") dt = self.curr.fetchall() - actual_fields = set([x[1] for x in dt if x[1] != "ogc_fid"]) - actual_fields = sorted(list(actual_fields)) - + actual_fields = sorted({x[1] for x in dt if x[1] != "ogc_fid"}) self.assertEqual(fields, actual_fields, "Table editor is weird for table nodes") def test_copy(self): diff --git a/tests/aequilibrae/project/test_periods.py b/tests/aequilibrae/project/test_periods.py index 2021481cf..b73d9ba15 100644 --- a/tests/aequilibrae/project/test_periods.py +++ b/tests/aequilibrae/project/test_periods.py @@ -59,9 +59,7 @@ def test_fields(self): self.curr.execute("pragma table_info(periods)") dt = self.curr.fetchall() - actual_fields = set([x[1] for x in dt]) - actual_fields = sorted(list(actual_fields)) - + actual_fields = sorted({x[1] for x in dt}) self.assertEqual(fields, actual_fields, "Table editor is weird for table periods") def test_copy(self): diff --git a/tests/aequilibrae/transit/test_gtfs_loader.py b/tests/aequilibrae/transit/test_gtfs_loader.py index f90edd719..0af0ea558 100644 --- a/tests/aequilibrae/transit/test_gtfs_loader.py +++ b/tests/aequilibrae/transit/test_gtfs_loader.py @@ -33,7 +33,7 @@ def test_load_data(gtfs_loader, gtfs_fldr): df = cap[cap.city == "Coquimbo"] df.loc[df.min_distance < 100, "speed"] = 10 - dict_speeds = {x: df for x, df in df.groupby(["mode"])} + dict_speeds = {x: df for x, df in df.groupby(["mode"])} # noqa: C416 gtfs = gtfs_loader gtfs._set_maximum_speeds(dict_speeds) diff --git a/tests/aequilibrae/transit/test_gtfs_route.py b/tests/aequilibrae/transit/test_gtfs_route.py index 0284da092..da7bc2241 100644 --- a/tests/aequilibrae/transit/test_gtfs_route.py +++ b/tests/aequilibrae/transit/test_gtfs_route.py @@ -53,7 +53,7 @@ def test_save_to_database(self, data_dict, transit_conn): "Select agency_id, shortname, longname, description, route_type from routes where route=?", [data["route_id"]], ) - result = [x for x in curr.fetchone()] + result = list(curr.fetchone()) expected = [ data["agency_id"], data["route_short_name"], diff --git a/tests/aequilibrae/transit/test_gtfs_stop.py b/tests/aequilibrae/transit/test_gtfs_stop.py index 7f1a165a3..bb8d5bdd4 100644 --- a/tests/aequilibrae/transit/test_gtfs_stop.py +++ b/tests/aequilibrae/transit/test_gtfs_stop.py @@ -1,11 +1,10 @@ +from copy import deepcopy from random import randint, choice, uniform -from shapely.geometry import LineString + import pytest -from aequilibrae.project import Project -from aequilibrae.transit import Transit -from aequilibrae.project.database_connection import database_connection -from aequilibrae.transit.functions.get_srid import get_srid +from shapely.geometry import LineString +from aequilibrae.transit.functions.get_srid import get_srid from aequilibrae.transit.transit_elements import Stop from tests.aequilibrae.transit.random_word import randomword @@ -40,10 +39,10 @@ def test__populate(data): if key in data: assert val == data[key], "Stop population with record failed" - new_data = {key: val for key, val in data.items()} + new_data = deepcopy(data) new_data[randomword(randint(1, 15))] = randomword(randint(1, 20)) with pytest.raises(KeyError): - s = Stop(1, tuple(new_data.values()), list(new_data.keys())) + _ = Stop(1, tuple(new_data.values()), list(new_data.keys())) def test_save_to_database(data, transit_conn): @@ -62,9 +61,7 @@ def test_save_to_database(data, transit_conn): VALUES(?, ?, ?, ?, ?, ?, GeomFromWKB(?, 4326));""" transit_conn.execute(sql_tl, [tlink_id, randint(1, 1000000000), randint(1, 10), s.stop_id, s.stop_id + 1, 0, line]) - qry = transit_conn.execute( - "Select agency_id, link, dir, description, street from stops where stop=?", [data["stop_id"]] - ).fetchone() - result = [x for x in qry] + sql = "Select agency_id, link, dir, description, street from stops where stop=?" + result = list(transit_conn.execute(sql, [data["stop_id"]]).fetchone()) expected = [s.agency_id, link, direc, data["stop_desc"], data["stop_street"]] assert result == expected, "Saving Stop to the database failed" diff --git a/tests/aequilibrae/transit/test_gtfs_trip.py b/tests/aequilibrae/transit/test_gtfs_trip.py index 564f6273d..00b1b29cb 100644 --- a/tests/aequilibrae/transit/test_gtfs_trip.py +++ b/tests/aequilibrae/transit/test_gtfs_trip.py @@ -1,3 +1,4 @@ +from copy import copy from random import randint, choice import pytest @@ -39,11 +40,11 @@ def test_populate(self, data): def test_save_to_database(self, data, transit_conn): r = Trip() r._populate(tuple(data.values()), list(data.keys())) - times = [r for r in range(randint(5, 15))] + times = list(range(randint(5, 15))) patid = randint(15, 2500000) - r.arrivals = [r for r in times] - r.departures = [r for r in times] + r.arrivals = copy(times) + r.departures = copy(times) r.pattern_id = patid r.source_time = [0] * len(times) r.save_to_database(transit_conn) diff --git a/tests/aequilibrae/transit/test_lib_gtfs.py b/tests/aequilibrae/transit/test_lib_gtfs.py index 73b4e0f33..b6a49fc9f 100644 --- a/tests/aequilibrae/transit/test_lib_gtfs.py +++ b/tests/aequilibrae/transit/test_lib_gtfs.py @@ -23,7 +23,7 @@ def test_set_capacities(system_builder): def test_dates_available(system_builder): dates = system_builder.dates_available() - assert type(dates) is list + assert isinstance(dates, list) def test_set_allow_map_match(system_builder): diff --git a/tests/aequilibrae/transit/test_pattern.py b/tests/aequilibrae/transit/test_pattern.py index 81f4f9b8b..584cdfc89 100644 --- a/tests/aequilibrae/transit/test_pattern.py +++ b/tests/aequilibrae/transit/test_pattern.py @@ -12,7 +12,7 @@ def pat(create_path, create_gtfs_project): transit.load_date("2016-04-13") patterns = transit.select_patterns - yield [x for x in patterns.values()][0] + yield list(patterns.values())[0] def test_save_to_database(pat, transit_conn): diff --git a/tests/requirements_tests.txt b/tests/requirements_tests.txt index bc7580cc1..6d41ce0fe 100644 --- a/tests/requirements_tests.txt +++ b/tests/requirements_tests.txt @@ -4,6 +4,6 @@ pytest pytest-cov pytest-subtests coveralls -flake8 +ruff==0.1.14 wheel -black +black==24.1.1 diff --git a/tests/setup_windows_spatialite.py b/tests/setup_windows_spatialite.py index 900a93615..a770faff4 100644 --- a/tests/setup_windows_spatialite.py +++ b/tests/setup_windows_spatialite.py @@ -31,7 +31,7 @@ root = "C:/hostedtoolcache/windows/Python/" file = "sqlite3.dll" - for d, subD, f in walk(root): + for d, _, f in walk(root): if file in f: if "x64" in d: zipfile.ZipFile(zip_path64).extractall(d) From 427f223e968d6a2d283f7ec6b95dbd29fd25d6f2 Mon Sep 17 00:00:00 2001 From: Jake Moss Date: Wed, 31 Jan 2024 15:06:25 +1000 Subject: [PATCH 35/44] Update GMNS urls (#499) * Update GMNS urls * Linting --- .../creating_models/plot_import_from_gmns.py | 8 ++++---- .../other_applications/plot_export_to_gmns.py | 2 +- .../project_database/exporting_to_gmns.rst | 2 +- .../project_database/importing_from_gmns.rst | 4 ++-- tests/aequilibrae/project/test_network_gmns.py | 12 +++++++++--- 5 files changed, 17 insertions(+), 11 deletions(-) diff --git a/docs/source/examples/creating_models/plot_import_from_gmns.py b/docs/source/examples/creating_models/plot_import_from_gmns.py index c31706b9c..a28d0dff1 100644 --- a/docs/source/examples/creating_models/plot_import_from_gmns.py +++ b/docs/source/examples/creating_models/plot_import_from_gmns.py @@ -21,9 +21,9 @@ # %% # We load the example file from the GMNS GitHub repository -link_file = "https://raw.githubusercontent.com/zephyr-data-specs/GMNS/development/Small_Network_Examples/Arlington_Signals/link.csv" -node_file = "https://raw.githubusercontent.com/zephyr-data-specs/GMNS/development/Small_Network_Examples/Arlington_Signals/node.csv" -use_group_file = "https://raw.githubusercontent.com/zephyr-data-specs/GMNS/development/Small_Network_Examples/Arlington_Signals/use_group.csv" +link_file = "https://raw.githubusercontent.com/zephyr-data-specs/GMNS/develop/examples/Arlington_Signals/link.csv" +node_file = "https://raw.githubusercontent.com/zephyr-data-specs/GMNS/develop/examples/Arlington_Signals/node.csv" +use_group_file = "https://raw.githubusercontent.com/zephyr-data-specs/GMNS/develop/examples/Arlington_Signals/use_group.csv" # %% # We create the example project inside our temp folder @@ -61,7 +61,7 @@ # %% # Now, let's plot a map. This map can be compared with the images of the README.md # file located in this example repository on GitHub: -# https://github.com/zephyr-data-specs/GMNS/blob/development/Small_Network_Examples/Arlington_Signals/README.md +# https://github.com/zephyr-data-specs/GMNS/blob/develop/examples/Arlington_Signals/README.md links = project.network.links.data nodes = project.network.nodes.data diff --git a/docs/source/examples/other_applications/plot_export_to_gmns.py b/docs/source/examples/other_applications/plot_export_to_gmns.py index e9eda5d2f..8ea72d87c 100644 --- a/docs/source/examples/other_applications/plot_export_to_gmns.py +++ b/docs/source/examples/other_applications/plot_export_to_gmns.py @@ -37,7 +37,7 @@ # %% # Now, let's plot a map. This map can be compared with the images of the README.md # file located in this example repository on GitHub: -# https://github.com/zephyr-data-specs/GMNS/blob/development/Small_Network_Examples/Arlington_Signals/README.md +# https://github.com/zephyr-data-specs/GMNS/blob/develop/examples/Arlington_Signals/README.md links = pd.read_csv(os.path.join(output_fldr, "link.csv")) nodes = pd.read_csv(os.path.join(output_fldr, "node.csv")) diff --git a/docs/source/modeling_with_aequilibrae/project_database/exporting_to_gmns.rst b/docs/source/modeling_with_aequilibrae/project_database/exporting_to_gmns.rst index ba3fbdd20..6fccb1ee5 100644 --- a/docs/source/modeling_with_aequilibrae/project_database/exporting_to_gmns.rst +++ b/docs/source/modeling_with_aequilibrae/project_database/exporting_to_gmns.rst @@ -34,4 +34,4 @@ back at the AequilibraE project. **is an optinal field listed in the GMNS node table specification.** You can find the GMNS specification -`here `_. +`here `_. diff --git a/docs/source/modeling_with_aequilibrae/project_database/importing_from_gmns.rst b/docs/source/modeling_with_aequilibrae/project_database/importing_from_gmns.rst index 2af1707cb..62cb962da 100644 --- a/docs/source/modeling_with_aequilibrae/project_database/importing_from_gmns.rst +++ b/docs/source/modeling_with_aequilibrae/project_database/importing_from_gmns.rst @@ -21,7 +21,7 @@ As of July 2022, it is possible to import the following files from a GMNS source * geometry table. You can find the specification for all these tables in the GMNS documentation, -`here `_. +`here `_. By default, the method ``create_from_gmns()`` read all required and optional fields specified in the GMNS link and node tables specification. If you need it to read @@ -38,4 +38,4 @@ specification. **'is_centroid' field has to be set to 1. However, this is not part of the GMNS** **specification. Thus, if you want a node to be identified as a centroid during the** **import process, in the GMNS node table you have to set the field 'node_type' equals** - **to 'centroid'.** \ No newline at end of file + **to 'centroid'.** diff --git a/tests/aequilibrae/project/test_network_gmns.py b/tests/aequilibrae/project/test_network_gmns.py index 5731e2052..9257f387e 100644 --- a/tests/aequilibrae/project/test_network_gmns.py +++ b/tests/aequilibrae/project/test_network_gmns.py @@ -17,9 +17,15 @@ def test_create_from_gmns(self): self.project = Project() self.project.new(proj_path) - link_file = "https://raw.githubusercontent.com/zephyr-data-specs/GMNS/development/Small_Network_Examples/Arlington_Signals/link.csv" - node_file = "https://raw.githubusercontent.com/zephyr-data-specs/GMNS/development/Small_Network_Examples/Arlington_Signals/node.csv" - use_group_file = "https://raw.githubusercontent.com/zephyr-data-specs/GMNS/development/Small_Network_Examples/Arlington_Signals/use_group.csv" + link_file = ( + "https://raw.githubusercontent.com/zephyr-data-specs/GMNS/develop/examples/Arlington_Signals/link.csv" + ) + node_file = ( + "https://raw.githubusercontent.com/zephyr-data-specs/GMNS/develop/examples/Arlington_Signals/node.csv" + ) + use_group_file = ( + "https://raw.githubusercontent.com/zephyr-data-specs/GMNS/develop/examples/Arlington_Signals/use_group.csv" + ) new_link_fields = { "bridge": {"description": "bridge flag", "type": "text", "required": False}, From 9552907f8a8f42f6b3ddc289bdfbdbb35e60d595 Mon Sep 17 00:00:00 2001 From: Jake-Moss Date: Thu, 1 Feb 2024 16:21:16 +1000 Subject: [PATCH 36/44] Remove debug method and fix small typos --- aequilibrae/paths/route_choice.pyx | 30 +++--------------------------- 1 file changed, 3 insertions(+), 27 deletions(-) diff --git a/aequilibrae/paths/route_choice.pyx b/aequilibrae/paths/route_choice.pyx index 3aa856b22..4ee98be4e 100644 --- a/aequilibrae/paths/route_choice.pyx +++ b/aequilibrae/paths/route_choice.pyx @@ -15,7 +15,7 @@ a lot of cpp interop and memory management. A description of the purpose of vari route_set: See route_choice.pxd for full type signature. It's an unordered set (hash set) of pointers to vectors of link IDs. It uses a custom hashing function and comparator. The hashing function is defined in a string that in inlined -directly into the output ccp. This is done allow declaring an the `()` operator, which is required and AFAIK not +directly into the output ccp. This is done allow declaring of the `()` operator, which is required and AFAIK not possible in Cython. The hash is designed to dereference then hash order dependent vectors. One isn't provided by stdlib. The comparator simply dereferences the pointer and uses the vector comparator. It's designed to store the outputted paths. Heap allocated (needs to be returned). @@ -86,7 +86,7 @@ cdef class RouteChoiceSet: # self.heuristic = HEURISTIC_MAP[self.res._heuristic] self.cost_view = graph.compact_cost self.graph_fs_view = graph.compact_fs - self.b_nodes_view = graph.compact_graph.b_node.values # FIXME: Why does path_computation copy this? + self.b_nodes_view = graph.compact_graph.b_node.values self.nodes_to_indices_view = graph.compact_nodes_to_indices tmp = graph.lonlat_index.loc[graph.compact_all_nodes] self.lat_view = tmp.lat.values @@ -164,7 +164,7 @@ cdef class RouteChoiceSet: long long origin_index, dest_index, i if max_routes == 0 and max_depth == 0: - raise ValueError("Either `max_routes` or `max_depth` must be >= 0") + raise ValueError("Either `max_routes` or `max_depth` must be > 0") if max_routes < 0 or max_depth < 0 or cores < 0: raise ValueError("`max_routes`, `max_depth`, and `cores` must be non-negative") @@ -232,30 +232,6 @@ cdef class RouteChoiceSet: del results return dict(zip(ods, res)) - def _generate_line_strins(self, project, graph, results): - """Debug method""" - import geopandas as gpd - import shapely - - links = project.network.links.data.set_index("link_id") - df = [] - for od, route_set in results.items(): - for route in route_set: - df.append( - ( - *od, - shapely.MultiLineString( - links.loc[ - graph.graph[graph.graph.__compressed_id__.isin(route)].link_id - ].geometry.to_list() - ), - ) - ) - - df = gpd.GeoDataFrame(df, columns=["origin", "destination", "geometry"]) - df.set_geometry("geometry") - df.to_file("test1.gpkg", layer='routes', driver="GPKG") - @cython.initializedcheck(False) cdef void path_find( RouteChoiceSet self, From a25841a767349749f326824d821887fb8b161df9 Mon Sep 17 00:00:00 2001 From: Pedro Camargo Date: Thu, 1 Feb 2024 18:20:59 -0600 Subject: [PATCH 37/44] Changes OSM downloader to accept a polygon instead of a bounding box (#495) * . * . * . * black * . * Cleans non-needed nodes * . * Updates checkout action to V4 as per deprecation of V3 * Remove indentation and if block --------- Co-authored-by: pveigadecamargo Co-authored-by: Jake-Moss --- .github/build_artifacts_qgis.yml | 2 +- .github/workflows/build_linux.yml | 2 +- .github/workflows/build_mac.yml | 2 +- .github/workflows/build_windows.yml | 2 +- .github/workflows/debug_tests.yml | 2 +- .github/workflows/documentation.yml | 2 +- .../workflows/test_linux_with_coverage.yml | 2 +- .github/workflows/unit_tests.yml | 4 +- aequilibrae/project/network/__init__.py | 1 - aequilibrae/project/network/gmns_exporter.py | 2 +- aequilibrae/project/network/network.py | 52 ++++++++--------- .../network/{osm_utils => osm}/__init__.py | 0 .../project/network/{ => osm}/osm_builder.py | 35 ++++++----- .../network/{ => osm}/osm_downloader.py | 12 ++-- .../network/{osm_utils => osm}/osm_params.py | 0 .../{osm_utils => osm}/place_getter.py | 4 +- .../examples/creating_models/from_osm.py | 4 ++ tests/aequilibrae/project/test_network.py | 58 +++++++++---------- .../project/test_osm_downloader.py | 13 +++-- .../aequilibrae/project/test_place_getter.py | 7 ++- 20 files changed, 112 insertions(+), 94 deletions(-) rename aequilibrae/project/network/{osm_utils => osm}/__init__.py (100%) rename aequilibrae/project/network/{ => osm}/osm_builder.py (93%) rename aequilibrae/project/network/{ => osm}/osm_downloader.py (96%) rename aequilibrae/project/network/{osm_utils => osm}/osm_params.py (100%) rename aequilibrae/project/network/{osm_utils => osm}/place_getter.py (99%) diff --git a/.github/build_artifacts_qgis.yml b/.github/build_artifacts_qgis.yml index 2f5bbc338..eec17d60e 100644 --- a/.github/build_artifacts_qgis.yml +++ b/.github/build_artifacts_qgis.yml @@ -19,7 +19,7 @@ jobs: architecture: ['x64'] os: [windows-latest, macos-latest] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Set Python environment uses: actions/setup-python@v4 with: diff --git a/.github/workflows/build_linux.yml b/.github/workflows/build_linux.yml index b50de47c9..e0ac2b110 100644 --- a/.github/workflows/build_linux.yml +++ b/.github/workflows/build_linux.yml @@ -9,7 +9,7 @@ jobs: HAS_SECRETS: ${{ secrets.AWS_SECRET_ACCESS_KEY != '' }} continue-on-error: true steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Set up Python 3.9 uses: actions/setup-python@v4 with: diff --git a/.github/workflows/build_mac.yml b/.github/workflows/build_mac.yml index 9f5fedaf1..401204b28 100644 --- a/.github/workflows/build_mac.yml +++ b/.github/workflows/build_mac.yml @@ -16,7 +16,7 @@ jobs: matrix: python-version: ['3.8', '3.9', '3.10', '3.11', '3.12'] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Set Python environment uses: actions/setup-python@v4 with: diff --git a/.github/workflows/build_windows.yml b/.github/workflows/build_windows.yml index 477a6e4a8..3d4677e74 100644 --- a/.github/workflows/build_windows.yml +++ b/.github/workflows/build_windows.yml @@ -13,7 +13,7 @@ jobs: python-version: ['3.8', '3.9', '3.10', '3.11', '3.12'] architecture: ['x64'] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Set Python environment uses: actions/setup-python@v4 with: diff --git a/.github/workflows/debug_tests.yml b/.github/workflows/debug_tests.yml index 692716022..15cae28c1 100644 --- a/.github/workflows/debug_tests.yml +++ b/.github/workflows/debug_tests.yml @@ -8,7 +8,7 @@ jobs: runs-on: ubuntu-20.04 container: python:3.9 steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Install dependencies run: | python -m pip install --upgrade pip diff --git a/.github/workflows/documentation.yml b/.github/workflows/documentation.yml index 7aab96c23..065c9ffd4 100644 --- a/.github/workflows/documentation.yml +++ b/.github/workflows/documentation.yml @@ -15,7 +15,7 @@ jobs: env: HAS_SECRETS: ${{ secrets.AWS_SECRET_ACCESS_KEY != '' }} steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Set up Python 3.9 uses: actions/setup-python@v4 with: diff --git a/.github/workflows/test_linux_with_coverage.yml b/.github/workflows/test_linux_with_coverage.yml index 70d8fe524..38b5ae19f 100644 --- a/.github/workflows/test_linux_with_coverage.yml +++ b/.github/workflows/test_linux_with_coverage.yml @@ -11,7 +11,7 @@ jobs: matrix: python-version: [3.10] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Install dependencies run: | sudo apt update diff --git a/.github/workflows/unit_tests.yml b/.github/workflows/unit_tests.yml index 88764c71c..5b6af38d9 100644 --- a/.github/workflows/unit_tests.yml +++ b/.github/workflows/unit_tests.yml @@ -6,7 +6,7 @@ jobs: linting: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Set Python environment uses: actions/setup-python@v4 with: @@ -36,7 +36,7 @@ jobs: max-parallel: 20 steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Set Python environment uses: actions/setup-python@v4 with: diff --git a/aequilibrae/project/network/__init__.py b/aequilibrae/project/network/__init__.py index b6e50f426..11d161266 100644 --- a/aequilibrae/project/network/__init__.py +++ b/aequilibrae/project/network/__init__.py @@ -1,4 +1,3 @@ -from .osm_downloader import OSMDownloader from .network import Network from .mode import Mode from .modes import Modes diff --git a/aequilibrae/project/network/gmns_exporter.py b/aequilibrae/project/network/gmns_exporter.py index 9319e5406..e4a72620c 100644 --- a/aequilibrae/project/network/gmns_exporter.py +++ b/aequilibrae/project/network/gmns_exporter.py @@ -12,7 +12,7 @@ def __init__(self, net, path) -> None: self.p = Parameters() self.links_df = net.links.data self.nodes_df = net.nodes.data - self.source = net.source + self.source = net.project.path_to_file self.output_path = path self.gmns_parameters = self.p.parameters["network"]["gmns"] diff --git a/aequilibrae/project/network/network.py b/aequilibrae/project/network/network.py index e9470a62d..b9d5f8115 100644 --- a/aequilibrae/project/network/network.py +++ b/aequilibrae/project/network/network.py @@ -1,18 +1,16 @@ import importlib.util as iutil import math -from sqlite3 import Connection as sqlc -from typing import Dict +from typing import Dict, Optional import numpy as np import pandas as pd import shapely.wkb import shapely.wkt -from shapely.geometry import Polygon +from shapely.geometry import Polygon, box from shapely.ops import unary_union from aequilibrae.context import get_logger from aequilibrae.parameters import Parameters -from aequilibrae.project.network import OSMDownloader from aequilibrae.project.network.gmns_builder import GMNSBuilder from aequilibrae.project.network.gmns_exporter import GMNSExporter from aequilibrae.project.network.haversine import haversine @@ -20,9 +18,10 @@ from aequilibrae.project.network.links import Links from aequilibrae.project.network.modes import Modes from aequilibrae.project.network.nodes import Nodes +from aequilibrae.project.network.osm.osm_builder import OSMBuilder +from aequilibrae.project.network.osm.osm_downloader import OSMDownloader +from aequilibrae.project.network.osm.place_getter import placegetter from aequilibrae.project.network.periods import Periods -from aequilibrae.project.network.osm_builder import OSMBuilder -from aequilibrae.project.network.osm_utils.place_getter import placegetter from aequilibrae.project.project_creation import req_link_flds, req_node_flds, protected_fields from aequilibrae.utils import WorkerThread from aequilibrae.utils.db_utils import commit_and_close @@ -51,7 +50,6 @@ def __init__(self, project) -> None: from aequilibrae.paths import Graph WorkerThread.__init__(self, None) - self.source = project.source # type: sqlc self.graphs = {} # type: Dict[Graph] self.project = project self.logger = project.logger @@ -125,27 +123,22 @@ def list_modes(self): def create_from_osm( self, - west: float = None, - south: float = None, - east: float = None, - north: float = None, - place_name: str = None, - modes=["car", "transit", "bicycle", "walk"], # noqa: B006 + model_area: Optional[Polygon] = None, + place_name: Optional[str] = None, + modes=("car", "transit", "bicycle", "walk"), ) -> None: """ Downloads the network from Open-Street Maps :Arguments: - **west** (:obj:`float`, Optional): West most coordinate of the download bounding box - **south** (:obj:`float`, Optional): South most coordinate of the download bounding box - - **east** (:obj:`float`, Optional): East most coordinate of the download bounding box + **area** (:obj:`Polygon`, Optional): Polygon for which the network will be downloaded. If not provided, + a place name would be required **place_name** (:obj:`str`, Optional): If not downloading with East-West-North-South boundingbox, this is required - **modes** (:obj:`list`, Optional): List of all modes to be downloaded. Defaults to the modes in the parameter + **modes** (:obj:`tuple`, Optional): List of all modes to be downloaded. Defaults to the modes in the parameter file .. code-block:: python @@ -189,12 +182,16 @@ def create_from_osm( raise ValueError("'modes' needs to be string or list/tuple of string") if place_name is None: - if min(east, west) < -180 or max(east, west) > 180 or min(north, south) < -90 or max(north, south) > 90: - raise ValueError("Coordinates out of bounds") - bbox = [west, south, east, north] + if ( + model_area.bounds[0] < -180 + or model_area.bounds[2] > 180 + or model_area.bounds[1] < -90 + or model_area.bounds[3] > 90 + ): + raise ValueError("Coordinates out of bounds. Polygon must be in WGS84") + west, south, east, north = model_area.bounds else: bbox, report = placegetter(place_name) - west, south, east, north = bbox if bbox is None: msg = f'We could not find a reference for place name "{place_name}"' self.logger.warning(msg) @@ -202,6 +199,8 @@ def create_from_osm( for i in report: if "PLACE FOUND" in i: self.logger.info(i) + model_area = box(*bbox) + west, south, east, north = bbox # Need to compute the size of the bounding box to not exceed it too much height = haversine((east + west) / 2, south, (east + west) / 2, north) @@ -212,7 +211,7 @@ def create_from_osm( max_query_area_size = par["max_query_area_size"] if area < max_query_area_size: - polygons = [bbox] + polygons = [model_area] else: polygons = [] parts = math.ceil(area / max_query_area_size) @@ -226,8 +225,9 @@ def create_from_osm( for j in range(vertical): ymin = max(-90, south + j * dy) ymax = min(90, south + (j + 1) * dy) - box = [xmin, ymin, xmax, ymax] - polygons.append(box) + subarea = box(xmin, ymin, xmax, ymax) + if subarea.intersects(model_area): + polygons.append(subarea) self.logger.info("Downloading data") self.downloader = OSMDownloader(polygons, modes, logger=self.logger) if pyqt: @@ -236,7 +236,7 @@ def create_from_osm( self.downloader.doWork() self.logger.info("Building Network") - self.builder = OSMBuilder(self.downloader.json, self.source, project=self.project) + self.builder = OSMBuilder(self.downloader.json, project=self.project, model_area=model_area) if pyqt: self.builder.building.connect(self.signal_handler) diff --git a/aequilibrae/project/network/osm_utils/__init__.py b/aequilibrae/project/network/osm/__init__.py similarity index 100% rename from aequilibrae/project/network/osm_utils/__init__.py rename to aequilibrae/project/network/osm/__init__.py diff --git a/aequilibrae/project/network/osm_builder.py b/aequilibrae/project/network/osm/osm_builder.py similarity index 93% rename from aequilibrae/project/network/osm_builder.py rename to aequilibrae/project/network/osm/osm_builder.py index de37f1fe5..ecc10c222 100644 --- a/aequilibrae/project/network/osm_builder.py +++ b/aequilibrae/project/network/osm/osm_builder.py @@ -1,43 +1,41 @@ import gc import importlib.util as iutil -import sqlite3 import string from typing import List import numpy as np import pandas as pd +from shapely import Point +from shapely.geometry import Polygon from aequilibrae.context import get_active_project from aequilibrae.parameters import Parameters +from aequilibrae.project.network.haversine import haversine from aequilibrae.project.network.link_types import LinkTypes -from aequilibrae.utils.spatialite_utils import connect_spatialite -from .haversine import haversine -from ...utils import WorkerThread - +from aequilibrae.utils import WorkerThread from aequilibrae.utils.db_utils import commit_and_close +from aequilibrae.utils.spatialite_utils import connect_spatialite -spec = iutil.find_spec("PyQt5") -pyqt = spec is not None +pyqt = iutil.find_spec("PyQt5") is not None if pyqt: from PyQt5.QtCore import pyqtSignal -spec = iutil.find_spec("qgis") -isqgis = spec is not None -if isqgis: - import qgis +if iutil.find_spec("qgis") is not None: + pass class OSMBuilder(WorkerThread): if pyqt: building = pyqtSignal(object) - def __init__(self, osm_items: List, path: str, node_start=10000, project=None) -> None: + def __init__(self, osm_items: List, project, model_area: Polygon) -> None: WorkerThread.__init__(self, None) self.project = project or get_active_project() self.logger = self.project.logger self.osm_items = osm_items - self.path = path - self.node_start = node_start + self.model_area = model_area + self.path = self.project.path_to_file + self.node_start = 10000 self.__link_types = None # type: LinkTypes self.report = [] self.__model_link_types = [] @@ -57,6 +55,9 @@ def doWork(self): self.__worksetup() node_count = self.data_structures() self.importing_links(node_count, conn) + conn.execute( + "DELETE FROM nodes WHERE node_id NOT IN (SELECT a_node FROM links union all SELECT b_node FROM links)" + ) self.__emit_all(["finished_threaded_procedure", 0]) def data_structures(self): @@ -87,6 +88,7 @@ def data_structures(self): nid = node.pop("id") _ = node.pop("type") node["node_id"] = i + self.node_start + node["inside_model"] = self.model_area.contains(Point(node["lon"], node["lat"])) self.nodes[nid] = node self.node_df.append([node["node_id"], nid, node["lon"], node["lat"]]) self.__emit_all(["Value", i]) @@ -190,6 +192,8 @@ def importing_links(self, node_count, conn): if len(vars["modes"]) > 0: for i in range(segments): attributes = self.__build_link_data(vars, intersections, i, linknodes, node_ids, fields) + if attributes is None: + continue all_attrs.append(attributes) vars["link_id"] += 1 @@ -244,6 +248,9 @@ def __build_link_data(self, vars, intersections, i, linknodes, node_ids, fields) ) geometry = ["{} {}".format(self.nodes[x]["lon"], self.nodes[x]["lat"]) for x in all_nodes] + inside_area = sum([self.nodes[x]["inside_model"] for x in all_nodes]) + if inside_area == 0: + return None geometry = "LINESTRING ({})".format(", ".join(geometry)) attributes = [vars.get(x) for x in fields] diff --git a/aequilibrae/project/network/osm_downloader.py b/aequilibrae/project/network/osm/osm_downloader.py similarity index 96% rename from aequilibrae/project/network/osm_downloader.py rename to aequilibrae/project/network/osm/osm_downloader.py index 79ad15170..76c810b1e 100644 --- a/aequilibrae/project/network/osm_downloader.py +++ b/aequilibrae/project/network/osm/osm_downloader.py @@ -13,13 +13,17 @@ import logging import time import re +from typing import List + import requests -from .osm_utils.osm_params import http_headers, memory +from shapely import Polygon + +from .osm_params import http_headers, memory from aequilibrae.parameters import Parameters from aequilibrae.context import get_logger +from aequilibrae.utils import WorkerThread import gc import importlib.util as iutil -from ...utils import WorkerThread spec = iutil.find_spec("PyQt5") pyqt = spec is not None @@ -35,7 +39,7 @@ def __emit_all(self, *args): if pyqt: self.downloading.emit(*args) - def __init__(self, polygons, modes, logger: logging.Logger = None): + def __init__(self, polygons: List[Polygon], modes, logger: logging.Logger = None): WorkerThread.__init__(self, None) self.logger = logger or get_logger() self.polygons = polygons @@ -63,7 +67,7 @@ def doWork(self): self.logger.debug(msg) self.__emit_all(["Value", counter]) self.__emit_all(["text", msg]) - west, south, east, north = poly + west, south, east, north = poly.bounds query_str = query_template.format( north=north, south=south, diff --git a/aequilibrae/project/network/osm_utils/osm_params.py b/aequilibrae/project/network/osm/osm_params.py similarity index 100% rename from aequilibrae/project/network/osm_utils/osm_params.py rename to aequilibrae/project/network/osm/osm_params.py diff --git a/aequilibrae/project/network/osm_utils/place_getter.py b/aequilibrae/project/network/osm/place_getter.py similarity index 99% rename from aequilibrae/project/network/osm_utils/place_getter.py rename to aequilibrae/project/network/osm/place_getter.py index 25fe4db1b..9bdafc634 100644 --- a/aequilibrae/project/network/osm_utils/place_getter.py +++ b/aequilibrae/project/network/osm/place_getter.py @@ -1,7 +1,9 @@ -import time import re +import time from typing import List, Union, Tuple + import requests + from aequilibrae.parameters import Parameters from .osm_params import http_headers diff --git a/docs/source/examples/creating_models/from_osm.py b/docs/source/examples/creating_models/from_osm.py index 93d634349..12244fee7 100644 --- a/docs/source/examples/creating_models/from_osm.py +++ b/docs/source/examples/creating_models/from_osm.py @@ -31,6 +31,10 @@ # For the sake of this example, we will choose the small nation of Nauru. project.network.create_from_osm(place_name="Nauru") +# We can also choose to create a model from a polygon (which must be in EPSG:4326) +# Or from a Polygon defined by a bounding box, for example +# project.network.create_from_osm(model_area=box(-112.185, 36.59, -112.179, 36.60)) + # %% # We grab all the links data as a Pandas DataFrame so we can process it easier links = project.network.links.data diff --git a/tests/aequilibrae/project/test_network.py b/tests/aequilibrae/project/test_network.py index d70e3daa7..0a073aeb9 100644 --- a/tests/aequilibrae/project/test_network.py +++ b/tests/aequilibrae/project/test_network.py @@ -1,11 +1,13 @@ -from unittest import TestCase -from tempfile import gettempdir import os import uuid from shutil import copytree -from aequilibrae.project import Project +from tempfile import gettempdir +from unittest import TestCase from warnings import warn -from random import random + +from shapely.geometry import box + +from aequilibrae.project import Project from ...data import siouxfalls_project @@ -22,40 +24,36 @@ def tearDown(self) -> None: self.siouxfalls.close() def test_create_from_osm(self): - thresh = 0.05 if os.environ.get("GITHUB_WORKFLOW", "ERROR") == "Code coverage": - thresh = 1.01 + print("Skipped check to not load OSM servers") + return - if random() < thresh: - self.siouxfalls.close() - self.project = Project() - self.project.new(self.proj_path2) - # self.network.create_from_osm(west=153.1136245, south=-27.5095487, east=153.115, north=-27.5085, modes=["car"]) - self.project.network.create_from_osm(west=-112.185, south=36.59, east=-112.179, north=36.60) - curr = self.project.conn.cursor() + self.siouxfalls.close() + self.project = Project() + self.project.new(self.proj_path2) + self.project.network.create_from_osm(model_area=box(-112.185, 36.59, -112.179, 36.60)) + curr = self.project.conn.cursor() - curr.execute("""select count(*) from links""") - lks = curr.fetchone()[0] + curr.execute("""select count(*) from links""") + lks = curr.fetchone()[0] - curr.execute("""select count(distinct osm_id) from links""") - osmids = curr.fetchone()[0] + curr.execute("""select count(distinct osm_id) from links""") + osmids = curr.fetchone()[0] - if osmids == 0: - warn("COULD NOT RETRIEVE DATA FROM OSM") - return + if osmids == 0: + warn("COULD NOT RETRIEVE DATA FROM OSM") + return - if osmids >= lks: - self.fail("OSM links not broken down properly") + if osmids >= lks: + self.fail("OSM links not broken down properly") - curr.execute("""select count(*) from nodes""") - nds = curr.fetchone()[0] + curr.execute("""select count(*) from nodes""") + nds = curr.fetchone()[0] - if lks > nds: - self.fail("We imported more links than nodes. Something wrong here") - self.project.close() - self.siouxfalls.open(self.proj_path) - else: - print("Skipped check to not load OSM servers") + if lks > nds: + self.fail("We imported more links than nodes. Something wrong here") + self.project.close() + self.siouxfalls.open(self.proj_path) def test_count_centroids(self): items = self.siouxfalls.network.count_centroids() diff --git a/tests/aequilibrae/project/test_osm_downloader.py b/tests/aequilibrae/project/test_osm_downloader.py index ed9cf0da4..706c04989 100644 --- a/tests/aequilibrae/project/test_osm_downloader.py +++ b/tests/aequilibrae/project/test_osm_downloader.py @@ -1,9 +1,12 @@ import importlib.util as iutil -from tempfile import gettempdir -from unittest import TestCase import os -from aequilibrae.project.network.osm_downloader import OSMDownloader from random import random +from tempfile import gettempdir +from unittest import TestCase + +from shapely.geometry import box + +from aequilibrae.project.network.osm.osm_downloader import OSMDownloader spec = iutil.find_spec("PyQt5") pyqt = spec is not None @@ -16,7 +19,7 @@ def setUp(self) -> None: def test_do_work(self): if not self.should_do_work(): return - o = OSMDownloader([[0.0, 0.0, 0.1, 0.1]], ["car"]) + o = OSMDownloader([box(0.0, 0.0, 0.1, 0.1)], ["car"]) o.doWork() if o.json: self.fail("It found links in the middle of the ocean") @@ -26,7 +29,7 @@ def test_do_work2(self): return # LITTLE PLACE IN THE MIDDLE OF THE Grand Canyon North Rim - o = OSMDownloader([[-112.185, 36.59, -112.179, 36.60]], ["car"]) + o = OSMDownloader([box(-112.185, 36.59, -112.179, 36.60)], ["car"]) o.doWork() if "elements" not in o.json[0]: diff --git a/tests/aequilibrae/project/test_place_getter.py b/tests/aequilibrae/project/test_place_getter.py index 8c02a6b9d..0df6b1799 100644 --- a/tests/aequilibrae/project/test_place_getter.py +++ b/tests/aequilibrae/project/test_place_getter.py @@ -1,7 +1,8 @@ -from unittest import TestCase -from aequilibrae.project.network.osm_utils.place_getter import placegetter -from random import random import os +from random import random +from unittest import TestCase + +from aequilibrae.project.network.osm.place_getter import placegetter class Test(TestCase): From f89c19f90af585eb1caec1b81192c1c5defd092c Mon Sep 17 00:00:00 2001 From: Jake Moss Date: Fri, 2 Feb 2024 10:34:22 +1000 Subject: [PATCH 38/44] Select link cwf bfw bug (#500) * Stable until iter 4 * Add multi-iteration test over all algorithms * Fix mismatched results Overall the root of the issue was that the select link wasn't taking into account the previous step directions in it's calculations. In principle, the patch that modified the select to do just that should have worked, however, the matrix that stored the previous step directions was allocated on the wrong object and, while the object lived the entire duration of the assignment, the matrix was being zeroed at each iteration. I'd overlooked this as it did improve the stability up to 4 iterations, but Jan pointed out to me that the beta's being used were 1.0 (and 0.0 respectively) up until the 5th iteration where the step direction truly started to matter. Another two bugs I found are patched in this commit. 1. The aon object was being leaked outside of the traffic class for loop, this meant that the last traffic class was being used for all select links analysis'. The aon objects are now stored and reused, this may increase memory usage during the assignment as GC can no longer clean the memory allocated by these objects up at the end of every AON assignment, instead it will at the end of all assignments. 2. As noted in the comment `pre_previous_step_direction` was being used in place of `previous_step_direction`. * Silence some warnings * Make black and ruff agree * typo * Rename pre_previous_step_direction to temp_step_direction_for_copy --------- Co-authored-by: pveigadecamargo --- aequilibrae/matrix/aequilibrae_matrix.py | 14 +- aequilibrae/paths/all_or_nothing.py | 5 +- aequilibrae/paths/linear_approximation.py | 186 ++++++++++++++---- .../paths/results/assignment_results.py | 3 +- aequilibrae/transit/gtfs_loader.py | 6 +- tests/aequilibrae/paths/test_select_link.py | 43 +++- 6 files changed, 198 insertions(+), 59 deletions(-) diff --git a/aequilibrae/matrix/aequilibrae_matrix.py b/aequilibrae/matrix/aequilibrae_matrix.py index 45177263f..13fc3c492 100644 --- a/aequilibrae/matrix/aequilibrae_matrix.py +++ b/aequilibrae/matrix/aequilibrae_matrix.py @@ -129,7 +129,6 @@ def save(self, names=(), file_name=None) -> None: self.computational_view(names) def __save_as(self, file_name: str, cores: List[str]): - if Path(file_name).suffix.lower() == ".aem": mat = AequilibraeMatrix() args = { @@ -250,10 +249,7 @@ def create_empty( if mat_name in object.__dict__: raise ValueError(mat_name + " is a reserved name") if len(mat_name) > CORE_NAME_MAX_LENGTH: - raise ValueError( - "Matrix names need to be be shorter " - "than {}: {}".format(CORE_NAME_MAX_LENGTH, mat_name) - ) + raise ValueError(f"Matrix names need to be shorter than {CORE_NAME_MAX_LENGTH}: {mat_name}") else: raise ValueError("Matrix core names need to be strings: " + str(mat_name)) else: @@ -608,9 +604,7 @@ def __write__(self): np.memmap(self.file_path, dtype="uint8", offset=17, mode="r+", shape=1)[0] = data_size # matrix name - np.memmap(self.file_path, dtype="S" + str(MATRIX_NAME_MAX_LENGTH), offset=18, mode="r+", shape=1)[0] = ( - self.name - ) + np.memmap(self.file_path, dtype=f"S{MATRIX_NAME_MAX_LENGTH}", offset=18, mode="r+", shape=1)[0] = self.name # matrix description offset = 18 + MATRIX_NAME_MAX_LENGTH @@ -1133,9 +1127,7 @@ def setName(self, matrix_name: str): if len(str(matrix_name)) > MATRIX_NAME_MAX_LENGTH: matrix_name = str(matrix_name)[0:MATRIX_NAME_MAX_LENGTH] - np.memmap(self.file_path, dtype="S" + str(MATRIX_NAME_MAX_LENGTH), offset=18, mode="r+", shape=1)[0] = ( - matrix_name - ) + np.memmap(self.file_path, dtype=f"S{MATRIX_NAME_MAX_LENGTH}", offset=18, shape=1)[0] = matrix_name def setDescription(self, matrix_description: str): """ diff --git a/aequilibrae/paths/all_or_nothing.py b/aequilibrae/paths/all_or_nothing.py index 6cd1becb0..6400683b0 100644 --- a/aequilibrae/paths/all_or_nothing.py +++ b/aequilibrae/paths/all_or_nothing.py @@ -36,8 +36,6 @@ def __init__(self, matrix, graph, results): self.graph = graph self.results = results self.aux_res = MultiThreadedAoN() - self.report = [] - self.cumulative = 0 if results._graph_id != graph._id: raise ValueError("Results object not prepared. Use --> results.prepare(graph)") @@ -55,6 +53,9 @@ def doWork(self): self.execute() def execute(self): + self.report = [] + self.cumulative = 0 + if pyqt: self.assignment.emit(["zones finalized", 0]) diff --git a/aequilibrae/paths/linear_approximation.py b/aequilibrae/paths/linear_approximation.py index fa0c376a2..c1cb86c29 100644 --- a/aequilibrae/paths/linear_approximation.py +++ b/aequilibrae/paths/linear_approximation.py @@ -122,7 +122,9 @@ def __init__(self, assig_spec, algorithm, project=None) -> None: self.step_direction = {} # type: Dict[AssignmentResults] self.previous_step_direction = {} # type: Dict[AssignmentResults] - self.pre_previous_step_direction = {} # type: Dict[AssignmentResults] + self.temp_step_direction_for_copy = {} # type: Dict[AssignmentResults] + + self.aons = {} for c in self.traffic_classes: r = AssignmentResults() @@ -131,23 +133,12 @@ def __init__(self, assig_spec, algorithm, project=None) -> None: if self.algorithm in ["cfw", "bfw"]: for c in self.traffic_classes: - r = AssignmentResults() - r.prepare(c.graph, c.matrix) - r.compact_link_loads = np.zeros([]) - r.compact_total_link_loads = np.zeros([]) - self.previous_step_direction[c._id] = r - - r = AssignmentResults() - r.prepare(c.graph, c.matrix) - r.compact_link_loads = np.zeros([]) - r.compact_total_link_loads = np.zeros([]) - self.step_direction[c._id] = r - - r = AssignmentResults() - r.prepare(c.graph, c.matrix) - r.compact_link_loads = np.zeros([]) - r.compact_total_link_loads = np.zeros([]) - self.pre_previous_step_direction[c._id] = r + for d in [self.step_direction, self.previous_step_direction, self.temp_step_direction_for_copy]: + r = AssignmentResults() + r.prepare(c.graph, c.matrix) + r.compact_link_loads = np.zeros([]) + r.compact_total_link_loads = np.zeros([]) + d[c._id] = r def calculate_conjugate_stepsize(self): self.vdf.apply_derivative( @@ -257,17 +248,32 @@ def __calculate_step_direction(self): copy_three_dimensions(stp_dir_res.skims.matrix_view, aon_res.skims.matrix_view, self.cores) sd_flows.append(aon_res.total_link_loads) + if c._selected_links: + aux_res = self.aons[c._id].aux_res + for name, idx in c._aon_results._selected_links.items(): + copy_two_dimensions( + self.sl_step_dir_ll[c._id][name]["sdr"], + np.sum(aux_res.temp_sl_link_loading, axis=0)[idx, :, :], + self.cores, + ) + copy_three_dimensions( + self.sl_step_dir_od[c._id][name]["sdr"], + np.sum(aux_res.temp_sl_od_matrix, axis=0)[idx, :, :, :], + self.cores, + ) + # 3rd iteration is cfw. also, if we had to reset direction search we need a cfw step before bfw elif (self.iter == 3) or (self.do_conjugate_step) or (self.algorithm == "cfw"): self.do_conjugate_step = False self.calculate_conjugate_stepsize() for c in self.traffic_classes: sdr = self.step_direction[c._id] - pre_previous = self.pre_previous_step_direction[c._id] - copy_two_dimensions(pre_previous.link_loads, sdr.link_loads, self.cores) - pre_previous.total_flows() + previous = self.previous_step_direction[c._id] + + copy_two_dimensions(previous.link_loads, sdr.link_loads, self.cores) + previous.total_flows() if c.results.num_skims > 0: - copy_three_dimensions(pre_previous.skims.matrix_view, sdr.skims.matrix_view, self.cores) + copy_three_dimensions(previous.skims.matrix_view, sdr.skims.matrix_view, self.cores) linear_combination( sdr.link_loads, sdr.link_loads, c._aon_results.link_loads, self.conjugate_stepsize, self.cores @@ -282,6 +288,39 @@ def __calculate_step_direction(self): self.cores, ) + if c._selected_links: + aux_res = self.aons[c._id].aux_res + for name, idx in c._aon_results._selected_links.items(): + sl_step_dir_ll = self.sl_step_dir_ll[c._id][name] + sl_step_dir_od = self.sl_step_dir_od[c._id][name] + + copy_two_dimensions( + sl_step_dir_ll["prev_sdr"], + sl_step_dir_ll["sdr"], + self.cores, + ) + copy_three_dimensions( + sl_step_dir_od["prev_sdr"], + sl_step_dir_od["sdr"], + self.cores, + ) + + linear_combination( + sl_step_dir_ll["sdr"], + sl_step_dir_ll["sdr"], + np.sum(aux_res.temp_sl_link_loading, axis=0)[idx, :, :], + self.conjugate_stepsize, + self.cores, + ) + + linear_combination_skims( + sl_step_dir_od["sdr"], + sl_step_dir_od["sdr"], + np.sum(aux_res.temp_sl_od_matrix, axis=0)[idx, :, :, :], + self.conjugate_stepsize, + self.cores, + ) + sdr.total_flows() sd_flows.append(sdr.total_link_loads) # biconjugate @@ -289,7 +328,7 @@ def __calculate_step_direction(self): self.calculate_biconjugate_direction() # deep copy because we overwrite step_direction but need it on next iteration for c in self.traffic_classes: - ppst = self.pre_previous_step_direction[c._id] # type: AssignmentResults + ppst = self.temp_step_direction_for_copy[c._id] # type: AssignmentResults prev_stp_dir = self.previous_step_direction[c._id] # type: AssignmentResults stp_dir = self.step_direction[c._id] # type: AssignmentResults @@ -318,6 +357,51 @@ def __calculate_step_direction(self): self.cores, ) + if c._selected_links: + aux_res = self.aons[c._id].aux_res + for name, idx in c._aon_results._selected_links.items(): + sl_step_dir_ll = self.sl_step_dir_ll[c._id][name] + sl_step_dir_od = self.sl_step_dir_od[c._id][name] + copy_two_dimensions( + sl_step_dir_ll["temp_prev_sdr"], + sl_step_dir_ll["sdr"], + self.cores, + ) + copy_three_dimensions( + sl_step_dir_od["temp_prev_sdr"], + sl_step_dir_od["sdr"], + self.cores, + ) + + triple_linear_combination( + sl_step_dir_ll["sdr"], + np.sum(aux_res.temp_sl_link_loading, axis=0)[idx, :, :], + sl_step_dir_ll["sdr"], + sl_step_dir_ll["temp_prev_sdr"], + self.betas, + self.cores, + ) + + triple_linear_combination_skims( + sl_step_dir_od["sdr"], + np.sum(aux_res.temp_sl_od_matrix, axis=0)[idx, :, :, :], + sl_step_dir_od["sdr"], + sl_step_dir_od["prev_sdr"], + self.betas, + self.cores, + ) + + copy_two_dimensions( + sl_step_dir_ll["prev_sdr"], + sl_step_dir_ll["temp_prev_sdr"], + self.cores, + ) + copy_three_dimensions( + sl_step_dir_od["prev_sdr"], + sl_step_dir_od["temp_prev_sdr"], + self.cores, + ) + sd_flows.append(np.sum(stp_dir.link_loads, axis=1)) copy_two_dimensions(prev_stp_dir.link_loads, ppst.link_loads, self.cores) @@ -344,11 +428,40 @@ def doWork(self): def execute(self): # noqa: C901 # We build the fixed cost field + self.sl_step_dir_ll = {} + self.sl_step_dir_od = {} + for c in self.traffic_classes: # Copying select link dictionary that maps name to its relevant matrices into the class' results c._aon_results._selected_links = c._selected_links c.results._selected_links = c._selected_links + link_loads_step_dir_shape = ( + c.graph.compact_num_links, + c.results.classes["number"], + ) + + od_step_dir_shape = ( + c.graph.num_zones, + c.graph.num_zones, + c.results.classes["number"], + ) + + self.sl_step_dir_ll[c._id] = {} + self.sl_step_dir_od[c._id] = {} + for name in c._selected_links.keys(): + self.sl_step_dir_ll[c._id][name] = { + "sdr": np.zeros(link_loads_step_dir_shape, dtype=c.graph.default_types("float")), + "prev_sdr": np.zeros(link_loads_step_dir_shape, dtype=c.graph.default_types("float")), + "temp_prev_sdr": np.zeros(link_loads_step_dir_shape, dtype=c.graph.default_types("float")), + } + + self.sl_step_dir_od[c._id][name] = { + "sdr": np.zeros(od_step_dir_shape, dtype=c.graph.default_types("float")), + "prev_sdr": np.zeros(od_step_dir_shape, dtype=c.graph.default_types("float")), + "temp_prev_sdr": np.zeros(od_step_dir_shape, dtype=c.graph.default_types("float")), + } + # Sizes the temporary objects used for the results c.results.prepare(c.graph, c.matrix) c._aon_results.prepare(c.graph, c.matrix) @@ -363,11 +476,12 @@ def execute(self): # noqa: C901 c.fixed_cost[c.graph.graph.__supernet_id__] = v * c.fc_multiplier / c.vot c.fixed_cost[np.isnan(c.fixed_cost)] = 0 - # TODO: Review how to eliminate this. It looks unnecessary - # Just need to create some arrays for cost - for c in self.traffic_classes: + # TODO: Review how to eliminate this. It looks unnecessary + # Just need to create some arrays for cost c.graph.set_graph(self.time_field) + self.aons[c._id] = allOrNothing(c.matrix, c.graph, c._aon_results) + self.logger.info(f"{self.algorithm} Assignment STATS") self.logger.info("Iteration, RelativeGap, stepsize") for self.iter in range(1, self.max_iter + 1): # noqa: B020 @@ -385,7 +499,7 @@ def execute(self): # noqa: C901 cost = c.fixed_cost + self.congested_time aggregate_link_costs(cost, c.graph.compact_cost, c.results.crosswalk) - aon = allOrNothing(c.matrix, c.graph, c._aon_results) + aon = self.aons[c._id] # This is a new object every iteration, with new aux_res if pyqt: aon.assignment.connect(self.signal_handler) aon.execute() @@ -409,14 +523,14 @@ def execute(self): # noqa: C901 # The temp has an index associated with the link_set name copy_three_dimensions( c.results.select_link_od.matrix[name], # matrix being written into - np.sum(aon.aux_res.temp_sl_od_matrix, axis=0)[ + np.sum(self.aons[c._id].aux_res.temp_sl_od_matrix, axis=0)[ idx, :, :, : ], # results after the iteration self.cores, # core count ) copy_two_dimensions( c.results.select_link_loading[name], # ouput matrix - np.sum(aon.aux_res.temp_sl_link_loading, axis=0)[idx, :, :], # matrix 1 + np.sum(self.aons[c._id].aux_res.temp_sl_link_loading, axis=0)[idx, :, :], # matrix 1 self.cores, # core count ) flows.append(c.results.total_link_loads) @@ -426,7 +540,9 @@ def execute(self): # noqa: C901 self.calculate_stepsize() for c in self.traffic_classes: stp_dir = self.step_direction[c._id] + cls_res = c.results + linear_combination( cls_res.link_loads, stp_dir.link_loads, cls_res.link_loads, self.stepsize, self.cores ) @@ -445,17 +561,17 @@ def execute(self): # noqa: C901 # Copy the temporary results into the final od matrix, referenced by link_set name # The temp flows have an index associated with the link_set name linear_combination_skims( - c.results.select_link_od.matrix[name], # output matrix - np.sum(aon.aux_res.temp_sl_od_matrix, axis=0)[idx, :, :, :], # matrix 1 - c.results.select_link_od.matrix[name], # matrix 2 (previous iteration) + cls_res.select_link_od.matrix[name], # output matrix + self.sl_step_dir_od[c._id][name]["sdr"], + cls_res.select_link_od.matrix[name], # matrix 2 (previous iteration) self.stepsize, # stepsize self.cores, # core count ) linear_combination( - c.results.select_link_loading[name], # output matrix - np.sum(aon.aux_res.temp_sl_link_loading, axis=0)[idx, :, :], # matrix 1 - c.results.select_link_loading[name], # matrix 2 (previous iteration) + cls_res.select_link_loading[name], # output matrix + self.sl_step_dir_ll[c._id][name]["sdr"], + cls_res.select_link_loading[name], # matrix 2 (previous iteration) self.stepsize, # stepsize self.cores, # core count ) diff --git a/aequilibrae/paths/results/assignment_results.py b/aequilibrae/paths/results/assignment_results.py index f0921853d..220b7033a 100644 --- a/aequilibrae/paths/results/assignment_results.py +++ b/aequilibrae/paths/results/assignment_results.py @@ -184,8 +184,7 @@ def prepare(self, graph: Graph, matrix: AequilibraeMatrix) -> None: ) sl_idx = {} - for i, val in enumerate(self._selected_links.items()): - name, arr = val + for i, (name, arr) in enumerate(self._selected_links.items()): sl_idx[name] = i # Filling select_links array with linksets. Note the default value is -1, which is used as a placeholder # It also denotes when the given row has no more selected links, since Cython cannot handle diff --git a/aequilibrae/transit/gtfs_loader.py b/aequilibrae/transit/gtfs_loader.py index ba9f33d8d..691c355b4 100644 --- a/aequilibrae/transit/gtfs_loader.py +++ b/aequilibrae/transit/gtfs_loader.py @@ -179,10 +179,8 @@ def __deconflict_stop_times(self) -> None: } ) - for _, rec in max_speeds.iterrows(): - df.loc[(df.dist >= rec.min_distance) & ((df.dist < rec.max_distance)), "max_speed"] = ( - rec.speed - ) + for _, r in max_speeds.iterrows(): + df.loc[(df.dist >= r.min_distance) & (df.dist < r.max_distance), "max_speed"] = r.speed to_fix = df[df.max_speed < df.speed].index.values if to_fix.shape[0] > 0: diff --git a/tests/aequilibrae/paths/test_select_link.py b/tests/aequilibrae/paths/test_select_link.py index f499f91a2..d9f87c249 100644 --- a/tests/aequilibrae/paths/test_select_link.py +++ b/tests/aequilibrae/paths/test_select_link.py @@ -49,7 +49,7 @@ def test_multiple_link_sets(self): Uses two examples: 2 links in one select link, and a single Selected Link Checks both the OD Matrix and Link Loading """ - self.assignclass.set_select_links({"sl 9 or 6": [(9, 1), (6, 1)], "just 3": [(3, 1)], "sl 5 for fun": [(5, 1)]}) + self.assignclass.set_select_links({"sl_9_or_6": [(9, 1), (6, 1)], "just_3": [(3, 1)], "sl_5_for_fun": [(5, 1)]}) self.assignment.execute() for key in self.assignclass._selected_links.keys(): od_mask, link_loading = create_od_mask( @@ -72,7 +72,7 @@ def test_equals_demand_one_origin(self): Tests to make sure the OD matrix works when all links surrounding one origin are selected Confirms the Link Loading is done correctly in this case """ - self.assignclass.set_select_links({"sl 1, 4, 3, and 2": [(1, 1), (4, 1), (3, 1), (2, 1)]}) + self.assignclass.set_select_links({"sl_1_4_3_and_2": [(1, 1), (4, 1), (3, 1), (2, 1)]}) self.assignment.execute() @@ -101,7 +101,7 @@ def test_single_demand(self): self.matrix.matrix_view = custom_demand self.assignclass.matrix = self.matrix - self.assignclass.set_select_links({"sl 39, 66, or 73": [(39, 1), (66, 1), (73, 1)]}) + self.assignclass.set_select_links({"sl_39_66_or_73": [(39, 1), (66, 1), (73, 1)]}) self.assignment.execute() for key in self.assignclass._selected_links.keys(): @@ -126,7 +126,7 @@ def test_select_link_network_loading(self): self.assignment.execute() non_sl_loads = self.assignclass.results.get_load_results() self.setUp() - self.assignclass.set_select_links({"sl 39, 66, or 73": [(39, 1), (66, 1), (73, 1)]}) + self.assignclass.set_select_links({"sl_39_66_or_73": [(39, 1), (66, 1), (73, 1)]}) self.assignment.execute() sl_loads = self.assignclass.results.get_load_results() np.testing.assert_allclose(non_sl_loads.matrix_tot, sl_loads.matrix_tot) @@ -200,6 +200,39 @@ def test_kaitang(self): ), ) + def test_multi_iteration(self): + for algorithm in ["all-or-nothing", "msa", "fw", "cfw", "bfw"]: + with self.subTest(algorithm=algorithm): + assignment = TrafficAssignment() + assignclass = TrafficClass("car", self.car_graph, self.matrix) + assignment.set_classes([assignclass]) + assignment.set_vdf("BPR") + assignment.set_vdf_parameters({"alpha": 0.15, "beta": 4.0}) + assignment.set_vdf_parameters({"alpha": "b", "beta": "power"}) + assignment.set_capacity_field("capacity") + assignment.set_time_field("free_flow_time") + assignment.max_iter = 10 + assignment.set_algorithm(algorithm) + + assignclass.set_select_links({"sl_1_1": [(1, 1)], "sl_5_1": [(5, 1)]}) + assignment.execute() + + assignment_results = pd.DataFrame(assignclass.results.get_load_results().data).set_index("index") + sl_results = pd.DataFrame(assignclass.results.get_sl_results().data).set_index("index") + + self.assertAlmostEqual( + assignment_results["matrix_ab"].loc[1], + sl_results["sl_1_1_matrix_ab"].loc[1], + msg=f"Select link results differ to that of the assignment ({algorithm})", + delta=1e-6, + ) + self.assertAlmostEqual( + assignment_results["matrix_ab"].loc[5], + sl_results["sl_5_1_matrix_ab"].loc[5], + msg=f"Select link results differ to that of the assignment ({algorithm})", + delta=1e-6, + ) + def create_od_mask(demand: np.array, graph: Graph, sl): res = PathResults() @@ -228,7 +261,7 @@ def g(o, d): for origin in range(24): for dest in range(24): if mask.get((origin, dest)): - sl_od[origin, dest] = demand[origin, dest] + sl_od[origin, dest] = demand[origin, dest][0] # make link loading loading = np.zeros((76, 1)) From 0648ce363301e649a463e3081cfb3ae157c694fe Mon Sep 17 00:00:00 2001 From: Jake Moss Date: Fri, 2 Feb 2024 10:56:26 +1000 Subject: [PATCH 39/44] Dead end removal (#494) * Move graph_building to its own module. Improve compile times * Add dead end removal * Store dead end links for inspection * Add decorators * Fix * Add compressed graph tests and new data * Small graph compression docs * ruff format * black format as well --------- Co-authored-by: pveigadecamargo --- aequilibrae/paths/AoN.pyx | 1 - aequilibrae/paths/graph.py | 24 ++- aequilibrae/paths/graph_building.pyx | 218 ++++++++++++++++++++++---- setup.py | 16 +- tests/aequilibrae/paths/test_graph.py | 51 +++++- tests/data/KaiTang.zip | Bin 2395 -> 3771 bytes 6 files changed, 273 insertions(+), 37 deletions(-) diff --git a/aequilibrae/paths/AoN.pyx b/aequilibrae/paths/AoN.pyx index 31f0dd844..7e11f614f 100644 --- a/aequilibrae/paths/AoN.pyx +++ b/aequilibrae/paths/AoN.pyx @@ -11,7 +11,6 @@ include 'conical.pyx' include 'inrets.pyx' include 'parallel_numpy.pyx' include 'path_file_saving.pyx' -include 'graph_building.pyx' def one_to_all(origin, matrix, graph, result, aux_result, curr_thread): # type: (int, AequilibraeMatrix, Graph, AssignmentResults, MultiThreadedAoN, int) -> int diff --git a/aequilibrae/paths/graph.py b/aequilibrae/paths/graph.py index ea0193764..bf307d87a 100644 --- a/aequilibrae/paths/graph.py +++ b/aequilibrae/paths/graph.py @@ -7,14 +7,30 @@ import numpy as np import pandas as pd -from aequilibrae.paths.AoN import build_compressed_graph +from aequilibrae.paths.graph_building import build_compressed_graph from aequilibrae.context import get_logger class GraphBase(ABC): # noqa: B024 """ - Graph class + Graph class. + + AequilibraE graphs implement two forms of compression. + - link contraction, and + - dead end removal. + + Link contraction creates a topological equivalent graph by contracting sequences of links between nodes + with degrees of two. This compresses long streams of links, such as along highways or curved roads, into single links. + + Dead end removal attempts to remove dead ends and fish spines from the network. It does this based on the observation + that in a graph with non-negative weights a dead end will over ever appear in the results of a short(est) path if the + origin or destination is present within that dead end. + + Dead end removal is applied before link contraction and does not create a strictly topological equivalent graph, + however, all centroids are preserved. + + The compressed graph is used internally. """ def __init__(self, logger=None): @@ -77,6 +93,8 @@ def __init__(self, logger=None): self.g_link_crosswalk = np.array([]) # 4 a link ID in the BIG graph, a corresponding link in the compressed 1 + self.dead_end_links = np.array([]) + # Randomly generate a unique Graph ID randomly self._id = uuid.uuid4().hex @@ -170,6 +188,8 @@ def _build_directed_graph(self, network: pd.DataFrame, centroids: np.ndarray): neg_names.append(name + "_ba") not_pos = pd.DataFrame(not_pos, copy=True)[neg_names] not_pos.columns = names + + # Swap the a and b nodes of these edges. Direction is used for mapping the graph.graph back to the network. It does not indicate the direction of the link. not_pos.loc[:, "direction"] = -1 aux = np.array(not_pos.a_node.values, copy=True) not_pos.loc[:, "a_node"] = not_pos.loc[:, "b_node"] diff --git a/aequilibrae/paths/graph_building.pyx b/aequilibrae/paths/graph_building.pyx index c51548c6a..1bb816b1e 100644 --- a/aequilibrae/paths/graph_building.pyx +++ b/aequilibrae/paths/graph_building.pyx @@ -1,7 +1,127 @@ import pandas as pd import numpy as np +cimport numpy as np cimport cython +from libcpp.queue cimport queue + +@cython.wraparound(False) +@cython.embedsignature(True) +@cython.boundscheck(False) +@cython.initializedcheck(False) +cdef void _remove_dead_ends( + long long [:] graph_fs, + long long [:] all_nodes, + long long [:] nodes_to_indices, + long long [:] a_nodes, + long long [:] b_nodes, + signed char [:] directions, + long long [:] in_degree, + long long [:] out_degree, + np.uint8_t [:] burnt_links, +) noexcept nogil: + cdef: + long long b_node + Py_ssize_t node_idx, incoming, outgoing + + queue[long long] Q + + # Discovery: we are looking for potential dead ends, this includes: + # - nodes with all incoming edges, + # - nodes with all outgoing edges, + # - nodes for which all out going links point to the same node and has the same number of back links. + # This is a generalisation of dead-ends formed by a node with a single bidirectional link to another + # + # Removal: At nodes of interest we begin a BFS search for links we can remove. It's uncommon to the BFS to extend far as the stopping + # criteria and conditions for expansion are very similar, often this just searches in along a line. + # For removal the node must have "no choices", that is, all outgoing edges point to the same node *and* all incoming edges can be account for as + # from that same node. + # The criteria for expansion is that there are no incoming edges *and* all outgoing edges point to the same node. + for starting_node_idx in range(all_nodes.shape[0]): + node = all_nodes[starting_node_idx] + + Q.push(starting_node_idx) + while not Q.empty(): + node_idx = Q.front() + Q.pop() + + # Centroids are marked with negative degrees, the actual degree does not matter + if in_degree[node_idx] < 0 or out_degree[node_idx] < 0: + continue + elif in_degree[node_idx] == 0 and out_degree[node_idx] == 0: + continue + elif in_degree[node_idx] > 0 and out_degree[node_idx] == 0: + # All incoming or all outgoing edges, since there's no way to either leave, or get to this node all the attached + # edges are of no use. However we have no (current) means to remove these edges, a transpose of the graph would be required to + # avoid individual lookups. + continue + + ### Expansion + if in_degree[node_idx] == 0 and out_degree[node_idx] > 0: + for link_idx in range(graph_fs[node_idx], graph_fs[node_idx + 1]): + if not burnt_links[link_idx]: + # Only need to burn links leaving this node + burnt_links[link_idx] = True + in_degree[b_nodes[link_idx]] -= 1 + out_degree[node_idx] -= 1 + Q.push(b_nodes[link_idx]) + continue + + ### Propagation + # We now know that he node we are looking at has a mix of incoming and outgoing edges, i.e. in_degree[node] > 0 and out_degree[node] > 0 + # That implies that this node is reachable from some other node. We now need to assess if this node would ever be considered in pathfinding. + # To be considered there needs to be some form of real choice, i.e. there needs to be multiple ways to leave a new node. If the only way + # to leave this node is back the way we came then the cost of the path + # ... -> pre -> node -> pre -> ... + # is greater than that of + # ... -> pre -> ... + # so we can remove this link to node. This is because negative cost cycles are disallowed in path finding. + # + # We don't remove the case were the is only one new node that could be used to leave as it is handled in the graph compression. + # It would however, be possible to handle that here as well. + + # If all the outgoing edges are to the same node *and* all incoming edges come from that node, then there is no real choice and the + # link should be removed. This is primarily to catch nodes who are only connected to a single other node via a bidirectional link but can + # handle the case where multiple links point between the same pair of nodes with perhaps, different, but still non-negative costs. + + # Lets first find a link that isn't burnt. Then check that every link after this one points to the same node + b_node = -1 + for link_idx in range(graph_fs[node_idx], graph_fs[node_idx + 1]): + if burnt_links[link_idx]: + continue + + if b_node == -1: + b_node = b_nodes[link_idx] + elif b_node != b_nodes[link_idx]: + # We've found a link to a different node, that means we have some other way to leave and we can't remove this node. + break + else: + # We now know all outgoing edges point to the same node. Lets now check that all incoming edges are accounted for + incoming = in_degree[node_idx] + for link_idx in range(graph_fs[b_node], graph_fs[b_node + 1]): + # Incoming degree has already been decremented, when a link was burnt + if not burnt_links[link_idx] and b_nodes[link_idx] == node_idx: + incoming -= 1 + + if incoming != 0: + # Not all incoming edges are accounted for, there is some other node that links to this one. From where we don't know. + continue + + # All incoming edges are accounted for, this node is of interest. + for link_idx in range(graph_fs[node_idx], graph_fs[node_idx + 1]): + if not burnt_links[link_idx]: + burnt_links[link_idx] = True + in_degree[b_node] -= 1 + out_degree[node_idx] -= 1 + + for link_idx in range(graph_fs[b_node], graph_fs[b_node + 1]): + if not burnt_links[link_idx] and b_nodes[link_idx] == node_idx: + burnt_links[link_idx] = True + in_degree[node_idx] -= 1 + out_degree[b_node] -= 1 + + Q.push(b_node) + @cython.wraparound(False) @cython.embedsignature(True) @@ -23,31 +143,31 @@ cdef long long _build_compressed_graph(long long[:] link_idx, cdef: long long slink = 0 long long pre_link, n, first_node, lnk, lidx, a_node, b_node - long ab_dir = 1 # These could definitely be smaller since we only ever use them in conditionals - long ba_dir = 1 + bint ab_dir, ba_dir long drc Py_ssize_t i, k - for i in range(link_edge.shape[0]): - pre_link = link_edge[i] - + # For each link we have marked for examining + for pre_link in link_edge: if simplified_links[pre_link] >= 0: continue - ab_dir = 1 - ba_dir = 1 + + # We grab the initial information for that link lidx = link_idx[pre_link] a_node = a_nodes[lidx] b_node = b_nodes[lidx] drc = directions[lidx] + n = a_node if counts[a_node] == 2 else b_node first_node = b_node if counts[a_node] == 2 else a_node - ab_dir = 0 if (first_node == a_node and drc < 0) or (first_node == b_node and drc > 0) else ab_dir - ba_dir = 0 if (first_node == a_node and drc > 0) or (first_node == b_node and drc < 0) else ba_dir + # True if there exists a link in the direction ab (or ba), could be two links, or a single bidirectional link + ab_dir = False if (first_node == a_node and drc < 0) or (first_node == b_node and drc > 0) else True + ba_dir = False if (first_node == a_node and drc > 0) or (first_node == b_node and drc < 0) else True + # While the node we are looking at, n, has degree two, we can continue to compress while counts[n] == 2: - # assert (simplified_links[pre_link] >= 0), "How the heck did this happen?" - simplified_links[pre_link] = slink + simplified_links[pre_link] = slink # Mark link for removal simplified_directions[pre_link] = -1 if a_node == n else 1 # Gets the link from the list that is not the link we are coming from @@ -60,8 +180,8 @@ cdef long long _build_compressed_graph(long long[:] link_idx, a_node = a_nodes[lidx] b_node = b_nodes[lidx] drc = directions[lidx] - ab_dir = 0 if (n == a_node and drc < 0) or (n == b_node and drc > 0) else ab_dir - ba_dir = 0 if (n == a_node and drc > 0) or (n == b_node and drc < 0) else ba_dir + ab_dir = False if (n == a_node and drc < 0) or (n == b_node and drc > 0) else ab_dir + ba_dir = False if (n == a_node and drc > 0) or (n == b_node and drc < 0) else ba_dir n = ( a_nodes[lidx] if n == b_nodes[lidx] @@ -76,12 +196,12 @@ cdef long long _build_compressed_graph(long long[:] link_idx, # Available directions are NOT indexed like the other arrays compressed_a_node[slink] = first_node compressed_b_node[slink] = last_node - if ab_dir > 0: - if ba_dir > 0: + if ab_dir: + if ba_dir: compressed_dir[slink] = 0 else: compressed_dir[slink] = 1 - elif ba_dir > 0: + elif ba_dir: compressed_dir[slink] = -1 else: compressed_dir[slink] = -999 @@ -101,16 +221,47 @@ cdef void _back_fill(long long[:] links_index, long long max_node) noexcept: def build_compressed_graph(graph): + # General notes: + # Anything that uses graph.network is operating on the **mixed** graph. This graph has both directed and undirected edges + # Anything that uses graph.graph is operating on the **directed** graph. This graph has only directed edges, they may be backwards but they are directed + + burnt_links = np.full(len(graph.graph), False, dtype=bool) + + directed_node_max = max(graph.graph.a_node.values.max(), graph.graph.b_node.values.max()) + in_degree = np.bincount(graph.graph.b_node.values, minlength=directed_node_max + 1) + out_degree = np.bincount(graph.graph.a_node.values, minlength=directed_node_max + 1) + + centroid_idx = graph.nodes_to_indices[graph.centroids] + in_degree[centroid_idx] = -1 + out_degree[centroid_idx] = -1 + del centroid_idx + + _remove_dead_ends( + graph.fs, + graph.all_nodes, + graph.nodes_to_indices, + graph.graph.a_node.values, + graph.graph.b_node.values, + graph.graph.direction.values, + in_degree, + out_degree, + burnt_links, + ) + + graph.dead_end_links = graph.graph.link_id.values[burnt_links] # Perhaps filter to unique link_ids? There'll be duplicates in here + df = pd.DataFrame(graph.network, copy=True) + if graph.dead_end_links.shape[0]: + df = df[~df.link_id.isin(graph.dead_end_links)] + # Build link index - link_id_max = graph.network.link_id.max() + link_id_max = df.link_id.max() link_idx = np.empty(link_id_max + 1, dtype=np.int64) - link_idx[graph.network.link_id] = np.arange(graph.network.shape[0]) + link_idx[df.link_id] = np.arange(df.shape[0]) - nodes = np.hstack([graph.network.a_node.values, graph.network.b_node.values]) - links = np.hstack([graph.network.link_id.values, graph.network.link_id.values]) - counts = np.bincount(nodes) # index (node) i has frequency counts[i]. This is just the number of - # edges that connection to a given node + nodes = np.hstack([df.a_node.values, df.b_node.values]) + links = np.hstack([df.link_id.values, df.link_id.values]) + counts = np.bincount(nodes) # index (node) i has frequency counts[i]. This is just the number of edges that connect to a given node idx = np.argsort(nodes) all_nodes = nodes[idx] @@ -129,9 +280,13 @@ def build_compressed_graph(graph): # We keep all centroids for sure counts[graph.centroids] = 999 - truth = (counts == 2).astype(np.uint8) - link_edge = truth[graph.network.a_node.values] + truth[graph.network.b_node.values] - link_edge = graph.network.link_id.values[link_edge == 1].astype(np.int64) + degree_two = (counts == 2).astype(np.uint8) + # Reorder and sum the degree two nodes by how they appear in the network, finds how a particular node is connected, resulting values are + # 0: This node is not of degree one. We're not interested in this case. + # 1: This node is of degree one AND, has either incoming or outgoing flow. We can remove these these. + link_edge = df.link_id.values[ + degree_two[df.a_node.values] + degree_two[df.b_node.values] == 1 + ].astype(np.int64) simplified_links = np.full(link_id_max + 1, -1, dtype=np.int64) simplified_directions = np.zeros(link_id_max + 1, dtype=np.int8) @@ -144,9 +299,9 @@ def build_compressed_graph(graph): link_idx[:], links_index[:], link_edge[:], - graph.network.a_node.values[:], - graph.network.b_node.values[:], - graph.network.direction.values[:], + df.a_node.values[:], + df.b_node.values[:], + df.direction.values[:], link_id_max, simplified_links[:], simplified_directions[:], @@ -157,10 +312,9 @@ def build_compressed_graph(graph): compressed_b_node[:] ) - links_to_remove = np.argwhere(simplified_links >= 0) - df = pd.DataFrame(graph.network, copy=True) + links_to_remove = (simplified_links >= 0).nonzero()[0] if links_to_remove.shape[0]: - df = df[~df.link_id.isin(links_to_remove[:, 0])] + df = df[~df.link_id.isin(links_to_remove)] df = df[df.a_node != df.b_node] comp_lnk = pd.DataFrame( @@ -222,4 +376,4 @@ def build_compressed_graph(graph): # If will refer all the links that have no correlation to an element beyond the last link # This element will always be zero during assignment - graph.graph.loc[graph.graph.__compressed_id__.isna(), "__compressed_id__"] = graph.compact_graph.id.max() + 1 + graph.graph.__compressed_id__ = graph.graph.__compressed_id__.fillna(graph.compact_graph.id.max() + 1).astype(np.int64) diff --git a/setup.py b/setup.py index 46f89c43f..fac221980 100644 --- a/setup.py +++ b/setup.py @@ -4,6 +4,7 @@ import numpy as np from Cython.Distutils import build_ext +from Cython.Build import cythonize from setuptools import Extension from setuptools import setup, find_packages @@ -55,6 +56,16 @@ language="c++", ) +ext_mod_graph_building = Extension( + "aequilibrae.paths.graph_building", + [join("aequilibrae", "paths", "graph_building.pyx")], + extra_compile_args=compile_args, + extra_link_args=link_args, + define_macros=[("NPY_NO_DEPRECATED_API", "NPY_1_7_API_VERSION")], + include_dirs=include_dirs, + language="c++", +) + with open("requirements.txt", "r") as fl: install_requirements = [x.strip() for x in fl.readlines()] @@ -104,5 +115,8 @@ "Programming Language :: Python :: 3.12", ], cmdclass={"build_ext": build_ext}, - ext_modules=[ext_mod_aon, ext_mod_ipf, ext_mod_put], + ext_modules=cythonize( + [ext_mod_aon, ext_mod_ipf, ext_mod_put, ext_mod_graph_building], + compiler_directives={"language_level": "3str"}, + ), ) diff --git a/tests/aequilibrae/paths/test_graph.py b/tests/aequilibrae/paths/test_graph.py index 2698cd742..e8ae4665d 100644 --- a/tests/aequilibrae/paths/test_graph.py +++ b/tests/aequilibrae/paths/test_graph.py @@ -1,10 +1,11 @@ from unittest import TestCase import os import tempfile +import zipfile import numpy as np import pandas as pd from aequilibrae.paths import Graph -from os.path import join +from os.path import join, dirname from uuid import uuid4 from .parameters_test import centroids from aequilibrae.project import Project @@ -120,3 +121,51 @@ def test_transit_graph_config(self): def test_transit_graph_od_node_mapping(self): pd.testing.assert_frame_equal(self.graph.od_node_mapping, self.transit_graph.od_node_mapping) + + +class TestGraphCompression(TestCase): + def setUp(self) -> None: + proj_path = os.path.join(tempfile.gettempdir(), "test_graph_compression" + uuid4().hex) + os.mkdir(proj_path) + zipfile.ZipFile(join(dirname(siouxfalls_project), "KaiTang.zip")).extractall(proj_path) + + # proj_path = "/home/jake/Software/aequilibrae_performance_tests/models/kaitang" + self.link_df = pd.read_csv(os.path.join(proj_path, "links_modified.csv")) + self.node_df = pd.read_csv(os.path.join(proj_path, "nodes_modified.csv")) + centroids_array = np.array([7, 8, 11]) + + self.graph = Graph() + self.graph.network = self.link_df + self.graph.mode = "a" + self.graph.prepare_graph(centroids_array) + self.graph.set_blocked_centroid_flows(False) + self.graph.set_graph("fft") + + def test_compressed_graph(self): + # Check the compressed links, links 4 and 5 should be collapsed into 2 links from 3 - 10 and 10 - 3. + compressed_links = self.graph.graph[ + self.graph.graph.__compressed_id__.duplicated(keep=False) + & (self.graph.graph.__compressed_id__ != self.graph.compact_graph.id.max() + 1) + ] + + self.assertListEqual(compressed_links.link_id.unique().tolist(), [4, 5]) + + # Confirm these compacted links map back up to a contraction between the correct nodes + self.assertListEqual( + self.graph.compact_all_nodes[ + self.graph.compact_graph[self.graph.compact_graph.id.isin(compressed_links.__compressed_id__.unique())][ + ["a_node", "b_node"] + ].values + ].tolist(), + [[3, 10], [10, 3]], + ) + + def test_dead_end_removal(self): + # The dead end remove should be able to remove links [30, 38]. In it's current state it is not able to remove + # link 40 as it's a single direction link with no outgoing edges so its not possible to find the incoming edges + # (in general) without a transposed graph representation. + self.assertSetEqual( + set(self.graph.dead_end_links), + set(self.graph.graph[self.graph.graph.dead_end == 1].link_id) - {40}, + "Dead end removal removed incorrect links", + ) diff --git a/tests/data/KaiTang.zip b/tests/data/KaiTang.zip index 322ce0980d6980a06976ad28b8d6637aecf8e02d..cba074fc69fca852c59b704341888e652c107350 100644 GIT binary patch delta 1366 zcmcaDv|Dz=T=sfq77+#p1`dXx0^0~pUtL3QCI$vaP6h@c1{sE&%)IR4_}u)I%(TqZ z6uso)vd|Dt2Id&$?WxH?Tw1}+z{v7~nSlXJY@K#K@3sNYvG=uHiw^ctl@=i?0x9i&UeSbYP3(TJI&sx$dcw6Dg{rdDz%1;ku?fhpP{q2JO{Pv?e z?yULxF7?f|ZpK|Vt1j~!=udBNFT8g4bLzc=i>hCVo&I>$>GIxV@})1iOI9zxFMFna zWAKR`k!y2qe)AJQyZlB?*H`E3=i5s8_qUyWY+$$lp2dN;jt#w2idOxNds#dU zm;OBvIdjzls|y~X(VLhqPRrQ2Cv&Hom*w2Zy;A=h*=h@mm7VopPdcMrzlEcNokx%( z-|$IIR{JeW?yjf!xOI6 zA0_x--qP5!Sufl={c89YzlUthnUC8a7;L$d)vNqNt?HWD_mdMR$Z))QEf-pP?Mvy* zd=-Ugj|DrQJm#73<{Rf*{zoyU;sNz7QA@6E)x9l~nRaJ~i%+LPWbui*2Rt{8qIMSK zT*~S`t(+EWVVG>K@Q;5>R7%oYZ=+vzm8Xo}Us0=)S*fyO=>sMM$GP^RT-IB8Wu8Z_ zeLaEQ`S#?~eEAkT+1%TLpZ}ebyTc|rzQ9V;=;0$%3-4(qN=^qgSML>g&NKbE&+fU( z^$)5=`@bmPez#}uyyd%ne#Q&tTJKmj(^B|9Q-C)tZKeRz=HKgQ55EJZRCQqb1f|Wq z{FGEIX|o2HF6%JUX6S^yeTNJLT7KKOdQG^d5%*ukhUu*A(XSWlU*^mWn&9EUn18;H zcT)Jo;4|s(&+x5{KJ|H0#?G_#m3!m2tKNO#=oBe0gMIz{ zeRaR)zJ0rI^N~dbtBp(4)Vs1v+3!x&|K3}b&{Dy5^q>a!1c$&SzxE!y!2etCQpfL8 zM?|JiR1mzE#uW#CTso)TrvKDK zzapV2NwtfiTS0BwT6-O(J_c#6vIQ1>_=Gp9P7$>jh;IhRmXC#2GVqjnd;w6o<*%`2w zI;@aVXYwo#XxmPiYu(B}#0V@!$Vq{>L%?{!L0H+HE_5c6? From 4057cd7b5af10c776e1c8476f43db67722f65ad9 Mon Sep 17 00:00:00 2001 From: Jake Moss Date: Fri, 2 Feb 2024 11:15:04 +1000 Subject: [PATCH 40/44] Fix typo, used wrong matrix (#503) --- aequilibrae/paths/linear_approximation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aequilibrae/paths/linear_approximation.py b/aequilibrae/paths/linear_approximation.py index c1cb86c29..8833e4cbd 100644 --- a/aequilibrae/paths/linear_approximation.py +++ b/aequilibrae/paths/linear_approximation.py @@ -377,7 +377,7 @@ def __calculate_step_direction(self): sl_step_dir_ll["sdr"], np.sum(aux_res.temp_sl_link_loading, axis=0)[idx, :, :], sl_step_dir_ll["sdr"], - sl_step_dir_ll["temp_prev_sdr"], + sl_step_dir_ll["prev_sdr"], self.betas, self.cores, ) From 9089fea7824b2977e80a036d5ebc37a7d7559fa3 Mon Sep 17 00:00:00 2001 From: pveigadecamargo Date: Fri, 2 Feb 2024 11:38:34 +1000 Subject: [PATCH 41/44] typo --- setup.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/setup.py b/setup.py index 4df8ba954..852b82dff 100644 --- a/setup.py +++ b/setup.py @@ -35,7 +35,6 @@ language="c++", ) - ext_mod_ipf = Extension( "aequilibrae.distribution.ipf_core", [join("aequilibrae", "distribution", "ipf_core.pyx")], @@ -75,7 +74,6 @@ language="c++", ) - with open("requirements.txt", "r") as fl: install_requirements = [x.strip() for x in fl.readlines()] @@ -124,7 +122,8 @@ "Programming Language :: Python :: 3.12", ], cmdclass={"build_ext": build_ext}, - ext_modules=cythonize([ext_mod_aon, ext_mod_ipf, ext_mod_put, ext_mod_bfs_le, ext_mod_graph_building], + ext_modules=cythonize( + [ext_mod_aon, ext_mod_ipf, ext_mod_put, ext_mod_bfs_le, ext_mod_graph_building], compiler_directives={"language_level": "3str"}, ), ) From 92edd722ae3e769433d64361bdb2ca2af4b9b2c2 Mon Sep 17 00:00:00 2001 From: Jake-Moss Date: Fri, 2 Feb 2024 13:32:29 +1000 Subject: [PATCH 42/44] Allow switching path finding method --- aequilibrae/paths/route_choice.pxd | 5 ++- aequilibrae/paths/route_choice.pyx | 66 +++++++++++++++++++++--------- 2 files changed, 51 insertions(+), 20 deletions(-) diff --git a/aequilibrae/paths/route_choice.pxd b/aequilibrae/paths/route_choice.pxd index 3d69a097a..951a82b2b 100644 --- a/aequilibrae/paths/route_choice.pxd +++ b/aequilibrae/paths/route_choice.pxd @@ -117,6 +117,7 @@ cdef class RouteChoiceSet: long long num_nodes long long zones bint block_flows_through_centroids + bint a_star cdef void path_find( RouteChoiceSet self, @@ -125,7 +126,8 @@ cdef class RouteChoiceSet: double [:] scratch_cost, long long [:] thread_predecessors, long long [:] thread_conn, - long long [:] thread_b_nodes + long long [:] thread_b_nodes, + long long [:] thread_reached_first ) noexcept nogil cdef RouteSet_t *generate_route_set( @@ -138,5 +140,6 @@ cdef class RouteChoiceSet: long long [:] thread_predecessors, long long [:] thread_conn, long long [:] thread_b_nodes, + long long [:] _thread_reached_first, unsigned int seed ) noexcept nogil diff --git a/aequilibrae/paths/route_choice.pyx b/aequilibrae/paths/route_choice.pyx index 4ee98be4e..dfcb09ec6 100644 --- a/aequilibrae/paths/route_choice.pyx +++ b/aequilibrae/paths/route_choice.pyx @@ -95,6 +95,7 @@ cdef class RouteChoiceSet: self.num_nodes = graph.compact_num_nodes self.zones = graph.num_zones self.block_flows_through_centroids = graph.block_centroid_flows + self.a_star = True def __dealloc__(self): """ @@ -104,7 +105,7 @@ cdef class RouteChoiceSet: pass @cython.embedsignature(True) - def run(self, origin: int, destination: int, max_routes: int = 0, max_depth: int = 0, seed: int = 0): + def run(self, origin: int, destination: int, max_routes: int = 0, max_depth: int = 0, seed: int = 0, a_star: bool = True): """ Compute the a route set for a single OD pair. @@ -124,14 +125,14 @@ cdef class RouteChoiceSet: **route set** (:obj:`list[tuple[int, ...]]): Returns a list of unique variable length tuples of compact link IDs. Represents paths from ``origin`` to ``destination``. """ - return self.batched([(origin, destination)], max_routes=max_routes, max_depth=max_depth, seed=seed)[(origin, destination)] + return self.batched([(origin, destination)], max_routes=max_routes, max_depth=max_depth, seed=seed, a_star=a_star)[(origin, destination)] # Bounds checking doesn't really need to be disabled here but the warning is annoying @cython.boundscheck(False) @cython.wraparound(False) @cython.embedsignature(True) @cython.initializedcheck(False) - def batched(self, ods: List[Tuple[int, int]], max_routes: int = 0, max_depth: int = 0, seed: int = 0, cores: int = 1): + def batched(self, ods: List[Tuple[int, int]], max_routes: int = 0, max_depth: int = 0, seed: int = 0, cores: int = 1, a_star: bool = True): """ Compute the a route set for a list of OD pairs. @@ -160,6 +161,10 @@ cdef class RouteChoiceSet: long long [:, :] conn_matrix = np.empty((cores, self.num_nodes + 1), dtype=np.int64) long long [:, :] b_nodes_matrix = np.broadcast_to(self.b_nodes_view, (cores, self.b_nodes_view.shape[0])).copy() + # This matrix is never read from, it exists to allow using the Dijkstra's method without changing the + # interface. + long long [:, :] _reached_first_matrix + vector[RouteSet_t *] *results = new vector[RouteSet_t *](len(ods)) long long origin_index, dest_index, i @@ -182,6 +187,13 @@ cdef class RouteChoiceSet: if self.nodes_to_indices_view[d] == -1: raise ValueError(f"Destination {d} is not present within the compact graph") + self.a_star = a_star + + if self.a_star: + _reached_first_matrix = np.zeros((cores, 1), dtype=np.int64) # Dummy array to allow slicing + else: + _reached_first_matrix = np.zeros((cores, self.num_nodes), dtype=np.int64) + with nogil, parallel(num_threads=c_cores): for i in prange(c_ods.size()): origin_index = self.nodes_to_indices_view[c_ods[i].first] @@ -207,6 +219,7 @@ cdef class RouteChoiceSet: predecessors_matrix[threadid()], conn_matrix[threadid()], b_nodes_matrix[threadid()], + _reached_first_matrix[threadid()], c_seed, ) @@ -240,23 +253,37 @@ cdef class RouteChoiceSet: double [:] thread_cost, long long [:] thread_predecessors, long long [:] thread_conn, - long long [:] thread_b_nodes + long long [:] thread_b_nodes, + long long [:] _thread_reached_first ) noexcept nogil: """Small wrapper around path finding, thread locals should be passes as arguments.""" - path_finding_a_star( - origin_index, - dest_index, - thread_cost, - thread_b_nodes, - self.graph_fs_view, - self.nodes_to_indices_view, - self.lat_view, - self.lon_view, - thread_predecessors, - self.ids_graph_view, - thread_conn, - EQUIRECTANGULAR # FIXME: enum import failing due to redefinition - ) + if self.a_star: + path_finding_a_star( + origin_index, + dest_index, + thread_cost, + thread_b_nodes, + self.graph_fs_view, + self.nodes_to_indices_view, + self.lat_view, + self.lon_view, + thread_predecessors, + self.ids_graph_view, + thread_conn, + EQUIRECTANGULAR # FIXME: enum import failing due to redefinition + ) + else: + path_finding( + origin_index, + dest_index, + thread_cost, + thread_b_nodes, + self.graph_fs_view, + thread_predecessors, + self.ids_graph_view, + thread_conn, + _thread_reached_first + ) @cython.boundscheck(False) @cython.wraparound(False) @@ -272,6 +299,7 @@ cdef class RouteChoiceSet: long long [:] thread_predecessors, long long [:] thread_conn, long long [:] thread_b_nodes, + long long [:] _thread_reached_first, unsigned int seed ) noexcept nogil: """Main method for route set generation. See top of file for commentary.""" @@ -311,7 +339,7 @@ cdef class RouteChoiceSet: for connector in deref(banned): thread_cost[connector] = INFINITY - RouteChoiceSet.path_find(self, origin_index, dest_index, thread_cost, thread_predecessors, thread_conn, thread_b_nodes) + RouteChoiceSet.path_find(self, origin_index, dest_index, thread_cost, thread_predecessors, thread_conn, thread_b_nodes, _thread_reached_first) # Mark this set of banned links as seen removed_links.insert(banned) From 007e93417435c2ec9f7b4acced2908f820fc083f Mon Sep 17 00:00:00 2001 From: Jake-Moss Date: Fri, 2 Feb 2024 14:26:16 +1000 Subject: [PATCH 43/44] Add debug method for display in tests --- tests/aequilibrae/paths/test_route_choice.py | 23 ++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/tests/aequilibrae/paths/test_route_choice.py b/tests/aequilibrae/paths/test_route_choice.py index 37c81f262..8f1213944 100644 --- a/tests/aequilibrae/paths/test_route_choice.py +++ b/tests/aequilibrae/paths/test_route_choice.py @@ -115,3 +115,26 @@ def test_route_choice_exceptions(self): with self.subTest(a=a, b=b, max_routes=max_routes, max_depth=max_depth): with self.assertRaises(ValueError): rc.run(a, b, max_routes=max_routes, max_depth=max_depth) +def generate_line_strings(project, graph, results): + """Debug method""" + import geopandas as gpd + import shapely + + links = project.network.links.data.set_index("link_id") + df = [] + for od, route_set in results.items(): + for route in route_set: + df.append( + ( + *od, + shapely.MultiLineString( + links.loc[ + graph.graph[graph.graph.__compressed_id__.isin(route)].link_id + ].geometry.to_list() + ), + ) + ) + + df = gpd.GeoDataFrame(df, columns=["origin", "destination", "geometry"]) + df.set_geometry("geometry") + return df From 488006f68e4e47f59fd151339516c78a05a29e00 Mon Sep 17 00:00:00 2001 From: Jake-Moss Date: Fri, 2 Feb 2024 14:32:42 +1000 Subject: [PATCH 44/44] Linting --- tests/aequilibrae/paths/test_route_choice.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/aequilibrae/paths/test_route_choice.py b/tests/aequilibrae/paths/test_route_choice.py index 8f1213944..0c2a9447c 100644 --- a/tests/aequilibrae/paths/test_route_choice.py +++ b/tests/aequilibrae/paths/test_route_choice.py @@ -115,6 +115,8 @@ def test_route_choice_exceptions(self): with self.subTest(a=a, b=b, max_routes=max_routes, max_depth=max_depth): with self.assertRaises(ValueError): rc.run(a, b, max_routes=max_routes, max_depth=max_depth) + + def generate_line_strings(project, graph, results): """Debug method""" import geopandas as gpd @@ -128,9 +130,7 @@ def generate_line_strings(project, graph, results): ( *od, shapely.MultiLineString( - links.loc[ - graph.graph[graph.graph.__compressed_id__.isin(route)].link_id - ].geometry.to_list() + links.loc[graph.graph[graph.graph.__compressed_id__.isin(route)].link_id].geometry.to_list() ), ) )