Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Validation zhuzh-up: link attribute value checks #141

Merged
merged 14 commits into from
Sep 23, 2022
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion example_data/api_requests_send.json

Large diffs are not rendered by default.

108 changes: 72 additions & 36 deletions genet/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
import genet.utils.plot as plot
import genet.utils.simplification as simplification
import genet.utils.spatial as spatial
import genet.validate.network_validation as network_validation
import genet.validate.network as network_validation
import geopandas as gpd
import networkx as nx
import numpy as np
Expand Down Expand Up @@ -1999,55 +1999,56 @@ def invalid_network_routes(self):
return [route.id for route in self.schedule.routes() if
not route.has_network_route() or not self.is_valid_network_route(route)]

def generate_validation_report(self, link_length_threshold=1000):
def generate_validation_report(self, modes=None, link_metre_length_threshold=1000):
KasiaKoz marked this conversation as resolved.
Show resolved Hide resolved
"""
Generates a dictionary with keys: 'graph', 'schedule' and 'routing' describing validity of the Network's
underlying graph, the schedule services and then the intersection of the two which is the routing of schedule
services onto the graph.
:param link_length_threshold: in meters defaults to 1000, i.e. 1km
:param modes: list of modes in the network that need to be checked for strong connectivity. Defaults to
'car', 'walk' and 'bike'
:param link_metre_length_threshold: in meters defaults to 1000, i.e. 1km
:return:
"""
logging.info('Checking validity of the Network')
logging.info('Checking validity of the Network graph')
report = {}

# describe network connectivity
modes = ['car', 'walk', 'bike']
report['graph'] = {'graph_connectivity': {}}
if modes is None:
modes = ['car', 'walk', 'bike']
KasiaKoz marked this conversation as resolved.
Show resolved Hide resolved
logging.info(f'Defaulting to checking graph connectivity for modes: {modes}. You can change this by '
'passing a `modes` param')
graph_connectivity = {}
for mode in modes:
logging.info(f'Checking network connectivity for mode: {mode}')
# subgraph for the mode to be tested
G_mode = self.modal_subgraph(mode)
# calculate how many connected subgraphs there are
report['graph']['graph_connectivity'][mode] = network_validation.describe_graph_connectivity(G_mode)

def links_over_threshold_length(value):
return value >= link_length_threshold

links_over_1km_length = self.extract_links_on_edge_attributes(
conditions={'length': links_over_threshold_length})
graph_connectivity[mode] = self.check_connectivity_for_mode(mode)
report['graph'] = {'graph_connectivity': graph_connectivity}

# attribute checks
report['graph']['link_attributes'] = {
'links_over_1km_length': {
'number_of': len(links_over_1km_length),
'percentage': len(links_over_1km_length) / self.graph.number_of_edges(),
'link_ids': links_over_1km_length
}
}
f'{k}_attributes': {} for k in network_validation.LINK_ATTRIBUTE_VALIDATION_TOOLBOX}

def zero_value(value):
return (value == 0) or (value == '0') or (value == '0.0')

report['graph']['link_attributes']['zero_attributes'] = {}
for attrib in [d.name for d in graph_operations.get_attribute_schema(self.links()).descendants]:
links_with_zero_attrib = self.extract_links_on_edge_attributes(
conditions={attrib: zero_value}, mixed_dtypes=False)
if links_with_zero_attrib:
logging.warning(f'{len(links_with_zero_attrib)} of links have values of 0 for `{attrib}`')
report['graph']['link_attributes']['zero_attributes'][attrib] = {
'number_of': len(links_with_zero_attrib),
'percentage': len(links_with_zero_attrib) / self.graph.number_of_edges(),
'link_ids': links_with_zero_attrib
}
# checks on length attribute specifically
def links_over_threshold_length(value):
return value >= link_metre_length_threshold

report['graph']['link_attributes']['links_over_1000_length'] = self.report_on_link_attribute_condition(
'length', links_over_threshold_length)

# more general attribute value checks
non_testable = ['id', 'from', 'to', 's2_to', 's2_from', 'geometry']
link_attributes = [graph_operations.parse_leaf(leaf) for leaf in
graph_operations.get_attribute_schema(self.links()).leaves]
link_attributes = [attrib for attrib in link_attributes if attrib not in non_testable]
for attrib in link_attributes:
logging.info(f'Checking link values for `{attrib}`')
for value, condition in network_validation.LINK_ATTRIBUTE_VALIDATION_TOOLBOX.items():
links_satifying_condition = self.report_on_link_attribute_condition(attrib, condition)
if links_satifying_condition['number_of']:
logging.warning(
f'{links_satifying_condition["number_of"]} of links have {value} values for `{attrib}`')
if isinstance(attrib, dict):
attrib = dict_support.dict_to_string(attrib)
report['graph']['link_attributes'][f'{value}_attributes'][attrib] = links_satifying_condition

if self.schedule:
report['schedule'] = self.schedule.generate_validation_report()
Expand All @@ -2066,6 +2067,41 @@ def zero_value(value):
}
return report

def report_on_link_attribute_condition(self, attribute, condition):
KasiaKoz marked this conversation as resolved.
Show resolved Hide resolved
"""
:param attribute: one of the link attributes, e.g. 'length'
:param condition: callable, condition for link[attribute] to satisfy
:return:
"""
if isinstance(attribute, dict):
conditions = dict_support.nest_at_leaf(deepcopy(attribute), condition)
else:
conditions = {attribute: condition}

links_satifying_condition = self.extract_links_on_edge_attributes(conditions=conditions)
return {
'number_of': len(links_satifying_condition),
'percentage': len(links_satifying_condition) / self.graph.number_of_edges(),
'link_ids': links_satifying_condition
}

def check_connectivity_for_mode(self, mode):
logging.info(f'Checking network connectivity for mode: {mode}')
# subgraph for the mode to be tested
KasiaKoz marked this conversation as resolved.
Show resolved Hide resolved
G_mode = self.modal_subgraph(mode)
# calculate how many connected subgraphs there are
con_desc = network_validation.describe_graph_connectivity(G_mode)
no_of_components = con_desc["number_of_connected_subgraphs"]
logging.info(f'The graph for mode: {mode} has: '
f'{no_of_components} connected components, '
f'{len(con_desc["problem_nodes"]["dead_ends"])} sinks/dead_ends and '
f'{len(con_desc["problem_nodes"]["unreachable_node"])} sources/unreachable nodes.')
if no_of_components > 1:
logging.warning(f'The graph has more than one connected component for mode {mode}! '
'If this is not expected, consider using the `connect_components` method to connect the '
'components, or `retain_n_connected_subgraphs` with `n=1` to extract the largest component')
return con_desc

def generate_standard_outputs(self, output_dir, gtfs_day='19700101', include_shp_files=False):
"""
Generates geojsons that can be used for generating standard kepler visualisations.
Expand Down
2 changes: 1 addition & 1 deletion genet/output/matsim_xml_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from copy import deepcopy
from pandas import DataFrame
from genet.output import sanitiser
from genet.validate.network_validation import validate_attribute_data
from genet.validate.network import validate_attribute_data
from genet.utils.spatial import encode_shapely_linestring_to_polyline
from genet.exceptions import MalformedAdditionalAttributeError
import genet.variables as variables
Expand Down
2 changes: 1 addition & 1 deletion genet/schedule_elements.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
import genet.utils.persistence as persistence
import genet.utils.plot as plot
import genet.utils.spatial as spatial
import genet.validate.schedule_validation as schedule_validation
import genet.validate.schedule as schedule_validation
import networkx as nx
import numpy as np
import pandas as pd
Expand Down
5 changes: 5 additions & 0 deletions genet/utils/dict_support.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,11 @@ def combine_edge_data_lists(l1, l2):
return [(u, v, dat) for (u, v), dat in edges.items()]


def dict_to_string(d):
# turn a (potentially) nested dictionary into a string
KasiaKoz marked this conversation as resolved.
Show resolved Hide resolved
return str(d).replace('{', '').replace('}', '').replace("'", '').replace(' ', ':')


def dataframe_to_dict(df):
return {_id: {k: v for k, v in m.items() if notna(v)} for _id, m in df.to_dict().items()}

Expand Down
19 changes: 17 additions & 2 deletions genet/utils/graph_operations.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from anytree import Node, RenderTree

from genet.utils import pandas_helpers as pd_helpers
import genet.utils.dict_support as dict_support


class Filter:
Expand Down Expand Up @@ -209,6 +210,21 @@ def render_tree(root, data=False):
print("%s%s" % (pre, node.name))


def parse_leaf(leaf):
"""
:param leaf: anytree.node.node.Node
:return: str or dictionary with string key value pairs, for use as keys to extraction methods
"""
if leaf.depth > 1:
dict_path = {leaf.path[1].name: leaf.path[2].name}
if leaf.depth > 2:
for node in leaf.path[3:]:
dict_path = dict_support.nest_at_leaf(dict_path, node.name)
return dict_path
else:
return leaf.name


def get_attribute_data_under_key(iterator: Iterable, key: Union[str, dict]):
"""
Returns all data stored under key in attribute dictionaries for iterators yielding (index, attribute_dictionary),
Expand Down Expand Up @@ -256,8 +272,7 @@ def build_attribute_dataframe(iterator, keys: Union[list, str], index_name: str
for key in keys:
if isinstance(key, dict):
# consolidate nestedness to get a name for the column
name = str(key)
name = name.replace('{', '').replace('}', '').replace("'", '').replace(' ', ':')
name = dict_support.dict_to_string(key)
else:
name = key

Expand Down
31 changes: 31 additions & 0 deletions genet/validate/network_validation.py → genet/validate/network.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import networkx as nx
import math


def validate_attribute_data(attributes, necessary_attributes):
Expand Down Expand Up @@ -35,3 +36,33 @@ def describe_graph_connectivity(G):
# find number of connected subgraphs
dict_to_return['number_of_connected_subgraphs'] = len(find_connected_subgraphs(G))
return dict_to_return


def zero_value(value):
return value in {0, '0', '0.0'}
KasiaKoz marked this conversation as resolved.
Show resolved Hide resolved


def negative_value(value):
if isinstance(value, str):
return '-' in value
KasiaKoz marked this conversation as resolved.
Show resolved Hide resolved
return value < 0


def infinity_value(value):
if isinstance(value, str):
return value in ['inf', '-inf']
return math.isinf(value)


def fractional_value(value):
if isinstance(value, str):
return ('0.' in value) and (value != '0.0')
return 1 > value > 0


LINK_ATTRIBUTE_VALIDATION_TOOLBOX = {
KasiaKoz marked this conversation as resolved.
Show resolved Hide resolved
'zero': zero_value,
'negative': negative_value,
'infinite': infinity_value,
'fractional': fractional_value
}
File renamed without changes.
Loading