Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Validation zhuzh-up: link attribute value checks #141

Merged
merged 14 commits into from
Sep 23, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion example_data/api_requests_send.json

Large diffs are not rendered by default.

116 changes: 77 additions & 39 deletions genet/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
import genet.utils.plot as plot
import genet.utils.simplification as simplification
import genet.utils.spatial as spatial
import genet.validate.network_validation as network_validation
import genet.validate.network as network_validation
import geopandas as gpd
import networkx as nx
import numpy as np
Expand Down Expand Up @@ -942,7 +942,7 @@ def subgraph_on_link_conditions(self, conditions, how=any, mixed_dtypes=True):

def modes(self):
"""
Scans network for 'modes' attribute and returns list of all modes present int he network
Scans network for 'modes' attribute and returns list of all modes present in the network
:return:
"""
modes = set()
Expand Down Expand Up @@ -1999,55 +1999,60 @@ def invalid_network_routes(self):
return [route.id for route in self.schedule.routes() if
not route.has_network_route() or not self.is_valid_network_route(route)]

def generate_validation_report(self, link_length_threshold=1000):
def generate_validation_report(self, modes_for_strong_connectivity=None, link_metre_length_threshold=1000):
"""
Generates a dictionary with keys: 'graph', 'schedule' and 'routing' describing validity of the Network's
underlying graph, the schedule services and then the intersection of the two which is the routing of schedule
services onto the graph.
:param link_length_threshold: in meters defaults to 1000, i.e. 1km
:param modes_for_strong_connectivity: list of modes in the network that need to be checked for strong
connectivity. Defaults to 'car', 'walk' and 'bike'
:param link_metre_length_threshold: in meters defaults to 1000, i.e. 1km
:return:
"""
logging.info('Checking validity of the Network')
logging.info('Checking validity of the Network graph')
report = {}
# describe network connectivity
modes = ['car', 'walk', 'bike']
report['graph'] = {'graph_connectivity': {}}
for mode in modes:
logging.info(f'Checking network connectivity for mode: {mode}')
# subgraph for the mode to be tested
G_mode = self.modal_subgraph(mode)
# calculate how many connected subgraphs there are
report['graph']['graph_connectivity'][mode] = network_validation.describe_graph_connectivity(G_mode)

def links_over_threshold_length(value):
return value >= link_length_threshold

links_over_1km_length = self.extract_links_on_edge_attributes(
conditions={'length': links_over_threshold_length})

# describe network connectivity
if modes_for_strong_connectivity is None:
modes_for_strong_connectivity = ['car', 'walk', 'bike']
logging.info(f'Defaulting to checking graph connectivity for modes: {modes_for_strong_connectivity}. '
'You can change this by passing a `modes_for_strong_connectivity` param')
graph_connectivity = {}
for mode in modes_for_strong_connectivity:
graph_connectivity[mode] = self.check_connectivity_for_mode(mode)
report['graph'] = {'graph_connectivity': graph_connectivity}

# attribute checks
conditions_toolbox = network_validation.ConditionsToolbox()
report['graph']['link_attributes'] = {
'links_over_1km_length': {
'number_of': len(links_over_1km_length),
'percentage': len(links_over_1km_length) / self.graph.number_of_edges(),
'link_ids': links_over_1km_length
}
}
f'{k}_attributes': {} for k in conditions_toolbox.condition_names()}

def zero_value(value):
return (value == 0) or (value == '0') or (value == '0.0')

report['graph']['link_attributes']['zero_attributes'] = {}
for attrib in [d.name for d in graph_operations.get_attribute_schema(self.links()).descendants]:
links_with_zero_attrib = self.extract_links_on_edge_attributes(
conditions={attrib: zero_value}, mixed_dtypes=False)
if links_with_zero_attrib:
logging.warning(f'{len(links_with_zero_attrib)} of links have values of 0 for `{attrib}`')
report['graph']['link_attributes']['zero_attributes'][attrib] = {
'number_of': len(links_with_zero_attrib),
'percentage': len(links_with_zero_attrib) / self.graph.number_of_edges(),
'link_ids': links_with_zero_attrib
}
# checks on length attribute specifically
def links_over_threshold_length(value):
return value >= link_metre_length_threshold

report['graph']['link_attributes']['links_over_1000_length'] = self.report_on_link_attribute_condition(
'length', links_over_threshold_length)

# more general attribute value checks
non_testable = ['id', 'from', 'to', 's2_to', 's2_from', 'geometry']
link_attributes = [graph_operations.parse_leaf(leaf) for leaf in
graph_operations.get_attribute_schema(self.links()).leaves]
link_attributes = [attrib for attrib in link_attributes if attrib not in non_testable]
for attrib in link_attributes:
logging.info(f'Checking link values for `{attrib}`')
for condition_name in conditions_toolbox.condition_names():
links_satifying_condition = self.report_on_link_attribute_condition(
attrib, conditions_toolbox.get_condition_evaluator(condition_name))
if links_satifying_condition['number_of']:
logging.warning(
f'{links_satifying_condition["number_of"]} of links have '
f'{condition_name} values for `{attrib}`')
if isinstance(attrib, dict):
attrib = dict_support.dict_to_string(attrib)
report['graph']['link_attributes'][f'{condition_name}_attributes'][
attrib] = links_satifying_condition

if self.schedule:
report['schedule'] = self.schedule.generate_validation_report()
Expand All @@ -2066,6 +2071,39 @@ def zero_value(value):
}
return report

def report_on_link_attribute_condition(self, attribute, condition):
KasiaKoz marked this conversation as resolved.
Show resolved Hide resolved
"""
:param attribute: one of the link attributes, e.g. 'length'
:param condition: callable, condition for link[attribute] to satisfy
:return:
"""
if isinstance(attribute, dict):
conditions = dict_support.nest_at_leaf(deepcopy(attribute), condition)
else:
conditions = {attribute: condition}

links_satifying_condition = self.extract_links_on_edge_attributes(conditions=conditions)
return {
'number_of': len(links_satifying_condition),
'percentage': len(links_satifying_condition) / self.graph.number_of_edges(),
'link_ids': links_satifying_condition
}

def check_connectivity_for_mode(self, mode):
logging.info(f'Checking network connectivity for mode: {mode}')
G_mode = self.modal_subgraph(mode)
con_desc = network_validation.describe_graph_connectivity(G_mode)
no_of_components = con_desc["number_of_connected_subgraphs"]
logging.info(f'The graph for mode: {mode} has: '
f'{no_of_components} connected components, '
f'{len(con_desc["problem_nodes"]["dead_ends"])} sinks/dead_ends and '
f'{len(con_desc["problem_nodes"]["unreachable_node"])} sources/unreachable nodes.')
if no_of_components > 1:
logging.warning(f'The graph has more than one connected component for mode {mode}! '
'If this is not expected, consider using the `connect_components` method to connect the '
'components, or `retain_n_connected_subgraphs` with `n=1` to extract the largest component')
return con_desc

def generate_standard_outputs(self, output_dir, gtfs_day='19700101', include_shp_files=False):
"""
Generates geojsons that can be used for generating standard kepler visualisations.
Expand Down
2 changes: 1 addition & 1 deletion genet/output/matsim_xml_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from copy import deepcopy
from pandas import DataFrame
from genet.output import sanitiser
from genet.validate.network_validation import validate_attribute_data
from genet.validate.network import validate_attribute_data
from genet.utils.spatial import encode_shapely_linestring_to_polyline
from genet.exceptions import MalformedAdditionalAttributeError
import genet.variables as variables
Expand Down
2 changes: 1 addition & 1 deletion genet/schedule_elements.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
import genet.utils.persistence as persistence
import genet.utils.plot as plot
import genet.utils.spatial as spatial
import genet.validate.schedule_validation as schedule_validation
import genet.validate.schedule as schedule_validation
import networkx as nx
import numpy as np
import pandas as pd
Expand Down
4 changes: 4 additions & 0 deletions genet/utils/dict_support.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,10 @@ def combine_edge_data_lists(l1, l2):
return [(u, v, dat) for (u, v), dat in edges.items()]


def dict_to_string(d):
return str(d).replace('{', '').replace('}', '').replace("'", '').replace(' ', ':')


def dataframe_to_dict(df):
return {_id: {k: v for k, v in m.items() if notna(v)} for _id, m in df.to_dict().items()}

Expand Down
19 changes: 17 additions & 2 deletions genet/utils/graph_operations.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from anytree import Node, RenderTree

from genet.utils import pandas_helpers as pd_helpers
import genet.utils.dict_support as dict_support


class Filter:
Expand Down Expand Up @@ -209,6 +210,21 @@ def render_tree(root, data=False):
print("%s%s" % (pre, node.name))


def parse_leaf(leaf):
"""
:param leaf: anytree.node.node.Node
:return: str or dictionary with string key value pairs, for use as keys to extraction methods
"""
if leaf.depth > 1:
dict_path = {leaf.path[1].name: leaf.path[2].name}
if leaf.depth > 2:
for node in leaf.path[3:]:
dict_path = dict_support.nest_at_leaf(dict_path, node.name)
return dict_path
else:
return leaf.name


def get_attribute_data_under_key(iterator: Iterable, key: Union[str, dict]):
"""
Returns all data stored under key in attribute dictionaries for iterators yielding (index, attribute_dictionary),
Expand Down Expand Up @@ -256,8 +272,7 @@ def build_attribute_dataframe(iterator, keys: Union[list, str], index_name: str
for key in keys:
if isinstance(key, dict):
# consolidate nestedness to get a name for the column
name = str(key)
name = name.replace('{', '').replace('}', '').replace("'", '').replace(' ', ':')
name = dict_support.dict_to_string(key)
else:
name = key

Expand Down
101 changes: 101 additions & 0 deletions genet/validate/network.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
import networkx as nx
import math
from dataclasses import dataclass, fields


def validate_attribute_data(attributes, necessary_attributes):
missing_attribs = set(necessary_attributes) - set(attributes)
if missing_attribs:
raise AttributeError(f'Attributes: {missing_attribs} missing from data: {attributes}')


def find_problem_nodes(G):
problem_nodes = {}
problem_nodes['dead_ends'] = []
problem_nodes['unreachable_node'] = []
for node in G.nodes:
if (G.in_degree(node) == 0):
problem_nodes['unreachable_node'].append(node)
if (G.out_degree(node) == 0):
problem_nodes['dead_ends'].append(node)
return problem_nodes


def find_connected_subgraphs(G):
return [(list(c), len(c)) for c in sorted(nx.strongly_connected_components(G), key=len, reverse=True)]


def describe_graph_connectivity(G):
"""
Computes dead ends and unreachable nodes in G. Computes strongly connected components of G
:param G:
:return:
"""
dict_to_return = {}
# find dead ends or unreachable nodes
dict_to_return['problem_nodes'] = find_problem_nodes(G)
# find number of connected subgraphs
dict_to_return['number_of_connected_subgraphs'] = len(find_connected_subgraphs(G))
return dict_to_return


def evaluate_condition_for_floatable(value, condition):
try:
value = float(value)
return condition(value)
except (ValueError, TypeError):
return False


def zero_value(value):
return value == 0.0


def negative_value(value):
return value < 0.0


def infinity_value(value):
return math.isinf(value)


def fractional_value(value):
return 1.0 > value > 0.0


def none_condition(value):
return value in [None, 'None']


@dataclass()
class Condition:
condition: callable

def evaluate(self, value):
return self.condition(value)


@dataclass()
class FloatCondition(Condition):
condition: callable

def evaluate(self, value):
return evaluate_condition_for_floatable(value, self.condition)


@dataclass()
class ConditionsToolbox:
zero: Condition = FloatCondition(zero_value)
negative: Condition = FloatCondition(negative_value)
infinite: Condition = FloatCondition(infinity_value)
fractional: Condition = FloatCondition(fractional_value)
none: Condition = Condition(none_condition)

def condition_names(self) -> list:
return [field.name for field in fields(self)]

def get_condition_evaluator(self, condition: str) -> callable:
if condition in self.__dict__:
return self.__dict__[condition].evaluate
else:
raise NotImplementedError(f'Condition {condition} is not defined.')
37 changes: 0 additions & 37 deletions genet/validate/network_validation.py

This file was deleted.

File renamed without changes.
Loading