Skip to content

Commit

Permalink
Validation zhuzh-up: headway value checks (#146)
Browse files Browse the repository at this point in the history
* refactor link length over 1km

* tidy up graph connectivity validation

* refactor length and zero value checks

* add checks for negative, infinite and fractional link attribute values

* fix to work with dictionary / nested attributes

* module rename, tidy up

* update logging to match the rest

* fix imports in tests

* leave first trip dep diffs as nans, let them be ignored in stats

* address PR comments: Part 1: readability and conditions toolbox dataclass

* address PR comments: Part 2: chop up existing tests for validation report

* add condition for none values

* expose trips to dataframe and headway stats methods to Route and Service objects

* make vehicle definitions a stronger fail/pass condition, report as invalid stage on schedule level

* shuffle vehicle checks

* add headway stats to route level validation and check for zero values

* add tests for headway in validation report

* rename test file
  • Loading branch information
KasiaKoz authored Oct 7, 2022
1 parent 6ecec80 commit c611e80
Show file tree
Hide file tree
Showing 5 changed files with 364 additions and 177 deletions.
115 changes: 66 additions & 49 deletions genet/schedule_elements.py
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,70 @@ def find_epsg(self):
return epsg
return None

@abstractmethod
def trips_to_dataframe(self, gtfs_day='19700101'):
pass

def trips_headways(self, from_time=None, to_time=None, gtfs_day='19700101'):
"""
Generates a DataFrame holding all the trips IDs, their departure times (in datetime with given GTFS day,
if specified in `gtfs_day`) and vehicle IDs, next to the route ID and service ID.
Adds two columns: headway and headway_mins by calculating the time difference in ordered trip departures for
each unique route.
This can also be done for a specific time frame by specifying from_time and to_time (or just one of them).
:param from_time: "HH:MM:SS" format, used as lower time bound for subsetting
:param to_time: "HH:MM:SS" format, used as upper time bound for subsetting
:param gtfs_day: day used for GTFS when creating the network in YYYYMMDD format defaults to 19700101
:return:
"""
df = self.trips_to_dataframe(gtfs_day=gtfs_day).sort_values(
['route_id', 'trip_departure_time']).reset_index(drop=True)

year = int(gtfs_day[:4])
month = int(gtfs_day[4:6])
day = int(gtfs_day[6:8])

df = df.groupby('route_id').apply(get_headway)
df['headway_mins'] = (pd.to_timedelta(df['headway']).dt.total_seconds() / 60)

if from_time is not None:
hour, minute, second = list(map(int, from_time.split(':')))
df = df[df['trip_departure_time'] >= datetime(year, month, day, hour, minute, second)]
if to_time is not None:
hour, minute, second = list(map(int, to_time.split(':')))
df = df[df['trip_departure_time'] <= datetime(year, month, day, hour, minute, second)]

return df

def headway_stats(self, from_time=None, to_time=None, gtfs_day='19700101'):
"""
Generates a DataFrame calculating mean headway in minutes for all routes, with their service ID.
This can also be done for a specific time frame by specifying from_time and to_time (or just one of them).
:param from_time: "HH:MM:SS" format, used as lower time bound for subsetting
:param to_time: "HH:MM:SS" format, used as upper time bound for subsetting
:param gtfs_day: day used for GTFS when creating the network in YYYYMMDD format defaults to 19700101
:return:
"""
df = self.trips_headways(from_time=from_time, to_time=to_time, gtfs_day=gtfs_day)

groupby_cols = []
if 'service_id' in df.columns:
groupby_cols.append('service_id')
groupby_cols += ['route_id', 'mode']

# first trips don't have a headway, they are kept as NaT and NaN
if not df.empty:
route_groups = df.groupby(groupby_cols)
df = route_groups.describe()
df = df['headway_mins'][['mean', 'std', 'max', 'min']]
df['trip_count'] = route_groups.apply(len)
df.reset_index(inplace=True)
df = df.rename(
columns={'mean': 'mean_headway_mins', 'std': 'std_headway_mins', 'max': 'max_headway_mins',
'min': 'min_headway_mins'}
)
return df

def to_geodataframe(self):
"""
Generates GeoDataFrames of the Schedule graph in Schedule's crs
Expand Down Expand Up @@ -684,6 +748,7 @@ def trips_to_dataframe(self, gtfs_day='19700101'):
:return:
"""
df = pd.DataFrame(self.trips)

df['route_id'] = self.id
df['trip_departure_time'] = df['trip_departure_time'].apply(lambda x: use_schedule.sanitise_time(x, gtfs_day))
df['mode'] = self.mode
Expand Down Expand Up @@ -1449,35 +1514,6 @@ def trips_to_dataframe(self, gtfs_day='19700101'):
df['trip_departure_time'] = df['trip_departure_time'].apply(lambda x: use_schedule.sanitise_time(x, gtfs_day))
return df

def trips_headways(self, from_time=None, to_time=None, gtfs_day='19700101'):
"""
Generates a DataFrame holding all the trips IDs, their departure times (in datetime with given GTFS day,
if specified in `gtfs_day`) and vehicle IDs, next to the route ID and service ID.
Adds two columns: headway and headway_mins by calculating the time difference in ordered trip departures for
each unique route.
This can also be done for a specific time frame by specifying from_time and to_time (or just one of them).
:param from_time: "HH:MM:SS" format, used as lower time bound for subsetting
:param to_time: "HH:MM:SS" format, used as upper time bound for subsetting
:param gtfs_day: day used for GTFS when creating the network in YYYYMMDD format defaults to 19700101
:return:
"""
df = self.trips_to_dataframe(gtfs_day=gtfs_day).sort_values(
['route_id', 'trip_departure_time']).reset_index(drop=True)

year = int(gtfs_day[:4])
month = int(gtfs_day[4:6])
day = int(gtfs_day[6:8])
if from_time is not None:
hour, minute, second = list(map(int, from_time.split(':')))
df = df[df['trip_departure_time'] >= datetime(year, month, day, hour, minute, second)]
if to_time is not None:
hour, minute, second = list(map(int, to_time.split(':')))
df = df[df['trip_departure_time'] <= datetime(year, month, day, hour, minute, second)]

df = df.groupby('route_id').apply(get_headway)
df['headway_mins'] = (pd.to_timedelta(df['headway']).dt.total_seconds() / 60).fillna(0)
return df

def generate_trips_dataframe_from_headway(self, route_id, headway_spec: dict):
"""
Generates new trips and vehicles for the specified route.
Expand Down Expand Up @@ -1523,25 +1559,6 @@ def generate_trips_from_headway(self, route_id, headway_spec: dict):
self.vehicles = {**{veh_id: veh_type for veh_id in new_trips['vehicle_id']}, **self.vehicles}
list(map(self.vehicles.pop, old_vehicles - set(new_trips['vehicle_id'])))

def headway_stats(self, from_time=None, to_time=None, gtfs_day='19700101'):
"""
Generates a DataFrame calculating mean headway in minutes for all routes, with their service ID.
This can also be done for a specific time frame by specifying from_time and to_time (or just one of them).
:param from_time: "HH:MM:SS" format, used as lower time bound for subsetting
:param to_time: "HH:MM:SS" format, used as upper time bound for subsetting
:param gtfs_day: day used for GTFS when creating the network in YYYYMMDD format defaults to 19700101
:return:
"""
df = self.trips_headways(from_time=from_time, to_time=to_time, gtfs_day=gtfs_day)

df = df.groupby(['service_id', 'route_id', 'mode']).describe()
df = df['headway_mins'][['mean', 'std', 'max', 'min', 'count']].reset_index()
df = df.rename(
columns={'mean': 'mean_headway_mins', 'std': 'std_headway_mins', 'max': 'max_headway_mins',
'min': 'min_headway_mins', 'count': 'trip_count'}
)
return df

def unused_vehicles(self):
"""
A scenario change to the network may result in changes to vehicle assignments, with some vehicles not
Expand Down Expand Up @@ -3073,7 +3090,7 @@ def read_vehicle_types(yml):


def get_headway(group):
group['headway'] = group['trip_departure_time'].diff().fillna(pd.Timedelta(seconds=0))
group['headway'] = group['trip_departure_time'].diff()
return group


Expand Down
66 changes: 47 additions & 19 deletions genet/validate/schedule.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,35 +9,28 @@ def generate_validation_report(schedule):
'route_level': {},
'vehicle_level': {}
}

logging.info('Computing headway stats')
df_headway = schedule.headway_stats().set_index('route_id')

route_validity = {}
for route in schedule.routes():
is_valid_route, invalid_stages = route.is_valid_route(return_reason=True)
route_validity[route.id] = {
'is_valid_route': is_valid_route,
'invalid_stages': invalid_stages
'invalid_stages': invalid_stages,
'headway_stats': df_headway.loc[
route.id, ['mean_headway_mins', 'std_headway_mins', 'max_headway_mins', 'min_headway_mins']].to_dict()
}

is_valid_vehicle_def = schedule.validate_vehicle_definitions()
missing_vehicle_types = schedule.get_missing_vehicle_information()

report['vehicle_level'] = {
'vehicle_definitions_valid': is_valid_vehicle_def,
'vehicle_definitions_validity_components': {
'missing_vehicles': {
'missing_vehicles_types': missing_vehicle_types['missing_vehicle_types'],
'vehicles_affected': missing_vehicle_types['vehicles_affected']},
'unused_vehicles': schedule.unused_vehicles(),
'multiple_use_vehicles': schedule.check_vehicle_uniqueness()}
}

for service_id in schedule.service_ids():
invalid_stages = []
invalid_routes = []
report['route_level'][service_id] = {}
for route_id in schedule.service_to_route_map()[service_id]:
if not route_validity[route_id]['is_valid_route']:
invalid_routes.append(route_id)
logging.warning(f'Route id={route_id} under Service id={service_id} is not valid')
logging.warning(f'Route ID: {route_id} under Service ID: {service_id} is not valid')
report['route_level'][service_id][route_id] = route_validity[route_id]

if invalid_routes:
Expand All @@ -55,16 +48,33 @@ def generate_validation_report(schedule):
'invalid_routes': invalid_routes
}
if not is_valid_service:
logging.warning('Service id={} is not valid'.format(service_id))
logging.warning(f'Service with ID: {service_id} is not valid')

invalid_stages = []
invalid_services = [service_id for service_id in schedule.service_ids() if
not report['service_level'][service_id]['is_valid_service']]

if invalid_services:
logging.info('Checking validity of PT vehicles')
has_valid_vehicle_def = schedule.validate_vehicle_definitions()
missing_vehicle_types = schedule.get_missing_vehicle_information()

report['vehicle_level'] = {
'vehicle_definitions_valid': has_valid_vehicle_def,
'vehicle_definitions_validity_components': {
'missing_vehicles': {
'missing_vehicles_types': missing_vehicle_types['missing_vehicle_types'],
'vehicles_affected': missing_vehicle_types['vehicles_affected']},
'unused_vehicles': schedule.unused_vehicles(),
'multiple_use_vehicles': schedule.check_vehicle_uniqueness()}
}

if invalid_services or (not has_valid_vehicle_def):
is_valid_schedule = False
has_valid_services = False
invalid_stages.append('not_has_valid_services')
if invalid_services:
invalid_stages.append('not_has_valid_services')
if not has_valid_vehicle_def:
invalid_stages.append('not_has_valid_vehicle_definitions')
else:
is_valid_schedule = True
has_valid_services = True
Expand All @@ -75,7 +85,25 @@ def generate_validation_report(schedule):
'has_valid_services': has_valid_services,
'invalid_services': invalid_services}

if (not is_valid_schedule) or (not is_valid_vehicle_def):
zero_headways = df_headway[df_headway['min_headway_mins'] == 0]
report['schedule_level']['headways'] = {}
if not zero_headways.empty:
report['schedule_level']['headways']['has_zero_min_headways'] = True
report['schedule_level']['headways']['routes'] = {
'number_of_affected': len(zero_headways),
'ids': list(zero_headways.index)
}
report['schedule_level']['headways']['services'] = {
'number_of_affected': len(zero_headways['service_id'].unique()),
'ids': list(zero_headways['service_id'].unique())
}

logging.warning(f"Found {len(zero_headways)} PT Routes 0 minimum headway between trips. "
f"The following Services are affected: {report['schedule_level']['headways']['services']}")
else:
report['schedule_level']['headways']['has_zero_min_headways'] = False

if not is_valid_schedule:
logging.warning('This schedule is not valid')

return report
1 change: 0 additions & 1 deletion tests/test_core_network.py
Original file line number Diff line number Diff line change
Expand Up @@ -2744,7 +2744,6 @@ def invalid_pt2matsim_network_for_validation(network_object_from_test_data):
'pt_routes_with_invalid_network_route': ['VJbd8660f05fe6f744e58a66ae12bd66acbca88b98'],
}


def test_connectivity_in_report_with_invalid_network(invalid_pt2matsim_network_for_validation):
report = invalid_pt2matsim_network_for_validation['network'].generate_validation_report()
for mode, expected_connected_subgraphs in invalid_pt2matsim_network_for_validation['subgraph_no_per_mode'].items():
Expand Down
Loading

0 comments on commit c611e80

Please sign in to comment.