Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update MF submission format for multi-world challenge #138

Merged
merged 3 commits into from
Mar 31, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ profile = "black"
[tool.mypy]
exclude = "build"
ignore_missing_imports = true
disallow_untyped_calls = false
disallow_untyped_decorators = false
plugins = "numpy.typing.mypy_plugin"
strict = true
Expand Down
130 changes: 127 additions & 3 deletions src/av2/datasets/motion_forecasting/eval/metrics.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
# <Copyright 2022, Argo AI, LLC. Released under the MIT license.>
"""Utilities to evaluate motion forecasting predictions and compute metrics."""

from typing import List

import numpy as np

from av2.utils.typing import NDArrayBool, NDArrayFloat
Expand Down Expand Up @@ -32,7 +34,8 @@ def compute_fde(forecasted_trajectories: NDArrayFloat, gt_trajectory: NDArrayFlo
(K,) Final displacement error for each of the predicted trajectories.
"""
# Compute final displacement error for all K trajectories
fde_vector = (forecasted_trajectories - gt_trajectory)[:, -1]
error_vector: NDArrayFloat = forecasted_trajectories - gt_trajectory
fde_vector = error_vector[:, -1]
fde: NDArrayFloat = np.linalg.norm(fde_vector, axis=-1)
return fde

Expand All @@ -50,10 +53,10 @@ def compute_is_missed_prediction(
miss_threshold_m: Minimum distance threshold for final displacement to be considered a miss.

Returns:
(K,) Bools indicating whether prediction missed by more than specified threshold.
(K,) bools indicating whether prediction missed by more than specified threshold.
"""
fde = compute_fde(forecasted_trajectories, gt_trajectory)
is_missed_prediction = fde > miss_threshold_m
is_missed_prediction: NDArrayBool = fde > miss_threshold_m
return is_missed_prediction


Expand Down Expand Up @@ -141,3 +144,124 @@ def _compute_brier_score(

brier_score: NDArrayFloat = np.square((1 - forecast_probabilities))
return brier_score


def compute_world_fde(forecasted_world_trajectories: NDArrayFloat, gt_world_trajectories: NDArrayFloat) -> NDArrayFloat:
"""Compute the mean final displacement error for each of K predicted worlds.

Args:
forecasted_world_trajectories: (M, K, N, 2) K predicted trajectories of length N, for each of M actors.
gt_world_trajectories: (M, N, 2) ground truth trajectories of length N, for each of M actors.

Returns:
(K,) Mean final displacement error for each of the predicted worlds.
"""
actor_fdes = [
compute_fde(forecasted_actor_trajectories, gt_actor_trajectory)
for forecasted_actor_trajectories, gt_actor_trajectory in zip(
forecasted_world_trajectories, gt_world_trajectories
)
]

world_fdes: NDArrayFloat = np.stack(actor_fdes).mean(axis=0)
return world_fdes


def compute_world_ade(forecasted_world_trajectories: NDArrayFloat, gt_world_trajectories: NDArrayFloat) -> NDArrayFloat:
"""Compute the mean average displacement error for each of K predicted worlds.

Args:
forecasted_world_trajectories: (M, K, N, 2) K predicted trajectories of length N, for each of M actors.
gt_world_trajectories: (M, N, 2) ground truth trajectories of length N, for each of M actors.

Returns:
(K,) Mean average displacement error for each of the predicted worlds.
"""
actor_ades = [
compute_ade(forecasted_actor_trajectories, gt_actor_trajectory)
for forecasted_actor_trajectories, gt_actor_trajectory in zip(
forecasted_world_trajectories, gt_world_trajectories
)
]

world_ades: NDArrayFloat = np.stack(actor_ades).mean(axis=0)
return world_ades


def compute_world_misses(
forecasted_world_trajectories: NDArrayFloat, gt_world_trajectories: NDArrayFloat, miss_threshold_m: float = 2.0
) -> NDArrayBool:
"""For each world, compute whether predictions for each actor misssed by more than a distance threshold.

Args:
forecasted_world_trajectories: (M, K, N, 2) K predicted trajectories of length N, for each of M actors.
gt_world_trajectories: (M, N, 2) ground truth trajectories of length N, for each of M actors.
miss_threshold_m: Minimum distance threshold for final displacement to be considered a miss.

Returns:
(M, K) bools indicating whether prediction missed for actor in each world.
"""
actor_fdes = [
compute_fde(forecasted_actor_trajectories, gt_actor_trajectory)
for forecasted_actor_trajectories, gt_actor_trajectory in zip(
forecasted_world_trajectories, gt_world_trajectories
)
]

world_actor_missed: NDArrayBool = np.stack(actor_fdes) > miss_threshold_m
return world_actor_missed


def compute_world_brier_fde(
forecasted_world_trajectories: NDArrayFloat,
gt_world_trajectories: NDArrayFloat,
forecasted_world_probabilities: NDArrayFloat,
normalize: bool = False,
) -> NDArrayFloat:
"""Compute the mean final displacement error for each of K predicted worlds.

Args:
forecasted_world_trajectories: (M, K, N, 2) K predicted trajectories of length N, for each of M actors.
gt_world_trajectories: (M, N, 2) ground truth trajectories of length N, for each of M actors.
forecasted_world_probabilities: (M,) normalized probabilities associated with each world.
normalize: Normalizes `forecasted_world_probabilities` to sum to 1 when set to True.

Returns:
(K,) Mean probability-weighted final displacement error for each of the predicted worlds.
"""
actor_brier_fdes = [
compute_brier_fde(forecasted_actor_trajectories, gt_actor_trajectory, forecasted_world_probabilities, normalize)
for forecasted_actor_trajectories, gt_actor_trajectory in zip(
forecasted_world_trajectories, gt_world_trajectories
)
]

world_brier_fdes: NDArrayFloat = np.stack(actor_brier_fdes).mean(axis=0)
return world_brier_fdes


def compute_world_collisions(
forecasted_world_trajectories: NDArrayFloat, collision_threshold_m: float = 1.0
) -> NDArrayBool:
"""Compute whether any of the forecasted trajectories collide with each other.

Args:
forecasted_world_trajectories: (M, K, N, 2) K predicted trajectories of length N, for each of M actors.
collision_threshold_m: Distance threshold at which point a collision is considered to have occured.

Returns:
(M, K) bools indicating if a collision was present for an actor in each predicted world.
"""
actor_collisions: List[NDArrayBool] = []
for actor_idx in range(len(forecasted_world_trajectories)):
# Compute distance from current actor to all other predicted actors at each timestep
forecasted_actor_trajectories = forecasted_world_trajectories[actor_idx]
scenario_actor_dists = np.linalg.norm(forecasted_world_trajectories - forecasted_actor_trajectories, axis=-1)

# For each world, find the closest distance to any other predicted actor, at any time
scenario_actor_dists[actor_idx, :, :] = np.inf
closest_dist_to_other_actor_m = scenario_actor_dists.min(axis=-1).min(axis=0)
actor_collided_in_world = closest_dist_to_other_actor_m < collision_threshold_m
actor_collisions.append(actor_collided_in_world)

return np.stack(actor_collisions)
80 changes: 41 additions & 39 deletions src/av2/datasets/motion_forecasting/eval/submission.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,11 @@
# <Copyright 2022, Argo AI, LLC. Released under the MIT license.>

"""Classes and utilities used to build submissions for the AV2 motion forecasting challenge."""

from __future__ import annotations

from collections import defaultdict
from dataclasses import dataclass
from pathlib import Path
from typing import Any, Dict, Final, List, Tuple
from typing import Dict, Final, List, Tuple

import numpy as np
import pandas as pd
Expand All @@ -16,11 +14,12 @@
from av2.utils.typing import NDArrayNumber

# Define type aliases used for submission
PredictedTrajectories = NDArrayNumber # (K, AV2_SCENARIO_PRED_TIMESTEPS, 2)
PredictionProbabilities = NDArrayNumber # (K,)
TrackPredictions = Tuple[PredictedTrajectories, PredictionProbabilities]
ScenarioPredictions = Dict[str, TrackPredictions] # Mapping from track ID to track predictions
PredictionRow = Tuple[str, str, float, PredictedTrajectories, PredictionProbabilities]
ScenarioProbabilities = NDArrayNumber # (K,) per-scenario probabilities, one value for each predicted future.
TrackTrajectories = NDArrayNumber # (K, AV2_SCENARIO_PRED_TIMESTEPS, 2) per-track predicted trajectories.

ScenarioTrajectories = Dict[str, TrackTrajectories]
ScenarioPredictions = Tuple[ScenarioProbabilities, ScenarioTrajectories]
PredictionRow = Tuple[str, str, float, TrackTrajectories, ScenarioProbabilities]

SUBMISSION_COL_NAMES: Final[List[str]] = [
"scenario_id",
Expand All @@ -47,34 +46,34 @@ def __post_init__(self) -> None:

Raises:
ValueError: If predictions for at least one track are not of shape (*, AV2_SCENARIO_PRED_TIMESTEPS, 2).
ValueError: If for any track, prediction probabilities doesn't match the number of predicted trajectories.
ValueError: If prediction probabilities for at least one track do not sum to 1.
ValueError: If for any track, number of probabilities doesn't match the number of predicted trajectories.
ValueError: If prediction probabilities for at least one scenario do not sum to 1.
"""
for scenario_id, scenario_predictions in self.predictions.items():
for track_id, (predicted_trajectories, prediction_probabilities) in scenario_predictions.items():
for scenario_id, (scenario_probabilities, scenario_trajectories) in self.predictions.items():
for track_id, track_trajectories in scenario_trajectories.items():
# Validate that predicted trajectories are of the correct shape
if predicted_trajectories[0].shape[-2:] != EXPECTED_PREDICTION_SHAPE:
if track_trajectories[0].shape[-2:] != EXPECTED_PREDICTION_SHAPE:
raise ValueError(
f"Prediction for track {track_id} in {scenario_id} found with invalid shape "
f"{predicted_trajectories.shape}, expected (*, {AV2_SCENARIO_PRED_TIMESTEPS}, 2)."
f"{track_trajectories.shape}, expected (*, {AV2_SCENARIO_PRED_TIMESTEPS}, 2)."
)

# Validate that the number of predicted trajectories and prediction probabilities matches
if len(predicted_trajectories) != len(prediction_probabilities):
if len(track_trajectories) != len(scenario_probabilities):
raise ValueError(
f"Prediction for track {track_id} in {scenario_id} has "
f"{len(predicted_trajectories)} predicted trajectories, but "
f"{len(prediction_probabilities)} probabilities."
f"{len(track_trajectories)} predicted trajectories, but "
f"{len(scenario_probabilities)} probabilities."
)

# Validate that prediction probabilities for each track are normalized
prediction_probability_sum = np.sum(prediction_probabilities)
probability_is_normalized = np.isclose(1, prediction_probability_sum)
if not probability_is_normalized:
raise ValueError(
f"Track probabilities must sum to 1, but probabilities for track {track_id} in {scenario_id} "
f"sum up to {prediction_probability_sum}."
)
# Validate that prediction probabilities for each scenario are normalized
prediction_probability_sum = np.sum(scenario_probabilities)
probability_is_normalized = np.isclose(1, prediction_probability_sum)
if not probability_is_normalized:
raise ValueError(
f"Track probabilities must sum to 1, but probabilities for track {track_id} in {scenario_id} "
f"sum up to {prediction_probability_sum}."
)

@classmethod
def from_parquet(cls, submission_file_path: Path) -> ChallengeSubmission:
Expand All @@ -91,14 +90,17 @@ def from_parquet(cls, submission_file_path: Path) -> ChallengeSubmission:
submission_df.sort_values(by="probability", inplace=True, ascending=False)

# From serialized data, build scenario-track mapping for predictions
submission_dict: Dict[str, Any] = defaultdict(lambda: defaultdict(dict))
for (scenario_id, track_id), track_df in submission_df.groupby(["scenario_id", "track_id"]):
predicted_trajectories_x = np.stack(track_df.loc[:, "predicted_trajectory_x"].values.tolist())
predicted_trajectories_y = np.stack(track_df.loc[:, "predicted_trajectory_y"].values.tolist())
predicted_trajectories = np.stack((predicted_trajectories_x, predicted_trajectories_y), axis=-1)
prediction_probabilities = np.array(track_df.loc[:, "probability"].values.tolist())

submission_dict[scenario_id][track_id] = (predicted_trajectories, prediction_probabilities)
submission_dict: Dict[str, ScenarioPredictions] = {}
for scenario_id, scenario_df in submission_df.groupby(["scenario_id"]):
scenario_trajectories: ScenarioTrajectories = {}
for track_id, track_df in scenario_df.groupby(["track_id"]):
predicted_trajectories_x = np.stack(track_df.loc[:, "predicted_trajectory_x"].values.tolist())
predicted_trajectories_y = np.stack(track_df.loc[:, "predicted_trajectory_y"].values.tolist())
predicted_trajectories = np.stack((predicted_trajectories_x, predicted_trajectories_y), axis=-1)
scenario_trajectories[track_id] = predicted_trajectories

scenario_probabilities = np.array(track_df.loc[:, "probability"].values.tolist())
submission_dict[scenario_id] = (scenario_probabilities, scenario_trajectories)

return cls(predictions=submission_dict)

Expand All @@ -111,16 +113,16 @@ def to_parquet(self, submission_file_path: Path) -> None:
prediction_rows: List[PredictionRow] = []

# Build list of rows for the submission dataframe
for scenario_id, scenario_predictions in self.predictions.items():
for track_id, (predicted_trajectories, prediction_probabilities) in scenario_predictions.items():
for prediction_idx in range(len(predicted_trajectories)):
for scenario_id, (scenario_probabilities, scenario_trajectories) in self.predictions.items():
for track_id, track_trajectories in scenario_trajectories.items():
for world_idx in range(len(track_trajectories)):
prediction_rows.append(
(
scenario_id,
track_id,
prediction_probabilities[prediction_idx],
predicted_trajectories[prediction_idx, :, 0],
predicted_trajectories[prediction_idx, :, 1],
scenario_probabilities[world_idx],
track_trajectories[world_idx, :, 0],
track_trajectories[world_idx, :, 1],
)
)

Expand Down
41 changes: 27 additions & 14 deletions tests/unit/datasets/motion_forecasting/eval/test_submission.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,22 +11,35 @@
import pytest

from av2.datasets.motion_forecasting.constants import AV2_SCENARIO_PRED_TIMESTEPS
from av2.datasets.motion_forecasting.eval.submission import ChallengeSubmission, ScenarioPredictions, TrackPredictions
from av2.datasets.motion_forecasting.eval.submission import (
ChallengeSubmission,
ScenarioPredictions,
ScenarioProbabilities,
ScenarioTrajectories,
TrackTrajectories,
)

# Build valid submission with predictions for a single track in a single scenario
valid_track_predictions: TrackPredictions = (np.zeros((2, AV2_SCENARIO_PRED_TIMESTEPS, 2)), np.array([0.6, 0.4]))
valid_scenario_predictions: ScenarioPredictions = {"valid_track_id": valid_track_predictions}
valid_submission_predictions = {"valid_scenario_id": valid_scenario_predictions}
valid_track_trajectories: TrackTrajectories = np.zeros((2, AV2_SCENARIO_PRED_TIMESTEPS, 2))
valid_scenario_probabilities: ScenarioProbabilities = np.array([0.6, 0.4])
valid_scenario_trajectories: ScenarioTrajectories = {"valid_track_id": valid_track_trajectories}
valid_submission_predictions = {"valid_scenario_id": (valid_scenario_probabilities, valid_scenario_trajectories)}

# Build invalid track submission with incorrect prediction length
too_short_track_predictions: TrackPredictions = (np.zeros((1, AV2_SCENARIO_PRED_TIMESTEPS - 1, 2)), np.array([1.0]))
too_short_scenario_predictions = {"invalid_track_id": too_short_track_predictions}
too_short_submission_predictions = {"invaild_scenario_id": too_short_scenario_predictions}
too_short_track_trajectories: TrackTrajectories = np.zeros((1, AV2_SCENARIO_PRED_TIMESTEPS - 1, 2))
too_short_scenario_probabilities = np.array([1.0])
too_short_scenario_trajectories = {"invalid_track_id": too_short_track_trajectories}
too_short_submission_predictions = {
"invalid_scenario_id": (too_short_scenario_probabilities, too_short_scenario_trajectories)
}

# Build invalid track submission with mismatched predicted trajectories and probabilities
mismatched_track_predictions: TrackPredictions = (np.zeros((1, AV2_SCENARIO_PRED_TIMESTEPS, 2)), np.array([0.5, 0.5]))
mismatched_scenario_predictions = {"invalid_track_id": mismatched_track_predictions}
mismatched_submission_predictions = {"invaild_scenario_id": mismatched_scenario_predictions}
mismatched_track_trajectories: TrackTrajectories = np.zeros((1, AV2_SCENARIO_PRED_TIMESTEPS, 2))
mismatched_scenario_probabilities = np.array([0.5, 0.5])
mismatched_scenario_trajectories = {"invalid_track_id": mismatched_track_trajectories}
mismatched_submission_predictions = {
"invalid_scenario_id": (mismatched_scenario_probabilities, mismatched_scenario_trajectories)
}


@pytest.mark.parametrize(
Expand Down Expand Up @@ -70,9 +83,9 @@ def test_challenge_submission_serialization(tmpdir: Path, test_submission_dict:
deserialized_submission = ChallengeSubmission.from_parquet(submission_file_path)

# Check that deserialized data matches original data exactly
for scenario_id, scenario_predictions in submission.predictions.items():
for track_id, (expected_trajectories, expected_probabilities) in scenario_predictions.items():
deserialized_predictions = deserialized_submission.predictions[scenario_id][track_id]
(deserialized_trajectories, deserialized_probabilities) = deserialized_predictions
for scenario_id, (expected_probabilities, scenario_trajectories) in submission.predictions.items():
for track_id, expected_trajectories in scenario_trajectories.items():
deserialized_probabilities = deserialized_submission.predictions[scenario_id][0]
deserialized_trajectories = deserialized_submission.predictions[scenario_id][1][track_id]
assert np.array_equal(deserialized_trajectories, expected_trajectories)
assert np.array_equal(deserialized_probabilities, expected_probabilities)