-
Notifications
You must be signed in to change notification settings - Fork 0
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Load a londonaq dataset from CSV #36
Merged
Merged
Changes from 11 commits
Commits
Show all changes
12 commits
Select commit
Hold shift + click to select a range
b8b15fe
✨ Split graph
PatrickOHara 2c07dac
💩 This is broken :(
PatrickOHara 9b6e28d
✨ Pydantic model for tsplib
PatrickOHara cfd99ca
🐛 Loading from CSV
PatrickOHara 7e5f91a
🐛 Old edges to json
PatrickOHara f0f4967
🚨 Linting
PatrickOHara 250937b
⬆️ networkx doesn't support py36 anymore
PatrickOHara 221d7b5
Black formatting
PatrickOHara 55098f1
formatting again
PatrickOHara 8245ebb
Remove scripts from linting
PatrickOHara 1030104
✅ Test loading from tsplib95
PatrickOHara 7559c13
Change dataset dir
PatrickOHara File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,4 @@ | ||
black>=20.8b1 | ||
black>=21.9b0 | ||
markdown-include>=0.6.0 | ||
mkdocs>=1.1.2 | ||
mkdocstrings>=0.13.6 | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,122 @@ | ||
"""Script for generating a tsplib style txt file from londonaq CSV""" | ||
|
||
import json | ||
from pathlib import Path | ||
|
||
import networkx as nx | ||
import pandas as pd | ||
import typer | ||
|
||
from tspwplib import split_graph_from_properties | ||
from tspwplib.problem import BaseTSP | ||
from tspwplib.types import ( | ||
EdgeWeightFormat, | ||
LondonaqGraphName, | ||
LondonaqLocationShort, | ||
LondonaqTimestamp, | ||
) | ||
from tspwplib.utils import londonaq_comment, londonaq_graph_name | ||
|
||
OLD_EDGE_LOOKUP_JSON = "old_edge_lookup.json" | ||
OLD_NODE_LOOKUP_JSON = "old_node_lookup.json" | ||
|
||
|
||
def generate_londonaq_dataset( | ||
dataset_dir: Path, | ||
name: LondonaqGraphName, | ||
comment: str, | ||
edges_csv_filename: str = "edges.csv", | ||
nodes_csv_filename: str = "nodes.csv", | ||
old_edge_lookup: str = OLD_EDGE_LOOKUP_JSON, | ||
old_node_lookup: str = OLD_NODE_LOOKUP_JSON, | ||
) -> BaseTSP: | ||
"""Generate a londonaq dataset""" | ||
|
||
# get the CSV files for edges and nodes | ||
dataset_dir.mkdir(parents=False, exist_ok=True) | ||
edges_filepath = dataset_dir / edges_csv_filename | ||
nodes_filepath = dataset_dir / nodes_csv_filename | ||
if not edges_filepath.exists(): | ||
raise FileNotFoundError(edges_filepath) | ||
if not nodes_filepath.exists(): | ||
raise FileNotFoundError(nodes_filepath) | ||
nodes_df = pd.read_csv(nodes_filepath) | ||
nodes_df = nodes_df.set_index("node") | ||
edges_df = pd.read_csv(edges_filepath) | ||
|
||
# split edges then relabel the nodes | ||
edges_df = edges_df.set_index(["source", "target", "key"]) | ||
edge_attrs = edges_df.to_dict("index") | ||
split_graph = split_graph_from_properties( | ||
edge_attrs, | ||
edge_attr_to_split="cost", | ||
edge_attr_to_vertex="length", | ||
new_vertex_attr="demand", | ||
old_edge_attr="old_edge", | ||
) | ||
normalize_map = {node: i for i, node in enumerate(split_graph.nodes())} | ||
normalized_graph = nx.relabel_nodes(split_graph, normalize_map, copy=True) | ||
|
||
# save the node and edge mappings to a json file | ||
old_edges = { | ||
(normalize_map[u], normalize_map[v]): data["old_edge"] | ||
for u, v, data in split_graph.edges(data=True) | ||
} | ||
old_vertices = {new: old for old, new in normalize_map.items()} | ||
|
||
# convert tuples to lists when dumping | ||
json_old_edges = {list(key): list(value) for key, value in old_edges.items()} | ||
with open(dataset_dir / old_edge_lookup, "w", encoding="UTF-8") as json_file: | ||
json.dump(json_old_edges, json_file) | ||
with open(dataset_dir / old_node_lookup, "w", encoding="UTF-8") as json_file: | ||
json.dump(old_vertices, json_file) | ||
|
||
# get depots | ||
depots = list(nodes_df.loc[nodes_df.is_depot].index.map(normalize_map)) | ||
nx.set_node_attributes(normalized_graph, False, "is_depot") | ||
for v in depots: | ||
normalized_graph.nodes[v]["is_depot"] = True | ||
|
||
# NOTE (not implemented yet) get node co-ordinates | ||
|
||
# get TSP representation | ||
tsp = BaseTSP.from_networkx( | ||
name, | ||
comment, | ||
"PCTSP", | ||
normalized_graph, | ||
edge_weight_format=EdgeWeightFormat.LOWER_DIAG_ROW, | ||
weight_attr_name="cost", | ||
) | ||
|
||
# save to txt file | ||
problem = tsp.to_tsplib95() | ||
txt_filepath = dataset_dir / f"{name}.txt" | ||
problem.save(txt_filepath) | ||
return tsp | ||
|
||
|
||
def to_pandas_nodelist(G: nx.Graph) -> pd.DataFrame: | ||
"""Move node attributes to a pandas dataframe. Node ID is stored in 'node' column.""" | ||
return pd.DataFrame([{"node": node, **data} for node, data in G.nodes(data=True)]) | ||
|
||
|
||
def main( | ||
location: LondonaqLocationShort, | ||
dataset_dir: Path = Path("/", "Users", "patrick", "Datasets", "pctsp", "londonaq"), | ||
): | ||
"""Entrypoint for generating londonaq dataset""" | ||
timestamp_id: LondonaqTimestamp = LondonaqTimestamp.A | ||
name = londonaq_graph_name(location, timestamp_id) | ||
comment = londonaq_comment(location, timestamp_id) | ||
generate_londonaq_dataset( | ||
dataset_dir / name.value, | ||
name, | ||
comment, | ||
edges_csv_filename=name.value + "_edges.csv", | ||
nodes_csv_filename=name.value + "_nodes.csv", | ||
) | ||
|
||
|
||
if __name__ == "__main__": | ||
typer.run(main) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
"""Tests for the pydantic representation of a TSP""" | ||
|
||
import pytest | ||
from tsplib95.models import StandardProblem | ||
from tspwplib import BaseTSP, GraphName, build_path_to_tsplib_instance | ||
|
||
|
||
@pytest.mark.parametrize("gname", list(GraphName)) | ||
def test_from_tsplib95(tsplib_root, gname): | ||
"""Test tsplib95 problems can be read into BaseTSP""" | ||
# only load problems with less than 1000 vertices | ||
n_nodes = int("".join(filter(str.isdigit, gname.value))) | ||
if n_nodes < 1000: | ||
tsp_path = build_path_to_tsplib_instance(tsplib_root, gname) | ||
assert tsp_path.exists() | ||
problem = StandardProblem.load(tsp_path) | ||
tsp = BaseTSP.from_tsplib95(problem) | ||
assert len(tsp.edge_data) == len(list(problem.get_edges())) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
"""Tests for splitting edges""" | ||
|
||
from tspwplib.converter import ( | ||
split_edges, | ||
split_graph_from_properties, | ||
lookup_from_split, | ||
lookup_to_split, | ||
) | ||
|
||
|
||
def test_split_edges(): | ||
"""Test split edges""" | ||
edge_list = [(0, 1), (1, 2), (0, 2)] | ||
splits = split_edges(edge_list) | ||
assert len(splits) == len(edge_list) * 2 | ||
assert (0, -1) in splits | ||
assert (0, -3) in splits | ||
|
||
# test lookups | ||
from_split = lookup_from_split(edge_list, splits) | ||
assert from_split[(0, -1)] == (0, 1) | ||
assert from_split[(-1, 1)] == (0, 1) | ||
assert from_split[(0, -3)] == (0, 2) | ||
|
||
to_split = lookup_to_split(edge_list, splits) | ||
assert to_split[(0, 1)] == ((0, -1), (-1, 1)) | ||
assert to_split[(1, 2)] == ((1, -2), (-2, 2)) | ||
|
||
|
||
def test_split_graph_from_properties(): | ||
"""Test split graph""" | ||
properties = { | ||
(0, 1): {"weight": 5, "cost": 3}, | ||
(1, 2): {"weight": 1, "cost": 10}, | ||
(0, 2): {"weight": 2, "cost": 5}, | ||
} | ||
G = split_graph_from_properties(properties) | ||
for _, _, data in G.edges(data=True): | ||
old_edge = data["old_edge"] | ||
assert data["cost"] == float(properties[old_edge]["cost"]) / 2.0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Oops change filepath