Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Load a londonaq dataset from CSV #36

Merged
merged 12 commits into from
Oct 15, 2021
2 changes: 1 addition & 1 deletion .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ jobs:

strategy:
matrix:
python-version: [3.6, 3.7, 3.8, 3.9]
python-version: [3.7, 3.8, 3.9]

env:
OPLIB_ROOT: ../OPLib
Expand Down
2 changes: 1 addition & 1 deletion .pylintrc
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# A comma-separated list of package or module names from where C extensions may
# be loaded. Extensions are loading into the active Python interpreter and may
# run arbitrary code.
extension-pkg-whitelist=
extension-pkg-whitelist=pydantic

# Specify a score threshold to be exceeded before program exits with error.
fail-under=10.0
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
black>=20.8b1
black>=21.9b0
markdown-include>=0.6.0
mkdocs>=1.1.2
mkdocstrings>=0.13.6
Expand Down
122 changes: 122 additions & 0 deletions scripts/from_urbanair.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
"""Script for generating a tsplib style txt file from londonaq CSV"""

import json
from pathlib import Path

import networkx as nx
import pandas as pd
import typer

from tspwplib import split_graph_from_properties
from tspwplib.problem import BaseTSP
from tspwplib.types import (
EdgeWeightFormat,
LondonaqGraphName,
LondonaqLocationShort,
LondonaqTimestamp,
)
from tspwplib.utils import londonaq_comment, londonaq_graph_name

OLD_EDGE_LOOKUP_JSON = "old_edge_lookup.json"
OLD_NODE_LOOKUP_JSON = "old_node_lookup.json"


def generate_londonaq_dataset(
dataset_dir: Path,
name: LondonaqGraphName,
comment: str,
edges_csv_filename: str = "edges.csv",
nodes_csv_filename: str = "nodes.csv",
old_edge_lookup: str = OLD_EDGE_LOOKUP_JSON,
old_node_lookup: str = OLD_NODE_LOOKUP_JSON,
) -> BaseTSP:
"""Generate a londonaq dataset"""

# get the CSV files for edges and nodes
dataset_dir.mkdir(parents=False, exist_ok=True)
edges_filepath = dataset_dir / edges_csv_filename
nodes_filepath = dataset_dir / nodes_csv_filename
if not edges_filepath.exists():
raise FileNotFoundError(edges_filepath)
if not nodes_filepath.exists():
raise FileNotFoundError(nodes_filepath)
nodes_df = pd.read_csv(nodes_filepath)
nodes_df = nodes_df.set_index("node")
edges_df = pd.read_csv(edges_filepath)

# split edges then relabel the nodes
edges_df = edges_df.set_index(["source", "target", "key"])
edge_attrs = edges_df.to_dict("index")
split_graph = split_graph_from_properties(
edge_attrs,
edge_attr_to_split="cost",
edge_attr_to_vertex="length",
new_vertex_attr="demand",
old_edge_attr="old_edge",
)
normalize_map = {node: i for i, node in enumerate(split_graph.nodes())}
normalized_graph = nx.relabel_nodes(split_graph, normalize_map, copy=True)

# save the node and edge mappings to a json file
old_edges = {
(normalize_map[u], normalize_map[v]): data["old_edge"]
for u, v, data in split_graph.edges(data=True)
}
old_vertices = {new: old for old, new in normalize_map.items()}

# convert tuples to lists when dumping
json_old_edges = {list(key): list(value) for key, value in old_edges.items()}
with open(dataset_dir / old_edge_lookup, "w", encoding="UTF-8") as json_file:
json.dump(json_old_edges, json_file)
with open(dataset_dir / old_node_lookup, "w", encoding="UTF-8") as json_file:
json.dump(old_vertices, json_file)

# get depots
depots = list(nodes_df.loc[nodes_df.is_depot].index.map(normalize_map))
nx.set_node_attributes(normalized_graph, False, "is_depot")
for v in depots:
normalized_graph.nodes[v]["is_depot"] = True

# NOTE (not implemented yet) get node co-ordinates

# get TSP representation
tsp = BaseTSP.from_networkx(
name,
comment,
"PCTSP",
normalized_graph,
edge_weight_format=EdgeWeightFormat.LOWER_DIAG_ROW,
weight_attr_name="cost",
)

# save to txt file
problem = tsp.to_tsplib95()
txt_filepath = dataset_dir / f"{name}.txt"
problem.save(txt_filepath)
return tsp


def to_pandas_nodelist(G: nx.Graph) -> pd.DataFrame:
"""Move node attributes to a pandas dataframe. Node ID is stored in 'node' column."""
return pd.DataFrame([{"node": node, **data} for node, data in G.nodes(data=True)])


def main(
location: LondonaqLocationShort,
dataset_dir: Path = Path("/", "Users", "patrick", "Datasets", "pctsp", "londonaq"),
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oops change filepath

):
"""Entrypoint for generating londonaq dataset"""
timestamp_id: LondonaqTimestamp = LondonaqTimestamp.A
name = londonaq_graph_name(location, timestamp_id)
comment = londonaq_comment(location, timestamp_id)
generate_londonaq_dataset(
dataset_dir / name.value,
name,
comment,
edges_csv_filename=name.value + "_edges.csv",
nodes_csv_filename=name.value + "_nodes.csv",
)


if __name__ == "__main__":
typer.run(main)
6 changes: 4 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,19 +8,21 @@
url="https://github.com/PatrickOHara/tspwplib",
description="Library of instances for TSP with Profits",
install_requires=[
"networkx>=2.6.0",
"pandas>=1.0.0",
"pydantic>=1.8.2",
"tsplib95>=0.7.1",
],
name="tspwplib",
packages=["tspwplib"],
python_requires=">=3.6",
python_requires=">=3.7",
license="MIT",
classifiers=[
"Programming Language :: Python",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.6",
"Programming Language :: Python :: 3.7",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3 :: Only",
"Operating System :: OS Independent",
],
Expand Down
18 changes: 18 additions & 0 deletions tests/test_base_tsp.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
"""Tests for the pydantic representation of a TSP"""

import pytest
from tsplib95.models import StandardProblem
from tspwplib import BaseTSP, GraphName, build_path_to_tsplib_instance


@pytest.mark.parametrize("gname", list(GraphName))
def test_from_tsplib95(tsplib_root, gname):
"""Test tsplib95 problems can be read into BaseTSP"""
# only load problems with less than 1000 vertices
n_nodes = int("".join(filter(str.isdigit, gname.value)))
if n_nodes < 1000:
tsp_path = build_path_to_tsplib_instance(tsplib_root, gname)
assert tsp_path.exists()
problem = StandardProblem.load(tsp_path)
tsp = BaseTSP.from_tsplib95(problem)
assert len(tsp.edge_data) == len(list(problem.get_edges()))
40 changes: 40 additions & 0 deletions tests/test_converter/test_split_converter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
"""Tests for splitting edges"""

from tspwplib.converter import (
split_edges,
split_graph_from_properties,
lookup_from_split,
lookup_to_split,
)


def test_split_edges():
"""Test split edges"""
edge_list = [(0, 1), (1, 2), (0, 2)]
splits = split_edges(edge_list)
assert len(splits) == len(edge_list) * 2
assert (0, -1) in splits
assert (0, -3) in splits

# test lookups
from_split = lookup_from_split(edge_list, splits)
assert from_split[(0, -1)] == (0, 1)
assert from_split[(-1, 1)] == (0, 1)
assert from_split[(0, -3)] == (0, 2)

to_split = lookup_to_split(edge_list, splits)
assert to_split[(0, 1)] == ((0, -1), (-1, 1))
assert to_split[(1, 2)] == ((1, -2), (-2, 2))


def test_split_graph_from_properties():
"""Test split graph"""
properties = {
(0, 1): {"weight": 5, "cost": 3},
(1, 2): {"weight": 1, "cost": 10},
(0, 2): {"weight": 2, "cost": 5},
}
G = split_graph_from_properties(properties)
for _, _, data in G.edges(data=True):
old_edge = data["old_edge"]
assert data["cost"] == float(properties[old_edge]["cost"]) / 2.0
15 changes: 14 additions & 1 deletion tspwplib/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,11 @@
is_split_vertex_pair,
is_vertex_split_head,
is_vertex_split_tail,
split_graph_from_properties,
split_head,
split_tail,
tail_prize,
to_simple_undirected,
to_vertex_dataframe,
)
from .complete import is_complete, is_complete_with_self_loops
Expand All @@ -22,7 +24,7 @@
NotSimpleCycleException,
NotSimplePathException,
)
from .problem import ProfitsProblem, is_pctsp_yes_instance
from .problem import BaseTSP, ProfitsProblem, is_pctsp_yes_instance
from .utils import build_path_to_oplib_instance, build_path_to_tsplib_instance
from .types import (
Alpha,
Expand All @@ -33,6 +35,10 @@
EdgeList,
Generation,
GraphName,
LondonaqGraphName,
LondonaqLocation,
LondonaqLocationShort,
LondonaqTimestamp,
OptimalSolutionTSP,
Vertex,
VertexFunction,
Expand All @@ -57,6 +63,7 @@

__all__ = [
"Alpha",
"BaseTSP",
"DisjointPaths",
"Edge",
"EdgeFunction",
Expand All @@ -65,6 +72,10 @@
"EdgesNotAdjacentException",
"Generation",
"GraphName",
"LondonaqGraphName",
"LondonaqLocation",
"LondonaqLocationShort",
"LondonaqTimestamp",
"NotSimpleException",
"NotSimpleCycleException",
"NotSimplePathException",
Expand Down Expand Up @@ -99,9 +110,11 @@
"problem",
"remove_self_loops_from_edge_list",
"reorder_edge_list_from_root",
"split_graph_from_properties",
"split_head",
"split_tail",
"tail_prize",
"to_simple_undirected",
"to_vertex_dataframe",
"total_cost",
"total_cost_networkx",
Expand Down
Loading