Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[BUG] Update some clustering algos to only support undirected graphs #2267

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions python/cugraph/cugraph/community/ktruss_subgraph.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,7 @@ def ktruss_subgraph(G, k, use_weights=True):
cuGraph graph descriptor with connectivity information. k-Trusses are
defined for only undirected graphs as they are defined for
undirected triangle in a graph.
The current implementation only supports undirected graphs.

k : int
The desired k to be used for extracting the k-truss subgraph.
Expand All @@ -159,8 +160,8 @@ def ktruss_subgraph(G, k, use_weights=True):
_ensure_compatible_cuda_version()

KTrussSubgraph = Graph()
if type(G) is not Graph:
raise Exception("input graph must be undirected")
if G.is_directed():
raise ValueError("input graph must be undirected")

subgraph_df = ktruss_subgraph_wrapper.ktruss_subgraph(G, k, use_weights)
if G.renumbered:
Expand Down
7 changes: 4 additions & 3 deletions python/cugraph/cugraph/community/leiden.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
# limitations under the License.

from cugraph.community import leiden_wrapper
from cugraph.structure.graph_classes import Graph
from cugraph.utilities import (ensure_cugraph_obj_for_nx,
df_score_to_dictionary,
)
Expand All @@ -34,6 +33,8 @@ def leiden(G, max_iter=100, resolution=1.):
G : cugraph.Graph
cuGraph graph descriptor of type Graph

The current implementation only supports undirected graphs.

The adjacency list will be computed if not already present.

max_iter : integer, optional (default=100)
Expand Down Expand Up @@ -76,8 +77,8 @@ def leiden(G, max_iter=100, resolution=1.):
"""
G, isNx = ensure_cugraph_obj_for_nx(G)

if type(G) is not Graph:
raise Exception(f"input graph must be undirected was {type(G)}")
if G.is_directed():
raise ValueError("input graph must be undirected")

parts, modularity_score = leiden_wrapper.leiden(
G, max_iter, resolution
Expand Down
6 changes: 3 additions & 3 deletions python/cugraph/cugraph/community/louvain.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
# limitations under the License.

from cugraph.community import louvain_wrapper
from cugraph.structure.graph_classes import Graph
from cugraph.utilities import (ensure_cugraph_obj_for_nx,
df_score_to_dictionary,
)
Expand All @@ -35,6 +34,7 @@ def louvain(G, max_iter=100, resolution=1.):
The graph descriptor should contain the connectivity information
and weights. The adjacency list will be computed if not already
present.
The current implementation only supports undirected graphs.

max_iter : integer, optional (default=100)
This controls the maximum number of levels/iterations of the Louvain
Expand Down Expand Up @@ -77,8 +77,8 @@ def louvain(G, max_iter=100, resolution=1.):

G, isNx = ensure_cugraph_obj_for_nx(G)

if type(G) is not Graph:
raise Exception("input graph must be undirected")
if G.is_directed():
raise ValueError("input graph must be undirected")

parts, modularity_score = louvain_wrapper.louvain(
G, max_iter, resolution
Expand Down
8 changes: 4 additions & 4 deletions python/cugraph/cugraph/community/triangle_count.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
# limitations under the License.

from cugraph.community import triangle_count_wrapper
from cugraph.structure.graph_classes import Graph
from cugraph.utilities import ensure_cugraph_obj_for_nx


Expand All @@ -28,7 +27,8 @@ def triangles(G):
----------
G : cugraph.graph or networkx.Graph
cuGraph graph descriptor, should contain the connectivity information,
(edge weights are not used in this algorithm)
(edge weights are not used in this algorithm).
The current implementation only supports undirected graphs.

Returns
-------
Expand All @@ -50,8 +50,8 @@ def triangles(G):

G, _ = ensure_cugraph_obj_for_nx(G)

if type(G) is not Graph:
raise Exception("input graph must be undirected")
if G.is_directed():
raise ValueError("input graph must be undirected")

result = triangle_count_wrapper.triangles(G)

Expand Down
9 changes: 4 additions & 5 deletions python/cugraph/cugraph/dask/community/louvain.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,8 @@ def louvain(input_graph, max_iter=100, resolution=1.0):
and weights. The adjacency list will be computed if not already
present.

The current implementation only supports undirected graphs.

max_iter : integer, optional (default=100)
This controls the maximum number of levels/iterations of the Louvain
algorithm. When specified the algorithm will terminate after no more
Expand Down Expand Up @@ -114,11 +116,8 @@ def louvain(input_graph, max_iter=100, resolution=1.0):
>>> parts, modularity_score = dcg.louvain(dg)

"""
# FIXME: dask methods to populate graphs from edgelists are only present on
# DiGraph classes. Disable the Graph check for now and assume inputs are
# symmetric DiGraphs.
# if type(graph) is not Graph:
# raise Exception("input graph must be undirected")
if input_graph.is_directed():
raise ValueError("input graph must be undirected")
client = default_client()
# Calling renumbering results in data that is sorted by degree
input_graph.compute_renumber_edge_list(transposed=False)
Expand Down
34 changes: 17 additions & 17 deletions python/cugraph/cugraph/tests/mg/test_mg_louvain.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,19 +37,26 @@ def setFixtureParamNames(*args, **kwargs):
pass


# =============================================================================
# Parameters
# =============================================================================
DATASETS_ASYMMETRIC = [
utils.RAPIDS_DATASET_ROOT_DIR_PATH/"karate-asymmetric.csv"]


###############################################################################
# Fixtures
# @pytest.mark.skipif(
# is_single_gpu(), reason="skipping MG testing on Single GPU system"
# )
@pytest.fixture(scope="module",
params=utils.DATASETS_UNDIRECTED,
params=DATASETS_ASYMMETRIC,
ids=[f"dataset={d.as_posix()}"
for d in utils.DATASETS_UNDIRECTED])
for d in DATASETS_ASYMMETRIC])
def daskGraphFromDataset(request, dask_client):
"""
Returns a new dask dataframe created from the dataset file param.
This creates un undirected Graph.
This creates a directed Graph.
"""
# Since parameterized fixtures do not assign param names to param values,
# manually call the helper to do so.
Expand Down Expand Up @@ -77,7 +84,7 @@ def daskGraphFromDataset(request, dask_client):
def uddaskGraphFromDataset(request, dask_client):
"""
Returns a new dask dataframe created from the dataset file param.
This creates un undirected Graph.
This creates an undirected Graph.
"""
# Since parameterized fixtures do not assign param names to param
# values, manually call the helper to do so.
Expand All @@ -103,26 +110,19 @@ def uddaskGraphFromDataset(request, dask_client):
# @pytest.mark.skipif(
# is_single_gpu(), reason="skipping MG testing on Single GPU system"
# )
def test_mg_louvain_with_edgevals(daskGraphFromDataset):
# FIXME: daskGraphFromDataset returns a Directed graph, which Louvain is
# currently accepting. In the future, an MNMG symmeterize will need to
# be called to create a Graph for Louvain.
parts, mod = dcg.louvain(daskGraphFromDataset)

# FIXME: either call Nx with the same dataset and compare results, or
# hardcode golden results to compare to.
print()
print(parts.compute())
print(mod)
print()
def test_mg_louvain_with_edgevals_directed_graph(daskGraphFromDataset):
# Directed graphs are not supported by Louvain and a ValueError should be
# raised
with pytest.raises(ValueError):
parts, mod = dcg.louvain(daskGraphFromDataset)


###############################################################################
# Tests
# @pytest.mark.skipif(
# is_single_gpu(), reason="skipping MG testing on Single GPU system"
# )
def test_mg_udlouvain_with_edgevals(uddaskGraphFromDataset):
def test_mg_louvain_with_edgevals_undirected_graph(uddaskGraphFromDataset):
parts, mod = dcg.louvain(uddaskGraphFromDataset)

# FIXME: either call Nx with the same dataset and compare results, or
Expand Down
28 changes: 26 additions & 2 deletions python/cugraph/cugraph/tests/test_k_truss_subgraph.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,13 @@
print("Networkx version : {} ".format(nx.__version__))


# =============================================================================
# Pytest Setup / Teardown - called for each test function
# =============================================================================
def setup_function():
gc.collect()


# These ground truth files have been created by running the networkx ktruss
# function on reference graphs. Currently networkx ktruss has an error such
# that nx.k_truss(G,k-2) gives the expected result for running ktruss with
Expand Down Expand Up @@ -101,7 +108,6 @@ def test_unsupported_cuda_version():
f"{__unsupported_cuda_version} environment.")
@pytest.mark.parametrize("graph_file, nx_ground_truth", utils.DATASETS_KTRUSS)
def test_ktruss_subgraph_Graph(graph_file, nx_ground_truth):
gc.collect()

k = 5
cu_M = utils.read_csv_file(graph_file)
Expand All @@ -117,7 +123,6 @@ def test_ktruss_subgraph_Graph(graph_file, nx_ground_truth):
f"{__unsupported_cuda_version} environment.")
@pytest.mark.parametrize("graph_file, nx_ground_truth", utils.DATASETS_KTRUSS)
def test_ktruss_subgraph_Graph_nx(graph_file, nx_ground_truth):
gc.collect()

k = 5
M = utils.read_csv_for_nx(graph_file, read_weights_in_sp=True)
Expand All @@ -129,3 +134,22 @@ def test_ktruss_subgraph_Graph_nx(graph_file, nx_ground_truth):
k_truss_nx = nx.k_truss(G, k)

assert nx.is_isomorphic(k_subgraph, k_truss_nx)


@pytest.mark.skipif((__cuda_version == __unsupported_cuda_version),
reason="skipping on unsupported CUDA "
f"{__unsupported_cuda_version} environment.")
def test_ktruss_subgraph_directed_Graph():
input_data_path = (utils.RAPIDS_DATASET_ROOT_DIR_PATH /
"karate-asymmetric.csv").as_posix()
k = 5
edgevals = True
cu_M = utils.read_csv_file(input_data_path)
G = cugraph.Graph(directed=True)
if edgevals:
G.from_cudf_edgelist(cu_M, source="0", destination="1", edge_attr="2")
else:
G.from_cudf_edgelist(cu_M, source="0", destination="1")

with pytest.raises(ValueError):
cugraph.k_truss(G, k)
37 changes: 29 additions & 8 deletions python/cugraph/cugraph/tests/test_leiden.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,14 @@
warnings.filterwarnings("ignore", category=DeprecationWarning)


def cugraph_leiden(G, edgevals=False):
# =============================================================================
# Pytest Setup / Teardown - called for each test function
# =============================================================================
def setup_function():
gc.collect()


def cugraph_leiden(G):

# cugraph Louvain Call
t1 = time.time()
Expand All @@ -42,7 +49,7 @@ def cugraph_leiden(G, edgevals=False):
return parts, mod


def cugraph_louvain(G, edgevals=False):
def cugraph_louvain(G):

# cugraph Louvain Call
t1 = time.time()
Expand All @@ -55,7 +62,6 @@ def cugraph_louvain(G, edgevals=False):

@pytest.mark.parametrize("graph_file", utils.DATASETS)
def test_leiden(graph_file):
gc.collect()
edgevals = True

cu_M = utils.read_csv_file(graph_file)
Expand All @@ -66,16 +72,15 @@ def test_leiden(graph_file):
else:
G.from_cudf_edgelist(cu_M, source="0", destination="1")

leiden_parts, leiden_mod = cugraph_leiden(G, edgevals=True)
louvain_parts, louvain_mod = cugraph_louvain(G, edgevals=True)
leiden_parts, leiden_mod = cugraph_leiden(G)
louvain_parts, louvain_mod = cugraph_louvain(G)

# Calculating modularity scores for comparison
assert leiden_mod >= (0.99 * louvain_mod)


@pytest.mark.parametrize("graph_file", utils.DATASETS)
def test_leiden_nx(graph_file):
gc.collect()
edgevals = True

NM = utils.read_csv_for_nx(graph_file)
Expand All @@ -89,8 +94,24 @@ def test_leiden_nx(graph_file):
NM, create_using=nx.Graph(), source="0", target="1", edge_attr="2"
)

leiden_parts, leiden_mod = cugraph_leiden(G, edgevals=True)
louvain_parts, louvain_mod = cugraph_louvain(G, edgevals=True)
leiden_parts, leiden_mod = cugraph_leiden(G)
louvain_parts, louvain_mod = cugraph_louvain(G)

# Calculating modularity scores for comparison
assert leiden_mod >= (0.99 * louvain_mod)


def test_leiden_directed_graph():
input_data_path = (utils.RAPIDS_DATASET_ROOT_DIR_PATH /
"karate-asymmetric.csv").as_posix()

edgevals = True
cu_M = utils.read_csv_file(input_data_path)
G = cugraph.Graph(directed=True)
if edgevals:
G.from_cudf_edgelist(cu_M, source="0", destination="1", edge_attr="2")
else:
G.from_cudf_edgelist(cu_M, source="0", destination="1")

with pytest.raises(ValueError):
parts, mod = cugraph_leiden(G)
23 changes: 19 additions & 4 deletions python/cugraph/cugraph/tests/test_louvain.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,16 @@
print("Networkx version : {} ".format(nx.__version__))


def cugraph_call(cu_M, edgevals=False):
# =============================================================================
# Pytest Setup / Teardown - called for each test function
# =============================================================================
def setup_function():
gc.collect()


def cugraph_call(cu_M, edgevals=False, directed=False):

G = cugraph.Graph()
G = cugraph.Graph(directed=directed)
if edgevals:
G.from_cudf_edgelist(cu_M, source="0", destination="1", edge_attr="2")
else:
Expand Down Expand Up @@ -75,7 +82,6 @@ def networkx_call(M):

@pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED)
def test_louvain_with_edgevals(graph_file):
gc.collect()

M = utils.read_csv_for_nx(graph_file)
cu_M = utils.read_csv_file(graph_file)
Expand Down Expand Up @@ -103,7 +109,6 @@ def test_louvain_with_edgevals(graph_file):

@pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED)
def test_louvain(graph_file):
gc.collect()

M = utils.read_csv_for_nx(graph_file)
cu_M = utils.read_csv_file(graph_file)
Expand All @@ -127,3 +132,13 @@ def test_louvain(graph_file):
assert len(cu_parts) == len(nx_parts)
assert cu_mod > (0.82 * nx_mod)
assert abs(cu_mod - cu_mod_nx) < 0.0001


def test_louvain_directed_graph():
input_data_path = (utils.RAPIDS_DATASET_ROOT_DIR_PATH /
"karate-asymmetric.csv").as_posix()

cu_M = utils.read_csv_file(input_data_path)

with pytest.raises(ValueError):
cu_parts, cu_mod = cugraph_call(cu_M, directed=True)
Loading