Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Katz Centrality to pylibcugraph, refactor Katz Centrality for cugraph #2201

Merged
merged 29 commits into from
Apr 20, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
07684ed
Define eigenvector centrality C API, define tests
ChuckHastings Mar 30, 2022
17be3fe
fix clang-format issues
ChuckHastings Mar 30, 2022
6d828e1
respond to PR comments
ChuckHastings Apr 5, 2022
f23e2b5
update pylibcugraph to use new C API changes
ChuckHastings Apr 5, 2022
ea32dc8
add katz centrality to the C API
ChuckHastings Apr 5, 2022
ab7add0
Merge pull request #38 from ChuckHastings/fea_c_api_katz
betochimas Apr 6, 2022
9c64e58
Merge branch 'branch-22.06' into fea_c_api_katz
ChuckHastings Apr 6, 2022
d295448
Incorporated C katz test to pylib
betochimas Apr 6, 2022
770e20c
Merge branch 'branch-22.06-wip-sg-katz' into merge-4_6
betochimas Apr 6, 2022
391afa5
Merge pull request #41 from betochimas/merge-4_6
betochimas Apr 6, 2022
24c2d39
Merge pull request #40 from ChuckHastings/fea_c_api_katz
betochimas Apr 6, 2022
316f0a6
Split algorithms.pxd into 2, similar to libcugraph_c
betochimas Apr 6, 2022
be762a7
Merge pull request #42 from betochimas/branch-22.06
betochimas Apr 6, 2022
dfe6da2
Implementation of katz_centrality on pylib, testing close there
betochimas Apr 8, 2022
3d0a3bb
Merge branch 'branch-22.06-wip-sg-katz' of https://github.com/betochi…
betochimas Apr 8, 2022
e8de135
Initial commit for cugraph level refactor of SG Katz
betochimas Apr 11, 2022
fd31c36
Style check edits
betochimas Apr 11, 2022
b9b72f8
Merge pull request #43 from rapidsai/branch-22.06
betochimas Apr 11, 2022
7d0182b
Revert "Merging 4/7-11 commits"
betochimas Apr 12, 2022
6e0cba2
Merge pull request #44 from betochimas/revert-43-branch-22.06
betochimas Apr 12, 2022
4a7673a
Sorting results if renumbered
betochimas Apr 12, 2022
4e88aa8
Revert "Revert "Merging 4/7-11 commits""
betochimas Apr 12, 2022
5ee1623
Merge pull request #45 from betochimas/revert-44-revert-43-branch-22.06
betochimas Apr 12, 2022
6c8c6a8
Merge pull request #46 from betochimas/branch-22.06
betochimas Apr 12, 2022
c22be7a
Testing edits
betochimas Apr 13, 2022
0e7f8b7
Modify refactored sg katz to prevent breaking functionality/compat wi…
betochimas Apr 14, 2022
0de6782
Removed old SG Katz, ensured that refactored Katz passes original tes…
betochimas Apr 14, 2022
cdefe85
Add descriptions to new katz wrappers
betochimas Apr 14, 2022
65949e3
Addressing review comments w/ style check, testing edits mainly
betochimas Apr 19, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions datasets/toy_graph.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
0 1 0.1
1 3 2.1
1 4 1.1
2 0 5.1
2 1 3.1
2 3 4.1
3 5 7.2
4 5 3.2
2 changes: 1 addition & 1 deletion python/cugraph/cugraph/centrality/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2019-2021, NVIDIA CORPORATION.
# Copyright (c) 2019-2022, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
Expand Down
34 changes: 0 additions & 34 deletions python/cugraph/cugraph/centrality/katz_centrality.pxd

This file was deleted.

87 changes: 63 additions & 24 deletions python/cugraph/cugraph/centrality/katz_centrality.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,22 +11,25 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from cugraph.centrality import katz_centrality_wrapper
from pylibcugraph.experimental import (ResourceHandle,
GraphProperties,
SGGraph,
katz_centrality as pylibcugraph_katz
)
from cugraph.utilities import (ensure_cugraph_obj_for_nx,
df_score_to_dictionary,
)
import cudf


def katz_centrality(
G, alpha=None, beta=None, max_iter=100, tol=1.0e-6,
G, alpha=None, beta=1.0, max_iter=1000, tol=1.0e-6,
rlratzel marked this conversation as resolved.
Show resolved Hide resolved
nstart=None, normalized=True
):
"""
Compute the Katz centrality for the nodes of the graph G. cuGraph does not
currently support the 'beta' and 'weight' parameters as seen in the
corresponding networkX call. This implementation is based on a relaxed
version of Katz defined by Foster with a reduced computational complexity
of O(n+m)
Compute the Katz centrality for the nodes of the graph G. This
implementation is based on a relaxed version of Katz defined by Foster
with a reduced computational complexity of O(n+m)

On a directed graph, cuGraph computes the out-edge Katz centrality score.
This is opposite of NetworkX which compute the in-edge Katz centrality
Expand All @@ -46,7 +49,7 @@ def katz_centrality(
----------
G : cuGraph.Graph or networkx.Graph
cuGraph graph descriptor with connectivity information. The graph can
contain either directed (DiGraph) or undirected edges (Graph).
contain either directed or undirected edges.

alpha : float, optional (default=None)
Attenuation factor defaulted to None. If alpha is not specified then
Expand All @@ -63,17 +66,18 @@ def katz_centrality(
guarantee that it will never exceed alpha_max thus in turn
fulfilling the requirement for convergence.

beta : float, optional (default=None)
A weight scalar - currently Not Supported
beta : float, optional (default=1.0)
Weight scalar added to each vertex's new Katz Centrality score in every
iteration

max_iter : int, optional (default=100)
max_iter : int, optional (default=1000)
The maximum number of iterations before an answer is returned. This can
be used to limit the execution time and do an early exit before the
solver reaches the convergence tolerance.
If this value is lower or equal to 0 cuGraph will use the default
value, which is 100.
value, which is 1000.

tol : float, optional (default=1.0e-6)
tol : float, optional (default=1e-6)
Set the tolerance the approximation, this parameter should be a small
magnitude value.
The lower the tolerance the better the approximation. If this value is
Expand All @@ -98,7 +102,6 @@ def katz_centrality(
df : cudf.DataFrame or Dictionary if using NetworkX
GPU data frame containing two cudf.Series of size V: the vertex
identifiers and the corresponding katz centrality values.

df['vertex'] : cudf.Series
Contains the vertex identifiers
df['katz_centrality'] : cudf.Series
Expand All @@ -113,32 +116,68 @@ def katz_centrality(
>>> kc = cugraph.katz_centrality(G)

"""

if beta is not None:
raise NotImplementedError(
"The beta argument is "
"currently not supported"
)
if (alpha is not None) and (alpha <= 0.0):
raise ValueError(f"'alpha' must be a positive float or None, "
f"got: {alpha}")
if (not isinstance(beta, float)) or (beta <= 0.0):
raise ValueError(f"'beta' must be a positive float, got: {beta}")
if (not isinstance(max_iter, int)):
raise ValueError(f"'max_iter' must be an integer, got: {max_iter}")
elif max_iter <= 0:
max_iter = 1000
if (not isinstance(tol, float)) or (tol <= 0.0):
raise ValueError(f"'tol' must be a positive float, got: {tol}")

G, isNx = ensure_cugraph_obj_for_nx(G)

srcs = G.edgelist.edgelist_df['src']
dsts = G.edgelist.edgelist_df['dst']
if 'weights' in G.edgelist.edgelist_df.columns:
weights = G.edgelist.edgelist_df['weights']
else:
# FIXME: If weights column is not imported, a weights column of 1s
# with type hardcoded to float32 is passed into wrapper
weights = cudf.Series((srcs + 1) / (srcs + 1), dtype="float32")

if alpha is None:
largest_out_degree = G.degrees().nlargest(n=1, columns="out_degree")
largest_out_degree = largest_out_degree["out_degree"].iloc[0]
alpha = 1 / (largest_out_degree + 1)

if nstart is not None:
if G.renumbered is True:
if len(G.renumber_map.implementation.col_names) > 1:
cols = nstart.columns[:-1].to_list()
else:
cols = 'vertex'
nstart = G.add_internal_vertex_id(nstart, 'vertex', cols)
nstart = nstart[nstart.columns[0]]

resource_handle = ResourceHandle()
graph_props = GraphProperties(is_multigraph=G.is_multigraph())
store_transposed = False
renumber = False
do_expensive_check = False

sg = SGGraph(resource_handle, graph_props, srcs, dsts, weights,
store_transposed, renumber, do_expensive_check)

vertices, values = pylibcugraph_katz(resource_handle, sg, nstart, alpha,
beta, tol, max_iter,
do_expensive_check)

vertices = cudf.Series(vertices)
values = cudf.Series(values)

df = katz_centrality_wrapper.katz_centrality(
G, alpha, max_iter, tol, nstart, normalized
)
df = cudf.DataFrame()
df["vertex"] = vertices
df["katz_centrality"] = values

if G.renumbered:
df = G.unrenumber(df, "vertex")

if isNx is True:
dict = df_score_to_dictionary(df, 'katz_centrality')
dict = df_score_to_dictionary(df, "katz_centrality")
return dict
else:
return df
100 changes: 0 additions & 100 deletions python/cugraph/cugraph/centrality/katz_centrality_wrapper.pyx

This file was deleted.

Loading