From d68d44991b0a05d70c497c2ac0b9d53a2a9385eb Mon Sep 17 00:00:00 2001 From: Bjorn Pettersen Date: Wed, 1 Jan 2025 15:22:27 +0100 Subject: [PATCH] Integrate cycle detection into main pydeps functionality. Cycle nodes are marked as blue boxes. Cycle detection is done using kosaraju's algorithm. --- pydeps/cli.py | 4 +- pydeps/depgraph.py | 151 ++++++++++++++++++++++++++++++++++------- pydeps/depgraph2dot.py | 114 ++++++++++++++++--------------- pydeps/pydeps.py | 9 +-- tests/test_kosaraju.py | 37 ++++++++++ 5 files changed, 232 insertions(+), 83 deletions(-) create mode 100644 tests/test_kosaraju.py diff --git a/pydeps/cli.py b/pydeps/cli.py index 7e283e2..550ab62 100644 --- a/pydeps/cli.py +++ b/pydeps/cli.py @@ -192,8 +192,8 @@ def parse_args(argv=()): _args.show = not _args.no_show if _args.no_dot and _args.show_cycles: error("Can't use --no=dot and --show-cycles together") # pragma: nocover - if _args.show_cycles: - _args.max_bacon = 0 + # if _args.show_cycles: + # _args.max_bacon = 0 if _args.no_dot: _args.show_dot = False if _args.max_bacon == 0: diff --git a/pydeps/depgraph.py b/pydeps/depgraph.py index 3409131..82c00c1 100644 --- a/pydeps/depgraph.py +++ b/pydeps/depgraph.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- from __future__ import print_function -from collections import defaultdict +from collections import defaultdict, deque import fnmatch from .pycompat import zip_longest import json @@ -163,6 +163,106 @@ def label(self): return self.get_label(splitlength=14) +class GraphNode: + def __init__(self, src, index=None): + self.src = src + self.index = index + # self.inlinks = [] + # self.outlinks = [] + + def __str__(self): + return self.src.name + + def __repr__(self): + return self.src.name + + def __hash__(self): + return hash(self.src.name) + + def __eq__(self, other): + return self.src.name == other.src.name + + def __json__(self): + return self.src.name + + +class GraphNodeEncoder(json.JSONEncoder): + def default(self, obj): + if hasattr(obj, '__json__'): + return obj.__json__() + if isinstance(obj, GraphNode): + return obj.name + return super().default(obj) + + +class Graph: + def __init__(self, vertices: list[GraphNode], edges: list[tuple[GraphNode, GraphNode]]): + self.V = vertices + for i, v in enumerate(vertices): + v.index = i + self.edges = edges + self.neighbours = defaultdict(list) + for u, v in edges: + self.neighbours[u].append(v) + + def __json__(self): + return { + "edges": [(u, v) for u, v in self.edges], + "neighbours": {u.__json__(): [v.__json__() for v in self.neighbours[u]] for u in self.V} + } + + def __str__(self): + return json.dumps(self, indent=4, cls=GraphNodeEncoder) + + def transpose(self): + return Graph(self.V, [(v, u) for u, v in self.edges]) + + def dfs(self, v, visited, stack): + visited[v.index] = True + for neighbour in self.neighbours[v]: + if not visited[neighbour.index]: + self.dfs(neighbour, visited, stack) + stack.append(v) + + + def fill_order(self): + def _fill_order(visited, stack): + for i, node in enumerate(self.V): + if not visited[i]: + self.dfs(node, visited, stack) + visited = [False] * len(self.V) + stack = deque() + _fill_order(visited, stack) + return stack + + def dfs_util(self, v, visited): + component = [] + + def _dfs_util(v, visited): + visited[v.index] = True + component.append(v) + for neighbour in self.neighbours[v]: + if not visited[neighbour.index]: + _dfs_util(neighbour, visited) + + _dfs_util(v, visited) + return set(component) + + def kosaraju(self): + stack = self.fill_order() + transposed_graph = self.transpose() + + visited = [False] * len(self.V) + scc_list = [] + + while stack: + node = stack.pop() # popleft? + if not visited[node.index]: + component = transposed_graph.dfs_util(node, visited) + scc_list.append(component) + return sorted(scc_list, key=lambda x: len(x), reverse=True) + + class DepGraph(object): """The dependency graph. @@ -179,9 +279,11 @@ def __init__(self, depgraf, types, target, **args): self.curhue = 150 # start with a green-ish color self.colors = {} + self.cycles = [] self.cyclenodes = set() self.cyclerelations = set() + self.max_module_depth = args.get('max_module_depth', 0) self.target = target @@ -216,8 +318,8 @@ def __init__(self, depgraf, types, target, **args): cli.verbose(1, "there are", self.module_count, "total modules") self.connect_generations() - if self.args['show_cycles']: - self.find_import_cycles() + # if self.args['show_cycles']: + # self.find_import_cycles() self.calculate_bacon() if self.args['show_raw_deps']: print(self) @@ -236,6 +338,9 @@ def __init__(self, depgraf, types, target, **args): self.remove_excluded() + # if self.args['show_cycles']: + self.find_import_cycles() + if not self.args['show_deps']: cli.verbose(3, self) @@ -358,27 +463,27 @@ def __repr__(self): default=lambda obj: obj.__json__() if hasattr(obj, '__json__') else obj) def find_import_cycles(self): - def traverse(node, path): - if node.name in self.cyclenodes: - return + """Divide the graph into strongly connected components using kosaraju's algorithm. + """ - if node.name in path: - # found cycle - cycle = path[path.index(node.name):] + [node.name] - self.cycles.append(cycle) - for nodename in cycle: - self.cyclenodes.add(nodename) - for i in range(len(cycle) - 1): - self.cyclerelations.add( - (cycle[i], cycle[i + 1]) - ) - # return - - for impmod in sorted(node.imports): - traverse(self.sources[impmod], list(path + [node.name])) - - for src in sorted(self.sources.values(), key=lambda x: x.name.lower()): - traverse(src, []) + vertices = {src.name: GraphNode(src) for src in sorted( + self.sources.values(), key=lambda x: x.name.lower() + )} + edges = [] + for u in vertices.values(): + for v in u.src.imported_by: + tmp = self.sources[v] + edges.append((u, vertices[tmp.name])) + graph = Graph(vertices.values(), edges) + + scc = [c for c in graph.kosaraju() if len(c) > 1] + self.cycles = [[n.src for n in c] for c in scc] + for c in scc: + for node in c: + self.cyclenodes.add(node.src.name) + # c = list(c) + # for i in range(len(c) - 1): + # self.cyclerelations.add((c[i].src.name, c[i + 1].src.name)) def connect_generations(self): """Traverse depth-first adding imported_by. diff --git a/pydeps/depgraph2dot.py b/pydeps/depgraph2dot.py index beb9ed3..b968004 100644 --- a/pydeps/depgraph2dot.py +++ b/pydeps/depgraph2dot.py @@ -64,69 +64,75 @@ def render(self, depgraph, ctx): space = colors.ColorSpace(visited) for src in sorted(visited): bg, fg = depgraph.get_colors(src, space) - kwargs = {} + fillcolor = colors.rgb2css(bg) + fontcolor = colors.rgb2css(fg) + kwargs = { + 'fillcolor': fillcolor, + 'fontcolor': fontcolor, + } if src.name in depgraph.cyclenodes: - kwargs['shape'] = 'octagon' + kwargs['shape'] = 'box' + kwargs['fillcolor'] = 'blue' + kwargs['fontcolor'] = 'white' ctx.write_node( src.name, label=src.get_label(splitlength=14, rmprefix=self.kw.get('rmprefix')), - fillcolor=colors.rgb2css(bg), - fontcolor=colors.rgb2css(fg), **kwargs ) return ctx.text() -class CycleGraphDot(object): - def __init__(self, **kw): - self.kw = kw - - def render(self, depgraph, ctx): - with ctx.graph(concentrate=False): - visited = set() - drawn = set() - relations = set() - - for aname, bname in sorted(depgraph.cyclerelations): - try: - a = depgraph.sources[aname] - b = depgraph.sources[bname] - except KeyError: - continue - drawn.add((bname, aname)) - ctx.write_rule( - bname, aname, - # weight=depgraph.proximity_metric(a, b), - # minlen=depgraph.dissimilarity_metric(a, b), - ) - relations.add(aname) - relations.add(bname) - visited.add(a) - visited.add(b) - - space = colors.ColorSpace(visited) - for src in sorted(visited, key=lambda x: x.name.lower()): - # if src.name not in relations: - # print('skipping', src.name) - # continue - bg, fg = depgraph.get_colors(src, space) - kwargs = {} - - if src.name in depgraph.cyclenodes: - kwargs['shape'] = 'octagon' - - ctx.write_node( - src.name, label=src.label, - fillcolor=colors.rgb2css(bg), - fontcolor=colors.rgb2css(fg), - **kwargs - ) - - return ctx.text() +# class CycleGraphDot(object): +# def __init__(self, **kw): +# self.kw = kw + +# def render(self, depgraph, ctx): +# with ctx.graph(concentrate=False): +# visited = set() +# drawn = set() +# relations = set() + +# for aname, bname in sorted(depgraph.cyclerelations): +# try: +# a = depgraph.sources[aname] +# b = depgraph.sources[bname] +# except KeyError: +# continue +# drawn.add((bname, aname)) +# ctx.write_rule( +# bname, aname, +# # weight=depgraph.proximity_metric(a, b), +# # minlen=depgraph.dissimilarity_metric(a, b), +# ) +# relations.add(aname) +# relations.add(bname) +# visited.add(a) +# visited.add(b) + +# space = colors.ColorSpace(visited) +# for src in sorted(visited, key=lambda x: x.name.lower()): +# if src.name not in relations: +# print('skipping', src.name) +# continue +# bg, fg = depgraph.get_colors(src, space) +# kwargs = {} + +# # print("CYCLENODES:", depgraph.cyclenodes) +# if src.name in depgraph.cyclenodes: +# kwargs['shape'] = 'octagon' + +# ctx.write_node( +# src.name, label=src.label, +# fillcolor=colors.rgb2css(bg), +# fontcolor=colors.rgb2css(fg), +# **kwargs +# ) + +# return ctx.text() def dep2dot(target, depgraph, **kw): @@ -135,7 +141,7 @@ def dep2dot(target, depgraph, **kw): return dotter.render(depgraph, ctx) -def cycles2dot(target, depgraph, **kw): - dotter = CycleGraphDot(**kw) - ctx = RenderBuffer(target, remove_islands=False, **kw) - return dotter.render(depgraph, ctx) +# def cycles2dot(target, depgraph, **kw): +# dotter = CycleGraphDot(**kw) +# ctx = RenderBuffer(target, remove_islands=False, **kw) +# return dotter.render(depgraph, ctx) diff --git a/pydeps/pydeps.py b/pydeps/pydeps.py index 4693c34..f476523 100644 --- a/pydeps/pydeps.py +++ b/pydeps/pydeps.py @@ -8,7 +8,7 @@ from pydeps.configs import Config from . import py2depgraph, cli, dot, target -from .depgraph2dot import dep2dot, cycles2dot +from .depgraph2dot import dep2dot # , cycles2dot import logging from . import colors log = logging.getLogger(__name__) @@ -93,9 +93,10 @@ def _pydeps(trgt, **kw): def depgraph_to_dotsrc(target, dep_graph, **kw): """Convert the dependency graph (DepGraph class) to dot source code. """ - if kw.get('show_cycles'): - dotsrc = cycles2dot(target, dep_graph, **kw) - elif not kw.get('no_dot'): + # if kw.get('show_cycles'): + # dotsrc = cycles2dot(target, dep_graph, **kw) + # el + if not kw.get('no_dot'): dotsrc = dep2dot(target, dep_graph, **kw) else: dotsrc = None diff --git a/tests/test_kosaraju.py b/tests/test_kosaraju.py new file mode 100644 index 0000000..e96c0ca --- /dev/null +++ b/tests/test_kosaraju.py @@ -0,0 +1,37 @@ + +from pydeps.depgraph import Graph, GraphNode + + +class Source: + def __init__(self, name): + self.name = name + self.imported_by = set() + +def test_kosaraju(): + nodes = [GraphNode(Source(str(i))) for i in range(10)] + edges = [ + (nodes[0], nodes[1]), + (nodes[1], nodes[2]), + (nodes[2], nodes[0]), + (nodes[2], nodes[8]), + (nodes[8], nodes[9]), + (nodes[1], nodes[3]), + (nodes[3], nodes[4]), + (nodes[4], nodes[5]), + (nodes[5], nodes[6]), + (nodes[6], nodes[3]), + (nodes[5], nodes[7]), + ] + graph = Graph(nodes, edges) + # print(graph) + # import pprint + # pprint.pprint(graph) + scc = graph.kosaraju() + print(scc) + assert scc == [ + {nodes[3], nodes[4], nodes[5], nodes[6]}, + {nodes[2], nodes[0], nodes[1]}, + {nodes[7]}, + {nodes[8]}, + {nodes[9]} + ]