Skip to content

Commit

Permalink
Merge pull request #554 from choubeyy/main
Browse files Browse the repository at this point in the history
Made changes to graph format converter, gspan and tkg
  • Loading branch information
udayRage authored Nov 15, 2024
2 parents d375212 + 3c863d2 commit b295993
Show file tree
Hide file tree
Showing 3 changed files with 185 additions and 48 deletions.
112 changes: 101 additions & 11 deletions PAMI/extras/graph/convertFormat.py
Original file line number Diff line number Diff line change
@@ -1,31 +1,121 @@
# Usage
# obj = ConvertFormat('iFile', 'oFile')
# obj1 = ConvertFormat('iFile', 'oFile')
#
# obj1.convertFromTraditionalToCompressed()
#
# obj1.getMemoryRSS()
#
# obj1.getMemoryUSS()
#
# obj2 = ConvertFormat('iFileCompressed', 'oFileTrad')
#
# obj2.convertFromCompressedToTraditional()
#
# obj2.getMemoryRSS()
#
# obj2.getMemoryUSS()

import os
import psutil


class ConvertFormat:
def __init__(self, iFile, oFile):
def __init__(self, iFile):
self.iFile = iFile
self.oFile = oFile
self.convert()
self.convertedData = []

def _writeGraphToFile(self, graph, oFile):
node_str = ' '.join(f"{node} {label}" for node, label in sorted(graph['nodes']))
def _writeGraphToFileCompressed(self, graph):
node_str = ' '.join(f"{node} {label}" for node, label in sorted(graph['nodes'], key=lambda x: x[0]))
edge_str = ' '.join(f"{u} {v} {label}" for u, v, label in graph['edges'])
oFile.write(f"{node_str} : {edge_str}\n")
return f"{node_str} : {edge_str}\n"

def convert(self):
def _writeGraphToFileTraditional(self, graph, gId):
traditional_lines = [f"t # {gId}\n"]
for node, label in sorted(graph['nodes'], key=lambda x: x[0]):
traditional_lines.append(f"v {node} {label}\n")
for u, v, label in graph['edges']:
traditional_lines.append(f"e {u} {v} {label}\n")
return ''.join(traditional_lines)

def convertFromTraditionalToCompressed(self):
graph = {}
with open(self.iFile, 'r') as iFile, open(self.oFile, 'w') as oFile:
self.convertedData = []
with open(self.iFile, 'r') as iFile:
for line in iFile:
parts = line.strip().split()
if not parts:
continue
if parts[0] == 't':
if graph:
self._writeGraphToFile(graph, oFile)
compressedGraph = self._writeGraphToFileCompressed(graph)
self.convertedData.append(compressedGraph)
graph = {'nodes': [], 'edges': []}
elif parts[0] == 'v':
graph['nodes'].append((int(parts[1]), parts[2]))
elif parts[0] == 'e':
graph['edges'].append((int(parts[1]), int(parts[2]), parts[3]))
if graph:
self._writeGraphToFile(graph, oFile)
compressedGraph = self._writeGraphToFileCompressed(graph)
self.convertedData.append(compressedGraph)

def convertFromCompressedToTraditional(self):
self.convertedData = []
gId = 0
with open(self.iFile, 'r') as iFile:
for line in iFile:
if not line.strip():
continue # Skip empty lines
if ':' not in line:
print(f"Invalid format in line: {line.strip()}")
continue
nodes_part, edges_part = line.strip().split(':')
nodes_tokens = nodes_part.strip().split()
edges_tokens = edges_part.strip().split()

# Parse nodes
nodes = []
for i in range(0, len(nodes_tokens), 2):
node_id = int(nodes_tokens[i])
node_label = nodes_tokens[i + 1]
nodes.append((node_id, node_label))

# Parse edges
edges = []
for i in range(0, len(edges_tokens), 3):
if i + 2 >= len(edges_tokens):
print(f"Incomplete edge information in line: {line.strip()}")
break
u = int(edges_tokens[i])
v = int(edges_tokens[i + 1])
label = edges_tokens[i + 2]
edges.append((u, v, label))

graph = {'nodes': nodes, 'edges': edges}
traditionalGraph = self._writeGraphToFileTraditional(graph, gId)
self.convertedData.append(traditionalGraph)
gId += 1

def save(self, oFile):
"""
Saves the converted data to the specified output file.
:param oFile: Path to the output file.
"""
if not self.convertedData:
print("No converted data to save. Please perform a conversion first.")
return

with open(oFile, 'w') as file:
for graphData in self.convertedData:
file.write(graphData)

def getMemoryRSS(self):
process = psutil.Process(os.getpid())
rss = process.memory_info().rss
return rss

def getMemoryUSS(self):
process = psutil.Process(os.getpid())
uss = process.memory_full_info().uss
return uss

57 changes: 42 additions & 15 deletions PAMI/subgraphMining/basic/gspan.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,11 @@ def __init__(self, iFile, minSupport, outputSingleVertices=True, maxNumberOfEdge
self._memoryUSS = float()
self._memoryRSS = float()

self.label_mapping = {}
self.current_label = 0
self.edge_label_mapping = {}
self.current_edge_label = 0


def mine(self):

Expand Down Expand Up @@ -114,6 +119,9 @@ def save(self, oFile):
subgraphs to a file specified by the `outputPath` parameter. The method iterates over each
frequent subgraph in `self.frequentSubgraphs` and writes the subgraph information to the file
"""
reverse_label_mapping = {v: k for k, v in self.label_mapping.items()}
reverse_edge_label_mapping = {v: k for k, v in self.edge_label_mapping.items()}

with open(oFile, 'w') as bw:
i = 0
for subgraph in self.frequentSubgraphs:
Expand All @@ -123,18 +131,20 @@ def save(self, oFile):
sb.append(f"t # {i} * {subgraph.support}\n")
if dfsCode.size == 1:
ee = dfsCode.getEeList()[0]
if ee.edgeLabel == -1:
sb.append(f"v 0 {ee.vLabel1}\n")
else:
sb.append(f"v 0 {ee.vLabel1}\n")
sb.append(f"v 1 {ee.vLabel2}\n")
sb.append(f"e 0 1 {ee.edgeLabel}\n")
vLabel1 = reverse_label_mapping.get(ee.vLabel1, ee.vLabel1)
sb.append(f"v 0 {vLabel1}\n")
if ee.edgeLabel != -1:
vLabel2 = reverse_label_mapping.get(ee.vLabel2, ee.vLabel2)
edgeLabel = reverse_edge_label_mapping.get(ee.edgeLabel, ee.edgeLabel)
sb.append(f"v 1 {vLabel2}\n")
sb.append(f"e 0 1 {edgeLabel}\n")
else:
vLabels = dfsCode.getAllVLabels()
vLabels = [reverse_label_mapping.get(label, label) for label in dfsCode.getAllVLabels()]
for j, vLabel in enumerate(vLabels):
sb.append(f"v {j} {vLabel}\n")
for ee in dfsCode.getEeList():
sb.append(f"e {ee.v1} {ee.v2} {ee.edgeLabel}\n")
edgeLabel = reverse_edge_label_mapping.get(ee.edgeLabel, ee.edgeLabel)
sb.append(f"e {ee.v1} {ee.v2} {edgeLabel}\n")

if self.outputGraphIds:
sb.append("x " + " ".join(str(id) for id in subgraph.setOfGraphsIds))
Expand All @@ -143,16 +153,13 @@ def save(self, oFile):
bw.write("".join(sb))
i += 1


def readGraphs(self, path):
"""
The `readGraphs` function reads graph data from a file and constructs a list of graphs with vertices
and edges.
:param path: The `path` parameter in the `readGraphs` method is the file path to the text file
containing the graph data that needs to be read and processed. This method reads the graph data from
the specified file and constructs a list of graphs represented by vertices and edges based on the
information in the
containing the graph data that needs to be read and processed.
:return: The `readGraphs` method reads graph data from a file specified by the `path` parameter. It
parses the data to create a list of graph objects and returns this list. Each graph object contains
information about vertices and edges within the graph.
Expand All @@ -170,19 +177,27 @@ def readGraphs(self, path):
graphDatabase.append(_ab.Graph(gId, vMap))
vMap = {} # Reset for the next graph

gId = int(line.split(" ")[2])
gId = int(line.split(" ")[2])

elif line.startswith("v"):
items = line.split(" ")
vId = int(items[1])
vLabel = int(items[2])
# Map vertex label
if items[2].isdigit():
vLabel = int(items[2])
else:
vLabel = self.get_label(items[2])
vMap[vId] = _ab.Vertex(vId, vLabel)

elif line.startswith("e"):
items = line.split(" ")
v1 = int(items[1])
v2 = int(items[2])
eLabel = int(items[3])
# Map edge label
if items[3].isdigit():
eLabel = int(items[3])
else:
eLabel = self.get_edge_label(items[3])
e = _ab.Edge(v1, v2, eLabel)
vMap[v1].addEdge(e)
vMap[v2].addEdge(e)
Expand All @@ -193,6 +208,17 @@ def readGraphs(self, path):
self.graphCount = len(graphDatabase)
return graphDatabase

def get_label(self, label_char):
if label_char not in self.label_mapping:
self.label_mapping[label_char] = self.current_label
self.current_label += 1
return self.label_mapping[label_char]

def get_edge_label(self, label_char):
if label_char not in self.edge_label_mapping:
self.edge_label_mapping[label_char] = self.current_edge_label
self.current_edge_label += 1
return self.edge_label_mapping[label_char]

def subgraphIsomorphisms(self, c: _ab.DFSCode, g: _ab.Graph):
"""
Expand Down Expand Up @@ -680,3 +706,4 @@ def saveSubgraphsByGraphId(self, oFile):
with open(oFile, 'w') as f:
for _, subgraphIds in graphToSubgraphs.items():
f.write(f"{' '.join(map(str, subgraphIds))}\n")

64 changes: 42 additions & 22 deletions PAMI/subgraphMining/topK/tkg.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,9 @@ def __init__(self, iFile, k, maxNumberOfEdges=float('inf'), outputSingleVertices
self.eliminatedWithMaxSize = 0
self.emptyGraphsRemoved = 0
self.pruneByEdgeCount = 0

self.label_mapping = {}
self.reverse_label_mapping = {}
self.current_label = 0

def mine(self):
"""
Expand Down Expand Up @@ -92,13 +94,9 @@ def mine(self):

def readGraphs(self, path):
"""
The `readGraphs` function reads graph data from a file and constructs a list of graphs with vertices
and edges.
:param path: This method reads the graph data from the specified file and constructs a list of graphs
represented by vertices and edges
:return: The `readGraphs` method returns a list of `_ab.Graph` objects, which represent graphs read
from the file.
Reads graph data from a file and constructs a list of graphs with vertices and edges.
Handles character vertex labels by mapping them to unique integers.
Edge labels are assumed to be integers.
"""
with open(path, 'r') as br:
graphDatabase = []
Expand All @@ -114,11 +112,19 @@ def readGraphs(self, path):
gId = int(line.split()[2])
elif line.startswith("v"):
items = line.split()
vId, vLabel = int(items[1]), int(items[2])
vId = int(items[1])
label = items[2]
# Map vertex label if it's a string, else convert to integer
if label.isdigit():
vLabel = int(label)
else:
vLabel = self.get_label(label)
vMap[vId] = _ab.Vertex(vId, vLabel)
elif line.startswith("e"):
items = line.split()
v1, v2, eLabel = int(items[1]), int(items[2]), int(items[3])
v1 = int(items[1])
v2 = int(items[2])
eLabel = int(items[3]) # Assuming edge labels are integers
edge = _ab.Edge(v1, v2, eLabel)
vMap[v1].addEdge(edge)
vMap[v2].addEdge(edge)
Expand All @@ -129,12 +135,23 @@ def readGraphs(self, path):
self.graphCount = len(graphDatabase)
return graphDatabase


def get_label(self, label_char):
"""
Maps a character vertex label to a unique integer.
If the label is already mapped, returns the existing integer.
Otherwise, assigns a new integer to the label.
"""
if label_char not in self.label_mapping:
self.label_mapping[label_char] = self.current_label
self.reverse_label_mapping[self.current_label] = label_char
self.current_label += 1
return self.label_mapping[label_char]

def save(self, oFile):
"""
The `save` function writes subgraph information to a file in a specific format.
:param oFile: The `oFile` parameter in the `save` method is the file path where the output will be
saved. This method writes the subgraphs information to the specified file in a specific format
Saves the frequent subgraphs to an output file.
Converts integer vertex labels back to their original characters.
"""
subgraphsList = self.getSubgraphsList()

Expand All @@ -146,24 +163,30 @@ def save(self, oFile):
sb.append(f"t # {i} * {subgraph.support}\n")
if len(dfsCode.eeList) == 1:
ee = dfsCode.eeList[0]
sb.append(f"v 0 {ee.vLabel1}\n")
# Convert labels back to characters if mapped
vLabel1 = self.reverse_label_mapping.get(ee.vLabel1, ee.vLabel1)
sb.append(f"v 0 {vLabel1}\n")
if ee.edgeLabel != -1:
sb.append(f"v 1 {ee.vLabel2}\n")
vLabel2 = self.reverse_label_mapping.get(ee.vLabel2, ee.vLabel2)
sb.append(f"v 1 {vLabel2}\n")
sb.append(f"e 0 1 {ee.edgeLabel}\n")
else:
vLabels = dfsCode.getAllVLabels()
for j, vLabel in enumerate(vLabels):
# Convert labels back to characters if mapped
vLabel = self.reverse_label_mapping.get(vLabel, vLabel)
sb.append(f"v {j} {vLabel}\n")
for ee in dfsCode.eeList:
sb.append(f"e {ee.v1} {ee.v2} {ee.edgeLabel}\n")

if self.outputGraphIds:
# Include graph IDs if the feature is enabled
if self.outputGraphIds and subgraph.setOfGraphsIds:
sb.append("x " + " ".join(str(id) for id in subgraph.setOfGraphsIds))

sb.append("\n\n")
bw.write("".join(sb))


def savePattern(self, subgraph):
def savePattern(self, subgraph):
# previousMinSup = self.minSup

self.kSubgraphs.put(subgraph)
Expand Down Expand Up @@ -545,6 +568,3 @@ def getSubgraphsList(self):
subgraphsList.sort(key=lambda sg: sg.support, reverse=True)
return subgraphsList




0 comments on commit b295993

Please sign in to comment.