From 96b1c83fb738c5fc77f68db4fd1f04dfda7e553a Mon Sep 17 00:00:00 2001 From: choubeyy Date: Fri, 15 Nov 2024 14:33:35 +0530 Subject: [PATCH 1/3] Changed the code structure and added memory methods --- PAMI/extras/graph/convertFormat.py | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/PAMI/extras/graph/convertFormat.py b/PAMI/extras/graph/convertFormat.py index 733e4296..9e2ca64c 100644 --- a/PAMI/extras/graph/convertFormat.py +++ b/PAMI/extras/graph/convertFormat.py @@ -1,11 +1,14 @@ # Usage # obj = ConvertFormat('iFile', 'oFile') # +import os +import psutil + + class ConvertFormat: - def __init__(self, iFile, oFile): + def __init__(self, iFile): self.iFile = iFile - self.oFile = oFile - self.convert() + self.oFile = 'oFile.txt' def _writeGraphToFile(self, graph, oFile): node_str = ' '.join(f"{node} {label}" for node, label in sorted(graph['nodes'])) @@ -28,4 +31,15 @@ def convert(self): elif parts[0] == 'e': graph['edges'].append((int(parts[1]), int(parts[2]), parts[3])) if graph: - self._writeGraphToFile(graph, oFile) \ No newline at end of file + self._writeGraphToFile(graph, oFile) + + def getMemoryRSS(self): + process = psutil.Process(os.getpid()) + rss = process.memory_info().rss + return rss + + def getMemoryUSS(self): + process = psutil.Process(os.getpid()) + uss = process.memory_full_info().uss + return uss + From 7555659dfc5071e90154e8a2cda0bd2887bc669d Mon Sep 17 00:00:00 2001 From: choubeyy Date: Fri, 15 Nov 2024 16:50:43 +0530 Subject: [PATCH 2/3] Now allows for both alphabetical and integer labels for vertices and edges --- PAMI/subgraphMining/basic/gspan.py | 57 +++++++++++++++++++------- PAMI/subgraphMining/topK/tkg.py | 64 ++++++++++++++++++++---------- 2 files changed, 84 insertions(+), 37 deletions(-) diff --git a/PAMI/subgraphMining/basic/gspan.py b/PAMI/subgraphMining/basic/gspan.py index c977d614..5584af75 100644 --- a/PAMI/subgraphMining/basic/gspan.py +++ b/PAMI/subgraphMining/basic/gspan.py @@ -62,6 +62,11 @@ def __init__(self, iFile, minSupport, outputSingleVertices=True, maxNumberOfEdge self._memoryUSS = float() self._memoryRSS = float() + self.label_mapping = {} + self.current_label = 0 + self.edge_label_mapping = {} + self.current_edge_label = 0 + def mine(self): @@ -114,6 +119,9 @@ def save(self, oFile): subgraphs to a file specified by the `outputPath` parameter. The method iterates over each frequent subgraph in `self.frequentSubgraphs` and writes the subgraph information to the file """ + reverse_label_mapping = {v: k for k, v in self.label_mapping.items()} + reverse_edge_label_mapping = {v: k for k, v in self.edge_label_mapping.items()} + with open(oFile, 'w') as bw: i = 0 for subgraph in self.frequentSubgraphs: @@ -123,18 +131,20 @@ def save(self, oFile): sb.append(f"t # {i} * {subgraph.support}\n") if dfsCode.size == 1: ee = dfsCode.getEeList()[0] - if ee.edgeLabel == -1: - sb.append(f"v 0 {ee.vLabel1}\n") - else: - sb.append(f"v 0 {ee.vLabel1}\n") - sb.append(f"v 1 {ee.vLabel2}\n") - sb.append(f"e 0 1 {ee.edgeLabel}\n") + vLabel1 = reverse_label_mapping.get(ee.vLabel1, ee.vLabel1) + sb.append(f"v 0 {vLabel1}\n") + if ee.edgeLabel != -1: + vLabel2 = reverse_label_mapping.get(ee.vLabel2, ee.vLabel2) + edgeLabel = reverse_edge_label_mapping.get(ee.edgeLabel, ee.edgeLabel) + sb.append(f"v 1 {vLabel2}\n") + sb.append(f"e 0 1 {edgeLabel}\n") else: - vLabels = dfsCode.getAllVLabels() + vLabels = [reverse_label_mapping.get(label, label) for label in dfsCode.getAllVLabels()] for j, vLabel in enumerate(vLabels): sb.append(f"v {j} {vLabel}\n") for ee in dfsCode.getEeList(): - sb.append(f"e {ee.v1} {ee.v2} {ee.edgeLabel}\n") + edgeLabel = reverse_edge_label_mapping.get(ee.edgeLabel, ee.edgeLabel) + sb.append(f"e {ee.v1} {ee.v2} {edgeLabel}\n") if self.outputGraphIds: sb.append("x " + " ".join(str(id) for id in subgraph.setOfGraphsIds)) @@ -143,16 +153,13 @@ def save(self, oFile): bw.write("".join(sb)) i += 1 - def readGraphs(self, path): """ The `readGraphs` function reads graph data from a file and constructs a list of graphs with vertices and edges. :param path: The `path` parameter in the `readGraphs` method is the file path to the text file - containing the graph data that needs to be read and processed. This method reads the graph data from - the specified file and constructs a list of graphs represented by vertices and edges based on the - information in the + containing the graph data that needs to be read and processed. :return: The `readGraphs` method reads graph data from a file specified by the `path` parameter. It parses the data to create a list of graph objects and returns this list. Each graph object contains information about vertices and edges within the graph. @@ -170,19 +177,27 @@ def readGraphs(self, path): graphDatabase.append(_ab.Graph(gId, vMap)) vMap = {} # Reset for the next graph - gId = int(line.split(" ")[2]) + gId = int(line.split(" ")[2]) elif line.startswith("v"): items = line.split(" ") vId = int(items[1]) - vLabel = int(items[2]) + # Map vertex label + if items[2].isdigit(): + vLabel = int(items[2]) + else: + vLabel = self.get_label(items[2]) vMap[vId] = _ab.Vertex(vId, vLabel) elif line.startswith("e"): items = line.split(" ") v1 = int(items[1]) v2 = int(items[2]) - eLabel = int(items[3]) + # Map edge label + if items[3].isdigit(): + eLabel = int(items[3]) + else: + eLabel = self.get_edge_label(items[3]) e = _ab.Edge(v1, v2, eLabel) vMap[v1].addEdge(e) vMap[v2].addEdge(e) @@ -193,6 +208,17 @@ def readGraphs(self, path): self.graphCount = len(graphDatabase) return graphDatabase + def get_label(self, label_char): + if label_char not in self.label_mapping: + self.label_mapping[label_char] = self.current_label + self.current_label += 1 + return self.label_mapping[label_char] + + def get_edge_label(self, label_char): + if label_char not in self.edge_label_mapping: + self.edge_label_mapping[label_char] = self.current_edge_label + self.current_edge_label += 1 + return self.edge_label_mapping[label_char] def subgraphIsomorphisms(self, c: _ab.DFSCode, g: _ab.Graph): """ @@ -680,3 +706,4 @@ def saveSubgraphsByGraphId(self, oFile): with open(oFile, 'w') as f: for _, subgraphIds in graphToSubgraphs.items(): f.write(f"{' '.join(map(str, subgraphIds))}\n") + diff --git a/PAMI/subgraphMining/topK/tkg.py b/PAMI/subgraphMining/topK/tkg.py index cdb4252c..cea563f8 100644 --- a/PAMI/subgraphMining/topK/tkg.py +++ b/PAMI/subgraphMining/topK/tkg.py @@ -53,7 +53,9 @@ def __init__(self, iFile, k, maxNumberOfEdges=float('inf'), outputSingleVertices self.eliminatedWithMaxSize = 0 self.emptyGraphsRemoved = 0 self.pruneByEdgeCount = 0 - + self.label_mapping = {} + self.reverse_label_mapping = {} + self.current_label = 0 def mine(self): """ @@ -92,13 +94,9 @@ def mine(self): def readGraphs(self, path): """ - The `readGraphs` function reads graph data from a file and constructs a list of graphs with vertices - and edges. - - :param path: This method reads the graph data from the specified file and constructs a list of graphs - represented by vertices and edges - :return: The `readGraphs` method returns a list of `_ab.Graph` objects, which represent graphs read - from the file. + Reads graph data from a file and constructs a list of graphs with vertices and edges. + Handles character vertex labels by mapping them to unique integers. + Edge labels are assumed to be integers. """ with open(path, 'r') as br: graphDatabase = [] @@ -114,11 +112,19 @@ def readGraphs(self, path): gId = int(line.split()[2]) elif line.startswith("v"): items = line.split() - vId, vLabel = int(items[1]), int(items[2]) + vId = int(items[1]) + label = items[2] + # Map vertex label if it's a string, else convert to integer + if label.isdigit(): + vLabel = int(label) + else: + vLabel = self.get_label(label) vMap[vId] = _ab.Vertex(vId, vLabel) elif line.startswith("e"): items = line.split() - v1, v2, eLabel = int(items[1]), int(items[2]), int(items[3]) + v1 = int(items[1]) + v2 = int(items[2]) + eLabel = int(items[3]) # Assuming edge labels are integers edge = _ab.Edge(v1, v2, eLabel) vMap[v1].addEdge(edge) vMap[v2].addEdge(edge) @@ -129,12 +135,23 @@ def readGraphs(self, path): self.graphCount = len(graphDatabase) return graphDatabase + + def get_label(self, label_char): + """ + Maps a character vertex label to a unique integer. + If the label is already mapped, returns the existing integer. + Otherwise, assigns a new integer to the label. + """ + if label_char not in self.label_mapping: + self.label_mapping[label_char] = self.current_label + self.reverse_label_mapping[self.current_label] = label_char + self.current_label += 1 + return self.label_mapping[label_char] + def save(self, oFile): """ - The `save` function writes subgraph information to a file in a specific format. - - :param oFile: The `oFile` parameter in the `save` method is the file path where the output will be - saved. This method writes the subgraphs information to the specified file in a specific format + Saves the frequent subgraphs to an output file. + Converts integer vertex labels back to their original characters. """ subgraphsList = self.getSubgraphsList() @@ -146,24 +163,30 @@ def save(self, oFile): sb.append(f"t # {i} * {subgraph.support}\n") if len(dfsCode.eeList) == 1: ee = dfsCode.eeList[0] - sb.append(f"v 0 {ee.vLabel1}\n") + # Convert labels back to characters if mapped + vLabel1 = self.reverse_label_mapping.get(ee.vLabel1, ee.vLabel1) + sb.append(f"v 0 {vLabel1}\n") if ee.edgeLabel != -1: - sb.append(f"v 1 {ee.vLabel2}\n") + vLabel2 = self.reverse_label_mapping.get(ee.vLabel2, ee.vLabel2) + sb.append(f"v 1 {vLabel2}\n") sb.append(f"e 0 1 {ee.edgeLabel}\n") else: vLabels = dfsCode.getAllVLabels() for j, vLabel in enumerate(vLabels): + # Convert labels back to characters if mapped + vLabel = self.reverse_label_mapping.get(vLabel, vLabel) sb.append(f"v {j} {vLabel}\n") for ee in dfsCode.eeList: sb.append(f"e {ee.v1} {ee.v2} {ee.edgeLabel}\n") - if self.outputGraphIds: + # Include graph IDs if the feature is enabled + if self.outputGraphIds and subgraph.setOfGraphsIds: sb.append("x " + " ".join(str(id) for id in subgraph.setOfGraphsIds)) + sb.append("\n\n") bw.write("".join(sb)) - - def savePattern(self, subgraph): + def savePattern(self, subgraph): # previousMinSup = self.minSup self.kSubgraphs.put(subgraph) @@ -545,6 +568,3 @@ def getSubgraphsList(self): subgraphsList.sort(key=lambda sg: sg.support, reverse=True) return subgraphsList - - - From 3c863d245e97e58645977f97e9eec07df2dbced7 Mon Sep 17 00:00:00 2001 From: choubeyy Date: Fri, 15 Nov 2024 19:05:47 +0530 Subject: [PATCH 3/3] Added both traditional to compressed and compressed to traditional converter --- PAMI/extras/graph/convertFormat.py | 94 +++++++++++++++++++++++++++--- 1 file changed, 85 insertions(+), 9 deletions(-) diff --git a/PAMI/extras/graph/convertFormat.py b/PAMI/extras/graph/convertFormat.py index 9e2ca64c..84773f63 100644 --- a/PAMI/extras/graph/convertFormat.py +++ b/PAMI/extras/graph/convertFormat.py @@ -1,6 +1,20 @@ # Usage -# obj = ConvertFormat('iFile', 'oFile') +# obj1 = ConvertFormat('iFile', 'oFile') # +# obj1.convertFromTraditionalToCompressed() +# +# obj1.getMemoryRSS() +# +# obj1.getMemoryUSS() +# +# obj2 = ConvertFormat('iFileCompressed', 'oFileTrad') +# +# obj2.convertFromCompressedToTraditional() +# +# obj2.getMemoryRSS() +# +# obj2.getMemoryUSS() + import os import psutil @@ -8,30 +22,92 @@ class ConvertFormat: def __init__(self, iFile): self.iFile = iFile - self.oFile = 'oFile.txt' + self.convertedData = [] - def _writeGraphToFile(self, graph, oFile): - node_str = ' '.join(f"{node} {label}" for node, label in sorted(graph['nodes'])) + def _writeGraphToFileCompressed(self, graph): + node_str = ' '.join(f"{node} {label}" for node, label in sorted(graph['nodes'], key=lambda x: x[0])) edge_str = ' '.join(f"{u} {v} {label}" for u, v, label in graph['edges']) - oFile.write(f"{node_str} : {edge_str}\n") + return f"{node_str} : {edge_str}\n" - def convert(self): + def _writeGraphToFileTraditional(self, graph, gId): + traditional_lines = [f"t # {gId}\n"] + for node, label in sorted(graph['nodes'], key=lambda x: x[0]): + traditional_lines.append(f"v {node} {label}\n") + for u, v, label in graph['edges']: + traditional_lines.append(f"e {u} {v} {label}\n") + return ''.join(traditional_lines) + + def convertFromTraditionalToCompressed(self): graph = {} - with open(self.iFile, 'r') as iFile, open(self.oFile, 'w') as oFile: + self.convertedData = [] + with open(self.iFile, 'r') as iFile: for line in iFile: parts = line.strip().split() if not parts: continue if parts[0] == 't': if graph: - self._writeGraphToFile(graph, oFile) + compressedGraph = self._writeGraphToFileCompressed(graph) + self.convertedData.append(compressedGraph) graph = {'nodes': [], 'edges': []} elif parts[0] == 'v': graph['nodes'].append((int(parts[1]), parts[2])) elif parts[0] == 'e': graph['edges'].append((int(parts[1]), int(parts[2]), parts[3])) if graph: - self._writeGraphToFile(graph, oFile) + compressedGraph = self._writeGraphToFileCompressed(graph) + self.convertedData.append(compressedGraph) + + def convertFromCompressedToTraditional(self): + self.convertedData = [] + gId = 0 + with open(self.iFile, 'r') as iFile: + for line in iFile: + if not line.strip(): + continue # Skip empty lines + if ':' not in line: + print(f"Invalid format in line: {line.strip()}") + continue + nodes_part, edges_part = line.strip().split(':') + nodes_tokens = nodes_part.strip().split() + edges_tokens = edges_part.strip().split() + + # Parse nodes + nodes = [] + for i in range(0, len(nodes_tokens), 2): + node_id = int(nodes_tokens[i]) + node_label = nodes_tokens[i + 1] + nodes.append((node_id, node_label)) + + # Parse edges + edges = [] + for i in range(0, len(edges_tokens), 3): + if i + 2 >= len(edges_tokens): + print(f"Incomplete edge information in line: {line.strip()}") + break + u = int(edges_tokens[i]) + v = int(edges_tokens[i + 1]) + label = edges_tokens[i + 2] + edges.append((u, v, label)) + + graph = {'nodes': nodes, 'edges': edges} + traditionalGraph = self._writeGraphToFileTraditional(graph, gId) + self.convertedData.append(traditionalGraph) + gId += 1 + + def save(self, oFile): + """ + Saves the converted data to the specified output file. + + :param oFile: Path to the output file. + """ + if not self.convertedData: + print("No converted data to save. Please perform a conversion first.") + return + + with open(oFile, 'w') as file: + for graphData in self.convertedData: + file.write(graphData) def getMemoryRSS(self): process = psutil.Process(os.getpid())