From 96b1c83fb738c5fc77f68db4fd1f04dfda7e553a Mon Sep 17 00:00:00 2001
From: choubeyy <choubeyishan28@gmail.com>
Date: Fri, 15 Nov 2024 14:33:35 +0530
Subject: [PATCH 1/3] Changed the code structure and added memory methods

---
 PAMI/extras/graph/convertFormat.py | 22 ++++++++++++++++++----
 1 file changed, 18 insertions(+), 4 deletions(-)

diff --git a/PAMI/extras/graph/convertFormat.py b/PAMI/extras/graph/convertFormat.py
index 733e4296..9e2ca64c 100644
--- a/PAMI/extras/graph/convertFormat.py
+++ b/PAMI/extras/graph/convertFormat.py
@@ -1,11 +1,14 @@
 #  Usage
 #  obj = ConvertFormat('iFile', 'oFile')
 #
+import os
+import psutil
+
+
 class ConvertFormat:
-    def __init__(self, iFile, oFile):
+    def __init__(self, iFile):
         self.iFile = iFile
-        self.oFile = oFile
-        self.convert()
+        self.oFile = 'oFile.txt'
 
     def _writeGraphToFile(self, graph, oFile):
         node_str = ' '.join(f"{node} {label}" for node, label in sorted(graph['nodes']))
@@ -28,4 +31,15 @@ def convert(self):
                 elif parts[0] == 'e':
                     graph['edges'].append((int(parts[1]), int(parts[2]), parts[3]))
             if graph:
-                self._writeGraphToFile(graph, oFile)
\ No newline at end of file
+                self._writeGraphToFile(graph, oFile)
+
+    def getMemoryRSS(self):
+        process = psutil.Process(os.getpid())
+        rss = process.memory_info().rss
+        return rss
+
+    def getMemoryUSS(self):
+        process = psutil.Process(os.getpid())
+        uss = process.memory_full_info().uss
+        return uss
+

From 7555659dfc5071e90154e8a2cda0bd2887bc669d Mon Sep 17 00:00:00 2001
From: choubeyy <choubeyishan28@gmail.com>
Date: Fri, 15 Nov 2024 16:50:43 +0530
Subject: [PATCH 2/3] Now allows for both alphabetical and integer labels for
 vertices and edges

---
 PAMI/subgraphMining/basic/gspan.py | 57 +++++++++++++++++++-------
 PAMI/subgraphMining/topK/tkg.py    | 64 ++++++++++++++++++++----------
 2 files changed, 84 insertions(+), 37 deletions(-)

diff --git a/PAMI/subgraphMining/basic/gspan.py b/PAMI/subgraphMining/basic/gspan.py
index c977d614..5584af75 100644
--- a/PAMI/subgraphMining/basic/gspan.py
+++ b/PAMI/subgraphMining/basic/gspan.py
@@ -62,6 +62,11 @@ def __init__(self, iFile, minSupport, outputSingleVertices=True, maxNumberOfEdge
         self._memoryUSS = float()
         self._memoryRSS = float()
 
+        self.label_mapping = {}
+        self.current_label = 0
+        self.edge_label_mapping = {}
+        self.current_edge_label = 0
+
 
     def mine(self):
 
@@ -114,6 +119,9 @@ def save(self, oFile):
         subgraphs to a file specified by the `outputPath` parameter. The method iterates over each
         frequent subgraph in `self.frequentSubgraphs` and writes the subgraph information to the file
         """
+        reverse_label_mapping = {v: k for k, v in self.label_mapping.items()}
+        reverse_edge_label_mapping = {v: k for k, v in self.edge_label_mapping.items()}
+
         with open(oFile, 'w') as bw:
             i = 0
             for subgraph in self.frequentSubgraphs:
@@ -123,18 +131,20 @@ def save(self, oFile):
                 sb.append(f"t # {i} * {subgraph.support}\n")
                 if dfsCode.size == 1:
                     ee = dfsCode.getEeList()[0]
-                    if ee.edgeLabel == -1:
-                        sb.append(f"v 0 {ee.vLabel1}\n")
-                    else:
-                        sb.append(f"v 0 {ee.vLabel1}\n")
-                        sb.append(f"v 1 {ee.vLabel2}\n")
-                        sb.append(f"e 0 1 {ee.edgeLabel}\n")
+                    vLabel1 = reverse_label_mapping.get(ee.vLabel1, ee.vLabel1)
+                    sb.append(f"v 0 {vLabel1}\n")
+                    if ee.edgeLabel != -1:
+                        vLabel2 = reverse_label_mapping.get(ee.vLabel2, ee.vLabel2)
+                        edgeLabel = reverse_edge_label_mapping.get(ee.edgeLabel, ee.edgeLabel)
+                        sb.append(f"v 1 {vLabel2}\n")
+                        sb.append(f"e 0 1 {edgeLabel}\n")
                 else:
-                    vLabels = dfsCode.getAllVLabels()
+                    vLabels = [reverse_label_mapping.get(label, label) for label in dfsCode.getAllVLabels()]
                     for j, vLabel in enumerate(vLabels):
                         sb.append(f"v {j} {vLabel}\n")
                     for ee in dfsCode.getEeList():
-                        sb.append(f"e {ee.v1} {ee.v2} {ee.edgeLabel}\n")
+                        edgeLabel = reverse_edge_label_mapping.get(ee.edgeLabel, ee.edgeLabel)
+                        sb.append(f"e {ee.v1} {ee.v2} {edgeLabel}\n")
 
                 if self.outputGraphIds:
                     sb.append("x " + " ".join(str(id) for id in subgraph.setOfGraphsIds))
@@ -143,16 +153,13 @@ def save(self, oFile):
                 bw.write("".join(sb))
                 i += 1
 
-
     def readGraphs(self, path):
         """
         The `readGraphs` function reads graph data from a file and constructs a list of graphs with vertices
         and edges.
         
         :param path: The `path` parameter in the `readGraphs` method is the file path to the text file
-        containing the graph data that needs to be read and processed. This method reads the graph data from
-        the specified file and constructs a list of graphs represented by vertices and edges based on the
-        information in the
+        containing the graph data that needs to be read and processed.
         :return: The `readGraphs` method reads graph data from a file specified by the `path` parameter. It
         parses the data to create a list of graph objects and returns this list. Each graph object contains
         information about vertices and edges within the graph.
@@ -170,19 +177,27 @@ def readGraphs(self, path):
                         graphDatabase.append(_ab.Graph(gId, vMap))
                         vMap = {}  # Reset for the next graph
 
-                    gId = int(line.split(" ")[2]) 
+                    gId = int(line.split(" ")[2])
 
                 elif line.startswith("v"):
                     items = line.split(" ")
                     vId = int(items[1])
-                    vLabel = int(items[2])
+                    # Map vertex label
+                    if items[2].isdigit():
+                        vLabel = int(items[2])
+                    else:
+                        vLabel = self.get_label(items[2])
                     vMap[vId] = _ab.Vertex(vId, vLabel)
 
                 elif line.startswith("e"):
                     items = line.split(" ")
                     v1 = int(items[1])
                     v2 = int(items[2])
-                    eLabel = int(items[3])
+                    # Map edge label
+                    if items[3].isdigit():
+                        eLabel = int(items[3])
+                    else:
+                        eLabel = self.get_edge_label(items[3])
                     e = _ab.Edge(v1, v2, eLabel)
                     vMap[v1].addEdge(e)
                     vMap[v2].addEdge(e)
@@ -193,6 +208,17 @@ def readGraphs(self, path):
         self.graphCount = len(graphDatabase)
         return graphDatabase
 
+    def get_label(self, label_char):
+        if label_char not in self.label_mapping:
+            self.label_mapping[label_char] = self.current_label
+            self.current_label += 1
+        return self.label_mapping[label_char]
+
+    def get_edge_label(self, label_char):
+        if label_char not in self.edge_label_mapping:
+            self.edge_label_mapping[label_char] = self.current_edge_label
+            self.current_edge_label += 1
+        return self.edge_label_mapping[label_char]
 
     def subgraphIsomorphisms(self, c: _ab.DFSCode, g: _ab.Graph):
         """
@@ -680,3 +706,4 @@ def saveSubgraphsByGraphId(self, oFile):
         with open(oFile, 'w') as f:
             for _, subgraphIds in graphToSubgraphs.items():
                 f.write(f"{' '.join(map(str, subgraphIds))}\n")
+
diff --git a/PAMI/subgraphMining/topK/tkg.py b/PAMI/subgraphMining/topK/tkg.py
index cdb4252c..cea563f8 100644
--- a/PAMI/subgraphMining/topK/tkg.py
+++ b/PAMI/subgraphMining/topK/tkg.py
@@ -53,7 +53,9 @@ def __init__(self, iFile, k, maxNumberOfEdges=float('inf'), outputSingleVertices
         self.eliminatedWithMaxSize = 0
         self.emptyGraphsRemoved = 0
         self.pruneByEdgeCount = 0
-
+        self.label_mapping = {}
+        self.reverse_label_mapping = {}
+        self.current_label = 0
 
     def mine(self):
         """
@@ -92,13 +94,9 @@ def mine(self):
 
     def readGraphs(self, path):
         """
-        The `readGraphs` function reads graph data from a file and constructs a list of graphs with vertices
-        and edges.
-        
-        :param path: This method reads the graph data from the specified file and constructs a list of graphs 
-        represented by vertices and edges
-        :return: The `readGraphs` method returns a list of `_ab.Graph` objects, which represent graphs read
-        from the file.
+        Reads graph data from a file and constructs a list of graphs with vertices and edges.
+        Handles character vertex labels by mapping them to unique integers.
+        Edge labels are assumed to be integers.
         """
         with open(path, 'r') as br:
             graphDatabase = []
@@ -114,11 +112,19 @@ def readGraphs(self, path):
                     gId = int(line.split()[2])
                 elif line.startswith("v"):
                     items = line.split()
-                    vId, vLabel = int(items[1]), int(items[2])
+                    vId = int(items[1])
+                    label = items[2]
+                    # Map vertex label if it's a string, else convert to integer
+                    if label.isdigit():
+                        vLabel = int(label)
+                    else:
+                        vLabel = self.get_label(label)
                     vMap[vId] = _ab.Vertex(vId, vLabel)
                 elif line.startswith("e"):
                     items = line.split()
-                    v1, v2, eLabel = int(items[1]), int(items[2]), int(items[3])
+                    v1 = int(items[1])
+                    v2 = int(items[2])
+                    eLabel = int(items[3])  # Assuming edge labels are integers
                     edge = _ab.Edge(v1, v2, eLabel)
                     vMap[v1].addEdge(edge)
                     vMap[v2].addEdge(edge)
@@ -129,12 +135,23 @@ def readGraphs(self, path):
         self.graphCount = len(graphDatabase)
         return graphDatabase
 
+
+    def get_label(self, label_char):
+        """
+        Maps a character vertex label to a unique integer.
+        If the label is already mapped, returns the existing integer.
+        Otherwise, assigns a new integer to the label.
+        """
+        if label_char not in self.label_mapping:
+            self.label_mapping[label_char] = self.current_label
+            self.reverse_label_mapping[self.current_label] = label_char
+            self.current_label += 1
+        return self.label_mapping[label_char]
+
     def save(self, oFile):
         """
-        The `save` function writes subgraph information to a file in a specific format.
-        
-        :param oFile: The `oFile` parameter in the `save` method is the file path where the output will be
-        saved. This method writes the subgraphs information to the specified file in a specific format
+        Saves the frequent subgraphs to an output file.
+        Converts integer vertex labels back to their original characters.
         """
         subgraphsList = self.getSubgraphsList()
 
@@ -146,24 +163,30 @@ def save(self, oFile):
                 sb.append(f"t # {i} * {subgraph.support}\n")
                 if len(dfsCode.eeList) == 1:
                     ee = dfsCode.eeList[0]
-                    sb.append(f"v 0 {ee.vLabel1}\n")
+                    # Convert labels back to characters if mapped
+                    vLabel1 = self.reverse_label_mapping.get(ee.vLabel1, ee.vLabel1)
+                    sb.append(f"v 0 {vLabel1}\n")
                     if ee.edgeLabel != -1:
-                        sb.append(f"v 1 {ee.vLabel2}\n")
+                        vLabel2 = self.reverse_label_mapping.get(ee.vLabel2, ee.vLabel2)
+                        sb.append(f"v 1 {vLabel2}\n")
                         sb.append(f"e 0 1 {ee.edgeLabel}\n")
                 else:
                     vLabels = dfsCode.getAllVLabels()
                     for j, vLabel in enumerate(vLabels):
+                        # Convert labels back to characters if mapped
+                        vLabel = self.reverse_label_mapping.get(vLabel, vLabel)
                         sb.append(f"v {j} {vLabel}\n")
                     for ee in dfsCode.eeList:
                         sb.append(f"e {ee.v1} {ee.v2} {ee.edgeLabel}\n")
 
-                if self.outputGraphIds:
+                # Include graph IDs if the feature is enabled
+                if self.outputGraphIds and subgraph.setOfGraphsIds:
                     sb.append("x " + " ".join(str(id) for id in subgraph.setOfGraphsIds))
+
                 sb.append("\n\n")
                 bw.write("".join(sb))
 
-
-    def savePattern(self, subgraph):        
+    def savePattern(self, subgraph):
         # previousMinSup = self.minSup
 
         self.kSubgraphs.put(subgraph)
@@ -545,6 +568,3 @@ def getSubgraphsList(self):
         subgraphsList.sort(key=lambda sg: sg.support, reverse=True)
         return subgraphsList
 
-
-    
-

From 3c863d245e97e58645977f97e9eec07df2dbced7 Mon Sep 17 00:00:00 2001
From: choubeyy <choubeyishan28@gmail.com>
Date: Fri, 15 Nov 2024 19:05:47 +0530
Subject: [PATCH 3/3] Added both traditional to compressed and compressed to
 traditional converter

---
 PAMI/extras/graph/convertFormat.py | 94 +++++++++++++++++++++++++++---
 1 file changed, 85 insertions(+), 9 deletions(-)

diff --git a/PAMI/extras/graph/convertFormat.py b/PAMI/extras/graph/convertFormat.py
index 9e2ca64c..84773f63 100644
--- a/PAMI/extras/graph/convertFormat.py
+++ b/PAMI/extras/graph/convertFormat.py
@@ -1,6 +1,20 @@
 #  Usage
-#  obj = ConvertFormat('iFile', 'oFile')
+#  obj1 = ConvertFormat('iFile', 'oFile')
 #
+#  obj1.convertFromTraditionalToCompressed()
+#
+#  obj1.getMemoryRSS()
+#
+#  obj1.getMemoryUSS()
+#
+#  obj2 = ConvertFormat('iFileCompressed', 'oFileTrad')
+#
+#  obj2.convertFromCompressedToTraditional()
+#
+#  obj2.getMemoryRSS()
+#
+#  obj2.getMemoryUSS()
+
 import os
 import psutil
 
@@ -8,30 +22,92 @@
 class ConvertFormat:
     def __init__(self, iFile):
         self.iFile = iFile
-        self.oFile = 'oFile.txt'
+        self.convertedData = []
 
-    def _writeGraphToFile(self, graph, oFile):
-        node_str = ' '.join(f"{node} {label}" for node, label in sorted(graph['nodes']))
+    def _writeGraphToFileCompressed(self, graph):
+        node_str = ' '.join(f"{node} {label}" for node, label in sorted(graph['nodes'], key=lambda x: x[0]))
         edge_str = ' '.join(f"{u} {v} {label}" for u, v, label in graph['edges'])
-        oFile.write(f"{node_str} : {edge_str}\n")
+        return f"{node_str} : {edge_str}\n"
 
-    def convert(self):
+    def _writeGraphToFileTraditional(self, graph, gId):
+        traditional_lines = [f"t # {gId}\n"]
+        for node, label in sorted(graph['nodes'], key=lambda x: x[0]):
+            traditional_lines.append(f"v {node} {label}\n")
+        for u, v, label in graph['edges']:
+            traditional_lines.append(f"e {u} {v} {label}\n")
+        return ''.join(traditional_lines)
+
+    def convertFromTraditionalToCompressed(self):
         graph = {}
-        with open(self.iFile, 'r') as iFile, open(self.oFile, 'w') as oFile:
+        self.convertedData = []
+        with open(self.iFile, 'r') as iFile:
             for line in iFile:
                 parts = line.strip().split()
                 if not parts:
                     continue
                 if parts[0] == 't':
                     if graph:
-                        self._writeGraphToFile(graph, oFile)
+                        compressedGraph = self._writeGraphToFileCompressed(graph)
+                        self.convertedData.append(compressedGraph)
                     graph = {'nodes': [], 'edges': []}
                 elif parts[0] == 'v':
                     graph['nodes'].append((int(parts[1]), parts[2]))
                 elif parts[0] == 'e':
                     graph['edges'].append((int(parts[1]), int(parts[2]), parts[3]))
             if graph:
-                self._writeGraphToFile(graph, oFile)
+                compressedGraph = self._writeGraphToFileCompressed(graph)
+                self.convertedData.append(compressedGraph)
+
+    def convertFromCompressedToTraditional(self):
+        self.convertedData = []
+        gId = 0
+        with open(self.iFile, 'r') as iFile:
+            for line in iFile:
+                if not line.strip():
+                    continue  # Skip empty lines
+                if ':' not in line:
+                    print(f"Invalid format in line: {line.strip()}")
+                    continue
+                nodes_part, edges_part = line.strip().split(':')
+                nodes_tokens = nodes_part.strip().split()
+                edges_tokens = edges_part.strip().split()
+
+                # Parse nodes
+                nodes = []
+                for i in range(0, len(nodes_tokens), 2):
+                    node_id = int(nodes_tokens[i])
+                    node_label = nodes_tokens[i + 1]
+                    nodes.append((node_id, node_label))
+
+                # Parse edges
+                edges = []
+                for i in range(0, len(edges_tokens), 3):
+                    if i + 2 >= len(edges_tokens):
+                        print(f"Incomplete edge information in line: {line.strip()}")
+                        break
+                    u = int(edges_tokens[i])
+                    v = int(edges_tokens[i + 1])
+                    label = edges_tokens[i + 2]
+                    edges.append((u, v, label))
+
+                graph = {'nodes': nodes, 'edges': edges}
+                traditionalGraph = self._writeGraphToFileTraditional(graph, gId)
+                self.convertedData.append(traditionalGraph)
+                gId += 1
+
+    def save(self, oFile):
+        """
+        Saves the converted data to the specified output file.
+
+        :param oFile: Path to the output file.
+        """
+        if not self.convertedData:
+            print("No converted data to save. Please perform a conversion first.")
+            return
+
+        with open(oFile, 'w') as file:
+            for graphData in self.convertedData:
+                file.write(graphData)
 
     def getMemoryRSS(self):
         process = psutil.Process(os.getpid())