Merge branch 'main' into main

UdayLab · May 25, 2024 · 3dbff5a · 3dbff5a
2 parents 7824c90 + 6ee7f17
commit 3dbff5a
Show file tree

Hide file tree

Showing 88 changed files with 30,107 additions and 29,188 deletions.
diff --git a/PAMI/frequentPattern/basic/ECLATDiffset.py b/PAMI/frequentPattern/basic/ECLATDiffset.py
@@ -52,14 +52,13 @@
 """
 
 
-# from abstract import *
-
 from PAMI.frequentPattern.basic import abstract as _ab
 from deprecated import deprecated
 
 
 class ECLATDiffset(_ab._frequentPatterns):
     """
+
     :**Description**:   ECLATDiffset uses diffset to extract the frequent patterns in a transactional database.
 
     :**Reference**:  KDD '03: Proceedings of the ninth ACM SIGKDD international conference on Knowledge discovery and data mining
@@ -323,7 +322,7 @@ def getRuntime(self):
     def getPatternsAsDataFrame(self):
         """
 
-        Storing final frequent patterns in a dataframe
+        Storing final frequent patterns in a dataframe.
 
         :return: returning frequent patterns in a dataframe
         :rtype: pd.DataFrame
@@ -342,7 +341,7 @@ def getPatternsAsDataFrame(self):
     def save(self, outFile: str, seperator = "\t" ) -> None:
         """
 
-        Complete set of frequent patterns will be loaded in to an output file
+        Complete set of frequent patterns will be loaded in to an output csv file.
 
         :param outFile: name of the output file
         :type outFile: csvfile
@@ -362,7 +361,7 @@ def save(self, outFile: str, seperator = "\t" ) -> None:
     def getPatterns(self):
         """
 
-        Function to send the set of frequent patterns after completion of the mining process
+        This function returns the frequent patterns after completion of the mining process
 
         :return: returning frequent patterns
         :rtype: dict
@@ -371,7 +370,7 @@ def getPatterns(self):
 
     def printResults(self):
         """
-        This function is used to print the results
+        This function is used to print the results.
         """
         print("Total number of Frequent Patterns:", len(self.getPatterns()))
         print("Total Memory in USS:", self.getMemoryUSS())

diff --git a/PAMI/frequentPattern/topk/FAE.py b/PAMI/frequentPattern/topk/FAE.py
@@ -1,10 +1,13 @@
 # Top - K is and algorithm to discover top frequent patterns in a transactional database.
 #
 # **Importing this algorithm into a python program**
-# ---------------------------------------------------------
 #
 #             import PAMI.frequentPattern.topK.FAE as alg
 #
+#             iFile = 'sampleDB.txt'
+#
+#             K = 2
+#
 #             obj = alg.FAE(iFile, K)
 #
 #             obj.mine()
@@ -31,9 +34,6 @@
 #
 
 
-
-
-
 __copyright__ = """
 Copyright (C)  2021 Rage Uday Kiran
 
@@ -57,49 +57,29 @@
 
 class FAE(_ab._frequentPatterns):
     """
-    :Description: Top - K is and algorithm to discover top frequent patterns in a transactional database.
-
-
-    :Reference:   Zhi-Hong Deng, Guo-Dong Fang: Mining Top-Rank-K Frequent Patterns: DOI: 10.1109/ICMLC.2007.4370261 · Source: IEEE Xplore
-                  https://ieeexplore.ieee.org/document/4370261
-
-    :param  iFile: str :
-                   Name of the Input file to mine complete set of frequent patterns
-    :param  oFile: str :
-                   Name of the output file to store complete set of frequent patterns
-    :param  k: int :
-                    User specified count of top frequent patterns
-    :param minimum: int :
-                    Minimum number of frequent patterns to consider in analysis
-
-    :param  sep: str :
-                   This variable is used to distinguish items from one another in a transaction. The default seperator is tab space. However, the users can override their default separator.
+    About this algorithm
+    ====================
 
+    :**Description**: Top - K is and algorithm to discover top frequent patterns in a transactional database.
 
+    :**Reference**:   Zhi-Hong Deng, Guo-Dong Fang: Mining Top-Rank-K Frequent Patterns: DOI: 10.1109/ICMLC.2007.4370261 · Source: IEEE Xplore https://ieeexplore.ieee.org/document/4370261
 
-    :Attributes:
+    :**Parameters**:    - **iFile** (*str or URL or dataFrame*) -- *Name of the Input file to mine complete set of frequent patterns.*
+                        - **oFile** (*str*) -- *Name of the output file to store complete set of frequent patterns.*
+                        - **k** (*int*) -- *User specified count of top frequent patterns.*
+                        **minimum** (*int*) -- *Minimum number of frequent patterns to consider in analysis.*
+                        **sep** (*str*) -- *This variable is used to distinguish items from one another in a transaction. The default seperator is tab space. However, the users can override their default separator.*
 
-        startTime : float
-          To record the start time of the mining process
+    :**Attributes**:    - **startTime** (*float*) -- *To record the start time of the mining process.*
+                        - **endTime** (*float*) -- *To record the completion time of the mining process.*
+                        - **finalPatterns** (*dict*) -- *Storing the complete set of patterns in a dictionary variable.*
+                        - **memoryUSS** (*float*) -- *To store the total amount of USS memory consumed by the program.*
+                        - **memoryRSS** (*float*) -- *To store the total amount of RSS memory consumed by the program.*
 
-        endTime : float
-          To record the completion time of the mining process
+    Execution methods
+    =================
 
-        finalPatterns : dict
-          Storing the complete set of patterns in a dictionary variable
-
-        memoryUSS : float
-          To store the total amount of USS memory consumed by the program
-
-        memoryRSS : float
-          To store the total amount of RSS memory consumed by the program
-
-        finalPatterns : dict
-            it represents to store the patterns
-
-
-    **Methods to execute code on terminal**
-    -------------------------------------------
+    **Terminal command**
 
     .. code-block:: console
 
@@ -109,45 +89,49 @@ class FAE(_ab._frequentPatterns):
 
       Example Usage:
 
-      (.venv) $ python3 FAE.py sampleDB.txt patterns.txt 10
-
-    .. note:: k will be considered as count of top frequent patterns to consider in analysis
+      (.venv) $ python3 FAE.py sampleDB.txt patterns.txt 10.0
 
+    .. note:: k will be considered as count of top frequent patterns to consider in analysis.
 
+    **Calling from a python program**
 
-    **Importing this algorithm into a python program**
-    ---------------------------------------------------------
     .. code-block:: python
 
-        import PAMI.frequentPattern.topK.FAE as alg
+            import PAMI.frequentPattern.topK.FAE as alg
+
+            iFile = 'sampleDB.txt'
+
+            K = 2
 
-        obj = alg.FAE(iFile, K)
+            obj = alg.FAE(iFile, K)
 
-        obj.mine()
+            obj.mine()
 
-        topKFrequentPatterns = obj.getPatterns()
+            topKFrequentPatterns = obj.getPatterns()
 
-        print("Total number of Frequent Patterns:", len(topKFrequentPatterns))
+            print("Total number of Frequent Patterns:", len(topKFrequentPatterns))
 
-        obj.save(oFile)
+            obj.save(oFile)
 
-        Df = obj.getPatternInDataFrame()
+            Df = obj.getPatternInDataFrame()
 
-        memUSS = obj.getMemoryUSS()
+            memUSS = obj.getMemoryUSS()
 
-        print("Total Memory in USS:", memUSS)
+            print("Total Memory in USS:", memUSS)
 
-        memRSS = obj.getMemoryRSS()
+            memRSS = obj.getMemoryRSS()
 
-        print("Total Memory in RSS", memRSS)
+            print("Total Memory in RSS", memRSS)
 
-        run = obj.getRuntime()
+            run = obj.getRuntime()
 
-        print("Total ExecutionTime in seconds:", run)
+            print("Total ExecutionTime in seconds:", run)
 
-    Credits:
-    --------
-        The complete program was written by P.Likhitha  under the supervision of Professor Rage Uday Kiran.
+
+    Credits
+    =======
+
+    The complete program was written by P. Likhitha  and revised by Tarun Sreepada under the supervision of Professor Rage Uday Kiran.
 
     """
 
@@ -166,8 +150,7 @@ class FAE(_ab._frequentPatterns):
 
     def _creatingItemSets(self):
         """
-            Storing the complete transactions of the database/input file in a database variable
-
+        Storing the complete transactions of the database/input file in a database variable
         """
 
         self._Database = []
@@ -227,14 +210,15 @@ def _frequentOneItem(self):
         return plist
 
     def _save(self, prefix, suffix, tidSetI):
-        """Saves the patterns that satisfy the periodic frequent property.
-
-            :param prefix: the prefix of a pattern
-            :type prefix: list
-            :param suffix: the suffix of a patterns
-            :type suffix: list
-            :param tidSetI: the timestamp of a patterns
-            :type tidSetI: list
+        """
+        Saves the patterns that satisfy the periodic frequent property.
+
+        :param prefix: the prefix of a pattern
+        :type prefix: list
+        :param suffix: the suffix of a patterns
+        :type suffix: list
+        :param tidSetI: the timestamp of a patterns
+        :type tidSetI: list
         """
 
         if prefix is None:
@@ -263,18 +247,16 @@ def _save(self, prefix, suffix, tidSetI):
                     return
 
     def _Generation(self, prefix, itemSets, tidSets):
-        """Equivalence class is followed  and checks for the patterns generated for periodic-frequent patterns.
-
-            :param prefix:  main equivalence prefix
-            :type prefix: periodic-frequent item or pattern
-            :param itemSets: patterns which are items combined with prefix and satisfying the periodicity
-                            and frequent with their timestamps
-            :type itemSets: list
-            :param tidSets: timestamps of the items in the argument itemSets
-            :type tidSets: list
-
-
-                    """
+        """
+        Equivalence class is followed  and checks for the patterns generated for periodic-frequent patterns.
+
+        :param prefix:  main equivalence prefix
+        :type prefix: periodic-frequent item or pattern
+        :param itemSets: patterns which are items combined with prefix and satisfying the periodicity and frequent with their timestamps
+        :type itemSets: list
+        :param tidSets: timestamps of the items in the argument itemSets
+        :type tidSets: list
+        """
         if len(itemSets) == 1:
             i = itemSets[0]
             tidI = tidSets[0]
@@ -302,6 +284,7 @@ def _Generation(self, prefix, itemSets, tidSets):
     def _convert(self, value):
         """
         to convert the type of user specified minSup value
+
         :param value: user specified minSup value
         :type value: int or float or str
         :return: converted type
@@ -321,13 +304,13 @@ def _convert(self, value):
     @deprecated("It is recommended to use 'mine()' instead of 'startMine()' for mining process. Starting from January 2025, 'startMine()' will be completely terminated.")
     def startMine(self):
         """
-            Main function of the program
+        TopK Frequent pattern mining process will start from here
         """
         self.mine()
 
     def mine(self):
         """
-            Main function of the program
+        TopK Frequent pattern mining process will start from here
         """
         self._startTime = _ab._time.time()
         if self._iFile is None:
@@ -364,7 +347,6 @@ def getMemoryUSS(self):
         Total amount of USS memory consumed by the mining process will be retrieved from this function
 
         :return: returning USS memory consumed by the mining process
-
         :rtype: float
         """
 
@@ -375,7 +357,6 @@ def getMemoryRSS(self):
         Total amount of RSS memory consumed by the mining process will be retrieved from this function
 
         :return: returning RSS memory consumed by the mining process
-
         :rtype: float
         """
 
@@ -386,7 +367,6 @@ def getRuntime(self):
         Calculating the total amount of runtime taken by the mining process
 
         :return: returning total amount of runtime taken by the mining process
-
         :rtype: float
         """
 
@@ -417,7 +397,9 @@ def save(self, outFile):
         Complete set of frequent patterns will be loaded in to an output file
 
         :param outFile: name of the output file
+
         :type outFile: csvfile
+
         """
         self._oFile = outFile
         writer = open(self._oFile, 'w+')
@@ -430,7 +412,6 @@ def getPatterns(self):
         Function to send the set of frequent patterns after completion of the mining process
 
         :return: returning frequent patterns
-
         :rtype: dict
         """
         return self._finalPatterns

diff --git a/PAMI/subgraphMining/basic/gspan.py b/PAMI/subgraphMining/basic/gspan.py
@@ -649,4 +649,17 @@ def getFrequentSubgraphs(self):
             sb.append('\n'.join(subgraphDescription))  
         return '\n'.join(sb)  
 
+    def getSubgraphGraphMapping(self):
+        """
+        Return a list of mappings from subgraphs to the graph IDs they belong to in the format <FID, Clabel, GIDs[]>.
+        """
+        mappings = []
+        for i, subgraph in enumerate(self.frequentSubgraphs):
+            mapping = {
+                "FID": i,
+                "Clabel": str(subgraph.dfsCode),
+                "GIDs": list(subgraph.setOfGraphsIds)
+            }
+            mappings.append(mapping)
+        return mappings
 
diff --git a/PAMI/subgraphMining/topK/graph.py b/PAMI/subgraphMining/topK/graph.py
@@ -44,7 +44,7 @@ def removeInfrequentLabel(self, label):
 
         for vertex in self.vMap.values():
             edgesToRemove = [edge for edge in vertex.getEdgeList() 
-                               if edge.getV1() not in self.vMap or edge.getV2() not in self.vMap]
+                               if edge.v1 not in self.vMap or edge.v2 not in self.vMap]
 
             for edge in edgesToRemove:
                 vertex.getEdgeList().remove(edge)

diff --git a/PAMI/subgraphMining/topK/tkg.py b/PAMI/subgraphMining/topK/tkg.py
@@ -492,7 +492,7 @@ def removeInfrequentVertexPairs(self, graphDB):
 
                         if TKG.ELIMINATE_INFREQUENT_VERTEX_PAIRS and count < self.minSup:
                             v1.removeEdge(edge)
-                            self.infrequentVertexPairsRemoved += 1
+                            self.infrequentVertexPairsRemovedCount += 1
 
                         elif TKG.ELIMINATE_INFREQUENT_EDGE_LABELS and \
                                 mapEdgeLabelToSupport.get(edge.getEdgeLabel(), 0) < self.minSup: