Skip to content

Commit

Permalink
Merge branch 'main' into main
Browse files Browse the repository at this point in the history
  • Loading branch information
udayRage authored May 25, 2024
2 parents 7824c90 + 6ee7f17 commit 3dbff5a
Show file tree
Hide file tree
Showing 88 changed files with 30,107 additions and 29,188 deletions.
11 changes: 5 additions & 6 deletions PAMI/frequentPattern/basic/ECLATDiffset.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,14 +52,13 @@
"""


# from abstract import *

from PAMI.frequentPattern.basic import abstract as _ab
from deprecated import deprecated


class ECLATDiffset(_ab._frequentPatterns):
"""
:**Description**: ECLATDiffset uses diffset to extract the frequent patterns in a transactional database.
:**Reference**: KDD '03: Proceedings of the ninth ACM SIGKDD international conference on Knowledge discovery and data mining
Expand Down Expand Up @@ -323,7 +322,7 @@ def getRuntime(self):
def getPatternsAsDataFrame(self):
"""
Storing final frequent patterns in a dataframe
Storing final frequent patterns in a dataframe.
:return: returning frequent patterns in a dataframe
:rtype: pd.DataFrame
Expand All @@ -342,7 +341,7 @@ def getPatternsAsDataFrame(self):
def save(self, outFile: str, seperator = "\t" ) -> None:
"""
Complete set of frequent patterns will be loaded in to an output file
Complete set of frequent patterns will be loaded in to an output csv file.
:param outFile: name of the output file
:type outFile: csvfile
Expand All @@ -362,7 +361,7 @@ def save(self, outFile: str, seperator = "\t" ) -> None:
def getPatterns(self):
"""
Function to send the set of frequent patterns after completion of the mining process
This function returns the frequent patterns after completion of the mining process
:return: returning frequent patterns
:rtype: dict
Expand All @@ -371,7 +370,7 @@ def getPatterns(self):

def printResults(self):
"""
This function is used to print the results
This function is used to print the results.
"""
print("Total number of Frequent Patterns:", len(self.getPatterns()))
print("Total Memory in USS:", self.getMemoryUSS())
Expand Down
161 changes: 71 additions & 90 deletions PAMI/frequentPattern/topk/FAE.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
# Top - K is and algorithm to discover top frequent patterns in a transactional database.
#
# **Importing this algorithm into a python program**
# ---------------------------------------------------------
#
# import PAMI.frequentPattern.topK.FAE as alg
#
# iFile = 'sampleDB.txt'
#
# K = 2
#
# obj = alg.FAE(iFile, K)
#
# obj.mine()
Expand All @@ -31,9 +34,6 @@
#





__copyright__ = """
Copyright (C) 2021 Rage Uday Kiran
Expand All @@ -57,49 +57,29 @@

class FAE(_ab._frequentPatterns):
"""
:Description: Top - K is and algorithm to discover top frequent patterns in a transactional database.
:Reference: Zhi-Hong Deng, Guo-Dong Fang: Mining Top-Rank-K Frequent Patterns: DOI: 10.1109/ICMLC.2007.4370261 · Source: IEEE Xplore
https://ieeexplore.ieee.org/document/4370261
:param iFile: str :
Name of the Input file to mine complete set of frequent patterns
:param oFile: str :
Name of the output file to store complete set of frequent patterns
:param k: int :
User specified count of top frequent patterns
:param minimum: int :
Minimum number of frequent patterns to consider in analysis
:param sep: str :
This variable is used to distinguish items from one another in a transaction. The default seperator is tab space. However, the users can override their default separator.
About this algorithm
====================
:**Description**: Top - K is and algorithm to discover top frequent patterns in a transactional database.
:**Reference**: Zhi-Hong Deng, Guo-Dong Fang: Mining Top-Rank-K Frequent Patterns: DOI: 10.1109/ICMLC.2007.4370261 · Source: IEEE Xplore https://ieeexplore.ieee.org/document/4370261
:Attributes:
:**Parameters**: - **iFile** (*str or URL or dataFrame*) -- *Name of the Input file to mine complete set of frequent patterns.*
- **oFile** (*str*) -- *Name of the output file to store complete set of frequent patterns.*
- **k** (*int*) -- *User specified count of top frequent patterns.*
**minimum** (*int*) -- *Minimum number of frequent patterns to consider in analysis.*
**sep** (*str*) -- *This variable is used to distinguish items from one another in a transaction. The default seperator is tab space. However, the users can override their default separator.*
startTime : float
To record the start time of the mining process
:**Attributes**: - **startTime** (*float*) -- *To record the start time of the mining process.*
- **endTime** (*float*) -- *To record the completion time of the mining process.*
- **finalPatterns** (*dict*) -- *Storing the complete set of patterns in a dictionary variable.*
- **memoryUSS** (*float*) -- *To store the total amount of USS memory consumed by the program.*
- **memoryRSS** (*float*) -- *To store the total amount of RSS memory consumed by the program.*
endTime : float
To record the completion time of the mining process
Execution methods
=================
finalPatterns : dict
Storing the complete set of patterns in a dictionary variable
memoryUSS : float
To store the total amount of USS memory consumed by the program
memoryRSS : float
To store the total amount of RSS memory consumed by the program
finalPatterns : dict
it represents to store the patterns
**Methods to execute code on terminal**
-------------------------------------------
**Terminal command**
.. code-block:: console
Expand All @@ -109,45 +89,49 @@ class FAE(_ab._frequentPatterns):
Example Usage:
(.venv) $ python3 FAE.py sampleDB.txt patterns.txt 10
.. note:: k will be considered as count of top frequent patterns to consider in analysis
(.venv) $ python3 FAE.py sampleDB.txt patterns.txt 10.0
.. note:: k will be considered as count of top frequent patterns to consider in analysis.
**Calling from a python program**
**Importing this algorithm into a python program**
---------------------------------------------------------
.. code-block:: python
import PAMI.frequentPattern.topK.FAE as alg
import PAMI.frequentPattern.topK.FAE as alg
iFile = 'sampleDB.txt'
K = 2
obj = alg.FAE(iFile, K)
obj = alg.FAE(iFile, K)
obj.mine()
obj.mine()
topKFrequentPatterns = obj.getPatterns()
topKFrequentPatterns = obj.getPatterns()
print("Total number of Frequent Patterns:", len(topKFrequentPatterns))
print("Total number of Frequent Patterns:", len(topKFrequentPatterns))
obj.save(oFile)
obj.save(oFile)
Df = obj.getPatternInDataFrame()
Df = obj.getPatternInDataFrame()
memUSS = obj.getMemoryUSS()
memUSS = obj.getMemoryUSS()
print("Total Memory in USS:", memUSS)
print("Total Memory in USS:", memUSS)
memRSS = obj.getMemoryRSS()
memRSS = obj.getMemoryRSS()
print("Total Memory in RSS", memRSS)
print("Total Memory in RSS", memRSS)
run = obj.getRuntime()
run = obj.getRuntime()
print("Total ExecutionTime in seconds:", run)
print("Total ExecutionTime in seconds:", run)
Credits:
--------
The complete program was written by P.Likhitha under the supervision of Professor Rage Uday Kiran.
Credits
=======
The complete program was written by P. Likhitha and revised by Tarun Sreepada under the supervision of Professor Rage Uday Kiran.
"""

Expand All @@ -166,8 +150,7 @@ class FAE(_ab._frequentPatterns):

def _creatingItemSets(self):
"""
Storing the complete transactions of the database/input file in a database variable
Storing the complete transactions of the database/input file in a database variable
"""

self._Database = []
Expand Down Expand Up @@ -227,14 +210,15 @@ def _frequentOneItem(self):
return plist

def _save(self, prefix, suffix, tidSetI):
"""Saves the patterns that satisfy the periodic frequent property.
:param prefix: the prefix of a pattern
:type prefix: list
:param suffix: the suffix of a patterns
:type suffix: list
:param tidSetI: the timestamp of a patterns
:type tidSetI: list
"""
Saves the patterns that satisfy the periodic frequent property.
:param prefix: the prefix of a pattern
:type prefix: list
:param suffix: the suffix of a patterns
:type suffix: list
:param tidSetI: the timestamp of a patterns
:type tidSetI: list
"""

if prefix is None:
Expand Down Expand Up @@ -263,18 +247,16 @@ def _save(self, prefix, suffix, tidSetI):
return

def _Generation(self, prefix, itemSets, tidSets):
"""Equivalence class is followed and checks for the patterns generated for periodic-frequent patterns.
:param prefix: main equivalence prefix
:type prefix: periodic-frequent item or pattern
:param itemSets: patterns which are items combined with prefix and satisfying the periodicity
and frequent with their timestamps
:type itemSets: list
:param tidSets: timestamps of the items in the argument itemSets
:type tidSets: list
"""
"""
Equivalence class is followed and checks for the patterns generated for periodic-frequent patterns.
:param prefix: main equivalence prefix
:type prefix: periodic-frequent item or pattern
:param itemSets: patterns which are items combined with prefix and satisfying the periodicity and frequent with their timestamps
:type itemSets: list
:param tidSets: timestamps of the items in the argument itemSets
:type tidSets: list
"""
if len(itemSets) == 1:
i = itemSets[0]
tidI = tidSets[0]
Expand Down Expand Up @@ -302,6 +284,7 @@ def _Generation(self, prefix, itemSets, tidSets):
def _convert(self, value):
"""
to convert the type of user specified minSup value
:param value: user specified minSup value
:type value: int or float or str
:return: converted type
Expand All @@ -321,13 +304,13 @@ def _convert(self, value):
@deprecated("It is recommended to use 'mine()' instead of 'startMine()' for mining process. Starting from January 2025, 'startMine()' will be completely terminated.")
def startMine(self):
"""
Main function of the program
TopK Frequent pattern mining process will start from here
"""
self.mine()

def mine(self):
"""
Main function of the program
TopK Frequent pattern mining process will start from here
"""
self._startTime = _ab._time.time()
if self._iFile is None:
Expand Down Expand Up @@ -364,7 +347,6 @@ def getMemoryUSS(self):
Total amount of USS memory consumed by the mining process will be retrieved from this function
:return: returning USS memory consumed by the mining process
:rtype: float
"""

Expand All @@ -375,7 +357,6 @@ def getMemoryRSS(self):
Total amount of RSS memory consumed by the mining process will be retrieved from this function
:return: returning RSS memory consumed by the mining process
:rtype: float
"""

Expand All @@ -386,7 +367,6 @@ def getRuntime(self):
Calculating the total amount of runtime taken by the mining process
:return: returning total amount of runtime taken by the mining process
:rtype: float
"""

Expand Down Expand Up @@ -417,7 +397,9 @@ def save(self, outFile):
Complete set of frequent patterns will be loaded in to an output file
:param outFile: name of the output file
:type outFile: csvfile
"""
self._oFile = outFile
writer = open(self._oFile, 'w+')
Expand All @@ -430,7 +412,6 @@ def getPatterns(self):
Function to send the set of frequent patterns after completion of the mining process
:return: returning frequent patterns
:rtype: dict
"""
return self._finalPatterns
Expand Down
13 changes: 13 additions & 0 deletions PAMI/subgraphMining/basic/gspan.py
Original file line number Diff line number Diff line change
Expand Up @@ -649,4 +649,17 @@ def getFrequentSubgraphs(self):
sb.append('\n'.join(subgraphDescription))
return '\n'.join(sb)

def getSubgraphGraphMapping(self):
"""
Return a list of mappings from subgraphs to the graph IDs they belong to in the format <FID, Clabel, GIDs[]>.
"""
mappings = []
for i, subgraph in enumerate(self.frequentSubgraphs):
mapping = {
"FID": i,
"Clabel": str(subgraph.dfsCode),
"GIDs": list(subgraph.setOfGraphsIds)
}
mappings.append(mapping)
return mappings

2 changes: 1 addition & 1 deletion PAMI/subgraphMining/topK/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def removeInfrequentLabel(self, label):

for vertex in self.vMap.values():
edgesToRemove = [edge for edge in vertex.getEdgeList()
if edge.getV1() not in self.vMap or edge.getV2() not in self.vMap]
if edge.v1 not in self.vMap or edge.v2 not in self.vMap]

for edge in edgesToRemove:
vertex.getEdgeList().remove(edge)
Expand Down
2 changes: 1 addition & 1 deletion PAMI/subgraphMining/topK/tkg.py
Original file line number Diff line number Diff line change
Expand Up @@ -492,7 +492,7 @@ def removeInfrequentVertexPairs(self, graphDB):

if TKG.ELIMINATE_INFREQUENT_VERTEX_PAIRS and count < self.minSup:
v1.removeEdge(edge)
self.infrequentVertexPairsRemoved += 1
self.infrequentVertexPairsRemovedCount += 1

elif TKG.ELIMINATE_INFREQUENT_EDGE_LABELS and \
mapEdgeLabelToSupport.get(edge.getEdgeLabel(), 0) < self.minSup:
Expand Down
Loading

0 comments on commit 3dbff5a

Please sign in to comment.