From d7ac61fed955c0911c430bc61ad83778d64fd1e5 Mon Sep 17 00:00:00 2001 From: vanithakattumuri Date: Tue, 30 Apr 2024 14:30:12 +0900 Subject: [PATCH 1/3] #2 updated Apriori.py nd ECLAT.py documentation. --- PAMI/frequentPattern/basic/Apriori.py | 2 +- PAMI/frequentPattern/basic/ECLAT.py | 100 +++++++++++--------------- 2 files changed, 44 insertions(+), 58 deletions(-) diff --git a/PAMI/frequentPattern/basic/Apriori.py b/PAMI/frequentPattern/basic/Apriori.py index fea3fa42..d46ea50e 100644 --- a/PAMI/frequentPattern/basic/Apriori.py +++ b/PAMI/frequentPattern/basic/Apriori.py @@ -135,7 +135,7 @@ class Apriori(_ab._frequentPatterns): Credits ======= - The complete program was written by P. Likhitha under the supervision of Professor Rage Uday Kiran. + The complete program was written by P. Likhitha and revised by Tarun Sreepada under the supervision of Professor Rage Uday Kiran. """ diff --git a/PAMI/frequentPattern/basic/ECLAT.py b/PAMI/frequentPattern/basic/ECLAT.py index 50259765..f1911855 100644 --- a/PAMI/frequentPattern/basic/ECLAT.py +++ b/PAMI/frequentPattern/basic/ECLAT.py @@ -1,10 +1,13 @@ # ECLAT is one of the fundamental algorithm to discover frequent patterns in a transactional database. # # **Importing this algorithm into a python program** -# ------------------------------------------------------------------ # # import PAMI.frequentPattern.basic.ECLAT as alg # +# iFile = 'sampleDB.txt' +# +# minSup = 10 # can also be specified between 0 and 1 +# # obj = alg.ECLAT(iFile, minSup) # # obj.mine() @@ -31,9 +34,6 @@ # - - - __copyright__ = """ Copyright (C) 2021 Rage Uday Kiran @@ -52,48 +52,34 @@ """ from PAMI.frequentPattern.basic import abstract as _ab -from typing import List, Dict, Tuple, Set, Union, Any, Generator from deprecated import deprecated class ECLAT(_ab._frequentPatterns): """ - :Description: ECLAT is one of the fundamental algorithm to discover frequent patterns in a transactional database. - - :Reference: Mohammed Javeed Zaki: Scalable Algorithms for Association Mining. IEEE Trans. Knowl. Data Eng. 12(3): - 372-390 (2000), https://ieeexplore.ieee.org/document/846291 - - :param iFile: str : - Name of the Input file to mine complete set of frequent pattern's - :param oFile: str : - Name of the output file to store complete set of frequent patterns - :param minSup: int or float or str : - The user can specify minSup either in count or proportion of database size. If the program detects the data type of minSup is integer, then it treats minSup is expressed in count. - :param sep: str : - This variable is used to distinguish items from one another in a transaction. The default seperator is tab space. However, the users can override their default separator. - - :Attributes: + About this algorithm + ==================== - startTime : float - To record the start time of the mining process + :**Description**: ECLAT is one of the fundamental algorithm to discover frequent patterns in a transactional database. - endTime : float - To record the completion time of the mining process - - finalPatterns : dict - Storing the complete set of patterns in a dictionary variable - - memoryUSS : float - To store the total amount of USS memory consumed by the program + :**Reference**: Mohammed Javeed Zaki: Scalable Algorithms for Association Mining. IEEE Trans. Knowl. Data Eng. 12(3): + 372-390 (2000), https://ieeexplore.ieee.org/document/846291 - memoryRSS : float - To store the total amount of RSS memory consumed by the program + :**Parameters**: - **iFile** (*str or URL or dataFrame*) -- *Name of the Input file to mine complete set of frequent patterns.* + - **oFile** (*str*) -- *Name of the output file to store complete set of frequent patterns.* + - **minSup** (*int or float or str*) -- *The user can specify minSup either in count or proportion of database size. If the program detects the data type of minSup is integer, then it treats minSup is expressed in count. Otherwise, it will be treated as float.* + - **sep** (*str*) -- *This variable is used to distinguish items from one another in a transaction. The default seperator is tab space. However, the users can override their default separator.* - Database : list - To store the transactions of a database in list + :**Attributes**: - **startTime** (*float*) -- *To record the start time of the mining process.* + - **endTime** (*float*) -- *To record the completion time of the mining process.* + - **finalPatterns** (*dict*) -- *Storing the complete set of patterns in a dictionary variable.* + - **memoryUSS** (*float*) -- *To store the total amount of USS memory consumed by the program.* + - **memoryRSS** (*float*) -- *To store the total amount of RSS memory consumed by the program.* + - **Database** (*list*) -- *To store the transactions of a database in list.* + Execution methods + ================= - **Methods to execute code on terminal** - ------------------------------------------ + **Terminal command** .. code-block:: console @@ -105,15 +91,19 @@ class ECLAT(_ab._frequentPatterns): (.venv) $ python3 ECLAT.py sampleDB.txt patterns.txt 10.0 - .. note:: minSup will be considered in percentage of database transactions + .. note:: minSup can be specified in support count or a value between 0 and 1. - **Importing this algorithm into a python program** - ------------------------------------------------------------------ + **Calling from a python program** + .. code-block:: python import PAMI.frequentPattern.basic.ECLAT as alg + iFile = 'sampleDB.txt' + + minSup = 10 # can also be specified between 0 and 1 + obj = alg.ECLAT(iFile, minSup) obj.mine() @@ -139,10 +129,10 @@ class ECLAT(_ab._frequentPatterns): print("Total ExecutionTime in seconds:", run) - **Credits:** - ---------------------- + Credits: + ======== - The complete program was written by Kundai under the supervision of Professor Rage Uday Kiran. + The complete program was written by Kundai and revised by Tarun Sreepada under the supervision of Professor Rage Uday Kiran. """ @@ -159,10 +149,10 @@ class ECLAT(_ab._frequentPatterns): def _creatingItemSets(self) -> float: """ + Storing the complete transactions of the database/input file in a database variable :return: the complete transactions of the database/input file in a database variable - :rtype: float """ self._Database = [] @@ -199,11 +189,8 @@ def _convert(self, value) -> float: To convert the user specified minSup value :param value: user specified minSup value - :return: converted type - :rtype: float - """ if type(value) is int: value = int(value) @@ -226,6 +213,16 @@ def startMine(self) -> None: self.mine() def __recursive(self, items, cands): + """ + + This function generates new candidates by taking input as original candidates. + + :param items: A dictionary containing items and their corresponding support values. + :type items: dict + :param cands: A list of candidate itemsets. + :type cands: list + :return: None + """ for i in range(len(cands)): newCands = [] @@ -287,9 +284,7 @@ def getMemoryUSS(self) -> float: Total amount of USS memory consumed by the mining process will be retrieved from this function :return: returning USS memory consumed by the mining process - :rtype: float - """ return self._memoryUSS @@ -300,9 +295,7 @@ def getMemoryRSS(self) -> float: Total amount of RSS memory consumed by the mining process will be retrieved from this function :return: returning RSS memory consumed by the mining process - :rtype: float - """ return self._memoryRSS @@ -312,7 +305,6 @@ def getRuntime(self) -> float: Calculating the total amount of runtime taken by the mining process :return: returning total amount of runtime taken by the mining process - :rtype: float """ @@ -324,9 +316,7 @@ def getPatternsAsDataFrame(self) -> _ab._pd.DataFrame: Storing final frequent patterns in a dataframe :return: returning frequent patterns in a dataframe - :rtype: pd.DataFrame - """ # time = _ab._time.time() @@ -349,11 +339,8 @@ def save(self, outFile: str) -> None: Complete set of frequent patterns will be loaded in to an output file :param outFile: name of the output file - :type outFile: csvfile - :return: None - """ with open(outFile, 'w') as f: for x, y in self._finalPatterns.items(): @@ -365,7 +352,6 @@ def getPatterns(self) -> dict: Function to send the set of frequent patterns after completion of the mining process :return: returning frequent patterns - :rtype: dict """ return self._finalPatterns From 96831d7abefbdc7e5e17db7e3146e751d415da9a Mon Sep 17 00:00:00 2001 From: vanithakattumuri Date: Tue, 30 Apr 2024 14:53:29 +0900 Subject: [PATCH 2/3] #2 updated ECLATbitset.py nd FPGrowth.py documentation. --- PAMI/frequentPattern/basic/ECLAT.py | 2 +- PAMI/frequentPattern/basic/ECLATbitset.py | 103 ++++++++-------- PAMI/frequentPattern/basic/FPGrowth.py | 139 ++++++++-------------- 3 files changed, 103 insertions(+), 141 deletions(-) diff --git a/PAMI/frequentPattern/basic/ECLAT.py b/PAMI/frequentPattern/basic/ECLAT.py index f1911855..ee121532 100644 --- a/PAMI/frequentPattern/basic/ECLAT.py +++ b/PAMI/frequentPattern/basic/ECLAT.py @@ -76,7 +76,7 @@ class ECLAT(_ab._frequentPatterns): - **memoryRSS** (*float*) -- *To store the total amount of RSS memory consumed by the program.* - **Database** (*list*) -- *To store the transactions of a database in list.* - Execution methods + Execution methods ================= **Terminal command** diff --git a/PAMI/frequentPattern/basic/ECLATbitset.py b/PAMI/frequentPattern/basic/ECLATbitset.py index c208cd00..00926480 100644 --- a/PAMI/frequentPattern/basic/ECLATbitset.py +++ b/PAMI/frequentPattern/basic/ECLATbitset.py @@ -1,10 +1,13 @@ # ECLATbitset is one of the fundamental algorithm to discover frequent patterns in a transactional database. # # **Importing this algorithm into a python program** -# --------------------------------------------------------- # # import PAMI.frequentPattern.basic.ECLATbitset as alg # +# iFile = 'sampleDB.txt' +# +# minSup = 10 # can also be specified between 0 and 1 +# # obj = alg.ECLATbitset(iFile, minSup) # # obj.mine() @@ -54,43 +57,30 @@ class ECLATbitset(_ab._frequentPatterns): """ - :Description: ECLATbitset is one of the fundamental algorithm to discover frequent patterns in a transactional database. - - :Reference: Mohammed Javeed Zaki: Scalable Algorithms for Association Mining. IEEE Trans. Knowl. Data Eng. 12(3): - 372-390 (2000), https://ieeexplore.ieee.org/document/846291 - - :param iFile: str : - Name of the Input file to mine complete set of frequent patterns - :param oFile: str : - Name of the output file to store complete set of frequent patterns - :param minSup: int or float or str : - The user can specify minSup either in count or proportion of database size. If the program detects the data type of minSup is integer, then it treats minSup is expressed in count. - :param sep: str : - This variable is used to distinguish items from one another in a transaction. The default seperator is tab space. However, the users can override their default separator. + About this algorithm + ==================== - :Attributes: + :*Description*: ECLATbitset is one of the fundamental algorithm to discover frequent patterns in a transactional database. - startTime : float - To record the start time of the mining process - - endTime : float - To record the completion time of the mining process - - finalPatterns : dict - Storing the complete set of patterns in a dictionary variable - - memoryUSS : float - To store the total amount of USS memory consumed by the program + :*Reference*: Mohammed Javeed Zaki: Scalable Algorithms for Association Mining. IEEE Trans. Knowl. Data Eng. 12(3): + 372-390 (2000), https://ieeexplore.ieee.org/document/846291 - memoryRSS : float - To store the total amount of RSS memory consumed by the program + :**Parameters**: - **iFile** (*str or URL or dataFrame*) -- *Name of the Input file to mine complete set of frequent patterns.* + - **oFile** (*str*) -- *Name of the output file to store complete set of frequent patterns.* + - **minSup** (*int or float or str*) -- *The user can specify minSup either in count or proportion of database size. If the program detects the data type of minSup is integer, then it treats minSup is expressed in count. Otherwise, it will be treated as float.* + - **sep** (*str*) -- *This variable is used to distinguish items from one another in a transaction. The default seperator is tab space. However, the users can override their default separator.* - Database : list - To store the transactions of a database in list + :**Attributes**: - **startTime** (*float*) -- *To record the start time of the mining process.* + - **endTime** (*float*) -- *To record the completion time of the mining process.* + - **finalPatterns** (*dict*) -- *Storing the complete set of patterns in a dictionary variable.* + - **memoryUSS** (*float*) -- *To store the total amount of USS memory consumed by the program.* + - **memoryRSS** (*float*) -- *To store the total amount of RSS memory consumed by the program.* + - **Database** (*list*) -- *To store the transactions of a database in list.* + Execution methods + ================= - **Methods to execute code on terminal** - ------------------------------------------ + **Terminal command** .. code-block:: console @@ -102,15 +92,19 @@ class ECLATbitset(_ab._frequentPatterns): (.venv) $ python3 ECLATbitset.py sampleDB.txt patterns.txt 10.0 - .. note:: minSup will be considered in percentage of database transactions + .. note:: minSup can be specified in support count or a value between 0 and 1. + + **Calling from a python program** - **Importing this algorithm into a python program** - --------------------------------------------------------- .. code-block:: python import PAMI.frequentPattern.basic.ECLATbitset as alg + iFile = 'sampleDB.txt' + + minSup = 10 # can also be specified between 0 and 1 + obj = alg.ECLATbitset(iFile, minSup) obj.mine() @@ -135,10 +129,10 @@ class ECLATbitset(_ab._frequentPatterns): print("Total ExecutionTime in seconds:", run) - **Credits:** - ------------------- + Credits: + ======== - The complete program was written by Yudai Masu under the supervision of Professor Rage Uday Kiran. + The complete program was written by Yudai Masu and revised by Tarun Sreepada under the supervision of Professor Rage Uday Kiran. """ @@ -160,11 +154,8 @@ def _convert(self, value): To convert the user specified minSup value :param value: user specified minSup value - :type value: int - :return: converted type - :rtype: int or float or string """ if type(value) is int: @@ -213,8 +204,7 @@ def _creatingItemSets(self): print("File Not Found") self._minSup = self._convert(self._minSup) - @deprecated( - "It is recommended to use 'mine()' instead of 'startMine()' for mining process. Starting from January 2025, 'startMine()' will be completely terminated.") + @deprecated("It is recommended to use 'mine()' instead of 'startMine()' for mining process. Starting from January 2025, 'startMine()' will be completely terminated.") def startMine(self): """ Frequent pattern mining process will start from here @@ -228,13 +218,9 @@ def _bitPacker(self, data, maxIndex): It takes the data and maxIndex as input and generates integer as output value. :param data: it takes data as input. - :type data: int or float - :param maxIndex: It converts the data into bits By taking the maxIndex value as condition. - :type maxIndex: int - """ packed_bits = 0 for i in data: @@ -243,6 +229,16 @@ def _bitPacker(self, data, maxIndex): return packed_bits def __recursive(self, items, cands): + """ + + This function generates new candidates by taking input as original candidates. + + :param items: A dictionary containing items and their corresponding support values. + :type items: dict + :param cands: A list of candidate itemsets. + :type cands: list + :return: None + """ for i in range(len(cands)): newCands = [] @@ -302,7 +298,9 @@ def mine(self) -> None: def getMemoryUSS(self): """ + Total amount of USS memory consumed by the mining process will be retrieved from this function + :return: returning USS memory consumed by the mining process :rtype: float """ @@ -311,7 +309,9 @@ def getMemoryUSS(self): def getMemoryRSS(self): """ + Total amount of RSS memory consumed by the mining process will be retrieved from this function + :return: returning RSS memory consumed by the mining process :rtype: float """ @@ -320,7 +320,9 @@ def getMemoryRSS(self): def getRuntime(self): """ + Calculating the total amount of runtime taken by the mining process + :return: returning total amount of runtime taken by the mining process :rtype: float """ @@ -333,9 +335,7 @@ def getPatternsAsDataFrame(self) -> _ab._pd.DataFrame: Storing final frequent patterns in a dataframe :return: returning frequent patterns in a dataframe - :rtype: pd.DataFrame - """ # time = _ab._time.time() @@ -358,11 +358,8 @@ def save(self, outFile: str) -> None: Complete set of frequent patterns will be loaded in to an output file :param outFile: name of the output file - :type outFile: csvfile - :return: None - """ with open(outFile, 'w') as f: for x, y in self._finalPatterns.items(): @@ -372,7 +369,9 @@ def save(self, outFile: str) -> None: def getPatterns(self): """ + Function to send the set of frequent patterns after completion of the mining process + :return: returning frequent patterns :rtype: dict """ diff --git a/PAMI/frequentPattern/basic/FPGrowth.py b/PAMI/frequentPattern/basic/FPGrowth.py index 7e9075ce..e97536de 100644 --- a/PAMI/frequentPattern/basic/FPGrowth.py +++ b/PAMI/frequentPattern/basic/FPGrowth.py @@ -1,10 +1,13 @@ # FPGrowth is one of the fundamental algorithm to discover frequent patterns in a transactional database. It stores the database in compressed fp-tree decreasing the memory usage and extracts the patterns from tree.It employs downward closure property to reduce the search space effectively. # # **Importing this algorithm into a python program** -# -------------------------------------------------------- # # from PAMI.frequentPattern.basic import FPGrowth as alg # +# iFile = 'sampleDB.txt' +# +# minSup = 10 # can also be specified between 0 and 1 +# # obj = alg.FPGrowth(iFile, minSup) # # obj.mine() @@ -31,8 +34,6 @@ # - - __copyright__ = """ Copyright (C) 2021 Rage Uday Kiran @@ -51,7 +52,7 @@ """ from PAMI.frequentPattern.basic import abstract as _fp -from typing import List, Dict, Tuple, Set, Union, Any, Generator +from typing import List, Dict, Tuple, Any from deprecated import deprecated from itertools import combinations from collections import Counter @@ -64,22 +65,12 @@ class _Node: """ A class used to represent the node of frequentPatternTree - :Attributes: - - itemId: int - storing item of a node - counter: int - To maintain the support of node - parent: node - To maintain the parent of node - children: list - To maintain the children of node - - :Methods: - - addChild(node) - Updates the nodes children list and parent for the given node + :**Attributes**: - **itemId** (*int*) -- *storing item of a node.* + - **counter** (*int*) -- *To maintain the support of node.* + - **parent** (*node*) -- *To maintain the parent of node.* + - **children** (*list*) -- *To maintain the children of node.* + :**Methods**: - **addChild(node)** -- *Updates the nodes children list and parent for the given node.* """ def __init__(self, item, count, parent) -> None: @@ -90,17 +81,15 @@ def __init__(self, item, count, parent) -> None: def addChild(self, item, count = 1) -> Any: """ + Adds a child node to the current node with the specified item and count. :param item: The item associated with the child node. :type item: List - :param count: The count or support of the item. Default is 1. :type count: int - :return: The child node added. :rtype: List - """ if item not in self.children: self.children[item] = _Node(item, count, self) @@ -113,7 +102,6 @@ def traverse(self) -> Tuple[List[int], int]: Traversing the tree to get the transaction :return: transaction and count of each item in transaction - :rtype: Tuple, List and int """ transaction = [] @@ -127,55 +115,33 @@ def traverse(self) -> Tuple[List[int], int]: class FPGrowth(_fp._frequentPatterns): """ + About this algorithm + ==================== - :Description: FPGrowth is one of the fundamental algorithm to discover frequent patterns in a transactional database. It stores the database in compressed fp-tree decreasing the memory usage and extracts the patterns from tree.It employs downward closure property to reduce the search space effectively. - - :Reference: Han, J., Pei, J., Yin, Y. et al. Mining Frequent Patterns without Candidate Generation: A Frequent-Pattern - Tree Approach. Data Mining and Knowledge Discovery 8, 53–87 (2004). https://doi.org/10.1023 + :**Description**: FPGrowth is one of the fundamental algorithm to discover frequent patterns in a transactional database. It stores the database in compressed fp-tree decreasing the memory usage and extracts the patterns from tree.It employs downward closure property to reduce the search space effectively. - :param iFile: str : - Name of the Input file to mine complete set of frequent patterns - :param oFile: str : - Name of the output file to store complete set of frequent patterns - :param minSup: int or float or str : - The user can specify minSup either in count or proportion of database size. If the program detects the data type of minSup is integer, then it treats minSup is expressed in count. - :param sep: str : - This variable is used to distinguish items from one another in a transaction. The default seperator is tab space. However, the users can override their default separator. + :**Reference**: Han, J., Pei, J., Yin, Y. et al. Mining Frequent Patterns without Candidate Generation: A Frequent-Pattern + Tree Approach. Data Mining and Knowledge Discovery 8, 53–87 (2004). https://doi.org/10.1023 + :**Parameters**: - **iFile** (*str or URL or dataFrame*) -- *Name of the Input file to mine complete set of frequent patterns.* + - **oFile** (*str*) -- *Name of the output file to store complete set of frequent patterns.* + - **minSup** (*int or float or str*) -- *The user can specify minSup either in count or proportion of database size. If the program detects the data type of minSup is integer, then it treats minSup is expressed in count. Otherwise, it will be treated as float.* + - **sep** (*str*) -- *This variable is used to distinguish items from one another in a transaction. The default seperator is tab space. However, the users can override their default separator.* + :**Attributes**: - **startTime** (*float*) -- *To record the start time of the mining process.* + - **endTime** (*float*) -- *To record the completion time of the mining process.* + - **finalPatterns** (*dict*) -- *Storing the complete set of patterns in a dictionary variable.* + - **memoryUSS** (*float*) -- *To store the total amount of USS memory consumed by the program.* + - **memoryRSS** (*float*) -- *To store the total amount of RSS memory consumed by the program.* + - **Database** (*list*) -- *To store the transactions of a database in list.* + - **mapSupport** (*Dictionary*) -- *To maintain the information of item and their frequency.* + - **tree** (*class*) -- *it represents the Tree class.* - :Attributes: - startTime : float - To record the start time of the mining process + Execution methods + ================= - endTime : float - To record the completion time of the mining process - - finalPatterns : dict - Storing the complete set of patterns in a dictionary variable - - memoryUSS : float - To store the total amount of USS memory consumed by the program - - memoryRSS : float - To store the total amount of RSS memory consumed by the program - - Database : list - To store the transactions of a database in list - - mapSupport : Dictionary - To maintain the information of item and their frequency - lno : int - it represents the total no of transactions - tree : class - it represents the Tree class - finalPatterns : dict - it represents to store the patterns - - - **Methods to execute code on terminal** - -------------------------------------------------------- + **Terminal command** .. code-block:: console @@ -187,15 +153,19 @@ class FPGrowth(_fp._frequentPatterns): (.venv) $ python3 FPGrowth.py sampleDB.txt patterns.txt 10.0 - .. note:: minSup will be considered in percentage of database transactions + .. note:: minSup can be specified in support count or a value between 0 and 1. + + **Calling from a python program** - **Importing this algorithm into a python program** - -------------------------------------------------------- .. code-block:: python from PAMI.frequentPattern.basic import FPGrowth as alg + iFile = 'sampleDB.txt' + + minSup = 10 # can also be specified between 0 and 1 + obj = alg.FPGrowth(iFile, minSup) obj.mine() @@ -221,9 +191,10 @@ class FPGrowth(_fp._frequentPatterns): print("Total ExecutionTime in seconds:", run) - **Credits:** - ---------------------------- - The complete program was written by P.Likhitha under the supervision of Professor Rage Uday Kiran. + Credits: + ======== + + The complete program was written by P. Likhitha and revised by Tarun Sreepada under the supervision of Professor Rage Uday Kiran. """ @@ -285,11 +256,8 @@ def __convert(self, value) -> float: To convert the type of user specified minSup value :param value: user specified minSup value - :return: converted type - :rtype: float - """ if type(value) is int: value = int(value) @@ -309,13 +277,10 @@ def _construct(self, items, data, minSup): :param items: A dictionary containing item frequencies. :type items: Dict - :param data: A list of transactions. :type data: List - :param minSup: The minimum support threshold. :type minSup: int - :return: The root node of the constructed FP-tree and a dictionary containing information about nodes associated with each item. :rtype: Tuple[_Node, Dict] """ @@ -339,14 +304,13 @@ def _construct(self, items, data, minSup): def _all_combinations(self, arr): """ + Generates all possible combinations of items from a given transaction. :param arr: A list of items in a transaction. :type arr: List - :return: A list containing all possible combinations of items. :rtype: List - """ all_combinations_list = [] @@ -361,16 +325,12 @@ def _recursive(self, root, itemNode, minSup, patterns): :param root: The root node of the current subtree. :type root: _Node - :param itemNode: A dictionary containing information about the nodes associated with each item. :type itemNode: Dict - :param minSup: The minimum support threshold. :type minSup: int - :param patterns: A dictionary to store the generated frequent patterns. :type patterns: Dict - """ itemNode = {k: v for k, v in sorted(itemNode.items(), key = lambda x: x[1][1])} @@ -476,10 +436,10 @@ def startMine(self): def getMemoryUSS(self) -> float: """ + Total amount of USS memory consumed by the mining process will be retrieved from this function :return: returning USS memory consumed by the mining process - :rtype: float """ @@ -487,7 +447,9 @@ def getMemoryUSS(self) -> float: def getMemoryRSS(self) -> float: """ + Total amount of RSS memory consumed by the mining process will be retrieved from this function + :return: returning RSS memory consumed by the mining process :rtype: float """ @@ -496,10 +458,10 @@ def getMemoryRSS(self) -> float: def getRuntime(self) -> float: """ + Calculating the total amount of runtime taken by the mining process :return: returning total amount of runtime taken by the mining process - :rtype: float """ @@ -507,10 +469,10 @@ def getRuntime(self) -> float: def getPatternsAsDataFrame(self) -> _fp._pd.DataFrame: """ + Storing final frequent patterns in a dataframe :return: returning frequent patterns in a dataframe - :rtype: pd.DataFrame """ @@ -523,12 +485,11 @@ def getPatternsAsDataFrame(self) -> _fp._pd.DataFrame: def save(self, outFile: str) -> None: """ + Complete set of frequent patterns will be loaded in to an output file :param outFile: name of the output file - :type outFile: csvfile - :return: None """ self._oFile = outFile @@ -539,7 +500,9 @@ def save(self, outFile: str) -> None: def getPatterns(self) -> Dict[str, int]: """ + Function to send the set of frequent patterns after completion of the mining process + :return: returning frequent patterns :rtype: dict """ From f0d9c34dcd9f8c5d042e68ad3d6396e0090b803a Mon Sep 17 00:00:00 2001 From: vanithakattumuri Date: Tue, 30 Apr 2024 23:04:51 +0900 Subject: [PATCH 3/3] #2 updated CHARM.py documentation. --- PAMI/frequentPattern/closed/CHARM.py | 124 ++++++++++++--------------- 1 file changed, 54 insertions(+), 70 deletions(-) diff --git a/PAMI/frequentPattern/closed/CHARM.py b/PAMI/frequentPattern/closed/CHARM.py index 1c43d7b1..d995f1db 100644 --- a/PAMI/frequentPattern/closed/CHARM.py +++ b/PAMI/frequentPattern/closed/CHARM.py @@ -1,11 +1,13 @@ -# CHARM is an algorithm to discover closed frequent patterns in a transactional database. Closed frequent patterns are patterns if there exists no superset that has the same support count as this original itemset . This algorithm employs depth-first search technique to find the complete set of closed frequent patterns in a +# CHARM is an algorithm to discover closed frequent patterns in a transactional database. Closed frequent patterns are patterns if there exists no superset that has the same support count as this original itemset . This algorithm employs depth-first search technique to find the complete set of closed frequent patterns in a transactional database. # # **Importing this algorithm into a python program** -# -------------------------------------------------------------- -# # # from PAMI.frequentPattern.closed import CHARM as alg # +# iFile = 'sampleDB.txt' +# +# minSup = 10 # can also be specified between 0 and 1 +# # obj = alg.CHARM(iFile, minSup) # # obj.mine() @@ -32,8 +34,6 @@ # - - __copyright__ = """ Copyright (C) 2021 Rage Uday Kiran @@ -58,61 +58,33 @@ class CHARM(_ab._frequentPatterns): """ - :Description: CHARM is an algorithm to discover closed frequent patterns in a transactional database. Closed frequent patterns are patterns if there exists no superset that has the same support count as this original itemset. This algorithm employs depth-first search technique to find the complete set of closed frequent patterns in a - - - :Reference: Mohammed J. Zaki and Ching-Jui Hsiao, CHARM: An Efficient Algorithm for Closed Itemset Mining, - Proceedings of the 2002 SIAM, SDM. 2002, 457-473, https://doi.org/10.1137/1.9781611972726.27 - - :param iFile: str : - Name of the Input file to mine complete set of frequent patterns - :param oFile: str : - Name of the output file to store complete set of frequent patterns - :param minSup: int or float or str : - The user can specify minSup either in count or proportion of database size. If the program detects the data type of minSup is integer, then it treats minSup is expressed in count. - :param sep: str : - This variable is used to distinguish items from one another in a transaction. The default seperator is tab space. However, the users can override their default separator. - - - - :Attributes: - - startTime : float - To record the start time of the mining process + :**Description**: CHARM is an algorithm to discover closed frequent patterns in a transactional database. Closed frequent patterns are patterns if there exists no superset that has the same support count as this original itemset. This algorithm employs depth-first search technique to find the complete set of closed frequent patterns in a transactional database. - endTime : float - To record the completion time of the mining process + :**Reference**: Mohammed J. Zaki and Ching-Jui Hsiao, CHARM: An Efficient Algorithm for Closed Itemset Mining, + Proceedings of the 2002 SIAM, SDM. 2002, 457-473, https://doi.org/10.1137/1.9781611972726.27 - finalPatterns : dict - Storing the complete set of patterns in a dictionary variable + :**Parameters**: - **iFile** (*str or URL or dataFrame*) -- *Name of the Input file to mine complete set of frequent patterns.* + - **oFile** (*str*) -- *Name of the output file to store complete set of frequent patterns.* + - **minSup** (*int or float or str*) -- *The user can specify minSup either in count or proportion of database size. If the program detects the data type of minSup is integer, then it treats minSup is expressed in count. Otherwise, it will be treated as float.* + - **sep** (*str*) -- *This variable is used to distinguish items from one another in a transaction. The default seperator is tab space. However, the users can override their default separator.* - memoryUSS : float - To store the total amount of USS memory consumed by the program + :**Attributes**: - **startTime** (*float*) -- *To record the start time of the mining process.* + - **endTime** (*float*) -- *To record the completion time of the mining process.* + - **finalPatterns** (*dict*) -- *Storing the complete set of patterns in a dictionary variable.* + - **memoryUSS** (*float*) -- *To store the total amount of USS memory consumed by the program.* + - **memoryRSS** (*float*) -- *To store the total amount of RSS memory consumed by the program.* + - **Database** (*list*) -- *To store the transactions of a database in list.* + - **mapSupport** (*Dictionary*) -- *To maintain the information of item and their frequency.* + - **tree** (*class*) -- *It represents the Tree class.* + - **itemSetCount** (*int*) -- *It represents the total no of patterns.* + - **tidList** (*dict*) -- *Stores the timestamps of an item.* + - **hashing** (*dict*) -- *Stores the patterns with their support to check for the closed property.* - memoryRSS : float - To store the total amount of RSS memory consumed by the program - Database : list - To store the transactions of a database in list + Execution methods + ================= - mapSupport : Dictionary - To maintain the information of item and their frequency - lno : int - it represents the total no of transactions - tree : class - it represents the Tree class - itemSetCount : int - it represents the total no of patterns - finalPatterns : dict - it represents to store the patterns - tidList : dict - stores the timestamps of an item - hashing : dict - stores the patterns with their support to check for the closed property - - - **Methods to execute code on terminal** - -------------------------------------------------------------- + **Terminal command** .. code-block:: console @@ -124,15 +96,19 @@ class CHARM(_ab._frequentPatterns): (.venv) $ python3 CHARM.py sampleDB.txt patterns.txt 10.0 - .. note:: minSup will be considered in percentage of database transactions + .. note:: minSup can be specified in support count or a value between 0 and 1. + + **Calling from a python program** - **Importing this algorithm into a python program** - -------------------------------------------------------------- .. code-block:: python from PAMI.frequentPattern.closed import CHARM as alg + iFile = 'sampleDB.txt' + + minSup = 10 # can also be specified between 0 and 1 + obj = alg.CHARM(iFile, minSup) obj.mine() @@ -158,10 +134,10 @@ class CHARM(_ab._frequentPatterns): print("Total ExecutionTime in seconds:", run) - **Credits:** - ------------------------------- + Credits: + ======= - The complete program was written by P.Likhitha under the supervision of Professor Rage Uday Kiran. + The complete program was written by P.Likhitha and revised by Tarun Sreepada under the supervision of Professor Rage Uday Kiran. """ @@ -190,11 +166,8 @@ def _convert(self, value): To convert the type of user specified minSup value :param value: user specified minSup value - :type value: int or float or str - :return: converted type - """ if type(value) is int: value = int(value) @@ -266,12 +239,11 @@ def _creatingItemsets(self): def _calculate(self, tidSet): """ + To calculate the hashcode of pattern :param tidSet: the timestamps of a pattern - :type tidSet: list - :rtype: int """ @@ -279,8 +251,10 @@ def _calculate(self, tidSet): def _contains(self, itemSet, value, hashcode): """ + Check for the closed property(patterns with same support) by checking the hashcode(sum of timestamps), if hashcode key in hashing dict is none then returns a false, else returns with true. + :param itemSet: frequent pattern :type itemSet: list :param value: support of the pattern @@ -298,19 +272,15 @@ def _contains(self, itemSet, value, hashcode): def _save(self, prefix, suffix, tidSetx): """ + Check for the closed property (patterns with same support), if found deletes the subsets and stores supersets and also saves the patterns that satisfy the closed property :param prefix: the prefix of a pattern - :type prefix: frequent item or pattern - :param suffix: the suffix of a patterns - :type suffix: list - :param tidSetx: the timestamp of a patterns - :type tidSetx: list """ if prefix is None: @@ -335,7 +305,9 @@ def _save(self, prefix, suffix, tidSetx): def _processEquivalenceClass(self, prefix, itemSets, tidSets): """ + Equivalence class is followed and check for the patterns which satisfies frequent properties. + :param prefix: main equivalence prefix :type prefix: frequent item or pattern :param itemSets: patterns which are items combined with prefix and satisfying the minSup @@ -458,7 +430,9 @@ def mine(self): def getMemoryUSS(self): """ + Total amount of USS memory consumed by the mining process will be retrieved from this function + :return: returning USS memory consumed by the mining process :rtype: float """ @@ -467,7 +441,9 @@ def getMemoryUSS(self): def getMemoryRSS(self): """ + Total amount of RSS memory consumed by the mining process will be retrieved from this function + :return: returning RSS memory consumed by the mining process :rtype: float """ @@ -476,7 +452,9 @@ def getMemoryRSS(self): def getRuntime(self): """ + Calculating the total amount of runtime taken by the mining process + :return: returning total amount of runtime taken by the mining process :rtype: float """ @@ -485,7 +463,9 @@ def getRuntime(self): def getPatternsAsDataFrame(self): """ + Storing final frequent patterns in a dataframe + :return: returning frequent patterns in a dataframe :rtype: pd.DataFrame """ @@ -499,7 +479,9 @@ def getPatternsAsDataFrame(self): def save(self, outFile): """ + Complete set of frequent patterns will be loaded in to an output file + :param outFile: name of the output file :type outFile: csvfile """ @@ -511,7 +493,9 @@ def save(self, outFile): def getPatterns(self): """ + Function to send the set of frequent patterns after completion of the mining process + :return: returning frequent patterns :rtype: dict """