From 01ba393815fec20594c3cadb4249364e1ecf256f Mon Sep 17 00:00:00 2001 From: vanitha Date: Wed, 24 Apr 2024 20:11:43 +0900 Subject: [PATCH 1/3] #2 updated the sphinx documentation --- .readthedocs.yaml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.readthedocs.yaml b/.readthedocs.yaml index f74b9808..b5fb4a7a 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -23,7 +23,9 @@ build: # Build documentation in the "docs/" directory with Sphinx sphinx: configuration: finalSphinxDocs/conf.py - fail_on_warning: false #true if you want to stop the building process + fail_on_warning: true + +#true if you want to stop the building process # Optionally build your docs in additional formats such as PDF and ePub formats: From 4857ceeef0146e71b060876cc9f7a476d4186fdb Mon Sep 17 00:00:00 2001 From: vanitha Date: Tue, 30 Apr 2024 13:03:57 +0900 Subject: [PATCH 2/3] #2 updated the documentation --- PAMI/frequentPattern/basic/Apriori.py | 5 +- PAMI/frequentPattern/basic/ECLAT.py | 96 ++++++++-------------- PAMI/frequentPattern/basic/ECLATDiffset.py | 95 ++++++++++----------- PAMI/frequentPattern/basic/ECLATbitset.py | 96 +++++++++++----------- 4 files changed, 131 insertions(+), 161 deletions(-) diff --git a/PAMI/frequentPattern/basic/Apriori.py b/PAMI/frequentPattern/basic/Apriori.py index fea3fa42..abff5fdf 100644 --- a/PAMI/frequentPattern/basic/Apriori.py +++ b/PAMI/frequentPattern/basic/Apriori.py @@ -135,7 +135,7 @@ class Apriori(_ab._frequentPatterns): Credits ======= - The complete program was written by P. Likhitha under the supervision of Professor Rage Uday Kiran. + The complete program was written by P. Likhitha and revised by Tarun Sreepada under the supervision of Professor Rage Uday Kiran. """ @@ -207,8 +207,7 @@ def _convert(self, value: Union[int, float, str]) -> Union[int, float]: value = int(value) return value - @deprecated( - "It is recommended to use 'mine()' instead of 'startMine()' for mining process. Starting from January 2025, 'startMine()' will be completely terminated.") + @deprecated("It is recommended to use 'mine()' instead of 'startMine()' for mining process. Starting from January 2025, 'startMine()' will be completely terminated.") def startMine(self) -> None: """ Frequent pattern mining process will start from here diff --git a/PAMI/frequentPattern/basic/ECLAT.py b/PAMI/frequentPattern/basic/ECLAT.py index 09ea6559..c4466ccf 100644 --- a/PAMI/frequentPattern/basic/ECLAT.py +++ b/PAMI/frequentPattern/basic/ECLAT.py @@ -1,10 +1,13 @@ # ECLAT is one of the fundamental algorithm to discover frequent patterns in a transactional database. # # **Importing this algorithm into a python program** -# ------------------------------------------------------------------ # # import PAMI.frequentPattern.basic.ECLAT as alg # +# iFile = 'sampleDB.txt' +# +# minSup = 10 # can also be specified between 0 and 1 +# # obj = alg.ECLAT(iFile, minSup) # # obj.mine() @@ -31,9 +34,6 @@ # - - - __copyright__ = """ Copyright (C) 2021 Rage Uday Kiran @@ -52,48 +52,34 @@ """ from PAMI.frequentPattern.basic import abstract as _ab -from typing import List, Dict, Tuple, Set, Union, Any, Generator from deprecated import deprecated class ECLAT(_ab._frequentPatterns): """ - :Description: ECLAT is one of the fundamental algorithm to discover frequent patterns in a transactional database. - - :Reference: Mohammed Javeed Zaki: Scalable Algorithms for Association Mining. IEEE Trans. Knowl. Data Eng. 12(3): - 372-390 (2000), https://ieeexplore.ieee.org/document/846291 - - :param iFile: str : - Name of the Input file to mine complete set of frequent pattern's - :param oFile: str : - Name of the output file to store complete set of frequent patterns - :param minSup: int or float or str : - The user can specify minSup either in count or proportion of database size. If the program detects the data type of minSup is integer, then it treats minSup is expressed in count. - :param sep: str : - This variable is used to distinguish items from one another in a transaction. The default seperator is tab space. However, the users can override their default separator. - - :Attributes: - - startTime : float - To record the start time of the mining process + About this algorithm + ==================== - endTime : float - To record the completion time of the mining process + :**Description**: *ECLAT is one of the fundamental algorithm to discover frequent patterns in a transactional database.* - finalPatterns : dict - Storing the complete set of patterns in a dictionary variable + :**Reference**: Mohammed Javeed Zaki: Scalable Algorithms for Association Mining. IEEE Trans. Knowl. Data Eng. 12(3): + 372-390 (2000), https://ieeexplore.ieee.org/document/846291 - memoryUSS : float - To store the total amount of USS memory consumed by the program + :**Parameters**: - **iFile** (*str or URL or dataFrame*) -- *Name of the Input file to mine complete set of frequent patterns.* + - **oFile** (*str*) -- *Name of the Output file to store the frequent patterns.* + - **minSup** (*int or float or str*) -- The user can specify minSup either in count or proportion of database size. If the program detects the data type of minSup is integer, then it treats minSup is expressed in count. + - **sep** (*str*) -- This variable is used to distinguish items from one another in a transaction. The default seperator is tab space. However, the users can override their default separator. - memoryRSS : float - To store the total amount of RSS memory consumed by the program + :**Attributes**: - **startTime** (*float*) -- *To record the start time of the mining process.* + - **endTime** (*float*) -- *To record the end time of the mining process.* + - **finalPatterns** (*dict*) -- *Storing the complete set of patterns in a dictionary variable.* + - **memoryUSS** (*float*) -- *To store the total amount of USS memory consumed by the program.* + - **memoryRSS** *(float*) -- *To store the total amount of RSS memory consumed by the program.* + - **Database** (*list*) -- *To store the transactions of a database in list.* - Database : list - To store the transactions of a database in list + Execution methods + ================= - - **Methods to execute code on terminal** - ------------------------------------------ + **Terminal command** .. code-block:: console @@ -105,15 +91,19 @@ class ECLAT(_ab._frequentPatterns): (.venv) $ python3 ECLAT.py sampleDB.txt patterns.txt 10.0 - .. note:: minSup will be considered in percentage of database transactions + .. note:: minSup can be specified in support count or a value between 0 and 1. + + **Calling from a python program** - **Importing this algorithm into a python program** - ------------------------------------------------------------------ .. code-block:: python import PAMI.frequentPattern.basic.ECLAT as alg + iFile = 'sampleDB.txt' + + minSup = 10 # can also be specified between 0 and 1 + obj = alg.ECLAT(iFile, minSup) obj.mine() @@ -139,10 +129,10 @@ class ECLAT(_ab._frequentPatterns): print("Total ExecutionTime in seconds:", run) - **Credits:** - ---------------------- + Credits: + ======== - The complete program was written by Kundai under the supervision of Professor Rage Uday Kiran. + The complete program was written by Kundai and revised by Tarun Sreepada under the supervision of Professor Rage Uday Kiran. """ @@ -159,10 +149,10 @@ class ECLAT(_ab._frequentPatterns): def _creatingItemSets(self) -> float: """ + Storing the complete transactions of the database/input file in a database variable :return: the complete transactions of the database/input file in a database variable - :rtype: float """ self._Database = [] @@ -199,9 +189,7 @@ def _getUniqueItemList(self) -> list: Generating one frequent patterns :return: list of unique patterns - :rtype: list - """ self._finalPatterns = {} candidate = {} @@ -226,11 +214,8 @@ def _generateFrequentPatterns(self, candidateFrequent: list) -> None: It will generate the combinations of frequent items :param candidateFrequent :it represents the items with their respective transaction identifiers - :type candidateFrequent: list - :return: None - """ new_freqList = [] for i in range(0, len(candidateFrequent)): @@ -256,11 +241,8 @@ def _convert(self, value) -> float: To convert the user specified minSup value :param value: user specified minSup value - :return: converted type - :rtype: float - """ if type(value) is int: value = int(value) @@ -312,9 +294,7 @@ def getMemoryUSS(self) -> float: Total amount of USS memory consumed by the mining process will be retrieved from this function :return: returning USS memory consumed by the mining process - :rtype: float - """ return self._memoryUSS @@ -325,9 +305,7 @@ def getMemoryRSS(self) -> float: Total amount of RSS memory consumed by the mining process will be retrieved from this function :return: returning RSS memory consumed by the mining process - :rtype: float - """ return self._memoryRSS @@ -337,7 +315,6 @@ def getRuntime(self) -> float: Calculating the total amount of runtime taken by the mining process :return: returning total amount of runtime taken by the mining process - :rtype: float """ @@ -349,9 +326,7 @@ def getPatternsAsDataFrame(self) -> _ab._pd.DataFrame: Storing final frequent patterns in a dataframe :return: returning frequent patterns in a dataframe - :rtype: pd.DataFrame - """ dataFrame = {} @@ -367,11 +342,8 @@ def save(self, outFile: str) -> None: Complete set of frequent patterns will be loaded in to an output file :param outFile: name of the output file - :type outFile: csvfile - :return: None - """ self._oFile = outFile writer = open(self._oFile, 'w+') @@ -381,10 +353,10 @@ def save(self, outFile: str) -> None: def getPatterns(self) -> dict: """ + Function to send the set of frequent patterns after completion of the mining process :return: returning frequent patterns - :rtype: dict """ return self._finalPatterns diff --git a/PAMI/frequentPattern/basic/ECLATDiffset.py b/PAMI/frequentPattern/basic/ECLATDiffset.py index c382679e..b9970a1a 100644 --- a/PAMI/frequentPattern/basic/ECLATDiffset.py +++ b/PAMI/frequentPattern/basic/ECLATDiffset.py @@ -1,10 +1,13 @@ # ECLATDiffest uses diffset to extract the frequent patterns in a transactional database. - +# # **Importing this algorithm into a python program** -# --------------------------------------------------------- # # import PAMI.frequentPattern.basic.ECLATDiffset as alg # +# iFile = 'sampleDB.txt' +# +# minSup = 10 # can also be specified between 0 and 1 +# # obj = alg.ECLATDiffset(iFile, minSup) # # obj.mine() @@ -31,10 +34,6 @@ # - - - - __copyright__ = """ Copyright (C) 2021 Rage Uday Kiran @@ -61,43 +60,28 @@ class ECLATDiffset(_ab._frequentPatterns): """ - :Description: ECLATDiffset uses diffset to extract the frequent patterns in a transactional database. + :**Description**: ECLATDiffset uses diffset to extract the frequent patterns in a transactional database. - :Reference: KDD '03: Proceedings of the ninth ACM SIGKDD international conference on Knowledge discovery and data mining - August 2003 Pages 326–335 https://doi.org/10.1145/956750.956788 + :**Reference**: KDD '03: Proceedings of the ninth ACM SIGKDD international conference on Knowledge discovery and data mining + August 2003 Pages 326–335 https://doi.org/10.1145/956750.956788 - :param iFile: str : - Name of the Input file to mine complete set of frequent pattern's - :param oFile: str : - Name of the output file to store complete set of frequent patterns - :param minSup: int or float or str : - The user can specify minSup either in count or proportion of database size. If the program detects the data type of minSup is integer, then it treats minSup is expressed in count. - :param sep: str : - This variable is used to distinguish items from one another in a transaction. The default seperator is tab space. However, the users can override their default separator. - - :Attributes: - - startTime : float - To record the start time of the mining process - - endTime : float - To record the completion time of the mining process - - finalPatterns : dict - Storing the complete set of patterns in a dictionary variable - - memoryUSS : float - To store the total amount of USS memory consumed by the program - - memoryRSS : float - To store the total amount of RSS memory consumed by the program - - Database : list - To store the transactions of a database in list + :**Parameters**: - **iFile** (*str or URL or dataFrame*) -- *Name of the Input file to mine complete set of frequent patterns.* + - **oFile** (*str*) -- *Name of the output file to store complete set of frequent patterns* + - **minSup** (*int or float or str*) -- *The user can specify minSup either in count or proportion of database size. If the program detects the data type of minSup is integer, then it treats minSup is expressed in count.* + - **sep** (*str*) -- **This variable is used to distinguish items from one another in a transaction. The default seperator is tab space. However, the users can override their default separator.** + + :**Attributes**: - **startTime** (*float*) -- *To record the start time of the mining process.* + - **endTime** (*float*) -- *To record the end time of the mining process.* + - **finalPatterns** (*dict*) -- *Storing the complete set of patterns in a dictionary variable.* + - **memoryUSS** (*float*) -- *To store the total amount of USS memory consumed by the program.* + - **memoryRSS** *(float*) -- *To store the total amount of RSS memory consumed by the program.* + - **Database** (*list*) -- *To store the transactions of a database in list.* - **Methods to execute code on terminal** - ------------------------------------------ + Execution methods + ================= + + **Terminal command** .. code-block:: console @@ -109,15 +93,19 @@ class ECLATDiffset(_ab._frequentPatterns): (.venv) $ python3 ECLATDiffset.py sampleDB.txt patterns.txt 10.0 - .. note:: minSup will be considered in percentage of database transactions + .. note:: minSup can be specified in support count or a value between 0 and 1. - **Importing this algorithm into a python program** - --------------------------------------------------------- + **Calling from a python program** + .. code-block:: python import PAMI.frequentPattern.basic.ECLATDiffset as alg + iFile = 'sampleDB.txt' + + minSup = 10 # can also be specified between 0 and 1 + obj = alg.ECLATDiffset(iFile, minSup) obj.mine() @@ -143,10 +131,10 @@ class ECLATDiffset(_ab._frequentPatterns): print("Total ExecutionTime in seconds:", run) - **Credits:** - ------------------- + Credits: + ======== - The complete program was written by Kundai under the supervision of Professor Rage Uday Kiran. + The complete program was written by Kundai and revised by Tarun Sreepada under the supervision of Professor Rage Uday Kiran. """ @@ -197,7 +185,9 @@ def _creatingItemSets(self): def _convert(self, value): """ + To convert the user specified minSup value + :param value: user specified minSup value :return: converted type """ @@ -243,7 +233,9 @@ def _getUniqueItemList(self): def _runDeclat(self, candidateList): """ + It will generate the combinations of frequent items + :param candidateList :it represents the items with their respective transaction identifiers :type candidateList: list :return: returning transaction dictionary @@ -275,7 +267,6 @@ def startMine(self): """ Frequent pattern mining process will start from here """ - self.mine() def mine(self): @@ -310,7 +301,9 @@ def mine(self): def getMemoryUSS(self): """ + Total amount of USS memory consumed by the mining process will be retrieved from this function + :return: returning USS memory consumed by the mining process :rtype: float """ @@ -319,7 +312,9 @@ def getMemoryUSS(self): def getMemoryRSS(self): """ + Total amount of RSS memory consumed by the mining process will be retrieved from this function + :return: returning RSS memory consumed by the mining process :rtype: float """ @@ -328,7 +323,9 @@ def getMemoryRSS(self): def getRuntime(self): """ + Calculating the total amount of runtime taken by the mining process + :return: returning total amount of runtime taken by the mining process :rtype: float """ @@ -337,7 +334,9 @@ def getRuntime(self): def getPatternsAsDataFrame(self): """ + Storing final frequent patterns in a dataframe + :return: returning frequent patterns in a dataframe :rtype: pd.DataFrame """ @@ -351,7 +350,9 @@ def getPatternsAsDataFrame(self): def save(self, outFile): """ + Complete set of frequent patterns will be loaded in to an output file + :param outFile: name of the output file :type outFile: csvfile """ @@ -363,7 +364,9 @@ def save(self, outFile): def getPatterns(self): """ + Function to send the set of frequent patterns after completion of the mining process + :return: returning frequent patterns :rtype: dict """ diff --git a/PAMI/frequentPattern/basic/ECLATbitset.py b/PAMI/frequentPattern/basic/ECLATbitset.py index b617390c..e5b2d9d9 100644 --- a/PAMI/frequentPattern/basic/ECLATbitset.py +++ b/PAMI/frequentPattern/basic/ECLATbitset.py @@ -1,10 +1,13 @@ # ECLATbitset is one of the fundamental algorithm to discover frequent patterns in a transactional database. # # **Importing this algorithm into a python program** -# --------------------------------------------------------- # # import PAMI.frequentPattern.basic.ECLATbitset as alg # +# iFile = 'sampleDB.txt' +# +# minSup = 10 # can also be specified between 0 and 1 +# # obj = alg.ECLATbitset(iFile, minSup) # # obj.mine() @@ -54,45 +57,27 @@ class ECLATbitset(_ab._frequentPatterns): """ - :Description: ECLATbitset is one of the fundamental algorithm to discover frequent patterns in a transactional database. - - :Reference: Mohammed Javeed Zaki: Scalable Algorithms for Association Mining. IEEE Trans. Knowl. Data Eng. 12(3): - 372-390 (2000), https://ieeexplore.ieee.org/document/846291 - - :param iFile: str : - Name of the Input file to mine complete set of frequent patterns - :param oFile: str : - Name of the output file to store complete set of frequent patterns - :param minSup: int or float or str : - The user can specify minSup either in count or proportion of database size. If the program detects the data type of minSup is integer, then it treats minSup is expressed in count. - :param sep: str : - This variable is used to distinguish items from one another in a transaction. The default seperator is tab space. However, the users can override their default separator. - - :Attributes: - - startTime : float - To record the start time of the mining process - - endTime : float - To record the completion time of the mining process - - finalPatterns : dict - Storing the complete set of patterns in a dictionary variable - - memoryUSS : float - To store the total amount of USS memory consumed by the program + :*Description*: ECLATbitset is one of the fundamental algorithm to discover frequent patterns in a transactional database. - memoryRSS : float - To store the total amount of RSS memory consumed by the program + :*Reference*: Mohammed Javeed Zaki: Scalable Algorithms for Association Mining. IEEE Trans. Knowl. Data Eng. 12(3): + 372-390 (2000), https://ieeexplore.ieee.org/document/846291 - Database : list - To store the transactions of a database in list + :**Parameters**: - **iFile** (*str or URL or dataFrame*) -- *Name of the Input file to mine complete set of frequent patterns.* + - **oFile** (*str*) -- *Name of the output file to store complete set of frequent patterns* + - **minSup** (*int or float or str*) -- *The user can specify minSup either in count or proportion of database size. If the program detects the data type of minSup is integer, then it treats minSup is expressed in count.* + - **sep** (*str*) -- **This variable is used to distinguish items from one another in a transaction. The default seperator is tab space. However, the users can override their default separator.** + :**Attributes**: - **startTime** (*float*) -- *To record the start time of the mining process.* + - **endTime** (*float*) -- *To record the end time of the mining process.* + - **finalPatterns** (*dict*) -- *Storing the complete set of patterns in a dictionary variable.* + - **memoryUSS** (*float*) -- *To store the total amount of USS memory consumed by the program.* + - **memoryRSS** *(float*) -- *To store the total amount of RSS memory consumed by the program.* + - **Database** (*list*) -- *To store the transactions of a database in list.* - **Methods to execute code on terminal** - ------------------------------------------ + Execution methods + ================= - .. code-block:: console + **Terminal command** Format: @@ -102,15 +87,19 @@ class ECLATbitset(_ab._frequentPatterns): (.venv) $ python3 ECLATbitset.py sampleDB.txt patterns.txt 10.0 - .. note:: minSup will be considered in percentage of database transactions + .. note:: minSup can be specified in support count or a value between 0 and 1. - **Importing this algorithm into a python program** - --------------------------------------------------------- + **Calling from a python program** + .. code-block:: python import PAMI.frequentPattern.basic.ECLATbitset as alg + iFile = 'sampleDB.txt' + + minSup = 10 # can also be specified between 0 and 1 + obj = alg.ECLATbitset(iFile, minSup) obj.mine() @@ -135,10 +124,10 @@ class ECLATbitset(_ab._frequentPatterns): print("Total ExecutionTime in seconds:", run) - **Credits:** - ------------------- + Credits: + ======== - The complete program was written by Yudai Masu under the supervision of Professor Rage Uday Kiran. + The complete program was written by Yudai Masu and revised by Tarun Sreepada under the supervision of Professor Rage Uday Kiran. """ @@ -157,14 +146,12 @@ class ECLATbitset(_ab._frequentPatterns): def _convert(self, value): """ + To convert the user specified minSup value :param value: user specified minSup value - :type value: int - :return: converted type - :rtype: int or float or string """ if type(value) is int: @@ -213,11 +200,11 @@ def _creatingItemSets(self): print("File Not Found") self._minSup = self._convert(self._minSup) - @deprecated( - "It is recommended to use 'mine()' instead of 'startMine()' for mining process. Starting from January 2025, 'startMine()' will be completely terminated.") + @deprecated("It is recommended to use 'mine()' instead of 'startMine()' for mining process. Starting from January 2025, 'startMine()' will be completely terminated.") def startMine(self): """ Frequent pattern mining process will start from here + We start with the scanning the itemSets and store the bitsets respectively. We form the combinations of single items and check with minSup condition to check the frequency of patterns """ @@ -225,16 +212,13 @@ def startMine(self): def _bitPacker(self, data, maxIndex): """ + It takes the data and maxIndex as input and generates integer as output value. :param data: it takes data as input. - :type data: int or float - :param maxIndex: It converts the data into bits By taking the maxIndex value as condition. - :type maxIndex: int - """ packed_bits = 0 for i in data: @@ -304,7 +288,9 @@ def mine(self) -> None: def getMemoryUSS(self): """ + Total amount of USS memory consumed by the mining process will be retrieved from this function + :return: returning USS memory consumed by the mining process :rtype: float """ @@ -313,7 +299,9 @@ def getMemoryUSS(self): def getMemoryRSS(self): """ + Total amount of RSS memory consumed by the mining process will be retrieved from this function + :return: returning RSS memory consumed by the mining process :rtype: float """ @@ -322,7 +310,9 @@ def getMemoryRSS(self): def getRuntime(self): """ + Calculating the total amount of runtime taken by the mining process + :return: returning total amount of runtime taken by the mining process :rtype: float """ @@ -331,7 +321,9 @@ def getRuntime(self): def getPatternsAsDataFrame(self): """ + Storing final frequent patterns in a dataframe + :return: returning frequent patterns in a dataframe :rtype: pd.DataFrame """ @@ -345,7 +337,9 @@ def getPatternsAsDataFrame(self): def save(self, outFile): """ + Complete set of frequent patterns will be loaded in to an output file + :param outFile: name of the outputfile :type outFile: file """ @@ -357,7 +351,9 @@ def save(self, outFile): def getPatterns(self): """ + Function to send the set of frequent patterns after completion of the mining process + :return: returning frequent patterns :rtype: dict """ From 7afe7a0ec50911cb59638ddd7bc8e7f9b684f561 Mon Sep 17 00:00:00 2001 From: vanitha Date: Fri, 3 May 2024 12:18:53 +0900 Subject: [PATCH 3/3] #2 changed the function names --- PAMI/AssociationRules/basic/abstract.py | 6 +-- PAMI/AssociationRules/basic/confidence.py | 56 +++++++++++----------- PAMI/AssociationRules/basic/leverage.py | 57 +++++++++++------------ PAMI/AssociationRules/basic/lift.py | 55 +++++++++++----------- 4 files changed, 86 insertions(+), 88 deletions(-) diff --git a/PAMI/AssociationRules/basic/abstract.py b/PAMI/AssociationRules/basic/abstract.py index 2c027bb2..62e4495c 100644 --- a/PAMI/AssociationRules/basic/abstract.py +++ b/PAMI/AssociationRules/basic/abstract.py @@ -110,7 +110,7 @@ def __init__(self, iFile, minConf, sep="\t"): self._iFile = iFile self._sep = sep self._minConf = minConf - self._finalPatterns = {} + self._associationRules = {} self._oFile = str() self._memoryUSS = float() self._memoryRSS = float() @@ -126,7 +126,7 @@ def startMine(self): pass @_abstractmethod - def getPatterns(self): + def getAssociationRules(self): """ Complete set of frequent patterns generated will be retrieved from this function """ @@ -144,7 +144,7 @@ def save(self, oFile): pass @_abstractmethod - def getPatternsAsDataFrame(self): + def getAssociationRulesAsDataFrame(self): """ Complete set of frequent patterns will be loaded in to data frame from this function """ diff --git a/PAMI/AssociationRules/basic/confidence.py b/PAMI/AssociationRules/basic/confidence.py index f948c405..8c44587f 100644 --- a/PAMI/AssociationRules/basic/confidence.py +++ b/PAMI/AssociationRules/basic/confidence.py @@ -2,23 +2,23 @@ # # **Importing this algorithm into a python program** # -# import PAMI.AssociationRules.basic import ARWithConfidence as alg +# import PAMI.AssociationRules.basic import confidence as alg # # iFile = 'sampleDB.txt' # # minConf = 0.5 # -# obj = alg.ARWithConfidence(iFile, minConf) +# obj = alg.confidence(iFile, minConf) # # obj.mine() # -# associationRules = obj.getPatterns() +# associationRules = obj.getAssociationRules() # # print("Total number of Association Rules:", len(associationRules)) # # obj.save(oFile) # -# Df = obj.getPatternInDataFrame() +# Df = obj.getAssociationRulesAsDataFrame() # # memUSS = obj.getMemoryUSS() # @@ -92,11 +92,11 @@ class confidence: Format: - (.venv) $ python3 ARWithConfidence.py + (.venv) $ python3 confidence.py Example Usage: - (.venv) $ python3 ARWithConfidence.py sampleDB.txt patterns.txt 0.5 ' ' + (.venv) $ python3 confidence.py sampleDB.txt patterns.txt 0.5 ' ' .. note:: minConf can be specified in a value between 0 and 1. @@ -105,23 +105,23 @@ class confidence: .. code-block:: python - import PAMI.AssociationRules.basic import ARWithConfidence as alg + import PAMI.AssociationRules.basic import confidence as alg iFile = 'sampleDB.txt' minConf = 0.5 - obj = alg.ARWithConfidence(iFile, minConf) + obj = alg.confidence(iFile, minConf) obj.mine() - associationRules = obj.getPatterns() + associationRules = obj.getAssociationRules() print("Total number of Association Rules:", len(associationRules)) obj.save(oFile) - Df = obj.getPatternInDataFrame() + Df = obj.getAssociationRulesAsDataFrame() memUSS = obj.getMemoryUSS() @@ -151,7 +151,7 @@ class confidence: _Sep = " " _memoryUSS = float() _memoryRSS = float() - _frequentPatterns = {} + _associationRules = {} def __init__(self, iFile, minConf, sep): """ @@ -164,14 +164,14 @@ def __init__(self, iFile, minConf, sep): """ self._iFile = iFile self._minConf = minConf - self._finalPatterns = {} + self._associationRules = {} self._sep = sep def _readPatterns(self): """ Reading the input file and storing all the frequent patterns and their support respectively in a frequentPatterns variable. """ - self._frequentPatterns = {} + self._associationRules = {} if isinstance(self._iFile, _ab._pd.DataFrame): pattern, support = [], [] if self._iFile.empty: @@ -193,7 +193,7 @@ def _readPatterns(self): If that doesn't work, please raise an issue in the github repository.\ Got pattern: ", pattern[i], "at index: ", i, "in the dataframe, type: ", type(pattern[i])) s = tuple(sorted(pattern[i])) - self._frequentPatterns[s] = support[i] + self._associationRules[s] = support[i] if isinstance(self._iFile, str): if _ab._validators.url(self._iFile): f = _ab._urlopen(self._iFile) @@ -202,7 +202,7 @@ def _readPatterns(self): line = line.split(':') s = line[0].split(self._sep) s = tuple(sorted(s)) - self._frequentPatterns[s] = int(line[1]) + self._associationRules[s] = int(line[1]) else: try: with open(self._iFile, 'r', encoding='utf-8') as f: @@ -212,7 +212,7 @@ def _readPatterns(self): s = line[0].split(self._sep) s = [x.strip() for x in s] s = tuple(sorted(s)) - self._frequentPatterns[s] = int(line[1]) + self._associationRules[s] = int(line[1]) except IOError: print("File Not Found") quit() @@ -235,17 +235,17 @@ def mine(self): self._startTime = _ab._time.time() self._readPatterns() - keys = list(self._frequentPatterns.keys()) + keys = list(self._associationRules.keys()) - for i in range(len(self._frequentPatterns)): - key = self._frequentPatterns[keys[i]] + for i in range(len(self._associationRules)): + key = self._associationRules[keys[i]] for idx in range(len(keys[i]) - 1, 0, -1): for c in combinations(keys[i], r=idx): antecedent = c # consequent = keys[i] - antecedent - conf = key / self._frequentPatterns[antecedent] + conf = key / self._associationRules[antecedent] if conf >= self._minConf: - self._finalPatterns[antecedent + tuple(['->']) + keys[i]] = conf + self._associationRules[antecedent + tuple(['->']) + keys[i]] = conf self._endTime = _ab._time.time() process = _ab._psutil.Process(_ab._os.getpid()) @@ -285,7 +285,7 @@ def getRuntime(self): return self._endTime - self._startTime - def getPatternsAsDataFrame(self): + def getAssociationRulesAsDataFrame(self): """ Storing final frequent patterns in a dataframe @@ -301,7 +301,7 @@ def getPatternsAsDataFrame(self): # # dataFrame = dataFrame.replace(r'\r+|\n+|\t+',' ', regex=True) # return dataFrame - dataFrame = _ab._pd.DataFrame(list(self._finalPatterns.items()), columns=['Patterns', 'Support']) + dataFrame = _ab._pd.DataFrame(list(self._associationRules.items()), columns=['Patterns', 'Support']) return dataFrame def save(self, outFile: str) -> None: @@ -314,24 +314,24 @@ def save(self, outFile: str) -> None: :return: None """ with open(outFile, 'w') as f: - for x, y in self._finalPatterns.items(): + for x, y in self._associationRules.items(): x = self._sep.join(x) f.write(f"{x} : {y}\n") - def getPatterns(self): + def getAssociationRules(self): """ Function to send the set of frequent patterns after completion of the mining process :return: returning frequent patterns :rtype: dict """ - return self._finalPatterns + return self._associationRules def printResults(self): """ Function to send the result after completion of the mining process """ - print("Total number of Association Rules:", len(self.getPatterns())) + print("Total number of Association Rules:", len(self.getAssociationRules())) print("Total Memory in USS:", self.getMemoryUSS()) print("Total Memory in RSS", self.getMemoryRSS()) print("Total ExecutionTime in ms:", self.getRuntime()) @@ -346,7 +346,7 @@ def printResults(self): _ap = confidence(_ab._sys.argv[1], _ab._sys.argv[3]) _ap.startMine() _ap.mine() - print("Total number of Association Rules:", len(_ap.getPatterns())) + print("Total number of Association Rules:", len(_ap.getAssociationRules())) _ap.save(_ab._sys.argv[2]) print("Total Memory in USS:", _ap.getMemoryUSS()) print("Total Memory in RSS", _ap.getMemoryRSS()) diff --git a/PAMI/AssociationRules/basic/leverage.py b/PAMI/AssociationRules/basic/leverage.py index 3407f2a2..dcd0956f 100644 --- a/PAMI/AssociationRules/basic/leverage.py +++ b/PAMI/AssociationRules/basic/leverage.py @@ -1,21 +1,20 @@ # This code uses "leverage" metric to extract the association rules from given frequent patterns. # # **Importing this algorithm into a python program** -# ---------------------------------------------------- # -# import PAMI.AssociationRules.basic import ARWithleverage as alg +# import PAMI.AssociationRules.basic import leverage as alg # -# obj = alg.ARWithleverage(iFile, minLev) +# obj = alg.leverage(iFile, minLev) # # obj.mine() # -# associationRules = obj.getPatterns() +# associationRules = obj.getAssociationRules() # # print("Total number of Association Rules:", len(associationRules)) # # obj.save(oFile) # -# Df = obj.getPatternInDataFrame() +# Df = obj.getAssociationRulesAsDataFrame() # # memUSS = obj.getMemoryUSS() # @@ -89,11 +88,11 @@ class leverage: Format: - (.venv) $ python3 ARWithleverage.py + (.venv) $ python3 leverage.py Example Usage: - (.venv) $ python3 ARWithleverage.py sampleDB.txt patterns.txt 0.5 ' ' + (.venv) $ python3 leverage.py sampleDB.txt patterns.txt 0.5 ' ' .. note:: minLev can be specified in a value between 0 and 1. @@ -102,19 +101,19 @@ class leverage: .. code-block:: python - import PAMI.AssociationRules.basic import ARWithleverage as alg + import PAMI.AssociationRules.basic import leverage as alg - obj = alg.ARWithleverage(iFile, minLev) + obj = alg.leverage(iFile, minLev) obj.mine() - associationRules = obj.getPatterns() + associationRules = obj.getAssociationRules() print("Total number of Association Rules:", len(associationRules)) obj.save(oFile) - Df = obj.getPatternInDataFrame() + Df = obj.getAssociationRulesAsDataFrame() memUSS = obj.getMemoryUSS() @@ -144,7 +143,7 @@ class leverage: _Sep = " " _memoryUSS = float() _memoryRSS = float() - _frequentPatterns = {} + _associationRules = {} def __init__(self, iFile, minLev, sep, maxTS): """ @@ -157,7 +156,7 @@ def __init__(self, iFile, minLev, sep, maxTS): """ self._iFile = iFile self._minLev = minLev - self._finalPatterns = {} + self._associationRules = {} self._sep = sep self._maxTS = maxTS @@ -165,7 +164,7 @@ def _readPatterns(self): """ Reading the input file and storing all the frequent patterns and their support respectively in a frequentPatterns variable. """ - self._frequentPatterns = {} + self._associationRules = {} if isinstance(self._iFile, _ab._pd.DataFrame): pattern, support = [], [] if self._iFile.empty: @@ -184,7 +183,7 @@ def _readPatterns(self): raise ValueError("Pattern should be a tuple. PAMI is going through a major revision. Please raise an issue in the github repository regarding this error and provide information regarding input and algorithm.\ In the meanwhile try saving the patterns to a file using (alg).save() and use the file as input. If that doesn't work, please raise an issue in the github repository.") s = tuple(sorted(pattern[i])) - self._frequentPatterns[s] = support[i] / self._maxTS + self._associationRules[s] = support[i] / self._maxTS if isinstance(self._iFile, str): if _ab._validators.url(self._iFile): f = _ab._urlopen(self._iFile) @@ -194,7 +193,7 @@ def _readPatterns(self): s = line[0].split(self._sep) s = tuple(sorted(s)) - self._frequentPatterns[s] = int(line[1]) / self._maxTS + self._associationRules[s] = int(line[1]) / self._maxTS else: try: with open(self._iFile, 'r', encoding='utf-8') as f: @@ -204,7 +203,7 @@ def _readPatterns(self): s = line[0].split(self._sep) s = [x.strip() for x in s] s = tuple(sorted(s)) - self._frequentPatterns[s] = int(line[1]) / self._maxTS + self._associationRules[s] = int(line[1]) / self._maxTS except IOError: print("File Not Found") quit() @@ -227,19 +226,19 @@ def mine(self): self._startTime = _ab._time.time() self._readPatterns() - keys = list(self._frequentPatterns.keys()) + keys = list(self._associationRules.keys()) - for i in range(len(self._frequentPatterns)): - key = self._frequentPatterns[keys[i]] + for i in range(len(self._associationRules)): + key = self._associationRules[keys[i]] for idx in range(len(keys[i]) - 1, 0, -1): for c in combinations(keys[i], r=idx): antecedent = c # consequent = keys[i] - antecedent consequent = tuple(sorted([x for x in keys[i] if x not in antecedent])) # Lev = key / self._frequentPatterns[antecedent] - lev = key - self._frequentPatterns[antecedent] * self._frequentPatterns[consequent] + lev = key - self._associationRules[antecedent] * self._associationRules[consequent] if lev >= self._minLev: - self._finalPatterns[antecedent + tuple(['->']) + keys[i]] = lev + self._associationRules[antecedent + tuple(['->']) + keys[i]] = lev self._endTime = _ab._time.time() process = _ab._psutil.Process(_ab._os.getpid()) @@ -279,7 +278,7 @@ def getRuntime(self): return self._endTime - self._startTime - def getPatternsAsDataFrame(self): + def getAssociationRulesAsDataFrame(self): """ Storing final frequent patterns in a dataframe @@ -295,7 +294,7 @@ def getPatternsAsDataFrame(self): # # dataFrame = dataFrame.replace(r'\r+|\n+|\t+',' ', regex=True) # return dataFrame - dataFrame = _ab._pd.DataFrame(list(self._finalPatterns.items()), columns=['Patterns', 'Support']) + dataFrame = _ab._pd.DataFrame(list(self._associationRules.items()), columns=['Patterns', 'Support']) return dataFrame def save(self, outFile: str) -> None: @@ -308,24 +307,24 @@ def save(self, outFile: str) -> None: :return: None """ with open(outFile, 'w') as f: - for x, y in self._finalPatterns.items(): + for x, y in self._associationRules.items(): x = self._sep.join(x) f.write(f"{x} : {y}\n") - def getPatterns(self): + def getAssociationRules(self): """ Function to send the set of frequent patterns after completion of the mining process :return: returning frequent patterns :rtype: dict """ - return self._finalPatterns + return self._associationRules def printResults(self): """ Function to send the result after completion of the mining process """ - print("Total number of Association Rules:", len(self.getPatterns())) + print("Total number of Association Rules:", len(self.getAssociationRules())) print("Total Memory in USS:", self.getMemoryUSS()) print("Total Memory in RSS", self.getMemoryRSS()) print("Total ExecutionTime in ms:", self.getRuntime()) @@ -340,7 +339,7 @@ def printResults(self): _ap = leverage(_ab._sys.argv[1], _ab._sys.argv[3]) _ap.startMine() _ap.mine() - print("Total number of Association Rules:", len(_ap.getPatterns())) + print("Total number of Association Rules:", len(_ap.getAssociationRules())) _ap.save(_ab._sys.argv[2]) print("Total Memory in USS:", _ap.getMemoryUSS()) print("Total Memory in RSS", _ap.getMemoryRSS()) diff --git a/PAMI/AssociationRules/basic/lift.py b/PAMI/AssociationRules/basic/lift.py index 2566b59c..604fc5e6 100644 --- a/PAMI/AssociationRules/basic/lift.py +++ b/PAMI/AssociationRules/basic/lift.py @@ -1,21 +1,20 @@ # This code uses "lift" metric to extract the association rules from given frequent patterns. # # **Importing this algorithm into a python program** -# ---------------------------------------------------- # -# import PAMI.AssociationRules.basic import ARWithlift as alg +# import PAMI.AssociationRules.basic import lift as alg # -# obj = alg.ARWithlift(iFile, minLift) +# obj = alg.lift(iFile, minLift) # # obj.mine() # -# associationRules = obj.getPatterns() +# associationRules = obj.getAssociationRules() # # print("Total number of Association Rules:", len(associationRules)) # # obj.save(oFile) # -# Df = obj.getPatternInDataFrame() +# Df = obj.getPatternsAsDataFrame() # # memUSS = obj.getMemoryUSS() # @@ -89,11 +88,11 @@ class lift: Format: - (.venv) $ python3 ARWithlift.py + (.venv) $ python3 lift.py Example Usage: - (.venv) $ python3 ARWithlift.py sampleDB.txt patterns.txt 0.5 ' ' + (.venv) $ python3 lift.py sampleDB.txt patterns.txt 0.5 ' ' .. note:: minLift can be specified in a value between 0 and 1. @@ -102,19 +101,19 @@ class lift: .. code-block:: python - import PAMI.AssociationRules.basic import ARWithlift as alg + import PAMI.AssociationRules.basic import lift as alg - obj = alg.ARWithlift(iFile, minLift) + obj = alg.lift(iFile, minLift) obj.mine() - associationRules = obj.getPatterns() + associationRules = obj.getAssociationRules() print("Total number of Association Rules:", len(associationRules)) obj.save(oFile) - Df = obj.getPatternInDataFrame() + Df = obj.getPatternsAsDataFrame() memUSS = obj.getMemoryUSS() @@ -144,7 +143,7 @@ class lift: _Sep = " " _memoryUSS = float() _memoryRSS = float() - _frequentPatterns = {} + _associationRules = {} def __init__(self, iFile, minLift, sep): """ @@ -157,14 +156,14 @@ def __init__(self, iFile, minLift, sep): """ self._iFile = iFile self._minLift = minLift - self._finalPatterns = {} + self._associationRules = {} self._sep = sep def _readPatterns(self): """ Reading the input file and storing all the frequent patterns and their support respectively in a frequentPatterns variable. """ - self._frequentPatterns = {} + self._associationRules = {} if isinstance(self._iFile, _ab._pd.DataFrame): pattern, support = [], [] if self._iFile.empty: @@ -183,7 +182,7 @@ def _readPatterns(self): raise ValueError("Pattern should be a tuple. PAMI is going through a major revision. Please raise an issue in the github repository regarding this error and provide information regarding input and algorithm.\ In the meanwhile try saving the patterns to a file using (alg).save() and use the file as input. If that doesn't work, please raise an issue in the github repository.") s = tuple(sorted(pattern[i])) - self._frequentPatterns[s] = support[i] + self._associationRules[s] = support[i] if isinstance(self._iFile, str): if _ab._validators.url(self._iFile): f = _ab._urlopen(self._iFile) @@ -193,7 +192,7 @@ def _readPatterns(self): s = line[0].split(self._sep) s = tuple(sorted(s)) - self._frequentPatterns[s] = int(line[1]) + self._associationRules[s] = int(line[1]) else: try: with open(self._iFile, 'r', encoding='utf-8') as f: @@ -203,7 +202,7 @@ def _readPatterns(self): s = line[0].split(self._sep) s = [x.strip() for x in s] s = tuple(sorted(s)) - self._frequentPatterns[s] = int(line[1]) + self._associationRules[s] = int(line[1]) except IOError: print("File Not Found") quit() @@ -226,18 +225,18 @@ def mine(self): self._startTime = _ab._time.time() self._readPatterns() - keys = list(self._frequentPatterns.keys()) + keys = list(self._associationRules.keys()) - for i in range(len(self._frequentPatterns)): - key = self._frequentPatterns[keys[i]] + for i in range(len(self._associationRules)): + key = self._associationRules[keys[i]] for idx in range(len(keys[i]) - 1, 0, -1): for c in combinations(keys[i], r=idx): antecedent = c consequent = tuple(sorted([x for x in keys[i] if x not in antecedent])) # print(antecedent, consequent) - lift = key / (self._frequentPatterns[antecedent]) * self._frequentPatterns[consequent] + lift = key / (self._associationRules[antecedent]) * self._associationRules[consequent] if lift >= self._minLift: - self._finalPatterns[antecedent + tuple(['->']) + keys[i]] = lift + self._associationRules[antecedent + tuple(['->']) + keys[i]] = lift self._endTime = _ab._time.time() process = _ab._psutil.Process(_ab._os.getpid()) @@ -293,7 +292,7 @@ def getPatternsAsDataFrame(self): # # dataFrame = dataFrame.replace(r'\r+|\n+|\t+',' ', regex=True) # return dataFrame - dataFrame = _ab._pd.DataFrame(list(self._finalPatterns.items()), columns=['Patterns', 'Support']) + dataFrame = _ab._pd.DataFrame(list(self._associationRules.items()), columns=['Patterns', 'Support']) return dataFrame def save(self, outFile: str) -> None: @@ -306,24 +305,24 @@ def save(self, outFile: str) -> None: :return: None """ with open(outFile, 'w') as f: - for x, y in self._finalPatterns.items(): + for x, y in self._associationRules.items(): x = self._sep.join(x) f.write(f"{x} : {y}\n") - def getPatterns(self): + def getAssociationRules(self): """ Function to send the set of frequent patterns after completion of the mining process :return: returning frequent patterns :rtype: dict """ - return self._finalPatterns + return self._associationRules def printResults(self): """ Function to send the result after completion of the mining process """ - print("Total number of Association Rules:", len(self.getPatterns())) + print("Total number of Association Rules:", len(self.getAssociationRules())) print("Total Memory in USS:", self.getMemoryUSS()) print("Total Memory in RSS", self.getMemoryRSS()) print("Total ExecutionTime in ms:", self.getRuntime()) @@ -338,7 +337,7 @@ def printResults(self): _ap = lift(_ab._sys.argv[1], _ab._sys.argv[3]) _ap.startMine() _ap.mine() - print("Total number of Association Rules:", len(_ap.getPatterns())) + print("Total number of Association Rules:", len(_ap.getAssociationRules())) _ap.save(_ab._sys.argv[2]) print("Total Memory in USS:", _ap.getMemoryUSS()) print("Total Memory in RSS", _ap.getMemoryRSS())