From 6fd9bf8c8ca8f700d702a02effd692d6b79a315d Mon Sep 17 00:00:00 2001 From: Tarun Sreepada Date: Wed, 1 May 2024 08:59:01 +0900 Subject: [PATCH 1/3] delete Apriori 2 --- PAMI/frequentPattern/basic/Apriori2.py | 136 ------------------------- 1 file changed, 136 deletions(-) delete mode 100644 PAMI/frequentPattern/basic/Apriori2.py diff --git a/PAMI/frequentPattern/basic/Apriori2.py b/PAMI/frequentPattern/basic/Apriori2.py deleted file mode 100644 index 4681e9dc..00000000 --- a/PAMI/frequentPattern/basic/Apriori2.py +++ /dev/null @@ -1,136 +0,0 @@ -from typing import Dict, List, Set, Union -from datetime import datetime -from deprecated import deprecated -import pandas as pd -import psutil -import os - -class Apriori: - """ - Apriori algorithm for frequent pattern mining in transactional databases. - - Args: - iFile (str): Input file name or path of the input file. - minSup (Union[int, float, str]): Minimum support threshold. If int, treated as count. If float, treated as proportion of database size. - sep (str, optional): Separator used to distinguish items from each other in a transaction. Default is '\t'. - - Attributes: - minSup (float): Minimum support threshold. - startTime (float): Start time of the mining process. - endTime (float): End time of the mining process. - frequentPatterns (Dict[str, int]): Dictionary storing the complete set of patterns. - database (List[Set[str]]): List to store transactions of the database. - - Methods: - mine(): Perform the frequent pattern mining process. - getMemoryUsage(): Get the total memory consumed. - getRuntime(): Get the total runtime of the mining process. - getPatternsAsDataFrame(): Get frequent patterns as a DataFrame. - savePatterns(outFile): Save the final patterns into a file. - getPatterns(): Get the set of frequent patterns. - printResults(): Print the results of the execution. - """ - - def __init__(self, iFile: str, minSup: Union[int, float, str], sep: str = '\t'): - self.minSup = self._convertMinSup(minSup) - self.startTime = 0.0 - self.endTime = 0.0 - self.frequentPatterns = {} - self.database = self._loadDatabase(iFile, sep) - - def _convertMinSup(self, minSup: Union[int, float, str]) -> float: - if isinstance(minSup, int): - return minSup - elif isinstance(minSup, float): - return len(self.database) * minSup - elif isinstance(minSup, str): - if '.' in minSup: - return len(self.database) * float(minSup) - else: - return int(minSup) - - def _loadDatabase(self, iFile: str, sep: str) -> List[Set[str]]: - database = [] - with open(iFile, 'r') as f: - for line in f: - items = line.strip().split(sep) - database.append(set(items)) - return database - - def mine(self) -> None: - """ - Perform the frequent pattern mining process. - """ - self.startTime = datetime.now() - candidates = [{item} for transaction in self.database for item in transaction] - frequentSets = [] - while candidates: - counts = self._countCandidates(candidates) - frequentSets.extend([c for c in candidates if counts[tuple(c)] >= self.minSup]) - candidates = self._generateCandidates(frequentSets) - self.frequentPatterns = {self._setToStr(pattern): self._getSupport(pattern) for pattern in frequentSets} - self.endTime = datetime.now() - - def _countCandidates(self, candidates: List[Set[str]]) -> Dict[tuple, int]: - counts = {} - for transaction in self.database: - for candidate in candidates: - if candidate.issubset(transaction): - counts[tuple(candidate)] = counts.get(tuple(candidate), 0) + 1 - return counts - - def _generateCandidates(self, frequentSets: List[Set[str]]) -> List[Set[str]]: - newCandidates = [] - for i, pattern1 in enumerate(frequentSets): - for pattern2 in frequentSets[i + 1:]: - if list(pattern1)[:-1] == list(pattern2)[:-1]: - newCandidate = pattern1.union(pattern2) - if all(self._isSubset(subset, frequentSets) for subset in self._getSubsets(newCandidate)): - newCandidates.append(newCandidate) - return newCandidates - - def _isSubset(self, subset: Set[str], superset: List[Set[str]]) -> bool: - return any(subset.issubset(pattern) for pattern in superset) - - def _getSubsets(self, pattern: Set[str]) -> List[Set[str]]: - return [set(subset) for subset in self._powerSet(pattern) if subset] - - def _powerSet(self, pattern: Set[str]) -> List[List[str]]: - return [list(subset) for i in range(len(pattern) + 1) for subset in combinations(pattern, i)] - - def _setToStr(self, pattern: Set[str]) -> str: - return '\t'.join(sorted(pattern)) - - def _getSupport(self, pattern: Set[str]) -> int: - return sum(pattern.issubset(transaction) for transaction in self.database) - - def getMemoryUsage(self) -> float: - """ - Get the total memory consumed. - - Returns: - float: Total memory consumed. - """ - process = psutil.Process(os.getpid()) - return process.memory_full_info().uss - - def getRuntime(self) -> float: - """ - Get the total runtime of the mining process. - - Returns: - float: Total runtime in seconds. - """ - return (self.endTime - self.startTime).total_seconds() - - def getPatternsAsDataFrame(self) -> pd.DataFrame: - """ - Get frequent patterns as a DataFrame. - - Returns: - pd.DataFrame: DataFrame containing frequent patterns. - """ - data = [[pattern, support] for pattern, support in self.frequentPatterns.items()] - return pd.DataFrame(data, columns=['Patterns', 'Support']) - - def savePatterns(self, outFile: From 0e832b25d7053c2b69cf77d49a23103a375f49a6 Mon Sep 17 00:00:00 2001 From: Tarun Sreepada Date: Thu, 2 May 2024 17:48:47 +0900 Subject: [PATCH 2/3] association rules --- .../{ARWithLeverage.py => _ARWithLeverage.py} | 0 .../basic/{ARWithLift.py => _ARWithLift.py} | 0 .../basic/{RuleMiner.py => _RuleMiner.py} | 0 .../{ARWithConfidence.py => confidence.py} | 182 ++++----- PAMI/AssociationRules/basic/lift.py | 347 ++++++++++++++++++ 5 files changed, 412 insertions(+), 117 deletions(-) rename PAMI/AssociationRules/basic/{ARWithLeverage.py => _ARWithLeverage.py} (100%) rename PAMI/AssociationRules/basic/{ARWithLift.py => _ARWithLift.py} (100%) rename PAMI/AssociationRules/basic/{RuleMiner.py => _RuleMiner.py} (100%) rename PAMI/AssociationRules/basic/{ARWithConfidence.py => confidence.py} (66%) create mode 100644 PAMI/AssociationRules/basic/lift.py diff --git a/PAMI/AssociationRules/basic/ARWithLeverage.py b/PAMI/AssociationRules/basic/_ARWithLeverage.py similarity index 100% rename from PAMI/AssociationRules/basic/ARWithLeverage.py rename to PAMI/AssociationRules/basic/_ARWithLeverage.py diff --git a/PAMI/AssociationRules/basic/ARWithLift.py b/PAMI/AssociationRules/basic/_ARWithLift.py similarity index 100% rename from PAMI/AssociationRules/basic/ARWithLift.py rename to PAMI/AssociationRules/basic/_ARWithLift.py diff --git a/PAMI/AssociationRules/basic/RuleMiner.py b/PAMI/AssociationRules/basic/_RuleMiner.py similarity index 100% rename from PAMI/AssociationRules/basic/RuleMiner.py rename to PAMI/AssociationRules/basic/_RuleMiner.py diff --git a/PAMI/AssociationRules/basic/ARWithConfidence.py b/PAMI/AssociationRules/basic/confidence.py similarity index 66% rename from PAMI/AssociationRules/basic/ARWithConfidence.py rename to PAMI/AssociationRules/basic/confidence.py index 3e69f5e4..7270c403 100644 --- a/PAMI/AssociationRules/basic/ARWithConfidence.py +++ b/PAMI/AssociationRules/basic/confidence.py @@ -53,85 +53,13 @@ from PAMI.AssociationRules.basic import abstract as _ab from deprecated import deprecated +# increase reucursion depth +import os +import sys +sys.setrecursionlimit(10**4) +from itertools import combinations -class _Confidence: - """ - :param patterns: Dictionary containing patterns and its support value. - :type patterns: dict - :param singleItems: List containing all the single frequent items. - :type singleItems: list - :param minConf: Minimum confidence to mine all the satisfying association rules. - :type minConf: int - """ - - def __init__(self, patterns, singleItems, minConf): - """ - :param patterns: given frequent patterns - :type patterns: dict - :param singleItems: one-length frequent patterns - :type singleItems: list - :param minConf: minimum confidence - :type minConf: float - """ - self._frequentPatterns = patterns - self._singleItems = singleItems - self._minConf = minConf - self._finalPatterns = {} - - def _generation(self, prefix, suffix): - """ - To generate the combinations all association rules. - - :param prefix: the prefix of association rule. - :type prefix: str - :param suffix: the suffix of association rule. - :type suffix: str - """ - if len(suffix) == 1: - conf = self._generateWithConfidence(prefix, suffix[0]) - for i in range(len(suffix)): - suffix1 = suffix[:i] + suffix[i + 1:] - prefix1 = prefix + ' ' + suffix[i] - for j in range(i + 1, len(suffix)): - self._generateWithConfidence(prefix + ' ' + suffix[i], suffix[j]) - # self._generation(prefix+ ' ' +suffix[i], suffix[i+1:]) - self._generation(prefix1, suffix1) - - def _generateWithConfidence(self, lhs, rhs): - """ - To find association rules satisfying user-specified minConf - - :param lhs: the prefix of association rule. - :type lhs: str - :param rhs: the suffix of association rule. - :type rhs: str - """ - s = lhs + '\t' + rhs - if self._frequentPatterns.get(s) == None: - return 0 - minimum = self._frequentPatterns[s] - conf_lhs = minimum / self._frequentPatterns[lhs] - conf_rhs = minimum / self._frequentPatterns[rhs] - if conf_lhs >= self._minConf: - s1 = lhs + '->' + rhs - self._finalPatterns[s1] = conf_lhs - if conf_rhs >= self._minConf: - s1 = rhs + '->' + lhs - self._finalPatterns[s1] = conf_rhs - - def run(self): - """ - To generate the combinations all association rules. - """ - for i in range(len(self._singleItems)): - suffix = self._singleItems[:i] + self._singleItems[i + 1:] - prefix = self._singleItems[i] - for j in range(i + 1, len(self._singleItems)): - self._generateWithConfidence(self._singleItems[i], self._singleItems[j]) - self._generation(prefix, suffix) - - -class ARWithConfidence: +class confidence: """ About this algorithm ==================== @@ -237,28 +165,35 @@ def _readPatterns(self): Reading the input file and storing all the frequent patterns and their support respectively in a frequentPatterns variable. """ self._frequentPatterns = {} - k = [] if isinstance(self._iFile, _ab._pd.DataFrame): - pattern, sup = [], [] + pattern, support = [], [] if self._iFile.empty: print("its empty..") - i = self._iFile.columns.values.tolist() - if 'pattern' in i: - pattern = self._iFile['pattern'].tolist() - if 'support' in i: - support = self._iFile['support'].tolist() + cols = self._iFile.columns.values.tolist() + for col in cols: + if 'pattern' in col.lower(): + pattern = self._iFile[col].tolist() + # print("Using column: ", col, "for pattern") + if 'support' in col.lower(): + support = self._iFile[col].tolist() + # print("Using column: ", col, "for support") for i in range(len(pattern)): - s = '\t'.join(pattern[i]) - self._frequentPattern[s] = support[i] + # if pattern[i] != tuple(): exit() + if pattern[i] != tuple(): + raise ValueError("Pattern should be a tuple. PAMI is going through a major revision. Please raise an issue in the github repository regarding this error and provide information regarding input and algorithm.\ + In the meanwhile try saving the patterns to a file using (alg).save() and use the file as input. If that doesn't work, please raise an issue in the github repository.") + s = tuple(sorted(pattern[i])) + self._frequentPatterns[s] = support[i] if isinstance(self._iFile, str): if _ab._validators.url(self._iFile): - data = _ab._urlopen(self._iFile) - for line in data: + f = _ab._urlopen(self._iFile) + for line in f: line = line.strip() line = line.split(':') s = line[0].split(self._sep) - s = '\t'.join(s) - self._frequentPatterns[s.strip()] = int(line[1]) + s = tuple(sorted(s)) + + self._frequentPatterns[s] = int(line[1]) else: try: with open(self._iFile, 'r', encoding='utf-8') as f: @@ -266,15 +201,14 @@ def _readPatterns(self): line = line.strip() line = line.split(':') s = line[0].split(self._sep) - for j in s: - if j not in k: - k.append(j) - s = '\t'.join(s) - self._frequentPatterns[s.strip()] = int(line[1]) + s = [x.strip() for x in s] + s = tuple(sorted(s)) + self._frequentPatterns[s] = int(line[1]) except IOError: print("File Not Found") quit() - return k + # sorted(k, key=lambda x: self._frequentPatterns[x], reverse=True) + # return k @deprecated("It is recommended to use 'mine()' instead of 'startMine()' for mining process. Starting from January 2025, 'startMine()' will be completely terminated.") def startMine(self): @@ -290,10 +224,20 @@ def mine(self): Association rule mining process will start from here """ self._startTime = _ab._time.time() - k = self._readPatterns() - a = _Confidence(self._frequentPatterns, k, self._minConf) - a.run() - self._finalPatterns = a._finalPatterns + self._readPatterns() + + keys = list(self._frequentPatterns.keys()) + + for i in range(len(self._frequentPatterns)): + key = self._frequentPatterns[keys[i]] + for idx in range(len(keys[i]) - 1, 0, -1): + for c in combinations(keys[i], r=idx): + antecedent = c + # consequent = keys[i] - antecedent + conf = key / self._frequentPatterns[antecedent] + if conf >= self._minConf: + self._finalPatterns[antecedent + tuple(['->']) + keys[i]] = conf + self._endTime = _ab._time.time() process = _ab._psutil.Process(_ab._os.getpid()) self._memoryUSS = float() @@ -340,26 +284,30 @@ def getPatternsAsDataFrame(self): :rtype: pd.DataFrame """ - dataFrame = {} - data = [] - for a, b in self._finalPatterns.items(): - data.append([a.replace('\t', ' '), b]) - dataFrame = _ab._pd.DataFrame(data, columns=['Patterns', 'Support']) - # dataFrame = dataFrame.replace(r'\r+|\n+|\t+',' ', regex=True) + # dataFrame = {} + # data = [] + # for a, b in self._finalPatterns.items(): + # data.append([a.replace('\t', ' '), b]) + # dataFrame = _ab._pd.DataFrame(data, columns=['Patterns', 'Support']) + # # dataFrame = dataFrame.replace(r'\r+|\n+|\t+',' ', regex=True) + # return dataFrame + + dataFrame = _ab._pd.DataFrame(list(self._finalPatterns.items()), columns=['Patterns', 'Support']) return dataFrame - def save(self, outFile): + def save(self, outFile: str) -> None: """ + Complete set of frequent patterns will be loaded in to an output file - :param outFile: name of the outputfile - :type outFile: file + :param outFile: name of the output file + :type outFile: csvfile + :return: None """ - self._oFile = outFile - writer = open(self._oFile, 'w+') - for x, y in self._finalPatterns.items(): - s1 = x.strip() + ":" + str(y) - writer.write("%s \n" % s1) + with open(outFile, 'w') as f: + for x, y in self._finalPatterns.items(): + x = self._sep.join(x) + f.write(f"{x} : {y}\n") def getPatterns(self): """ @@ -384,9 +332,9 @@ def printResults(self): _ap = str() if len(_ab._sys.argv) == 4 or len(_ab._sys.argv) == 5: if len(_ab._sys.argv) == 5: - _ap = ARWithConfidence(_ab._sys.argv[1], float(_ab._sys.argv[3]), _ab._sys.argv[4]) + _ap = confidence(_ab._sys.argv[1], float(_ab._sys.argv[3]), _ab._sys.argv[4]) if len(_ab._sys.argv) == 4: - _ap = ARWithConfidence(_ab._sys.argv[1], _ab._sys.argv[3]) + _ap = confidence(_ab._sys.argv[1], _ab._sys.argv[3]) _ap.startMine() _ap.mine() print("Total number of Association Rules:", len(_ap.getPatterns())) diff --git a/PAMI/AssociationRules/basic/lift.py b/PAMI/AssociationRules/basic/lift.py new file mode 100644 index 00000000..d9206d00 --- /dev/null +++ b/PAMI/AssociationRules/basic/lift.py @@ -0,0 +1,347 @@ +# This code uses "lift" metric to extract the association rules from given frequent patterns. +# +# **Importing this algorithm into a python program** +# ---------------------------------------------------- +# +# import PAMI.AssociationRules.basic import ARWithlift as alg +# +# obj = alg.ARWithlift(iFile, minLift) +# +# obj.mine() +# +# associationRules = obj.getPatterns() +# +# print("Total number of Association Rules:", len(associationRules)) +# +# obj.save(oFile) +# +# Df = obj.getPatternInDataFrame() +# +# memUSS = obj.getMemoryUSS() +# +# print("Total Memory in USS:", memUSS) +# +# memRSS = obj.getMemoryRSS() +# +# print("Total Memory in RSS", memRSS) +# +# run = obj.getRuntime() +# +# print("Total ExecutionTime in seconds:", run) +# + + + +__copyright__ = """ +Copyright (C) 2021 Rage Uday Kiran + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + Copyright (C) 2021 Rage Uday Kiran + +""" + +from PAMI.AssociationRules.basic import abstract as _ab +from deprecated import deprecated +# increase reucursion depth +import os +import sys +sys.setrecursionlimit(10**4) +from itertools import combinations + +class lift: + """ + About this algorithm + ==================== + + :**Description**: Association Rules are derived from frequent patterns using "lift" metric. + + :**Reference**: + + :**Parameters**: - **iFile** (*str*) -- *Name of the Input file to mine complete set of association rules* + - **oFile** (*str*) -- *Name of the Output file to write association rules* + - **minLift** (*float*) -- *Minimum lift to mine all the satisfying association rules. The user can specify the minLift in float between the range of 0 to 1.* + - **sep** (*str*) -- *This variable is used to distinguish items from one another in a transaction. The default seperator is tab space. However, the users can override their default separator.* + + :**Attributes**: - **startTime** (*float*) -- *To record the start time of the mining process.* + - **endTime** (*float*) -- *To record the completion time of the mining process.* + - **finalPatterns** (*dict*) -- *Storing the complete set of patterns in a dictionary variable.* + - **memoryUSS** (*float*) -- *To store the total amount of USS memory consumed by the program.* + - **memoryRSS** (*float*) -- *To store the total amount of RSS memory consumed by the program.* + + + Execution methods + ================= + + **Terminal command** + + .. code-block:: console + + Format: + + (.venv) $ python3 ARWithlift.py + + Example Usage: + + (.venv) $ python3 ARWithlift.py sampleDB.txt patterns.txt 0.5 ' ' + + .. note:: minLift can be specified in a value between 0 and 1. + + + **Calling from a python program** + + .. code-block:: python + + import PAMI.AssociationRules.basic import ARWithlift as alg + + obj = alg.ARWithlift(iFile, minLift) + + obj.mine() + + associationRules = obj.getPatterns() + + print("Total number of Association Rules:", len(associationRules)) + + obj.save(oFile) + + Df = obj.getPatternInDataFrame() + + memUSS = obj.getMemoryUSS() + + print("Total Memory in USS:", memUSS) + + memRSS = obj.getMemoryRSS() + + print("Total Memory in RSS", memRSS) + + run = obj.getRuntime() + + print("Total ExecutionTime in seconds:", run) + + + Credits + ======= + + The complete program was written by P. Likhitha under the supervision of Professor Rage Uday Kiran. + + """ + + _minLift = float() + _startTime = float() + _endTime = float() + _iFile = " " + _oFile = " " + _Sep = " " + _memoryUSS = float() + _memoryRSS = float() + _frequentPatterns = {} + + def __init__(self, iFile, minLift, sep): + """ + :param iFile: input file name or path + :type iFile: str + :param minLift: minimum lift + :type minLift: float + :param sep: Delimiter of input file + :type sep: str + """ + self._iFile = iFile + self._minLift = minLift + self._finalPatterns = {} + self._sep = sep + + def _readPatterns(self): + """ + Reading the input file and storing all the frequent patterns and their support respectively in a frequentPatterns variable. + """ + self._frequentPatterns = {} + if isinstance(self._iFile, _ab._pd.DataFrame): + pattern, support = [], [] + if self._iFile.empty: + print("its empty..") + cols = self._iFile.columns.values.tolist() + for col in cols: + if 'pattern' in col.lower(): + pattern = self._iFile[col].tolist() + # print("Using column: ", col, "for pattern") + if 'support' in col.lower(): + support = self._iFile[col].tolist() + # print("Using column: ", col, "for support") + for i in range(len(pattern)): + # if pattern[i] != tuple(): exit() + if pattern[i] != tuple(): + raise ValueError("Pattern should be a tuple. PAMI is going through a major revision. Please raise an issue in the github repository regarding this error and provide information regarding input and algorithm.\ + In the meanwhile try saving the patterns to a file using (alg).save() and use the file as input. If that doesn't work, please raise an issue in the github repository.") + s = tuple(sorted(pattern[i])) + self._frequentPatterns[s] = support[i] + if isinstance(self._iFile, str): + if _ab._validators.url(self._iFile): + f = _ab._urlopen(self._iFile) + for line in f: + line = line.strip() + line = line.split(':') + s = line[0].split(self._sep) + s = tuple(sorted(s)) + + self._frequentPatterns[s] = int(line[1]) + else: + try: + with open(self._iFile, 'r', encoding='utf-8') as f: + for line in f: + line = line.strip() + line = line.split(':') + s = line[0].split(self._sep) + s = [x.strip() for x in s] + s = tuple(sorted(s)) + self._frequentPatterns[s] = int(line[1]) + except IOError: + print("File Not Found") + quit() + # sorted(k, key=lambda x: self._frequentPatterns[x], reverse=True) + # return k + + @deprecated("It is recommended to use 'mine()' instead of 'startMine()' for mining process. Starting from January 2025, 'startMine()' will be completely terminated.") + def startMine(self): + """ + Association rule mining process will start from here + """ + self.mine() + + + + def mine(self): + """ + Association rule mining process will start from here + """ + self._startTime = _ab._time.time() + self._readPatterns() + + keys = list(self._frequentPatterns.keys()) + + for i in range(len(self._frequentPatterns)): + key = self._frequentPatterns[keys[i]] + for idx in range(len(keys[i]) - 1, 0, -1): + for c in combinations(keys[i], r=idx): + antecedent = c + consequent = tuple(sorted([x for x in keys[i] if x not in antecedent])) + # print(antecedent, consequent) + lift = key / (self._frequentPatterns[antecedent]) * self._frequentPatterns[consequent] + if lift >= self._minLift: + self._finalPatterns[antecedent + tuple(['->']) + keys[i]] = lift + + self._endTime = _ab._time.time() + process = _ab._psutil.Process(_ab._os.getpid()) + self._memoryUSS = float() + self._memoryRSS = float() + self._memoryUSS = process.memory_full_info().uss + self._memoryRSS = process.memory_info().rss + print("Association rules successfully generated from frequent patterns ") + + def getMemoryUSS(self): + """ + Total amount of USS memory consumed by the mining process will be retrieved from this function + + :return: returning USS memory consumed by the mining process + :rtype: float + """ + + return self._memoryUSS + + def getMemoryRSS(self): + """ + Total amount of RSS memory consumed by the mining process will be retrieved from this function + + :return: returning RSS memory consumed by the mining process + :rtype: float + """ + + return self._memoryRSS + + def getRuntime(self): + """ + Calculating the total amount of runtime taken by the mining process + + :return: returning total amount of runtime taken by the mining process + :rtype: float + """ + + return self._endTime - self._startTime + + def getPatternsAsDataFrame(self): + """ + Storing final frequent patterns in a dataframe + + :return: returning frequent patterns in a dataframe + :rtype: pd.DataFrame + """ + + # dataFrame = {} + # data = [] + # for a, b in self._finalPatterns.items(): + # data.append([a.replace('\t', ' '), b]) + # dataFrame = _ab._pd.DataFrame(data, columns=['Patterns', 'Support']) + # # dataFrame = dataFrame.replace(r'\r+|\n+|\t+',' ', regex=True) + # return dataFrame + + dataFrame = _ab._pd.DataFrame(list(self._finalPatterns.items()), columns=['Patterns', 'Support']) + return dataFrame + + def save(self, outFile: str) -> None: + """ + + Complete set of frequent patterns will be loaded in to an output file + + :param outFile: name of the output file + :type outFile: csvfile + :return: None + """ + with open(outFile, 'w') as f: + for x, y in self._finalPatterns.items(): + x = self._sep.join(x) + f.write(f"{x} : {y}\n") + + def getPatterns(self): + """ + Function to send the set of frequent patterns after completion of the mining process + + :return: returning frequent patterns + :rtype: dict + """ + return self._finalPatterns + + def printResults(self): + """ + Function to send the result after completion of the mining process + """ + print("Total number of Association Rules:", len(self.getPatterns())) + print("Total Memory in USS:", self.getMemoryUSS()) + print("Total Memory in RSS", self.getMemoryRSS()) + print("Total ExecutionTime in ms:", self.getRuntime()) + + +if __name__ == "__main__": + _ap = str() + if len(_ab._sys.argv) == 4 or len(_ab._sys.argv) == 5: + if len(_ab._sys.argv) == 5: + _ap = lift(_ab._sys.argv[1], float(_ab._sys.argv[3]), _ab._sys.argv[4]) + if len(_ab._sys.argv) == 4: + _ap = lift(_ab._sys.argv[1], _ab._sys.argv[3]) + _ap.startMine() + _ap.mine() + print("Total number of Association Rules:", len(_ap.getPatterns())) + _ap.save(_ab._sys.argv[2]) + print("Total Memory in USS:", _ap.getMemoryUSS()) + print("Total Memory in RSS", _ap.getMemoryRSS()) + print("Total ExecutionTime in ms:", _ap.getRuntime()) + else: + print("Error! The number of input parameters do not match the total number of parameters provided") From 0aa81e4096291f13b3d17fb8db9ed12304bad902 Mon Sep 17 00:00:00 2001 From: Tarun Sreepada Date: Thu, 2 May 2024 18:02:11 +0900 Subject: [PATCH 3/3] Create leverage.py --- PAMI/AssociationRules/basic/leverage.py | 347 ++++++++++++++++++++++++ 1 file changed, 347 insertions(+) create mode 100644 PAMI/AssociationRules/basic/leverage.py diff --git a/PAMI/AssociationRules/basic/leverage.py b/PAMI/AssociationRules/basic/leverage.py new file mode 100644 index 00000000..d7df8dd6 --- /dev/null +++ b/PAMI/AssociationRules/basic/leverage.py @@ -0,0 +1,347 @@ +# This code uses "leverage" metric to extract the association rules from given frequent patterns. +# +# **Importing this algorithm into a python program** +# ---------------------------------------------------- +# +# import PAMI.AssociationRules.basic import ARWithleverage as alg +# +# obj = alg.ARWithleverage(iFile, minConf) +# +# obj.mine() +# +# associationRules = obj.getPatterns() +# +# print("Total number of Association Rules:", len(associationRules)) +# +# obj.save(oFile) +# +# Df = obj.getPatternInDataFrame() +# +# memUSS = obj.getMemoryUSS() +# +# print("Total Memory in USS:", memUSS) +# +# memRSS = obj.getMemoryRSS() +# +# print("Total Memory in RSS", memRSS) +# +# run = obj.getRuntime() +# +# print("Total ExecutionTime in seconds:", run) +# + + + +__copyright__ = """ +Copyright (C) 2021 Rage Uday Kiran + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + Copyright (C) 2021 Rage Uday Kiran + +""" + +from PAMI.AssociationRules.basic import abstract as _ab +from deprecated import deprecated +# increase reucursion depth +import os +import sys +sys.setrecursionlimit(10**4) +from itertools import combinations + +class leverage: + """ + About this algorithm + ==================== + + :**Description**: Association Rules are derived from frequent patterns using "leverage" metric. + + :**Reference**: + + :**Parameters**: - **iFile** (*str*) -- *Name of the Input file to mine complete set of association rules* + - **oFile** (*str*) -- *Name of the Output file to write association rules* + - **minConf** (*float*) -- *Minimum leverage to mine all the satisfying association rules. The user can specify the minConf in float between the range of 0 to 1.* + - **sep** (*str*) -- *This variable is used to distinguish items from one another in a transaction. The default seperator is tab space. However, the users can override their default separator.* + + :**Attributes**: - **startTime** (*float*) -- *To record the start time of the mining process.* + - **endTime** (*float*) -- *To record the completion time of the mining process.* + - **finalPatterns** (*dict*) -- *Storing the complete set of patterns in a dictionary variable.* + - **memoryUSS** (*float*) -- *To store the total amount of USS memory consumed by the program.* + - **memoryRSS** (*float*) -- *To store the total amount of RSS memory consumed by the program.* + + + Execution methods + ================= + + **Terminal command** + + .. code-block:: console + + Format: + + (.venv) $ python3 ARWithleverage.py + + Example Usage: + + (.venv) $ python3 ARWithleverage.py sampleDB.txt patterns.txt 0.5 ' ' + + .. note:: minConf can be specified in a value between 0 and 1. + + + **Calling from a python program** + + .. code-block:: python + + import PAMI.AssociationRules.basic import ARWithleverage as alg + + obj = alg.ARWithleverage(iFile, minConf) + + obj.mine() + + associationRules = obj.getPatterns() + + print("Total number of Association Rules:", len(associationRules)) + + obj.save(oFile) + + Df = obj.getPatternInDataFrame() + + memUSS = obj.getMemoryUSS() + + print("Total Memory in USS:", memUSS) + + memRSS = obj.getMemoryRSS() + + print("Total Memory in RSS", memRSS) + + run = obj.getRuntime() + + print("Total ExecutionTime in seconds:", run) + + + Credits + ======= + + The complete program was written by P. Likhitha under the supervision of Professor Rage Uday Kiran. + + """ + + _minLev = float() + _startTime = float() + _endTime = float() + _iFile = " " + _oFile = " " + _Sep = " " + _memoryUSS = float() + _memoryRSS = float() + _frequentPatterns = {} + + def __init__(self, iFile, minConf, sep): + """ + :param iFile: input file name or path + :type iFile: str + :param minConf: minimum leverage + :type minConf: float + :param sep: Delimiter of input file + :type sep: str + """ + self._iFile = iFile + self._minLev = minConf + self._finalPatterns = {} + self._sep = sep + + def _readPatterns(self): + """ + Reading the input file and storing all the frequent patterns and their support respectively in a frequentPatterns variable. + """ + self._frequentPatterns = {} + if isinstance(self._iFile, _ab._pd.DataFrame): + pattern, support = [], [] + if self._iFile.empty: + print("its empty..") + cols = self._iFile.columns.values.tolist() + for col in cols: + if 'pattern' in col.lower(): + pattern = self._iFile[col].tolist() + # print("Using column: ", col, "for pattern") + if 'support' in col.lower(): + support = self._iFile[col].tolist() + # print("Using column: ", col, "for support") + for i in range(len(pattern)): + # if pattern[i] != tuple(): exit() + if pattern[i] != tuple(): + raise ValueError("Pattern should be a tuple. PAMI is going through a major revision. Please raise an issue in the github repository regarding this error and provide information regarding input and algorithm.\ + In the meanwhile try saving the patterns to a file using (alg).save() and use the file as input. If that doesn't work, please raise an issue in the github repository.") + s = tuple(sorted(pattern[i])) + self._frequentPatterns[s] = support[i] + if isinstance(self._iFile, str): + if _ab._validators.url(self._iFile): + f = _ab._urlopen(self._iFile) + for line in f: + line = line.strip() + line = line.split(':') + s = line[0].split(self._sep) + s = tuple(sorted(s)) + + self._frequentPatterns[s] = int(line[1]) + else: + try: + with open(self._iFile, 'r', encoding='utf-8') as f: + for line in f: + line = line.strip() + line = line.split(':') + s = line[0].split(self._sep) + s = [x.strip() for x in s] + s = tuple(sorted(s)) + self._frequentPatterns[s] = int(line[1]) + except IOError: + print("File Not Found") + quit() + # sorted(k, key=lambda x: self._frequentPatterns[x], reverse=True) + # return k + + @deprecated("It is recommended to use 'mine()' instead of 'startMine()' for mining process. Starting from January 2025, 'startMine()' will be completely terminated.") + def startMine(self): + """ + Association rule mining process will start from here + """ + self.mine() + + + + def mine(self): + """ + Association rule mining process will start from here + """ + self._startTime = _ab._time.time() + self._readPatterns() + + keys = list(self._frequentPatterns.keys()) + + for i in range(len(self._frequentPatterns)): + key = self._frequentPatterns[keys[i]] + for idx in range(len(keys[i]) - 1, 0, -1): + for c in combinations(keys[i], r=idx): + antecedent = c + # consequent = keys[i] - antecedent + # conf = key / self._frequentPatterns[antecedent] + lev = key - (self._frequentPatterns[antecedent] * self._frequentPatterns[keys[i]]) + if lev >= self._minLev: + self._finalPatterns[antecedent + tuple(['->']) + keys[i]] = lev + + self._endTime = _ab._time.time() + process = _ab._psutil.Process(_ab._os.getpid()) + self._memoryUSS = float() + self._memoryRSS = float() + self._memoryUSS = process.memory_full_info().uss + self._memoryRSS = process.memory_info().rss + print("Association rules successfully generated from frequent patterns ") + + def getMemoryUSS(self): + """ + Total amount of USS memory consumed by the mining process will be retrieved from this function + + :return: returning USS memory consumed by the mining process + :rtype: float + """ + + return self._memoryUSS + + def getMemoryRSS(self): + """ + Total amount of RSS memory consumed by the mining process will be retrieved from this function + + :return: returning RSS memory consumed by the mining process + :rtype: float + """ + + return self._memoryRSS + + def getRuntime(self): + """ + Calculating the total amount of runtime taken by the mining process + + :return: returning total amount of runtime taken by the mining process + :rtype: float + """ + + return self._endTime - self._startTime + + def getPatternsAsDataFrame(self): + """ + Storing final frequent patterns in a dataframe + + :return: returning frequent patterns in a dataframe + :rtype: pd.DataFrame + """ + + # dataFrame = {} + # data = [] + # for a, b in self._finalPatterns.items(): + # data.append([a.replace('\t', ' '), b]) + # dataFrame = _ab._pd.DataFrame(data, columns=['Patterns', 'Support']) + # # dataFrame = dataFrame.replace(r'\r+|\n+|\t+',' ', regex=True) + # return dataFrame + + dataFrame = _ab._pd.DataFrame(list(self._finalPatterns.items()), columns=['Patterns', 'Support']) + return dataFrame + + def save(self, outFile: str) -> None: + """ + + Complete set of frequent patterns will be loaded in to an output file + + :param outFile: name of the output file + :type outFile: csvfile + :return: None + """ + with open(outFile, 'w') as f: + for x, y in self._finalPatterns.items(): + x = self._sep.join(x) + f.write(f"{x} : {y}\n") + + def getPatterns(self): + """ + Function to send the set of frequent patterns after completion of the mining process + + :return: returning frequent patterns + :rtype: dict + """ + return self._finalPatterns + + def printResults(self): + """ + Function to send the result after completion of the mining process + """ + print("Total number of Association Rules:", len(self.getPatterns())) + print("Total Memory in USS:", self.getMemoryUSS()) + print("Total Memory in RSS", self.getMemoryRSS()) + print("Total ExecutionTime in ms:", self.getRuntime()) + + +if __name__ == "__main__": + _ap = str() + if len(_ab._sys.argv) == 4 or len(_ab._sys.argv) == 5: + if len(_ab._sys.argv) == 5: + _ap = leverage(_ab._sys.argv[1], float(_ab._sys.argv[3]), _ab._sys.argv[4]) + if len(_ab._sys.argv) == 4: + _ap = leverage(_ab._sys.argv[1], _ab._sys.argv[3]) + _ap.startMine() + _ap.mine() + print("Total number of Association Rules:", len(_ap.getPatterns())) + _ap.save(_ab._sys.argv[2]) + print("Total Memory in USS:", _ap.getMemoryUSS()) + print("Total Memory in RSS", _ap.getMemoryRSS()) + print("Total ExecutionTime in ms:", _ap.getRuntime()) + else: + print("Error! The number of input parameters do not match the total number of parameters provided")