From 2f9b1698b3f3d431bbab3c36546a35e8c4d3b49c Mon Sep 17 00:00:00 2001 From: Tarun Sreepada Date: Fri, 24 May 2024 20:32:59 +0900 Subject: [PATCH 1/2] CoMine Update 8s->5s on T10 minsup 0.001 minconf 0.1 on M2 Macbook Documentation may require some checking modified association rules to go back to old pami format. modified newer codes to use old pami format --- PAMI/AssociationRules/basic/confidence.py | 10 +- PAMI/AssociationRules/basic/leverage.py | 17 +- PAMI/AssociationRules/basic/lift.py | 18 +- PAMI/correlatedPattern/basic/CoMine.py | 430 ++++-------- PAMI/correlatedPattern/basic/_CoMine.py | 694 ++++++++++++++++++++ PAMI/frequentPattern/basic/Apriori.py | 18 +- PAMI/frequentPattern/basic/Aprioribitset.py | 18 +- PAMI/frequentPattern/basic/ECLAT.py | 15 +- PAMI/frequentPattern/basic/ECLATDiffset.py | 31 +- PAMI/frequentPattern/basic/ECLATbitset.py | 38 +- PAMI/frequentPattern/basic/FPGrowth.py | 18 +- PAMI/frequentPattern/closed/CHARM.py | 18 +- PAMI/frequentPattern/maximal/MaxFPGrowth.py | 24 +- PAMI/frequentPattern/topk/FAE.py | 28 +- 14 files changed, 970 insertions(+), 407 deletions(-) create mode 100644 PAMI/correlatedPattern/basic/_CoMine.py diff --git a/PAMI/AssociationRules/basic/confidence.py b/PAMI/AssociationRules/basic/confidence.py index 8c44587f..5b14e0b2 100644 --- a/PAMI/AssociationRules/basic/confidence.py +++ b/PAMI/AssociationRules/basic/confidence.py @@ -186,13 +186,15 @@ def _readPatterns(self): # print("Using column: ", col, "for support") for i in range(len(pattern)): # if pattern[i] != tuple(): exit() - if type(pattern[i]) != tuple: + if type(pattern[i]) != str: raise ValueError("Pattern should be a tuple. PAMI is going through a major revision.\ Please raise an issue in the github repository regarding this error and provide information regarding input and algorithm.\ In the meanwhile try saving the patterns to a file using (alg).save() and use the file as input. \ If that doesn't work, please raise an issue in the github repository.\ Got pattern: ", pattern[i], "at index: ", i, "in the dataframe, type: ", type(pattern[i])) - s = tuple(sorted(pattern[i])) + # s = tuple(sorted(pattern[i])) + s = pattern[i].split(self._sep) + s = tuple(sorted(s)) self._associationRules[s] = support[i] if isinstance(self._iFile, str): if _ab._validators.url(self._iFile): @@ -301,7 +303,9 @@ def getAssociationRulesAsDataFrame(self): # # dataFrame = dataFrame.replace(r'\r+|\n+|\t+',' ', regex=True) # return dataFrame - dataFrame = _ab._pd.DataFrame(list(self._associationRules.items()), columns=['Patterns', 'Support']) + # dataFrame = _ab._pd.DataFrame(list(self._associationRules.items()), columns=['Patterns', 'Support']) + # dataFrame = _ab._pd.DataFrame(list([[" ".join(x), y] for x,y in self._finalPatterns.items()]), columns=['Patterns', 'Support']) + dataFrame = _ab._pd.DataFrame(list([[" ".join(x), y] for x, y in self._associationRules.items()]), columns=['Patterns', 'Support']) return dataFrame def save(self, outFile: str) -> None: diff --git a/PAMI/AssociationRules/basic/leverage.py b/PAMI/AssociationRules/basic/leverage.py index dcd0956f..ced5aa0c 100644 --- a/PAMI/AssociationRules/basic/leverage.py +++ b/PAMI/AssociationRules/basic/leverage.py @@ -179,11 +179,17 @@ def _readPatterns(self): # print("Using column: ", col, "for support") for i in range(len(pattern)): # if pattern[i] != tuple(): exit() - if type(pattern[i]) != tuple: - raise ValueError("Pattern should be a tuple. PAMI is going through a major revision. Please raise an issue in the github repository regarding this error and provide information regarding input and algorithm.\ - In the meanwhile try saving the patterns to a file using (alg).save() and use the file as input. If that doesn't work, please raise an issue in the github repository.") - s = tuple(sorted(pattern[i])) + if type(pattern[i]) != str: + raise ValueError("Pattern should be a tuple. PAMI is going through a major revision.\ + Please raise an issue in the github repository regarding this error and provide information regarding input and algorithm.\ + In the meanwhile try saving the patterns to a file using (alg).save() and use the file as input. \ + If that doesn't work, please raise an issue in the github repository.\ + Got pattern: ", pattern[i], "at index: ", i, "in the dataframe, type: ", type(pattern[i])) + # s = tuple(sorted(pattern[i])) + s = pattern[i].split(self._sep) + s = tuple(sorted(s)) self._associationRules[s] = support[i] / self._maxTS + if isinstance(self._iFile, str): if _ab._validators.url(self._iFile): f = _ab._urlopen(self._iFile) @@ -294,7 +300,8 @@ def getAssociationRulesAsDataFrame(self): # # dataFrame = dataFrame.replace(r'\r+|\n+|\t+',' ', regex=True) # return dataFrame - dataFrame = _ab._pd.DataFrame(list(self._associationRules.items()), columns=['Patterns', 'Support']) + # dataFrame = _ab._pd.DataFrame(list(self._associationRules.items()), columns=['Patterns', 'Support']) + dataFrame = _ab._pd.DataFrame(list([[" ".join(x), y] for x, y in self._associationRules.items()]), columns=['Patterns', 'Support']) return dataFrame def save(self, outFile: str) -> None: diff --git a/PAMI/AssociationRules/basic/lift.py b/PAMI/AssociationRules/basic/lift.py index 604fc5e6..aa811b4e 100644 --- a/PAMI/AssociationRules/basic/lift.py +++ b/PAMI/AssociationRules/basic/lift.py @@ -178,10 +178,15 @@ def _readPatterns(self): # print("Using column: ", col, "for support") for i in range(len(pattern)): # if pattern[i] != tuple(): exit() - if type(pattern[i]) != tuple: - raise ValueError("Pattern should be a tuple. PAMI is going through a major revision. Please raise an issue in the github repository regarding this error and provide information regarding input and algorithm.\ - In the meanwhile try saving the patterns to a file using (alg).save() and use the file as input. If that doesn't work, please raise an issue in the github repository.") - s = tuple(sorted(pattern[i])) + if type(pattern[i]) != str: + raise ValueError("Pattern should be a tuple. PAMI is going through a major revision.\ + Please raise an issue in the github repository regarding this error and provide information regarding input and algorithm.\ + In the meanwhile try saving the patterns to a file using (alg).save() and use the file as input. \ + If that doesn't work, please raise an issue in the github repository.\ + Got pattern: ", pattern[i], "at index: ", i, "in the dataframe, type: ", type(pattern[i])) + # s = tuple(sorted(pattern[i])) + s = pattern[i].split(self._sep) + s = tuple(sorted(s)) self._associationRules[s] = support[i] if isinstance(self._iFile, str): if _ab._validators.url(self._iFile): @@ -276,7 +281,7 @@ def getRuntime(self): return self._endTime - self._startTime - def getPatternsAsDataFrame(self): + def getAssociationRulesAsDataFrame(self): """ Storing final frequent patterns in a dataframe @@ -292,7 +297,8 @@ def getPatternsAsDataFrame(self): # # dataFrame = dataFrame.replace(r'\r+|\n+|\t+',' ', regex=True) # return dataFrame - dataFrame = _ab._pd.DataFrame(list(self._associationRules.items()), columns=['Patterns', 'Support']) + # dataFrame = _ab._pd.DataFrame(list(self._associationRules.items()), columns=['Patterns', 'Support']) + dataFrame = _ab._pd.DataFrame(list([[" ".join(x), y] for x, y in self._associationRules.items()]), columns=['Patterns', 'Support']) return dataFrame def save(self, outFile: str) -> None: diff --git a/PAMI/correlatedPattern/basic/CoMine.py b/PAMI/correlatedPattern/basic/CoMine.py index b5913d06..0e5a140e 100644 --- a/PAMI/correlatedPattern/basic/CoMine.py +++ b/PAMI/correlatedPattern/basic/CoMine.py @@ -60,175 +60,59 @@ import pandas as _pd from typing import List, Dict, Tuple, Union from deprecated import deprecated +from collections import Counter -class _Node: - """ - A class used to represent the node of correlatedPatternTree - :**Attributes**: **itemId** (*int*) -- **storing item of a node** - **counter** (*int*) -- **To maintain the support of node** - **parent** (*node*) -- **To maintain the parent of every node** - **child** (*list*) -- **To maintain the children of node** - **nodeLink** (*node*) -- **Points to the node with same itemId** - - :**Methods**: - getChild(itemName) - returns the node with same itemName from correlatedPatternTree +class _Node: """ + A class used to represent the node of frequentPatternTree - def __init__(self) -> None: - self.itemId = -1 - self.counter = 1 - self.parent = None - self.child = [] - self.nodeLink = None + :**Attributes**: - **itemId** (*int*) -- *storing item of a node.* + - **counter** (*int*) -- *To maintain the support of node.* + - **parent** (*node*) -- *To maintain the parent of node.* + - **children** (*list*) -- *To maintain the children of node.* - def getChild(self, id1) -> Union[None, '_Node']: - """ - :param id1: give item id as input - :type id1: int - :return: the node with same itemId - :rtype: _Node - """ - for i in self.child: - if i.itemId == id1: - return i - return None - -class _Tree: - """ - A class used to represent the correlatedPatternGrowth tree structure - - :**Attributes**: **headerList** (*list*) -- **storing the list of items in tree sorted in ascending of their supports** - **mapItemNodes** (*dictionary*) -- **storing the nodes with same item name** - **mapItemLastNodes** (*dictionary*) -- **representing the map that indicates the last node for each item** - **root** (*Node*) -- **representing the root Node in a tree** - - :**Methods**: - - createHeaderList(items,minSup) - takes items only which are greater than minSup and sort the items in ascending order - addTransaction(transaction) - creating transaction as a branch in correlatedPatternTree - fixNodeLinks(item,newNode) - To create the link for nodes with same item - printTree(Node) - gives the details of node in correlatedPatternGrowth tree - addPrefixPath(prefix,port,minSup) - It takes the items in prefix pattern whose support is >=minSup and construct a subtree + :**Methods**: - **addChild(node)** -- *Updates the nodes children list and parent for the given node.* """ - def __init__(self) -> None: - self.headerList = [] - self.mapItemNodes = {} - self.mapItemLastNodes = {} - self.root = _Node() - - def addTransaction(self, transaction: List[int]) -> None: - """ - Adding transaction into tree - - :param transaction: it represents a single transaction in a database - :type transaction: list - :return: None - """ - - current = self.root - for i in transaction: - child = current.getChild(i) - if child is None: - newNode = _Node() - newNode.itemId = i - newNode.parent = current - current.child.append(newNode) - self.fixNodeLinks(i, newNode) - current = newNode - else: - child.counter += 1 - current = child + def __init__(self, item, count, parent) -> None: + self.item = item + self.count = count + self.parent = parent + self.children = {} - def fixNodeLinks(self, item: int, newNode: '_Node') -> None: + def addChild(self, item, count = 1): """ - Fixing node link for the newNode that inserted into correlatedPatternTree - :param item: it represents the item of newNode - :type item: int - :param newNode: it represents the newNode that inserted in correlatedPatternTree - :type newNode: Node - :return: None - """ - if item in self.mapItemLastNodes.keys(): - lastNode = self.mapItemLastNodes[item] - lastNode.nodeLink = newNode - self.mapItemLastNodes[item] = newNode - if item not in self.mapItemNodes.keys(): - self.mapItemNodes[item] = newNode - - def printTree(self, root: '_Node') -> None: - """ - This method is to find the details of parent, children, and support of a Node + Adds a child node to the current node with the specified item and count. - :param root: it represents the Node in correlatedPatternTree - :type root: Node - :return: None + :param item: The item associated with the child node. + :type item: List + :param count: The count or support of the item. Default is 1. + :type count: int + :return: The child node added. + :rtype: List """ - - if root.child is None: - return + if item not in self.children: + self.children[item] = _Node(item, count, self) else: - for i in root.child: - print(i.itemId, i.counter, i.parent.itemId) - self.printTree(i) - - def createHeaderList(self, mapSupport: Dict[int, int], minSup: int) -> None: + self.children[item].count += count + return self.children[item] + + def traverse(self) -> Tuple[List[int], int]: """ - To create the headerList + Traversing the tree to get the transaction - :param mapSupport : it represents the items with their supports - :type mapSupport : dictionary - :param minSup : it represents the minSup - :param minSup : float - :return: None - """ - - t1 = [] - for x, y in mapSupport.items(): - if y >= minSup: - t1.append(x) - itemSetBuffer = [k for k, v in sorted(mapSupport.items(), key=lambda x: x[1], reverse=True)] - self.headerList = [i for i in t1 if i in itemSetBuffer] - - def addPrefixPath(self, prefix: List['_Node'], mapSupportBeta, minSup) -> None: + :return: transaction and count of each item in transaction + :rtype: Tuple, List and int """ - To construct the conditional tree with prefix paths of a node in correlatedPatternTree - - :param prefix : it represents the prefix items of a Node - :type prefix : list - :param mapSupportBeta : it represents the items with their supports - :param mapSupportBeta : dictionary - :param minSup : to check the item meets with minSup - :param minSup : float - :return: None - """ - pathCount = prefix[0].counter - current = self.root - prefix.reverse() - for i in range(0, len(prefix) - 1): - pathItem = prefix[i] - if mapSupportBeta.get(pathItem.itemId) >= minSup: - child = current.getChild(pathItem.itemId) - if child is None: - newNode = _Node() - newNode.itemId = pathItem.itemId - newNode.parent = current - newNode.counter = pathCount - current.child.append(newNode) - current = newNode - self.fixNodeLinks(pathItem.itemId, newNode) - else: - child.counter += pathCount - current = child - + transaction = [] + count = self.count + node = self.parent + while node.parent is not None: + transaction.append(node.item) + node = node.parent + return transaction[::-1], count class CoMine(_ab._correlatedPatterns): """ @@ -338,6 +222,7 @@ class CoMine(_ab._correlatedPatterns): _itemSetCount = 0 _maxPatternLength = 1000 _sep = "\t" + _counter = 0 def __init__(self, iFile: Union[str, _pd.DataFrame], minSup: Union[int, float, str], minAllConf: float, sep: str="\t") ->None: """ @@ -383,60 +268,6 @@ def _creatingItemSets(self) -> None: print("File Not Found") quit() - def _getRatio(self, prefix: List[int], prefixLength: int, s: int) -> float: - """ - A Function to get itemSet Ratio - - :param prefix:the path - :type prefix: list - :param prefixLength: length - :type prefixLength:int - :param s:current ratio - :type s:float - :return: minAllConf of prefix - :rtype: float - """ - maximums = 0 - for ele in range(prefixLength): - i = prefix[ele] - if maximums < self._mapSupport.get(i): - maximums = self._mapSupport.get(i) - return s / maximums - - def _correlatedOneItem(self) -> None: - """ - Generating One correlated item - """ - self._mapSupport = {} - for i in self._Database: - for j in i: - if j not in self._mapSupport: - self._mapSupport[j] = 1 - else: - self._mapSupport[j] += 1 - - def _saveItemSet(self, prefix, prefixLength, support) -> None: - """ - To save the correlated patterns mined form correlatedPatternTree - - :param prefix: the correlated pattern - :type prefix: list - :param prefixLength : the length of a correlated pattern - :type prefixLength : int - :param support: the support of a pattern - :type support : int - :return: None - - The correlated patterns were stored in a global variable finalPatterns - """ - all_conf = self._getRatio(prefix, prefixLength, support) - if all_conf < self._minAllConf: - return - l = [] - for i in range(prefixLength): - l.append(prefix[i]) - self._itemSetCount += 1 - self._finalPatterns[tuple(l)] = [support, all_conf] def _convert(self, value: Union[int, float, str]) -> None: """ @@ -458,98 +289,6 @@ def _convert(self, value: Union[int, float, str]) -> None: value = int(value) return value - def _saveAllCombinations(self, tempBuffer, s, position, prefix, prefixLength) -> None: - """ - Generating all the combinations for items in single branch in correlatedPatternTree - - :param tempBuffer: items in a single branch - :type tempBuffer: list - :param s: support at leaf node of a branch - :param position: the length of a tempBuffer - :type position: int - :param prefix: it represents the list of leaf node - :type prefix: list - :param prefixLength: the length of prefix - :type prefixLength: int - :return: None - """ - max1 = 1 << position - for i in range(1, max1): - newPrefixLength = prefixLength - for j in range(position): - isSet = i & (1 << j) - if isSet > 0: - prefix.insert(newPrefixLength, tempBuffer[j].itemId) - newPrefixLength += 1 - self._saveItemSet(prefix, newPrefixLength, s) - - def _correlatedPatternGrowthGenerate(self, correlatedPatternTree, prefix, prefixLength, mapSupport) -> None: - """ - Mining the fp tree - - :param correlatedPatternTree: it represents the correlatedPatternTree - :type correlatedPatternTree: class Tree - :param prefix: it represents an empty list and store the patterns that are mined - :type prefix: list - :param prefixLength: the length of prefix - :type prefixLength: int - :param mapSupport: it represents the support of item - :type mapSupport: dictionary - :return: None - """ - - singlePath = True - position = 0 - s = 0 - if len(correlatedPatternTree.root.child) > 1: - singlePath = False - else: - currentNode = correlatedPatternTree.root.child[0] - while True: - if len(currentNode.child) > 1: - singlePath = False - break - self._fpNodeTempBuffer.insert(position, currentNode) - s = currentNode.counter - position += 1 - if len(currentNode.child) == 0: - break - currentNode = currentNode.child[0] - if singlePath is True: - self._saveAllCombinations(self._fpNodeTempBuffer, s, position, prefix, prefixLength) - else: - for i in reversed(correlatedPatternTree.headerList): - item = i - support = mapSupport[i] - betaSupport = support - prefix.insert(prefixLength, item) - self._saveItemSet(prefix, prefixLength + 1, betaSupport) - if prefixLength + 1 < self._maxPatternLength: - prefixPaths = [] - path = correlatedPatternTree.mapItemNodes.get(item) - mapSupportBeta = {} - while path is not None: - if path.parent.itemId != -1: - prefixPath = [] - prefixPath.append(path) - pathCount = path.counter - parent1 = path.parent - while parent1.itemId != -1: - prefixPath.append(parent1) - if mapSupportBeta.get(parent1.itemId) is None: - mapSupportBeta[parent1.itemId] = pathCount - else: - mapSupportBeta[parent1.itemId] = mapSupportBeta[parent1.itemId] + pathCount - parent1 = parent1.parent - prefixPaths.append(prefixPath) - path = path.nodeLink - treeBeta = _Tree() - for k in prefixPaths: - treeBeta.addPrefixPath(k, mapSupportBeta, self._minSup) - if len(treeBeta.root.child) > 0: - treeBeta.createHeaderList(mapSupportBeta, self._minSup) - self._correlatedPatternGrowthGenerate(treeBeta, prefix, prefixLength + 1, mapSupportBeta) - @deprecated("It is recommended to use 'mine()' instead of 'startMine()' for mining process. Starting from January 2025, 'startMine()' will be completely terminated.") def startMine(self) -> None: """ @@ -557,6 +296,56 @@ def startMine(self) -> None: """ self.mine() + def _maxSup(self, itemSet, item): + sups = [self._mapSupport[i] for i in itemSet] + [self._mapSupport[item]] + return max(sups) + + def _allConf(self, itemSet): + return self._finalPatterns[itemSet] / max([self._mapSupport[i] for i in itemSet]) + + def recursive(self, item, nodes, root): + + if root.item is None: + newRoot = _Node([item], 0, None) + else: + newRoot = _Node(root.item + [item], 0, None) + + itemCounts = {} + transactions = [] + for node in nodes: + transaction, count = node.traverse() + transactions.append([transaction, count]) + for item in transaction: + if item not in itemCounts: + itemCounts[item] = 0 + itemCounts[item] += count + + # print(newRoot.item, itemCounts.keys()) + itemCounts = {k:v for k, v in itemCounts.items() if v >= self._minSup} + if len(itemCounts) == 0: + return + + itemNodes = {} + for transaction, count in transactions: + transaction = [i for i in transaction if i in itemCounts] + transaction = sorted(transaction, key=lambda item: itemCounts[item], reverse=True) + node = newRoot + for item in transaction: + node = node.addChild(item, count) + if item not in itemNodes: + itemNodes[item] = [set(), 0] + itemNodes[item][0].add(node) + itemNodes[item][1] += count + + itemNodes = {k:v for k, v in sorted(itemNodes.items(), key=lambda x: x[1][1], reverse=True)} + + + for item in itemCounts: + conf = itemNodes[item][1] / self._maxSup(newRoot.item, item) + if conf >= self._minAllConf: + self._finalPatterns[tuple(newRoot.item + [item])] = [itemCounts[item], conf] + self.recursive(item, itemNodes[item][0], newRoot) + def mine(self) -> None: """ main method to start @@ -566,22 +355,33 @@ def mine(self) -> None: raise Exception("Please enter the file path or file name:") self._creatingItemSets() self._minSup = self._convert(self._minSup) - self._tree = _Tree() - self._finalPatterns = {} - self._correlatedOneItem() - self._mapSupport = {k: v for k, v in self._mapSupport.items() if v >= self._minSup} - _itemSetBuffer = [k for k, v in sorted(self._mapSupport.items(), key=lambda x: x[1], reverse=True)] - for i in self._Database: - _transaction = [] - for j in i: - if j in _itemSetBuffer: - _transaction.append(j) - _transaction.sort(key=lambda val: self._mapSupport[val], reverse=True) - self._tree.addTransaction(_transaction) - self._tree.createHeaderList(self._mapSupport, self._minSup) - if len(self._tree.headerList) > 0: - self._itemSetBuffer = [] - self._correlatedPatternGrowthGenerate(self._tree, self._itemSetBuffer, 0, self._mapSupport) + + itemCount = Counter() + for transaction in self._Database: + itemCount.update(transaction) + + self._mapSupport = {k: v for k, v in itemCount.items() if v >= self._minSup} + self._Database = [[item for item in transaction if item in self._mapSupport] for transaction in self._Database] + self._Database = [sorted(transaction, key=lambda item: self._mapSupport[item], reverse=True) for transaction in self._Database] + + root = _Node(None, 0, None) + itemNode = {} + # itemNode[item] = [node, count] + for transaction in self._Database: + node = root + for item in transaction: + node = node.addChild(item) + if item not in itemNode: + itemNode[item] = [set(), 0] + itemNode[item][0].add(node) + itemNode[item][1] += 1 + + itemNode = {k:v for k, v in sorted(itemNode.items(), key=lambda x: x[1][1], reverse=True)} + + for item in itemNode: + self._finalPatterns[tuple([item])] = [itemNode[item][1],1] + self.recursive(item, itemNode[item][0], root) + print("Correlated patterns were generated successfully using CoMine algorithm") self._endTime = _ab._time.time() self._memoryUSS = float() diff --git a/PAMI/correlatedPattern/basic/_CoMine.py b/PAMI/correlatedPattern/basic/_CoMine.py new file mode 100644 index 00000000..e26877bb --- /dev/null +++ b/PAMI/correlatedPattern/basic/_CoMine.py @@ -0,0 +1,694 @@ +# CoMine is one of the fundamental algorithm to discover correlated patterns in a transactional database. +# +# **Importing this algorithm into a python program** +# -------------------------------------------------------- +# +# from PAMI.correlatedPattern.basic import CoMine as alg +# +# iFile = 'sampleTDB.txt' +# +# minSup = 0.25 # can be specified between 0 and 1 +# +# minAllConf = 0.2 # can be specified between 0 and 1 +# +# obj = alg.CoMine(iFile, minSup, minAllConf, sep) +# +# obj.mine() +# +# Rules = obj.getPatterns() +# +# print("Total number of Patterns:", len(Patterns)) +# +# obj.save(oFile) +# +# Df = obj.getPatternsAsDataFrame() +# +# memUSS = obj.getMemoryUSS() +# +# print("Total Memory in USS:", memUSS) +# +# memRSS = obj.getMemoryRSS() +# +# print("Total Memory in RSS", memRSS) +# +# run = obj.getRuntime() +# +# print("Total ExecutionTime in seconds:", run) +# + + +__copyright__ = """ +Copyright (C) 2021 Rage Uday Kiran + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + Copyright (C) 2021 Rage Uday Kiran + +""" + +from PAMI.correlatedPattern.basic import abstract as _ab +import pandas as _pd +from typing import List, Dict, Tuple, Union +from deprecated import deprecated + +class _Node: + """ + A class used to represent the node of correlatedPatternTree + + :**Attributes**: **itemId** (*int*) -- **storing item of a node** + **counter** (*int*) -- **To maintain the support of node** + **parent** (*node*) -- **To maintain the parent of every node** + **child** (*list*) -- **To maintain the children of node** + **nodeLink** (*node*) -- **Points to the node with same itemId** + + :**Methods**: + getChild(itemName) + returns the node with same itemName from correlatedPatternTree + """ + + def __init__(self) -> None: + self.itemId = -1 + self.counter = 1 + self.parent = None + self.child = [] + self.nodeLink = None + + def getChild(self, id1) -> Union[None, '_Node']: + """ + :param id1: give item id as input + :type id1: int + :return: the node with same itemId + :rtype: _Node + """ + for i in self.child: + if i.itemId == id1: + return i + return None + +class _Tree: + """ + A class used to represent the correlatedPatternGrowth tree structure + + :**Attributes**: **headerList** (*list*) -- **storing the list of items in tree sorted in ascending of their supports** + **mapItemNodes** (*dictionary*) -- **storing the nodes with same item name** + **mapItemLastNodes** (*dictionary*) -- **representing the map that indicates the last node for each item** + **root** (*Node*) -- **representing the root Node in a tree** + + :**Methods**: + + createHeaderList(items,minSup) + takes items only which are greater than minSup and sort the items in ascending order + addTransaction(transaction) + creating transaction as a branch in correlatedPatternTree + fixNodeLinks(item,newNode) + To create the link for nodes with same item + printTree(Node) + gives the details of node in correlatedPatternGrowth tree + addPrefixPath(prefix,port,minSup) + It takes the items in prefix pattern whose support is >=minSup and construct a subtree + """ + + def __init__(self) -> None: + self.headerList = [] + self.mapItemNodes = {} + self.mapItemLastNodes = {} + self.root = _Node() + + def addTransaction(self, transaction: List[int]) -> None: + """ + Adding transaction into tree + + :param transaction: it represents a single transaction in a database + :type transaction: list + :return: None + """ + + current = self.root + for i in transaction: + child = current.getChild(i) + if child is None: + newNode = _Node() + newNode.itemId = i + newNode.parent = current + current.child.append(newNode) + self.fixNodeLinks(i, newNode) + current = newNode + else: + child.counter += 1 + current = child + + def fixNodeLinks(self, item: int, newNode: '_Node') -> None: + """ + Fixing node link for the newNode that inserted into correlatedPatternTree + + :param item: it represents the item of newNode + :type item: int + :param newNode: it represents the newNode that inserted in correlatedPatternTree + :type newNode: Node + :return: None + """ + if item in self.mapItemLastNodes.keys(): + lastNode = self.mapItemLastNodes[item] + lastNode.nodeLink = newNode + self.mapItemLastNodes[item] = newNode + if item not in self.mapItemNodes.keys(): + self.mapItemNodes[item] = newNode + + def printTree(self, root: '_Node') -> None: + """ + This method is to find the details of parent, children, and support of a Node + + :param root: it represents the Node in correlatedPatternTree + :type root: Node + :return: None + """ + + if root.child is None: + return + else: + for i in root.child: + print(i.itemId, i.counter, i.parent.itemId) + self.printTree(i) + + def createHeaderList(self, mapSupport: Dict[int, int], minSup: int) -> None: + """ + To create the headerList + + :param mapSupport : it represents the items with their supports + :type mapSupport : dictionary + :param minSup : it represents the minSup + :param minSup : float + :return: None + """ + + t1 = [] + for x, y in mapSupport.items(): + if y >= minSup: + t1.append(x) + itemSetBuffer = [k for k, v in sorted(mapSupport.items(), key=lambda x: x[1], reverse=True)] + self.headerList = [i for i in t1 if i in itemSetBuffer] + + def addPrefixPath(self, prefix: List['_Node'], mapSupportBeta, minSup) -> None: + """ + To construct the conditional tree with prefix paths of a node in correlatedPatternTree + + :param prefix : it represents the prefix items of a Node + :type prefix : list + :param mapSupportBeta : it represents the items with their supports + :param mapSupportBeta : dictionary + :param minSup : to check the item meets with minSup + :param minSup : float + :return: None + """ + pathCount = prefix[0].counter + current = self.root + prefix.reverse() + for i in range(0, len(prefix) - 1): + pathItem = prefix[i] + if mapSupportBeta.get(pathItem.itemId) >= minSup: + child = current.getChild(pathItem.itemId) + if child is None: + newNode = _Node() + newNode.itemId = pathItem.itemId + newNode.parent = current + newNode.counter = pathCount + current.child.append(newNode) + current = newNode + self.fixNodeLinks(pathItem.itemId, newNode) + else: + child.counter += pathCount + current = child + + +class CoMine(_ab._correlatedPatterns): + """ + About this algorithm + ==================== + + :**Description**: CoMine is one of the fundamental algorithm to discover correlated patterns in a transactional database. It is based on the traditional FP-Growth algorithm. This algorithm uses depth-first search technique to find all correlated patterns in a transactional database. + + :**Reference**: Lee, Y.K., Kim, W.Y., Cao, D., Han, J. (2003). CoMine: efficient mining of correlated patterns. In ICDM (pp. 581–584). + + :**parameters**: **iFile** (*str*) -- **Name of the Input file to mine complete set of correlated patterns** + **oFile** (*str*) -- **Name of the output file to store complete set of correlated patterns** + **minSup** (*int or float or str*) -- **The user can specify minSup either in count or proportion of database size. If the program detects the data type of minSup is integer, then it treats minSup is expressed in count.** + **minAllConf** (*float*) -- **The user can specify minAllConf values within the range (0, 1).** + **sep** (*str*) -- **This variable is used to distinguish items from one another in a transaction. The default seperator is tab space. However, the users can override their default separator.** + + :**Attributes**: **memoryUSS** (*float*) -- **To store the total amount of USS memory consumed by the program** + **memoryRSS** (*float*) -- **To store the total amount of RSS memory consumed by the program** + **startTime** (*float*) -- **To record the start time of the mining process** + **endTime** (*float*) -- **To record the completion time of the mining process** + **minSup** (*int*) -- **The user given minSup** + **minAllConf** (*float*) -- **The user given minimum all confidence Ratio(should be in range of 0 to 1)** + **Database** (*list*) -- **To store the transactions of a database in list** + **mapSupport** (*Dictionary*) -- **To maintain the information of item and their frequency** + **lno** (*int*) -- **it represents the total no of transactions** + **tree** (*class*) -- **it represents the Tree class** + **itemSetCount** (*int*) -- **it represents the total no of patterns** + **finalPatterns** (*dict*) -- **it represents to store the patterns** + **itemSetBuffer** (*list*) -- **it represents the store the items in mining** + **maxPatternLength** (*int*) -- **it represents the constraint for pattern length** + + Execution methods + ================= + + **Terminal command** + + .. code-block:: console + + Format: + + (.venv) $ python3 CoMine.py + + Example Usage: + + (.venv) $ python3 CoMine.py sampleTDB.txt output.txt 0.25 0.2 + + .. note:: minSup can be specified in support count or a value between 0 and 1. + + **Calling from a python program** + + .. code-block:: python + + from PAMI.correlatedPattern.basic import CoMine as alg + + iFile = 'sampleTDB.txt' + + minSup = 0.25 # can be specified between 0 and 1 + + minAllConf = 0.2 # can be specified between 0 and 1 + + obj = alg.CoMine(iFile, minSup, minAllConf,sep) + + obj.mine() + + patterns = obj.getPatterns() + + print("Total number of Patterns:", len(patterns)) + + obj.savePatterns(oFile) + + df = obj.getPatternsAsDataFrame() + + memUSS = obj.getMemoryUSS() + + print("Total Memory in USS:", memUSS) + + memRSS = obj.getMemoryRSS() + + print("Total Memory in RSS", memRSS) + + run = obj.getRuntime() + + print("Total ExecutionTime in seconds:", run) + + Credits + ======= + + The complete program was written by B.Sai Chitra under the supervision of Professor Rage Uday Kiran. + + """ + + _startTime = float() + _endTime = float() + _minSup = float() + _finalPatterns = {} + _iFile = " " + _oFile = " " + _memoryUSS = float() + _memoryRSS = float() + _minAllConf = 0.0 + _Database = [] + _mapSupport = {} + _lno = 0 + _tree = str() + _itemSetBuffer = None + _fpNodeTempBuffer = [] + _itemSetCount = 0 + _maxPatternLength = 1000 + _sep = "\t" + + def __init__(self, iFile: Union[str, _pd.DataFrame], minSup: Union[int, float, str], minAllConf: float, sep: str="\t") ->None: + """ + param iFile: give the input file + type iFile: str or DataFrame or url + param minSup: minimum support + type minSup: int or float + param sep: Delimiter of input file + type sep: str + """ + + super().__init__(iFile, minSup, minAllConf, sep) + + def _creatingItemSets(self) -> None: + """ + Storing the complete transactions of the database/input file in a database variable + """ + self._Database = [] + if isinstance(self._iFile, _ab._pd.DataFrame): + if self._iFile.empty: + print("its empty..") + i = self._iFile.columns.values.tolist() + if 'Transactions' in i: + self._Database = self._iFile['Transactions'].tolist() + if isinstance(self._iFile, str): + if _ab._validators.url(self._iFile): + data = _ab._urlopen(self._iFile) + for line in data: + line.strip() + line = line.decode("utf-8") + temp = [i.rstrip() for i in line.split(self._sep)] + temp = [x for x in temp if x] + self._Database.append(temp) + else: + try: + with open(self._iFile, 'r', encoding='utf-8') as f: + for line in f: + line.strip() + temp = [i.rstrip() for i in line.split(self._sep)] + temp = [x for x in temp if x] + self._Database.append(temp) + except IOError: + print("File Not Found") + quit() + + def _getRatio(self, prefix: List[int], prefixLength: int, s: int) -> float: + """ + A Function to get itemSet Ratio + + :param prefix:the path + :type prefix: list + :param prefixLength: length + :type prefixLength:int + :param s:current ratio + :type s:float + :return: minAllConf of prefix + :rtype: float + """ + maximums = 0 + for ele in range(prefixLength): + i = prefix[ele] + if maximums < self._mapSupport.get(i): + maximums = self._mapSupport.get(i) + return s / maximums + + def _correlatedOneItem(self) -> None: + """ + Generating One correlated item + """ + self._mapSupport = {} + for i in self._Database: + for j in i: + if j not in self._mapSupport: + self._mapSupport[j] = 1 + else: + self._mapSupport[j] += 1 + + def _saveItemSet(self, prefix, prefixLength, support) -> None: + """ + To save the correlated patterns mined form correlatedPatternTree + + :param prefix: the correlated pattern + :type prefix: list + :param prefixLength : the length of a correlated pattern + :type prefixLength : int + :param support: the support of a pattern + :type support : int + :return: None + + The correlated patterns were stored in a global variable finalPatterns + """ + all_conf = self._getRatio(prefix, prefixLength, support) + if all_conf < self._minAllConf: + return + l = [] + for i in range(prefixLength): + l.append(prefix[i]) + self._itemSetCount += 1 + self._finalPatterns[tuple(l)] = [support, all_conf] + + def _convert(self, value: Union[int, float, str]) -> None: + """ + To convert the type of user specified minSup value + + :param value: user specified minSup value + :type value: int or float or str + :return: None + """ + if type(value) is int: + value = int(value) + if type(value) is float: + value = (len(self._Database) * value) + if type(value) is str: + if '.' in value: + value = float(value) + value = (len(self._Database) * value) + else: + value = int(value) + return value + + def _saveAllCombinations(self, tempBuffer, s, position, prefix, prefixLength) -> None: + """ + Generating all the combinations for items in single branch in correlatedPatternTree + + :param tempBuffer: items in a single branch + :type tempBuffer: list + :param s: support at leaf node of a branch + :param position: the length of a tempBuffer + :type position: int + :param prefix: it represents the list of leaf node + :type prefix: list + :param prefixLength: the length of prefix + :type prefixLength: int + :return: None + """ + max1 = 1 << position + for i in range(1, max1): + newPrefixLength = prefixLength + for j in range(position): + isSet = i & (1 << j) + if isSet > 0: + prefix.insert(newPrefixLength, tempBuffer[j].itemId) + newPrefixLength += 1 + self._saveItemSet(prefix, newPrefixLength, s) + + def _correlatedPatternGrowthGenerate(self, correlatedPatternTree, prefix, prefixLength, mapSupport) -> None: + """ + Mining the fp tree + + :param correlatedPatternTree: it represents the correlatedPatternTree + :type correlatedPatternTree: class Tree + :param prefix: it represents an empty list and store the patterns that are mined + :type prefix: list + :param prefixLength: the length of prefix + :type prefixLength: int + :param mapSupport: it represents the support of item + :type mapSupport: dictionary + :return: None + """ + + singlePath = True + position = 0 + s = 0 + if len(correlatedPatternTree.root.child) > 1: + singlePath = False + else: + currentNode = correlatedPatternTree.root.child[0] + while True: + if len(currentNode.child) > 1: + singlePath = False + break + self._fpNodeTempBuffer.insert(position, currentNode) + s = currentNode.counter + position += 1 + if len(currentNode.child) == 0: + break + currentNode = currentNode.child[0] + if singlePath is True: + self._saveAllCombinations(self._fpNodeTempBuffer, s, position, prefix, prefixLength) + else: + for i in reversed(correlatedPatternTree.headerList): + item = i + support = mapSupport[i] + betaSupport = support + prefix.insert(prefixLength, item) + self._saveItemSet(prefix, prefixLength + 1, betaSupport) + if prefixLength + 1 < self._maxPatternLength: + prefixPaths = [] + path = correlatedPatternTree.mapItemNodes.get(item) + mapSupportBeta = {} + while path is not None: + if path.parent.itemId != -1: + prefixPath = [] + prefixPath.append(path) + pathCount = path.counter + parent1 = path.parent + while parent1.itemId != -1: + prefixPath.append(parent1) + if mapSupportBeta.get(parent1.itemId) is None: + mapSupportBeta[parent1.itemId] = pathCount + else: + mapSupportBeta[parent1.itemId] = mapSupportBeta[parent1.itemId] + pathCount + parent1 = parent1.parent + prefixPaths.append(prefixPath) + path = path.nodeLink + treeBeta = _Tree() + for k in prefixPaths: + treeBeta.addPrefixPath(k, mapSupportBeta, self._minSup) + if len(treeBeta.root.child) > 0: + treeBeta.createHeaderList(mapSupportBeta, self._minSup) + self._correlatedPatternGrowthGenerate(treeBeta, prefix, prefixLength + 1, mapSupportBeta) + + @deprecated("It is recommended to use 'mine()' instead of 'startMine()' for mining process. Starting from January 2025, 'startMine()' will be completely terminated.") + def startMine(self) -> None: + """ + main method to start + """ + self.mine() + + def mine(self) -> None: + """ + main method to start + """ + self._startTime = _ab._time.time() + if self._iFile is None: + raise Exception("Please enter the file path or file name:") + self._creatingItemSets() + self._minSup = self._convert(self._minSup) + self._tree = _Tree() + self._finalPatterns = {} + self._correlatedOneItem() + self._mapSupport = {k: v for k, v in self._mapSupport.items() if v >= self._minSup} + _itemSetBuffer = [k for k, v in sorted(self._mapSupport.items(), key=lambda x: x[1], reverse=True)] + for i in self._Database: + _transaction = [] + for j in i: + if j in _itemSetBuffer: + _transaction.append(j) + _transaction.sort(key=lambda val: self._mapSupport[val], reverse=True) + self._tree.addTransaction(_transaction) + self._tree.createHeaderList(self._mapSupport, self._minSup) + if len(self._tree.headerList) > 0: + self._itemSetBuffer = [] + self._correlatedPatternGrowthGenerate(self._tree, self._itemSetBuffer, 0, self._mapSupport) + print("Correlated patterns were generated successfully using CoMine algorithm") + self._endTime = _ab._time.time() + self._memoryUSS = float() + self._memoryRSS = float() + process = _ab._psutil.Process(_ab._os.getpid()) + self._memoryUSS = process.memory_full_info().uss + self._memoryRSS = process.memory_info().rss + + def getMemoryUSS(self) -> float: + """ + Total amount of USS memory consumed by the mining process will be retrieved from this function + + :return: returning USS memory consumed by the mining process + :rtype: float + """ + + return self._memoryUSS + + def getMemoryRSS(self) -> float: + """ + Total amount of RSS memory consumed by the mining process will be retrieved from this function + + :return: returning RSS memory consumed by the mining process + :rtype: float + """ + + return self._memoryRSS + + def getRuntime(self) -> float: + """ + Calculating the total amount of runtime taken by the mining process + + :return: returning total amount of runtime taken by the mining process + :rtype: float + """ + + return self._endTime - self._startTime + + def getPatternsAsDataFrame(self) -> _pd.DataFrame: + """ + Storing final correlated patterns in a dataframe + + :return: returning correlated patterns in a dataframe + :rtype: pd.DataFrame + """ + + dataframe = {} + data = [] + for a, b in self._finalPatterns.items(): + pat = " " + for i in a: + pat += str(i) + " " + data.append([pat, b[0], b[1]]) + dataframe = _ab._pd.DataFrame(data, columns=['Patterns', 'Support', 'Confidence']) + return dataframe + + def save(self, outFile) -> None: + """ + Complete set of correlated patterns will be saved into an output file + + :param outFile: name of the outputfile + :type outFile: file + :return: None + """ + self._oFile = outFile + writer = open(self._oFile, 'w+') + for x, y in self._finalPatterns.items(): + pat = "" + for i in x: + pat += str(i) + "\t" + patternsAndSupport = pat.strip() + ":" + str(y[0]) + ":" + str(y[1]) + writer.write("%s \n" % patternsAndSupport) + + def getPatterns(self) -> Dict[Tuple[int], List[Union[int, float]]]: + """ + Function to send the set of correlated patterns after completion of the mining process + + :return: returning correlated patterns + :rtype: dict + """ + return self._finalPatterns + + def printResults(self) -> None: + """ + function to print the result after completing the process + + :return: None + """ + print("Total number of Correlated Patterns:", len(self.getPatterns())) + print("Total Memory in USS:", self.getMemoryUSS()) + print("Total Memory in RSS", self.getMemoryRSS()) + print("Total ExecutionTime in ms:", self.getRuntime()) + +if __name__ == "__main__": + _ap = str() + if len(_ab._sys.argv) == 5 or len(_ab._sys.argv) == 6: + if len(_ab._sys.argv) == 6: + _ap = CoMine(_ab._sys.argv[1], _ab._sys.argv[3], float(_ab._sys.argv[4]), _ab._sys.argv[5]) + if len(_ab._sys.argv) == 5: + _ap = CoMine(_ab._sys.argv[1], _ab._sys.argv[3], float(_ab._sys.argv[4])) + _ap.startMine() + _ap.mine() + print("Total number of Correlated-Frequent Patterns:", len(_ap.getPatterns())) + _ap.save(_ab._sys.argv[2]) + print("Total Memory in USS:", _ap.getMemoryUSS()) + print("Total Memory in RSS", _ap.getMemoryRSS()) + print("Total ExecutionTime in seconds:", _ap.getRuntime()) + else: + print("Error! The number of input parameters do not match the total number of parameters provided") + diff --git a/PAMI/frequentPattern/basic/Apriori.py b/PAMI/frequentPattern/basic/Apriori.py index 66c692d1..70a8688c 100644 --- a/PAMI/frequentPattern/basic/Apriori.py +++ b/PAMI/frequentPattern/basic/Apriori.py @@ -328,26 +328,28 @@ def getPatternsAsDataFrame(self) -> _ab._pd.DataFrame: # dataFrame = _ab._pd.DataFrame(data, columns=['Patterns', 'Support']) # print("Time taken to convert the frequent patterns into DataFrame is: ", _ab._time.time() - time) - - dataFrame = _ab._pd.DataFrame(list(self._finalPatterns.items()), columns=['Patterns', 'Support']) + dataFrame = _ab._pd.DataFrame(list([[" ".join(x), y] for x,y in self._finalPatterns.items()]), columns=['Patterns', 'Support']) return dataFrame - def save(self, outFile: str) -> None: + def save(self, outFile: str, seperator = "\t" ) -> None: """ Complete set of frequent patterns will be loaded in to an output file :param outFile: name of the output file - :type outFile: csvfile - :return: None - """ + + # self._oFile = outFile + # writer = open(self._oFile, 'w+') + # for x, y in self._finalPatterns.items(): + # patternsAndSupport = x.strip() + ":" + str(y[0]) + # writer.write("%s \n" % patternsAndSupport) with open(outFile, 'w') as f: for x, y in self._finalPatterns.items(): - x = self._sep.join(x) + x = seperator.join(x) f.write(f"{x}:{y}\n") def getPatterns(self) -> Dict[str, int]: @@ -386,3 +388,5 @@ def printResults(self) -> None: print("Total ExecutionTime in ms:", _ap.getRuntime()) else: print("Error! The number of input parameters do not match the total number of parameters provided") + + \ No newline at end of file diff --git a/PAMI/frequentPattern/basic/Aprioribitset.py b/PAMI/frequentPattern/basic/Aprioribitset.py index 71c5505f..1a530de7 100644 --- a/PAMI/frequentPattern/basic/Aprioribitset.py +++ b/PAMI/frequentPattern/basic/Aprioribitset.py @@ -348,26 +348,29 @@ def getPatternsAsDataFrame(self) -> _ab._pd.DataFrame: # dataFrame = _ab._pd.DataFrame(data, columns=['Patterns', 'Support']) # print("Time taken to convert the frequent patterns into DataFrame is: ", _ab._time.time() - time) - - dataFrame = _ab._pd.DataFrame(list(self._finalPatterns.items()), columns=['Patterns', 'Support']) + dataFrame = _ab._pd.DataFrame(list([[" ".join(x), y] for x,y in self._finalPatterns.items()]), columns=['Patterns', 'Support']) + # dataFrame = _ab._pd.DataFrame(list(self._finalPatterns.items()), columns=['Patterns', 'Support']) return dataFrame - def save(self, outFile: str) -> None: + def save(self, outFile: str, seperator = "\t" ) -> None: """ Complete set of frequent patterns will be loaded in to an output file :param outFile: name of the output file - :type outFile: csvfile - :return: None - """ + + # self._oFile = outFile + # writer = open(self._oFile, 'w+') + # for x, y in self._finalPatterns.items(): + # patternsAndSupport = x.strip() + ":" + str(y[0]) + # writer.write("%s \n" % patternsAndSupport) with open(outFile, 'w') as f: for x, y in self._finalPatterns.items(): - x = self._sep.join(x) + x = seperator.join(x) f.write(f"{x}:{y}\n") def getPatterns(self): @@ -405,4 +408,3 @@ def printResults(self): else: print("Error! The number of input parameters do not match the total number of parameters provided") - diff --git a/PAMI/frequentPattern/basic/ECLAT.py b/PAMI/frequentPattern/basic/ECLAT.py index d7cafe5c..c926a137 100644 --- a/PAMI/frequentPattern/basic/ECLAT.py +++ b/PAMI/frequentPattern/basic/ECLAT.py @@ -328,12 +328,12 @@ def getPatternsAsDataFrame(self) -> _ab._pd.DataFrame: # dataFrame = _ab._pd.DataFrame(data, columns=['Patterns', 'Support']) # print("Time taken to convert the frequent patterns into DataFrame is: ", _ab._time.time() - time) - - dataFrame = _ab._pd.DataFrame(list(self._finalPatterns.items()), columns=['Patterns', 'Support']) + dataFrame = _ab._pd.DataFrame(list([[" ".join(x), y] for x,y in self._finalPatterns.items()]), columns=['Patterns', 'Support']) + # dataFrame = _ab._pd.DataFrame(list(self._finalPatterns.items()), columns=['Patterns', 'Support']) return dataFrame - def save(self, outFile: str) -> None: + def save(self, outFile: str, seperator = "\t" ) -> None: """ Complete set of frequent patterns will be loaded in to an output file @@ -342,9 +342,15 @@ def save(self, outFile: str) -> None: :type outFile: csvfile :return: None """ + + # self._oFile = outFile + # writer = open(self._oFile, 'w+') + # for x, y in self._finalPatterns.items(): + # patternsAndSupport = x.strip() + ":" + str(y[0]) + # writer.write("%s \n" % patternsAndSupport) with open(outFile, 'w') as f: for x, y in self._finalPatterns.items(): - x = self._sep.join(x) + x = seperator.join(x) f.write(f"{x}:{y}\n") def getPatterns(self) -> dict: @@ -383,3 +389,4 @@ def printResults(self) -> None: print("Total ExecutionTime in ms:", _ap.getRuntime()) else: print("Error! The number of input parameters do not match the total number of parameters provided") + diff --git a/PAMI/frequentPattern/basic/ECLATDiffset.py b/PAMI/frequentPattern/basic/ECLATDiffset.py index 8e69a9f9..ef978d33 100644 --- a/PAMI/frequentPattern/basic/ECLATDiffset.py +++ b/PAMI/frequentPattern/basic/ECLATDiffset.py @@ -329,26 +329,35 @@ def getPatternsAsDataFrame(self): :rtype: pd.DataFrame """ - dataFrame = {} - data = [] - for a, b in self._finalPatterns.items(): - data.append([a.replace('\t', ' '), b[0]]) - dataFrame = _ab._pd.DataFrame(data, columns=['Patterns', 'Support']) + # dataFrame = {} + # data = [] + # for a, b in self._finalPatterns.items(): + # data.append([a.replace('\t', ' '), b[0]]) + # dataFrame = _ab._pd.DataFrame(data, columns=['Patterns', 'Support']) + + dataFrame = _ab._pd.DataFrame(list([[" ".join(x), y] for x,y in self._finalPatterns.items()]), columns=['Patterns', 'Support']) + return dataFrame - def save(self, outFile): + def save(self, outFile: str, seperator = "\t" ) -> None: """ Complete set of frequent patterns will be loaded in to an output file :param outFile: name of the output file :type outFile: csvfile + :return: None """ - self._oFile = outFile - writer = open(self._oFile, 'w+') - for x, y in self._finalPatterns.items(): - patternsAndSupport = x.strip() + ":" + str(y[0]) - writer.write("%s \n" % patternsAndSupport) + + # self._oFile = outFile + # writer = open(self._oFile, 'w+') + # for x, y in self._finalPatterns.items(): + # patternsAndSupport = x.strip() + ":" + str(y[0]) + # writer.write("%s \n" % patternsAndSupport) + with open(outFile, 'w') as f: + for x, y in self._finalPatterns.items(): + x = seperator.join(x) + f.write(f"{x}:{y}\n") def getPatterns(self): """ diff --git a/PAMI/frequentPattern/basic/ECLATbitset.py b/PAMI/frequentPattern/basic/ECLATbitset.py index e5b2d9d9..b35311da 100644 --- a/PAMI/frequentPattern/basic/ECLATbitset.py +++ b/PAMI/frequentPattern/basic/ECLATbitset.py @@ -284,7 +284,7 @@ def mine(self) -> None: self._memoryRSS = float() self._memoryUSS = process.memory_full_info().uss self._memoryRSS = process.memory_info().rss - print("Frequent patterns were generated successfully using Apriori algorithm ") + print("Frequent patterns were generated successfully using ECLAT algorithm ") def getMemoryUSS(self): """ @@ -328,26 +328,34 @@ def getPatternsAsDataFrame(self): :rtype: pd.DataFrame """ - dataFrame = {} - data = [] - for a, b in self._finalPatterns.items(): - data.append([a.replace('\t', ' '), b]) - dataFrame = _ab._pd.DataFrame(data, columns=['Patterns', 'Support']) + # dataFrame = {} + # data = [] + # for a, b in self._finalPatterns.items(): + # data.append([a.replace('\t', ' '), b]) + # dataFrame = _ab._pd.DataFrame(data, columns=['Patterns', 'Support']) + + dataFrame = _ab._pd.DataFrame(list([[x.replace("\t", " "), y] for x,y in self._finalPatterns.items()]), columns=['Patterns', 'Support']) return dataFrame - def save(self, outFile): + def save(self, outFile: str, seperator = "\t" ) -> None: """ Complete set of frequent patterns will be loaded in to an output file - :param outFile: name of the outputfile - :type outFile: file + :param outFile: name of the output file + :type outFile: csvfile + :return: None """ - self._oFile = outFile - writer = open(self._oFile, 'w+') - for x, y in self._finalPatterns.items(): - patternsAndSupport = x.strip() + ":" + str(y) - writer.write("%s \n" % patternsAndSupport) + + # self._oFile = outFile + # writer = open(self._oFile, 'w+') + # for x, y in self._finalPatterns.items(): + # patternsAndSupport = x.strip() + ":" + str(y[0]) + # writer.write("%s \n" % patternsAndSupport) + with open(outFile, 'w') as f: + for x, y in self._finalPatterns.items(): + x = seperator.join(x) + f.write(f"{x}:{y}\n") def getPatterns(self): """ @@ -384,4 +392,4 @@ def printResults(self): print("Total Memory in RSS", _ap.getMemoryRSS()) print("Total ExecutionTime in ms:", _ap.getRuntime()) else: - print("Error! The number of input parameters do not match the total number of parameters provided") \ No newline at end of file + print("Error! The number of input parameters do not match the total number of parameters provided") diff --git a/PAMI/frequentPattern/basic/FPGrowth.py b/PAMI/frequentPattern/basic/FPGrowth.py index 9fd1dc79..7bd55bf8 100644 --- a/PAMI/frequentPattern/basic/FPGrowth.py +++ b/PAMI/frequentPattern/basic/FPGrowth.py @@ -478,15 +478,17 @@ def getPatternsAsDataFrame(self) -> _fp._pd.DataFrame: :rtype: pd.DataFrame """ - # dataframe = {} - # data = [] - # for a, b in self.__finalPatterns.items(): - # data.append([a.replace('\t', ' '), b]) - # dataframe = _fp._pd.DataFrame(data, columns=['Patterns', 'Support']) - dataFrame = _fp._pd.DataFrame(list(self._finalPatterns.items()), columns=['Patterns', 'Support']) + # # dataframe = {} + # # data = [] + # # for a, b in self.__finalPatterns.items(): + # # data.append([a.replace('\t', ' '), b]) + # # dataframe = _fp._pd.DataFrame(data, columns=['Patterns', 'Support']) + # dataFrame = _fp._pd.DataFrame(list(self._finalPatterns.items()), columns=['Patterns', 'Support']) + dataFrame = _fp._pd.DataFrame(list([[" ".join(x), y] for x,y in self._finalPatterns.items()]), columns=['Patterns', 'Support']) + return dataFrame - def save(self, outFile: str) -> None: + def save(self, outFile: str, seperator = "\t" ) -> None: """ Complete set of frequent patterns will be loaded in to an output file @@ -497,7 +499,7 @@ def save(self, outFile: str) -> None: """ with open(outFile, 'w') as f: for x, y in self._finalPatterns.items(): - x = self._sep.join(x) + x = seperator.join(x) f.write(f"{x}:{y}\n") def getPatterns(self) -> Dict[str, int]: diff --git a/PAMI/frequentPattern/closed/CHARM.py b/PAMI/frequentPattern/closed/CHARM.py index d995f1db..4695cde8 100644 --- a/PAMI/frequentPattern/closed/CHARM.py +++ b/PAMI/frequentPattern/closed/CHARM.py @@ -470,11 +470,14 @@ def getPatternsAsDataFrame(self): :rtype: pd.DataFrame """ - dataframe = {} - data = [] - for a, b in self._finalPatterns.items(): - data.append([a.replace('\t', ' '), b]) - dataframe = _ab._pd.DataFrame(data, columns=['Patterns', 'Support']) + # dataframe = {} + # data = [] + # for a, b in self._finalPatterns.items(): + # data.append([a.replace('\t', ' '), b]) + # dataframe = _ab._pd.DataFrame(data, columns=['Patterns', 'Support']) + + dataframe = _ab._pd.DataFrame(list([[x.replace('\t', ' '), y] for x,y in self._finalPatterns.items()]), columns=['Patterns', 'Support']) + return dataframe def save(self, outFile): @@ -529,3 +532,8 @@ def printResults(self): print("Total ExecutionTime in ms:", _ap.getRuntime()) else: print("Error! The number of input parameters do not match the total number of parameters provided") + + obj = CHARM("/Users/tarunsreepada/Downloads/Transactional_T10I4D100K.csv", 0.01) + obj.mine() + print(obj.getPatternsAsDataFrame()) + print(obj.printResults()) \ No newline at end of file diff --git a/PAMI/frequentPattern/maximal/MaxFPGrowth.py b/PAMI/frequentPattern/maximal/MaxFPGrowth.py index 3a807d84..ffc1c254 100644 --- a/PAMI/frequentPattern/maximal/MaxFPGrowth.py +++ b/PAMI/frequentPattern/maximal/MaxFPGrowth.py @@ -718,21 +718,28 @@ def getRuntime(self): def getPatternsAsDataFrame(self): """ + Storing final frequent patterns in a dataframe + :return: returning frequent patterns in a dataframe :rtype: pd.DataFrame """ - dataFrame = {} - data = [] - for a, b in self._finalPatterns.items(): - data.append([a.replace('\t', ' '), b]) - dataFrame = _ab._pd.DataFrame(data, columns=['Patterns', 'Support']) - return dataFrame + # dataframe = {} + # data = [] + # for a, b in self._finalPatterns.items(): + # data.append([a.replace('\t', ' '), b]) + # dataframe = _ab._pd.DataFrame(data, columns=['Patterns', 'Support']) + + dataframe = _ab._pd.DataFrame(list([[x.replace('\t', ' '), y] for x,y in self._finalPatterns.items()]), columns=['Patterns', 'Support']) + + return dataframe def save(self, outFile): """ - Complete set of frequent patterns will be loaded in to a output file + + Complete set of frequent patterns will be loaded in to an output file + :param outFile: name of the output file :type outFile: csvfile """ @@ -754,7 +761,7 @@ def printResults(self): """ This functon is used to print the results """ - print('Total number of Maximal Frequent Patterns: ' + str(self.getPatterns())) + print('Total number of Maximal Frequent Patterns: ' + str(len(self.getPatterns()))) print('Runtime: ' + str(self.getRuntime())) print('Memory (RSS): ' + str(self.getMemoryRSS())) print('Memory (USS): ' + str(self.getMemoryUSS())) @@ -776,3 +783,4 @@ def printResults(self): print("Total ExecutionTime in ms:", _ap.getRuntime()) else: print("Error! The number of input parameters do not match the total number of parameters provided") + diff --git a/PAMI/frequentPattern/topk/FAE.py b/PAMI/frequentPattern/topk/FAE.py index d4991f90..a51b3d1c 100644 --- a/PAMI/frequentPattern/topk/FAE.py +++ b/PAMI/frequentPattern/topk/FAE.py @@ -394,33 +394,36 @@ def getRuntime(self): def getPatternsAsDataFrame(self): """ + Storing final frequent patterns in a dataframe :return: returning frequent patterns in a dataframe - :rtype: pd.DataFrame """ - dataFrame = {} - data = [] - for a, b in self._finalPatterns.items(): - data.append([a.replace('\t', ' '), b]) - dataFrame = _ab._pd.DataFrame(data, columns=['Patterns', 'Support']) - return dataFrame + # dataframe = {} + # data = [] + # for a, b in self._finalPatterns.items(): + # data.append([a.replace('\t', ' '), b]) + # dataframe = _ab._pd.DataFrame(data, columns=['Patterns', 'Support']) + + dataframe = _ab._pd.DataFrame(list([[x.replace('\t', ' '), y] for x,y in self._finalPatterns.items()]), columns=['Patterns', 'Support']) + + return dataframe def save(self, outFile): """ + Complete set of frequent patterns will be loaded in to an output file :param outFile: name of the output file - - :type outFile: file + :type outFile: csvfile """ self._oFile = outFile writer = open(self._oFile, 'w+') for x, y in self._finalPatterns.items(): - patternsAndSupport = x.strip() + ":" + str(y) - writer.write("%s \n" % patternsAndSupport) + s1 = x.strip() + ":" + str(y) + writer.write("%s \n" % s1) def getPatterns(self): """ @@ -432,7 +435,7 @@ def getPatterns(self): """ return self._finalPatterns - def printTOPK(self): + def printResults(self): """ This function is used to print the results """ @@ -458,3 +461,4 @@ def printTOPK(self): print("Total ExecutionTime in ms:", _ap.getRuntime()) else: print("Error! The number of input parameters do not match the total number of parameters provided") + From 7824c90581c6db2636223cea55326166baca48c2 Mon Sep 17 00:00:00 2001 From: Tarun Sreepada Date: Fri, 24 May 2024 20:58:32 +0900 Subject: [PATCH 2/2] Update CoMine.py --- PAMI/correlatedPattern/basic/CoMine.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/PAMI/correlatedPattern/basic/CoMine.py b/PAMI/correlatedPattern/basic/CoMine.py index 0e5a140e..cb7c5d7e 100644 --- a/PAMI/correlatedPattern/basic/CoMine.py +++ b/PAMI/correlatedPattern/basic/CoMine.py @@ -321,7 +321,7 @@ def recursive(self, item, nodes, root): itemCounts[item] += count # print(newRoot.item, itemCounts.keys()) - itemCounts = {k:v for k, v in itemCounts.items() if v >= self._minSup} + itemCounts = {k:v for k, v in itemCounts.items() if v >= self._minSup and v/self._maxSup(newRoot.item, k) >= self._minAllConf} if len(itemCounts) == 0: return