From 58ab44cbbece8c26329c70d2434e26dd19ad25a3 Mon Sep 17 00:00:00 2001 From: likhitha Date: Thu, 30 Dec 2021 18:34:41 +0530 Subject: [PATCH 1/3] Updating Eclat Signed-off-by: kundai kwangwari --- PAMI/extras/DF2DB/createTDB.py | 59 +++ PAMI/frequentPattern/basic/ECLAT.py | 4 +- PAMI/frequentPattern/basic/ECLATbitset.py | 328 ++++--------- .../frequentPattern/basic/dump/ECLATbitset.py | 432 ++++++++++++++++++ PAMI/fuzzyFrequentPatterns/basic/FFIMiner.py | 6 +- .../basic/PFPGrowth.py | 11 + 6 files changed, 608 insertions(+), 232 deletions(-) create mode 100644 PAMI/extras/DF2DB/createTDB.py create mode 100644 PAMI/frequentPattern/basic/dump/ECLATbitset.py diff --git a/PAMI/extras/DF2DB/createTDB.py b/PAMI/extras/DF2DB/createTDB.py new file mode 100644 index 00000000..8c8aeaca --- /dev/null +++ b/PAMI/extras/DF2DB/createTDB.py @@ -0,0 +1,59 @@ +import pandas as pd +from PAMI.frequentPattern.basic import FPGrowth as fp + + +class createTDB: + + def __init__(self, df, threshold): + self._df = df + self._threshold = int(threshold) + self._items = [] + self._updatedItems = [] + + def createTDB(self): + """ + Create transactional data base + + :returning a transactional database as DataFrame + """ + i = self._df.columns.values.tolist() + if 'sid' in i: + self._items = self._df['sid'].tolist() + for i in self._items: + i = i.split() + self._updatedItems.append([j for j in i if int(j) > self._threshold]) + + def savePatterns(self, outFile): + """ + Complete set of frequent patterns will be loaded in to a output file + + :param outFile: name of the output file + + :type outFile: file + """ + self._oFile = outFile + writer = open(self._oFile, 'w+') + for x in self._updatedItems: + s = str() + for j in x: + s = s + j + " " + writer.write("%s \n" % s) + + +if __name__ == '__main__': + a = createTDB('DataFrame', "1204150") + a.createTDB() + a.savePatterns('output.txt') + ap = fp.FPGrowth('output.txt', 500, ' ') + ap.startMine() + Patterns = ap.getPatterns() + print("Total number of Frequent Patterns:", len(Patterns)) + ap.savePatterns('fpoutput.txt') + memUSS = ap.getMemoryUSS() + print("Total Memory in USS:", memUSS) + memRSS = ap.getMemoryRSS() + print("Total Memory in RSS", memRSS) + run = ap.getRuntime() + print("Total ExecutionTime in ms:", run) + + diff --git a/PAMI/frequentPattern/basic/ECLAT.py b/PAMI/frequentPattern/basic/ECLAT.py index 5568bf3c..ee798115 100644 --- a/PAMI/frequentPattern/basic/ECLAT.py +++ b/PAMI/frequentPattern/basic/ECLAT.py @@ -217,8 +217,8 @@ def _generateFrequentPatterns(self, candidateFrequent): self._finalPatterns[newKey] = [interSet] new_freqList.append(newKey) - if len(new_freqList) > 0: - self._generateFrequentPatterns(new_freqList) + if len(new_freqList) > 0: + self._generateFrequentPatterns(new_freqList) def _convert(self, value): """ diff --git a/PAMI/frequentPattern/basic/ECLATbitset.py b/PAMI/frequentPattern/basic/ECLATbitset.py index 6aad3330..1d400b3c 100644 --- a/PAMI/frequentPattern/basic/ECLATbitset.py +++ b/PAMI/frequentPattern/basic/ECLATbitset.py @@ -1,32 +1,8 @@ -# Copyright (C) 2021 Rage Uday Kiran -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see . -# Copyright (C) 2021 Rage Uday Kiran - -from PAMI.frequentPattern.basic import abstract as _ab - +import abstract as _ab class ECLATbitset(_ab._frequentPatterns): """ ECLATbitset is one of the fundamental algorithm to discover frequent patterns in a transactional database. - This program implemented following the eclat bitset algorithm. - - Reference: - ---------- - Zaki, M.J., Gouda, K.: Fast vertical mining using diffsets. Technical Report 01-1, Computer Science - Dept., Rensselaer Polytechnic Institute (March 2001), https://doi.org/10.1145/956750.956788 - Attributes: ----------- self.iFile : str @@ -53,77 +29,32 @@ class ECLATbitset(_ab._frequentPatterns): To store the total amount of RSS memory consumed by the program self.Database : list To store the complete set of transactions available in the input database/file - Methods: ------- - startMine() - Mining process will start from here - getPatterns() - Complete set of patterns will be retrieved with this function - savePatterns(oFile) - Complete set of frequent patterns will be loaded in to a output file - getPatternsAsDataFrame() - Complete set of frequent patterns will be loaded in to a dataframe - getMemoryUSS() - Total amount of USS memory consumed by the mining process will be retrieved from this function - getMemoryRSS() - Total amount of RSS memory consumed by the mining process will be retrieved from this function - getRuntime() - Total amount of runtime taken by the mining process will be retrieved from this function - creatingItemSets(iFileName) - Storing the complete transactions of the database/input file in a database variable - generationOfAllItems() - It will generate the combinations of frequent items - startMine() - the main function to mine the patterns - - Executing the code on terminal: - ------------------------------- - - Format: - ------- - python3 ECLATbitset.py - - Examples: - --------- - python3 ECLATbitset.py sampleDB.txt patterns.txt 10.0 (minSup will be considered in percentage of database transactions) - - python3 ECLATbitset.py sampleDB.txt patterns.txt 10 (minSup will be considered in support count or frequency) - - Sample run of the importing code: - --------------------------------- - - import PAMI.frequentPattern.basic.ECLATbitset as alg - - obj = alg.ECLATbitset(iFile, minSup) - - obj.startMine() - - frequentPatterns = obj.getPatterns() - - print("Total number of Frequent Patterns:", len(frequentPatterns)) - - obj.savePatterns(oFile) - - Df = obj.getPatternInDataFrame() - - memUSS = obj.getMemoryUSS() - - print("Total Memory in USS:", memUSS) - - memRSS = obj.getMemoryRSS() - - print("Total Memory in RSS", memRSS) - - run = obj.getRuntime() - - print("Total ExecutionTime in seconds:", run) - - Credits: - -------- - The complete program was written by P.Likhitha under the supervision of Professor Rage Uday Kiran. + startMine() + Mining process will start from here + getPatterns() + Complete set of patterns will be retrieved with this function + savePatterns(oFile) + Complete set of frequent patterns will be loaded in to a output file + getPatternsAsDataFrame() + Complete set of frequent patterns will be loaded in to a dataframe + getMemoryUSS() + Total amount of USS memory consumed by the mining process will be retrieved from this function + getMemoryRSS() + Total amount of RSS memory consumed by the mining process will be retrieved from this function + getRuntime() + Total amount of runtime taken by the mining process will be retrieved from this function + createFrequentItems() + Generate frequent items + tidToBitset(itemset) + Convert tid list to bit set + genPatterns(prefix, tidData) + Generate frequent patterns + genAllFrequentPatterns(frequentItems) + Generate all frequent patterns + """ - """ _startTime = float() _endTime = float() _finalPatterns = {} @@ -137,12 +68,11 @@ class ECLATbitset(_ab._frequentPatterns): _mapSupport = {} _lno = 0 + def _convert(self, value): """ To convert the user specified minSup value - :param value: user specified minSup value - :return: converted type """ if type(value) is int: @@ -160,7 +90,6 @@ def _convert(self, value): def _creatingItemSets(self): """ Storing the complete transactions of the database/input file in a database variable - """ self._Database = [] self._mapSupport = {} @@ -191,137 +120,95 @@ def _creatingItemSets(self): except IOError: print("File Not Found") self._minSup = self._convert(self._minSup) - - def _OneFrequentItems(self): - items = [] - p = {} - for i in self._Database: - for j in i: - if j not in items: - items.append(j) - for temp in self._Database: - for j in items: - count = 0 - if j in temp: - count = 1 - if j not in p: - p[j] = [count] - else: - p[j].append(count) - for x, y in p.items(): - if self._countSupport(y) >= self._minSup: - self._mapSupport[x] = y - pList = [key for key, value in sorted(self._mapSupport.items(), key=lambda x: (len(x[1])), reverse=True)] - return pList - - @staticmethod - def _countSupport(tids): - """To count support of 1's in tids - - :param tids: bitset representation of itemSets - :return: count + def creatingFrequentItems(self): """ - count = 0 - for i in tids: - if i == 1: - count += 1 - return count - - def _save(self, prefix, suffix, tidSetX): - """To save the patterns satisfying the minSup condition - - :param prefix: prefix item of itemSet - - :param suffix: suffix item of itemSet - - :param tidSetX: bitset representation of itemSet - - :return: saving the itemSet in to finalPatterns + This function creates frequent items from _database. + :return: frequentTidData that stores frequent items and their tid list. """ - if prefix is None: - prefix = suffix - else: - prefix = prefix + suffix - count = self._countSupport(tidSetX) - sample = str() - for i in prefix: - sample = sample + i + " " - self._finalPatterns[sample] = count - - def _generationOfAll(self, prefix, itemSets, tidSets): - """It will generate the combinations of frequent items with prefix and list of items - - :param prefix: it represents the prefix item to form the combinations - - :type prefix: list - - :param itemSets: it represents the suffix items of prefix - - :type itemSets: list - - :param tidSets: represents the tidLists of itemSets + tidData = {} + self._lno = 0 + for transaction in self._Database: + self._lno = self._lno + 1 + for item in transaction: + if item not in tidData: + tidData[item] = [self._lno] + else: + tidData[item].append(self._lno) + frequentTidData = {k: v for k, v in tidData.items() if len(v) >= self._minSup} + frequentTidData = dict(sorted(frequentTidData.items(), reverse=True, key=lambda x: len(x[1]))) + return frequentTidData - :type tidSets: 2d list + def tidToBitset(self,itemset): + """ + This function converts tid list to bitset. + :param itemset: + :return: + """ + bitset = {} + + for k,v in itemset.items(): + bitset[k] = 0b1 + bitset[k] = (bitset[k] << int(v[0])) | 0b1 + for i in range(1,len(v)): + diff = int(v[i]) - int(v[i-1]) + bitset[k] = (bitset[k] << diff) | 0b1 + bitset[k] = (bitset[k] << (self._lno - int(v[i]))) + return bitset + + def genPatterns(self,prefix,tidData): + """ + This function generate frequent pattern about prefix. + :param prefix: String + :param tidData: list + :return: + """ + # variables to store frequent item set and + itemset = prefix[0] + + # Get the length of tidData + length = len(tidData) + + for i in range(length): + #tid = prefix[1].intersection(tidData[i][1]) + tid = prefix[1] & tidData[i][1] + count = bin(tid).count("1") - 1 + #tidLength = len(tid) + if count >= self._minSup: + frequentItemset = itemset + ' ' + tidData[i][0] + self._finalPatterns[frequentItemset] = count + self.genPatterns((frequentItemset,tid),tidData[i+1:length]) + + def genAllFrequentPatterns(self,frequentItems): """ - if len(itemSets) == 1: - i = itemSets[0] - tidI = tidSets[0] - self._save(prefix, [i], tidI) - return - for i in range(len(itemSets)): - itemI = itemSets[i] - if itemI is None: - continue - tidSetX = tidSets[i] - classItemSets = [] - classTidSets = [] - itemSetx = [itemI] - for j in range(i + 1, len(itemSets)): - itemJ = itemSets[j] - tidSetJ = tidSets[j] - y = [k & l for k, l in zip(tidSetX, tidSetJ)] - support = self._countSupport(y) - if support >= self._minSup: - classItemSets.append(itemJ) - classTidSets.append(y) - newprefix = list(set(itemSetx)) + prefix - self._generationOfAll(newprefix, classItemSets, classTidSets) - del classItemSets, classTidSets - self._save(prefix, list(set(itemSetx)), tidSetX) - #raise Exception("end of time") + This function generates all frequent patterns. + :param frequentItems: frequent items + :return: + """ + tidData = list(frequentItems.items()) + length = len(tidData) + for i in range(length): + #print(i,tidData[i][0]) + self.genPatterns(tidData[i],tidData[i+1:length]) def startMine(self): """Frequent pattern mining process will start from here - We start with the scanning the itemSets and store the bitsets respectively. - We form the combinations of single items and check with minSup condition to check the frequency of patterns - """ + We start with the scanning the itemSets and store the bitsets respectively. + We form the combinations of single items and check with minSup condition to check the frequency of patterns + """ self._startTime = _ab._time.time() if self._iFile is None: raise Exception("Please enter the file path or file name:") if self._minSup is None: raise Exception("Please enter the Minimum Support") + self._creatingItemSets() - plist = self._OneFrequentItems() - self._finalPatterns = {} - for i in range(len(plist)): - itemI = plist[i] - tidSetX = self._mapSupport[itemI] - itemSetx = [itemI] - itemSets = [] - tidSets = [] - for j in range(i + 1, len(plist)): - itemJ = plist[j] - tidSetJ = self._mapSupport[itemJ] - y1 = [k & l for k, l in zip(tidSetX, tidSetJ)] - support = self._countSupport(y1) - if support >= self._minSup: - itemSets.append(itemJ) - tidSets.append(y1) - self._generationOfAll(itemSetx, itemSets, tidSets) - del itemSets, tidSets - self._save(None, itemSetx, tidSetX) + frequentItems = self.creatingFrequentItems() + self._finalPatterns = {k: len(v) for k, v in frequentItems.items()} + frequentItemsBitset = self.tidToBitset(frequentItems) + self.genAllFrequentPatterns(frequentItemsBitset) + self.savePatterns('output.txt') self._endTime = _ab._time.time() process = _ab._psutil.Process(_ab._os.getpid()) self._memoryUSS = float() @@ -332,9 +219,7 @@ def startMine(self): def getMemoryUSS(self): """Total amount of USS memory consumed by the mining process will be retrieved from this function - :return: returning USS memory consumed by the mining process - :rtype: float """ @@ -342,9 +227,7 @@ def getMemoryUSS(self): def getMemoryRSS(self): """Total amount of RSS memory consumed by the mining process will be retrieved from this function - :return: returning RSS memory consumed by the mining process - :rtype: float """ @@ -352,9 +235,7 @@ def getMemoryRSS(self): def getRuntime(self): """Calculating the total amount of runtime taken by the mining process - :return: returning total amount of runtime taken by the mining process - :rtype: float """ @@ -362,9 +243,7 @@ def getRuntime(self): def getPatternsAsDataFrame(self): """Storing final frequent patterns in a dataframe - :return: returning frequent patterns in a dataframe - :rtype: pd.DataFrame """ @@ -377,9 +256,7 @@ def getPatternsAsDataFrame(self): def savePatterns(self, outFile): """Complete set of frequent patterns will be loaded in to a output file - :param outFile: name of the output file - :type outFile: file """ self._oFile = outFile @@ -390,15 +267,12 @@ def savePatterns(self, outFile): def getPatterns(self): """ Function to send the set of frequent patterns after completion of the mining process - :return: returning frequent patterns - :rtype: dict """ return self._finalPatterns - -if __name__ == "__main__": +if __name__=="__main__": _ap = str() if len(_ab._sys.argv) == 4 or len(_ab._sys.argv) == 5: if len(_ab._sys.argv) == 5: @@ -416,7 +290,7 @@ def getPatterns(self): _run = _ap.getRuntime() print("Total ExecutionTime in ms:", _run) else: - '''l = [6000] + '''l = [2000] for i in l: ap = ECLATbitset('/Users/Likhitha/Downloads/mushrooms.txt', i, ' ') ap.startMine() diff --git a/PAMI/frequentPattern/basic/dump/ECLATbitset.py b/PAMI/frequentPattern/basic/dump/ECLATbitset.py new file mode 100644 index 00000000..3ad6cdf6 --- /dev/null +++ b/PAMI/frequentPattern/basic/dump/ECLATbitset.py @@ -0,0 +1,432 @@ +# Copyright (C) 2021 Rage Uday Kiran +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# Copyright (C) 2021 Rage Uday Kiran + +from PAMI.frequentPattern.basic import abstract as _ab + + +class ECLATbitset(_ab._frequentPatterns): + """ + ECLATbitset is one of the fundamental algorithm to discover frequent patterns in a transactional database. + This program implemented following the eclat bitset algorithm. + + Reference: + ---------- + Zaki, M.J., Gouda, K.: Fast vertical mining using diffsets. Technical Report 01-1, Computer Science + Dept., Rensselaer Polytechnic Institute (March 2001), https://doi.org/10.1145/956750.956788 + + Attributes: + ----------- + self.iFile : str + Input file name or path of the input file + minSup: float or int or str + The user can specify minSup either in count or proportion of database size. + If the program detects the data type of minSup is integer, then it treats minSup is expressed in count. + Otherwise, it will be treated as float. + Example: minSup=10 will be treated as integer, while minSup=10.0 will be treated as float + sep : str + This variable is used to distinguish items from one another in a transaction. The default separator is tab space or \t. + However, the users can override their default separator. + self.oFile : str + Name of the output file or path of the output file + self.startTime:float + To record the start time of the mining process + self.endTime:float + To record the completion time of the mining process + self.finalPatterns: dict + Storing the complete set of patterns in a dictionary variable + self.memoryUSS : float + To store the total amount of USS memory consumed by the program + self.memoryRSS : float + To store the total amount of RSS memory consumed by the program + self.Database : list + To store the complete set of transactions available in the input database/file + + Methods: + ------- + startMine() + Mining process will start from here + getPatterns() + Complete set of patterns will be retrieved with this function + savePatterns(oFile) + Complete set of frequent patterns will be loaded in to a output file + getPatternsAsDataFrame() + Complete set of frequent patterns will be loaded in to a dataframe + getMemoryUSS() + Total amount of USS memory consumed by the mining process will be retrieved from this function + getMemoryRSS() + Total amount of RSS memory consumed by the mining process will be retrieved from this function + getRuntime() + Total amount of runtime taken by the mining process will be retrieved from this function + creatingItemSets(iFileName) + Storing the complete transactions of the database/input file in a database variable + generationOfAllItems() + It will generate the combinations of frequent items + startMine() + the main function to mine the patterns + + Executing the code on terminal: + ------------------------------- + + Format: + ------- + python3 ECLATbitset.py + + Examples: + --------- + python3 ECLATbitset.py sampleDB.txt patterns.txt 10.0 (minSup will be considered in percentage of database transactions) + + python3 ECLATbitset.py sampleDB.txt patterns.txt 10 (minSup will be considered in support count or frequency) + + Sample run of the importing code: + --------------------------------- + + import PAMI.frequentPattern.basic.ECLATbitset as alg + + obj = alg.ECLATbitset(iFile, minSup) + + obj.startMine() + + frequentPatterns = obj.getPatterns() + + print("Total number of Frequent Patterns:", len(frequentPatterns)) + + obj.savePatterns(oFile) + + Df = obj.getPatternInDataFrame() + + memUSS = obj.getMemoryUSS() + + print("Total Memory in USS:", memUSS) + + memRSS = obj.getMemoryRSS() + + print("Total Memory in RSS", memRSS) + + run = obj.getRuntime() + + print("Total ExecutionTime in seconds:", run) + + Credits: + -------- + The complete program was written by P.Likhitha under the supervision of Professor Rage Uday Kiran. + + """ + _startTime = float() + _endTime = float() + _finalPatterns = {} + _iFile = " " + _oFile = " " + _sep = " " + _minSup = str() + _memoryUSS = float() + _memoryRSS = float() + _Database = [] + _mapSupport = {} + _lno = 0 + + def _convert(self, value): + """ + To convert the user specified minSup value + + :param value: user specified minSup value + + :return: converted type + """ + if type(value) is int: + value = int(value) + if type(value) is float: + value = (len(self._Database) * value) + if type(value) is str: + if '.' in value: + value = float(value) + value = (len(self._Database) * value) + else: + value = int(value) + return value + + def _creatingItemSets(self): + """ + Storing the complete transactions of the database/input file in a database variable + + """ + self._Database = [] + self._mapSupport = {} + if isinstance(self._iFile, _ab._pd.DataFrame): + if self._iFile.empty: + print("its empty..") + i = self._iFile.columns.values.tolist() + if 'Transactions' in i: + self._Database = self._iFile['Transactions'].tolist() + + if isinstance(self._iFile, str): + if _ab._validators.url(self._iFile): + data = _ab._urlopen(self._iFile) + for line in data: + line.strip() + line = line.decode("utf-8") + temp = [i.rstrip() for i in line.split(self._sep)] + temp = [x for x in temp if x] + self._Database.append(temp) + else: + try: + with open(self._iFile, 'r') as f: + for line in f: + self._lno += 1 + splitter = [i.rstrip() for i in line.split(self._sep)] + splitter = [x for x in splitter if x] + self._Database.append(splitter) + except IOError: + print("File Not Found") + self._minSup = self._convert(self._minSup) + + def _OneFrequentItems(self): + items = [] + p = {} + for i in self._Database: + for j in i: + if j not in items: + items.append(j) + for temp in self._Database: + for j in items: + count = 0 + if j in temp: + count = 1 + if j not in p: + p[j] = [count] + else: + p[j].append(count) + for x, y in p.items(): + if self._countSupport(y) >= self._minSup: + self._mapSupport[x] = y + pList = [key for key, value in sorted(self._mapSupport.items(), key=lambda x: (len(x[1])), reverse=True)] + return pList + + @staticmethod + def _countSupport(tids): + """To count support of 1's in tids + + :param tids: bitset representation of itemSets + + :return: count + """ + count = 0 + for i in tids: + if i == 1: + count += 1 + return count + + def _save(self, prefix, suffix, tidSetX): + """To save the patterns satisfying the minSup condition + + :param prefix: prefix item of itemSet + + :param suffix: suffix item of itemSet + + :param tidSetX: bitset representation of itemSet + + :return: saving the itemSet in to finalPatterns + """ + if prefix is None: + prefix = suffix + else: + prefix = prefix + suffix + count = self._countSupport(tidSetX) + sample = str() + for i in prefix: + sample = sample + i + " " + self._finalPatterns[sample] = count + + def _generationOfAll(self, prefix, itemSets, tidSets): + """It will generate the combinations of frequent items with prefix and list of items + + :param prefix: it represents the prefix item to form the combinations + + :type prefix: list + + :param itemSets: it represents the suffix items of prefix + + :type itemSets: list + + :param tidSets: represents the tidLists of itemSets + + :type tidSets: 2d list + """ + if len(itemSets) == 1: + i = itemSets[0] + tidI = tidSets[0] + self._save(prefix, [i], tidI) + return + for i in range(len(itemSets)): + itemI = itemSets[i] + if itemI is None: + continue + tidSetX = tidSets[i] + classItemSets = [] + classTidSets = [] + itemSetx = [itemI] + for j in range(i + 1, len(itemSets)): + itemJ = itemSets[j] + tidSetJ = tidSets[j] + y = [k & l for k, l in zip(tidSetX, tidSetJ)] + support = self._countSupport(y) + if support >= self._minSup: + classItemSets.append(itemJ) + classTidSets.append(y) + newprefix = list(set(itemSetx)) + prefix + self._generationOfAll(newprefix, classItemSets, classTidSets) + del classItemSets, classTidSets + self._save(prefix, list(set(itemSetx)), tidSetX) + # raise Exception("end of time") + + def startMine(self): + """Frequent pattern mining process will start from here + We start with the scanning the itemSets and store the bitsets respectively. + We form the combinations of single items and check with minSup condition to check the frequency of patterns + """ + + self._startTime = _ab._time.time() + if self._iFile is None: + raise Exception("Please enter the file path or file name:") + if self._minSup is None: + raise Exception("Please enter the Minimum Support") + self._creatingItemSets() + plist = self._OneFrequentItems() + self._finalPatterns = {} + for i in range(len(plist)): + itemI = plist[i] + tidSetX = self._mapSupport[itemI] + itemSetx = [itemI] + itemSets = [] + tidSets = [] + for j in range(i + 1, len(plist)): + itemJ = plist[j] + tidSetJ = self._mapSupport[itemJ] + y1 = [k & l for k, l in zip(tidSetX, tidSetJ)] + support = self._countSupport(y1) + if support >= self._minSup: + itemSets.append(itemJ) + tidSets.append(y1) + self._generationOfAll(itemSetx, itemSets, tidSets) + del itemSets, tidSets + self._save(None, itemSetx, tidSetX) + self._endTime = _ab._time.time() + process = _ab._psutil.Process(_ab._os.getpid()) + self._memoryUSS = float() + self._memoryRSS = float() + self._memoryUSS = process.memory_full_info().uss + self._memoryRSS = process.memory_info().rss + print("Frequent patterns were generated successfully using Eclat_bitset algorithm") + + def getMemoryUSS(self): + """Total amount of USS memory consumed by the mining process will be retrieved from this function + + :return: returning USS memory consumed by the mining process + + :rtype: float + """ + + return self._memoryUSS + + def getMemoryRSS(self): + """Total amount of RSS memory consumed by the mining process will be retrieved from this function + + :return: returning RSS memory consumed by the mining process + + :rtype: float + """ + + return self._memoryRSS + + def getRuntime(self): + """Calculating the total amount of runtime taken by the mining process + + :return: returning total amount of runtime taken by the mining process + + :rtype: float + """ + + return self._endTime - self._startTime + + def getPatternsAsDataFrame(self): + """Storing final frequent patterns in a dataframe + + :return: returning frequent patterns in a dataframe + + :rtype: pd.DataFrame + """ + + dataFrame = {} + data = [] + for a, b in self._finalPatterns.items(): + data.append([a, b]) + dataFrame = _ab._pd.DataFrame(data, columns=['Patterns', 'Support']) + return dataFrame + + def savePatterns(self, outFile): + """Complete set of frequent patterns will be loaded in to a output file + + :param outFile: name of the output file + + :type outFile: file + """ + self._oFile = outFile + writer = open(self._oFile, 'w+') + for x, y in self._finalPatterns.items(): + patternsAndSupport = x + ":" + str(y) + writer.write("%s \n" % patternsAndSupport) + + def getPatterns(self): + """ Function to send the set of frequent patterns after completion of the mining process + + :return: returning frequent patterns + + :rtype: dict + """ + return self._finalPatterns + + +if __name__ == "__main__": + _ap = str() + if len(_ab._sys.argv) == 4 or len(_ab._sys.argv) == 5: + if len(_ab._sys.argv) == 5: + _ap = ECLATbitset(_ab._sys.argv[1], _ab._sys.argv[3], _ab._sys.argv[4]) + if len(_ab._sys.argv) == 4: + _ap = ECLATbitset(_ab._sys.argv[1], _ab._sys.argv[3]) + _ap.startMine() + _Patterns = _ap.getPatterns() + print("Total number of Frequent Patterns:", len(_Patterns)) + _ap.savePatterns(_ab._sys.argv[2]) + _memUSS = _ap.getMemoryUSS() + print("Total Memory in USS:", _memUSS) + _memRSS = _ap.getMemoryRSS() + print("Total Memory in RSS", _memRSS) + _run = _ap.getRuntime() + print("Total ExecutionTime in ms:", _run) + else: + '''l = [2000] + for i in l: + ap = ECLATbitset('/Users/Likhitha/Downloads/mushrooms.txt', i, ' ') + ap.startMine() + Patterns = ap.getPatterns() + print("Total number of Frequent Patterns:", len(Patterns)) + ap.savePatterns('/Users/Likhitha/Downloads/output') + memUSS = ap.getMemoryUSS() + print("Total Memory in USS:", memUSS) + memRSS = ap.getMemoryRSS() + print("Total Memory in RSS", memRSS) + run = ap.getRuntime() + print("Total ExecutionTime in ms:", run)''' + print("Error! The number of input parameters do not match the total number of parameters provided") \ No newline at end of file diff --git a/PAMI/fuzzyFrequentPatterns/basic/FFIMiner.py b/PAMI/fuzzyFrequentPatterns/basic/FFIMiner.py index 12bca8d2..4074f667 100644 --- a/PAMI/fuzzyFrequentPatterns/basic/FFIMiner.py +++ b/PAMI/fuzzyFrequentPatterns/basic/FFIMiner.py @@ -259,7 +259,7 @@ class FFIMiner(_ab._fuzzyFrequentPattenrs): The complete program was written by B.Sai Chitra under the supervision of Professor Rage Uday Kiran. """ _startTime = float() - -endTime = float() + _endTime = float() _minSup = str() _maxPer = float() _finalPatterns = {} @@ -622,7 +622,7 @@ def savePatterns(self, outFile): _run = _ap.getRuntime() print("Total ExecutionTime in seconds:", _run) else: - l = [1800, 1900, 2000, 2200, 2400] + '''l = [1800, 1900, 2000, 2200, 2400] for i in l: ap = FFIMiner('/Users/Likhitha/Downloads/mushroom_utility_spmf.txt', i, ' ') ap.startMine() @@ -634,5 +634,5 @@ def savePatterns(self, outFile): memRSS = ap.getMemoryRSS() print("Total Memory in RSS", memRSS) run = ap.getRuntime() - print("Total ExecutionTime in seconds:", run) + print("Total ExecutionTime in seconds:", run)''' print("Error! The number of input parameters do not match the total number of parameters provided") diff --git a/PAMI/periodicFrequentPattern/basic/PFPGrowth.py b/PAMI/periodicFrequentPattern/basic/PFPGrowth.py index 34b7ce4c..7047caf6 100644 --- a/PAMI/periodicFrequentPattern/basic/PFPGrowth.py +++ b/PAMI/periodicFrequentPattern/basic/PFPGrowth.py @@ -646,4 +646,15 @@ def getPatterns(self): _run = _ap.getRuntime() print("Total ExecutionTime in ms:", _run) else: + '''ap = PFPGrowth('/Users/Likhitha/Downloads/temporal', 100, 1000, ' ') + ap.startMine() + Patterns = ap.getPatterns() + print("Total number of Frequent Patterns:", len(Patterns)) + ap.savePatterns('/Users/Likhitha/Downloads/output') + memUSS = ap.getMemoryUSS() + print("Total Memory in USS:", memUSS) + memRSS = ap.getMemoryRSS() + print("Total Memory in RSS", memRSS) + run = ap.getRuntime() + print("Total ExecutionTime in ms:", run)''' print("Error! The number of input parameters do not match the total number of parameters provided") From 09e9f51fc339f57043b928521a2afd5abf95baa1 Mon Sep 17 00:00:00 2001 From: kundai kwangwari Date: Thu, 30 Dec 2021 22:38:52 +0900 Subject: [PATCH 2/3] Updating Eclat Signed-off-by: kundai kwangwari --- PAMI/frequentPattern/basic/ECLAT.py | 1 + 1 file changed, 1 insertion(+) diff --git a/PAMI/frequentPattern/basic/ECLAT.py b/PAMI/frequentPattern/basic/ECLAT.py index ee798115..659e8012 100644 --- a/PAMI/frequentPattern/basic/ECLAT.py +++ b/PAMI/frequentPattern/basic/ECLAT.py @@ -217,6 +217,7 @@ def _generateFrequentPatterns(self, candidateFrequent): self._finalPatterns[newKey] = [interSet] new_freqList.append(newKey) + if len(new_freqList) > 0: self._generateFrequentPatterns(new_freqList) From acfae0eeac520cc3c5badb29dd756ed181408da5 Mon Sep 17 00:00:00 2001 From: kundai kwangwari Date: Thu, 30 Dec 2021 22:40:28 +0900 Subject: [PATCH 3/3] Updating Eclat Signed-off-by: kundai kwangwari --- PAMI/frequentPattern/basic/ECLAT.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/PAMI/frequentPattern/basic/ECLAT.py b/PAMI/frequentPattern/basic/ECLAT.py index 659e8012..9a4ab1ef 100644 --- a/PAMI/frequentPattern/basic/ECLAT.py +++ b/PAMI/frequentPattern/basic/ECLAT.py @@ -216,7 +216,7 @@ def _generateFrequentPatterns(self, candidateFrequent): newKey = item1 + " " + i2_list[-1] self._finalPatterns[newKey] = [interSet] new_freqList.append(newKey) - + else: break if len(new_freqList) > 0: self._generateFrequentPatterns(new_freqList)