diff --git a/PAMI/extras/convert/DF2DB.py b/PAMI/extras/convert/DF2DB.py index c3abb2b1..83769d91 100644 --- a/PAMI/extras/convert/DF2DB.py +++ b/PAMI/extras/convert/DF2DB.py @@ -38,6 +38,16 @@ import PAMI.extras.convert.sparseDF2DB as sparse import sys,psutil,os,time from typing import Union +import operator + +condition_operator = { + '<': operator.lt, + '>': operator.gt, + '<=': operator.le, + '>=': operator.ge, + '==': operator.eq, + '!=': operator.ne +} class DF2DB: """ @@ -93,8 +103,12 @@ def __init__(self, inputDF, DFtype='dense') -> None: self._endTime = float() self._memoryUSS = float() self._memoryRSS = float() + self.tids = [] + self.items = [] + self.items = list(self.inputDF.columns.values) + self.tids = list(self.inputDF.index) - def convert2TransactionalDatabase(self, oFile: str, condition: str, thresholdValue: Union[int, float]) -> str: + def convert2TransactionalDatabase(self, oFile: str, condition: str, thresholdValue: Union[int, float]) -> None: """ create transactional database and return oFileName :param oFile: file name or path to store database @@ -103,14 +117,28 @@ def convert2TransactionalDatabase(self, oFile: str, condition: str, thresholdVal :rtype: str """ self._startTime = time.time() - self.DF2DB.convert2TransactionalDatabase(oFile,condition,thresholdValue) + with open(oFile, 'w') as f: + if condition not in condition_operator: + print('Condition error') + else: + for tid in self.tids: + transaction = [item for item in self.items if + condition_operator[condition](self.inputDF.at[tid, item], thresholdValue)] + if len(transaction) > 1: + f.write(f'{transaction[0]}') + for item in transaction[1:]: + f.write(f'\t{item}') + elif len(transaction) == 1: + f.write(f'{transaction[0]}') + else: + continue + f.write('\n') process = psutil.Process(os.getpid()) self._memoryUSS = process.memory_full_info().uss self._memoryRSS = process.memory_info().rss self._endTime = time.time() - return self.DF2DB.getFileName() - def convert2TemporalDatabase(self, oFile: str, condition: str, thresholdValue: Union[int, float]) -> str: + def convert2TemporalDatabase(self, oFile: str, condition: str, thresholdValue: Union[int, float]) -> None: """ create temporal database and return oFile name :param oFile: file name or path to store database @@ -119,14 +147,29 @@ def convert2TemporalDatabase(self, oFile: str, condition: str, thresholdValue: U :rtype: str """ self._startTime = time.time() - self.DF2DB.convert2TemporalDatabase(oFile,condition,thresholdValue) + with open(oFile, 'w') as f: + if condition not in condition_operator: + print('Condition error') + else: + for tid in self.tids: + transaction = [item for item in self.items if + condition_operator[condition](self.inputDF.at[tid, item], thresholdValue)] + if len(transaction) > 1: + f.write(f'{tid + 1}') + for item in transaction: + f.write(f'\t{item}') + elif len(transaction) == 1: + f.write(f'{tid + 1}') + f.write(f'\t{transaction[0]}') + else: + continue + f.write('\n') process = psutil.Process(os.getpid()) self._memoryUSS = process.memory_full_info().uss self._memoryRSS = process.memory_info().rss self._endTime = time.time() - return self.DF2DB.getFileName() - def convert2UtilityDatabase(self, oFile: str) -> str: + def convert2UtilityDatabase(self, oFile: str) ->None: """ create utility database and return oFile name :param oFile: file name or path to store database @@ -135,12 +178,22 @@ def convert2UtilityDatabase(self, oFile: str) -> str: :rtype: str """ self._startTime = time.time() - self.DF2DB.convert2UtilityDatabase(oFile) + with open(oFile, 'w') as f: + for tid in self.tids: + df = self.inputDF.loc[tid].dropna() + f.write(f'{df.index[0]}') + for item in df.index[1:]: + f.write(f'\t{item}') + f.write(f':{df.sum()}:') + f.write(f'{df.at[df.index[0]]}') + + for item in df.index[1:]: + f.write(f'\t{df.at[item]}') + f.write('\n') process = psutil.Process(os.getpid()) self._memoryUSS = process.memory_full_info().uss self._memoryRSS = process.memory_info().rss self._endTime = time.time() - return self.DF2DB.getFileName() def convert2geoReferencedTransactionalDatabase(self, oFile: str, condition: str, thresholdValue: Union[int, float]) -> str: """ @@ -151,7 +204,22 @@ def convert2geoReferencedTransactionalDatabase(self, oFile: str, condition: str, :rtype: str """ self._startTime = time.time() - self.DF2DB.convert2TransactionalDatabase(oFile,condition,thresholdValue) + with open(oFile, 'w') as f: + if condition not in condition_operator: + print('Condition error') + else: + for tid in self.tids: + transaction = [item for item in self.items if + condition_operator[condition](self.inputDF.at[tid, item], thresholdValue)] + if len(transaction) > 1: + f.write(f'{transaction[0]}') + for item in transaction[1:]: + f.write(f'\t{item}') + elif len(transaction) == 1: + f.write(f'{transaction[0]}') + else: + continue + f.write('\n') process = psutil.Process(os.getpid()) self._memoryUSS = process.memory_full_info().uss self._memoryRSS = process.memory_info().rss @@ -167,13 +235,116 @@ def convert2geoReferencedTemporalDatabase(self, oFile: str, condition: str, thre :rtype: str """ self._startTime = time.time() - self.DF2DB.convert2TemporalDatabase(oFile,condition,thresholdValue) + with open(oFile, 'w') as f: + if condition not in condition_operator: + print('Condition error') + else: + for tid in self.tids: + transaction = [item for item in self.items if + condition_operator[condition](self.inputDF.at[tid, item], thresholdValue)] + if len(transaction) > 1: + f.write(f'{tid + 1}') + for item in transaction: + f.write(f'\t{item}') + elif len(transaction) == 1: + f.write(f'{tid + 1}') + f.write(f'\t{transaction[0]}') + else: + continue + f.write('\n') process = psutil.Process(os.getpid()) self._memoryUSS = process.memory_full_info().uss self._memoryRSS = process.memory_info().rss self._endTime = time.time() return self.DF2DB.getFileName() + + def convert2MultipleTimeSeries(self, oFile: str, condition: str, + thresholdValue: Union[int, float], interval: int) -> None: + """ + :Description: Create the multiple time series database. + + :param outputFile: Write multiple time series database into outputFile. + + :type outputFile: str + + :param interval: Breaks the given timeseries into intervals. + + :type interval: int + + :param condition: It is condition to judge the value in dataframe + + :param thresholdValue: User defined value. + + :type thresholdValue: int or float + """ + self._startTime = time.time() + writer = open(oFile, 'w+') + # with open(self.outputFile, 'w+') as f: + count = 0 + tids = [] + items = [] + values = [] + for tid in self.tids: + count += 1 + transaction = [item for item in self.items if + condition_operator[condition](self.inputDF.at[tid, item], thresholdValue)] + for i in transaction: + tids.append(count) + items.append(i) + values.append(self.inputDF.at[tid, i]) + if count == interval: + s1, s, ss = str(), str(), str() + if len(values) > 0: + + for j in range(len(tids)): + s1 = s1 + str(tids[j]) + '\t' + for j in range(len(items)): + s = s + items[j] + '\t' + for j in range(len(values)): + ss = ss + str(values[j]) + '\t' + + s2 = s1 + ':' + s + ':' + ss + writer.write("%s\n" % s2) + tids, items, values = [], [], [] + count = 0 + process = psutil.Process(os.getpid()) + self._memoryUSS = process.memory_full_info().uss + self._memoryRSS = process.memory_info().rss + self._endTime = time.time() + + + + def convert2UncertainTransactionalDatabase(self, oFile: str, condition: str, + thresholdValue: Union[int, float]) -> None: + with open(oFile, 'w') as f: + if condition not in condition_operator: + print('Condition error') + else: + for tid in self.tids: + transaction = [item for item in self.items if + condition_operator[condition](self.inputDF.at[tid, item], thresholdValue)] + uncertain = [self.inputDF.at[tid, item] for item in self.items if + condition_operator[condition](self.inputDF.at[tid, item], thresholdValue)] + if len(transaction) > 1: + f.write(f'{transaction[0]}') + for item in transaction[1:]: + f.write(f'\t{item}') + f.write(f':') + for value in uncertain: + tt = 0.1 + 0.036 * abs(25 - value) + tt = round(tt, 2) + f.write(f'\t{tt}') + elif len(transaction) == 1: + f.write(f'{transaction[0]}') + tt = 0.1 + 0.036 * abs(25 - uncertain[0]) + tt = round(tt, 2) + f.write(f':{tt}') + else: + continue + f.write('\n') + + def getMemoryUSS(self) -> float: """ Total amount of USS memory consumed by the mining process will be retrieved from this function diff --git a/PAMI/extras/syntheticDataGenerator/geoReferentialTransactionalDatabase.py b/PAMI/extras/syntheticDataGenerator/GeoReferentialTransactionalDatabase.py similarity index 91% rename from PAMI/extras/syntheticDataGenerator/geoReferentialTransactionalDatabase.py rename to PAMI/extras/syntheticDataGenerator/GeoReferentialTransactionalDatabase.py index 2a6b5b27..52b6b3e7 100644 --- a/PAMI/extras/syntheticDataGenerator/geoReferentialTransactionalDatabase.py +++ b/PAMI/extras/syntheticDataGenerator/GeoReferentialTransactionalDatabase.py @@ -37,7 +37,7 @@ import sys -class geoReferentialTransactionalDatabase: +class GeoReferentialTransactionalDatabase: """ :Description Generate a transactional database with the given number of lines, average number of items per line, and total number of items @@ -196,7 +196,7 @@ def create(self) -> None: self.db.append(nline) # self.db.append(line) - def save(self, sep, filename) -> None: + def save(self,filename, sep='\t') -> None: """ Save the transactional database to a file @@ -204,6 +204,10 @@ def save(self, sep, filename) -> None: :type filename: str + :param sep: seperator for the items + + :type sep: str + :return: None """ @@ -225,15 +229,15 @@ def getTransactions(self) -> pd.DataFrame: return df -if __name__ == "__main__": - # test the class - db = generateSpatioTransactional(10, 5, 10, 1, 5, 5, 10) - db.create() - db.save('\t', '2.txt') - print(db.getTransactions()) - - obj = generateSpatioTransactional(sys.argv[1], sys.argv[2], sys.argv[3], sys.argv[4], sys.argv[5], sys.argv[6], - sys.argv[7]) - obj.create() - obj.save(sys.argv[8]) - # print(obj.getTransactions()) +# if __name__ == "__main__": +# # test the class +# # db = GenerateSpatioTransactional(10, 5, 10, 1, 5, 5, 10) +# # db.create() +# # db.save('\t', '2.txt') +# # print(db.getTransactions()) +# +# obj = GeoreferentialTransactionalDatabase(sys.argv[1], sys.argv[2], sys.argv[3], sys.argv[4], sys.argv[5], sys.argv[6], +# sys.argv[7]) +# obj.create() +# obj.save(sys.argv[8]) +# # print(obj.getTransactions()) diff --git a/PAMI/extras/syntheticDataGenerator/createSyntheticGeoreferentialTemporal.py b/PAMI/extras/syntheticDataGenerator/createSyntheticGeoreferentialTemporal.py index 16f3b02b..a46f76e4 100644 --- a/PAMI/extras/syntheticDataGenerator/createSyntheticGeoreferentialTemporal.py +++ b/PAMI/extras/syntheticDataGenerator/createSyntheticGeoreferentialTemporal.py @@ -61,9 +61,9 @@ def createGeoreferentialTemporalDatabase(self, outputFile: str) -> None: writer.write("%s \n" % st) count += 1 -if __name__ == "__main__": - _ap = str() - _ap = createSyntheticGeoreferentialTemporal(100000, 870, 10) - _ap.createGeoreferentialTemporalDatabase("T10_geo_temp.txt") -else: - print("Error! The number of input parameters do not match the total number of parameters provided") +# if __name__ == "__main__": +# _ap = str() +# _ap = createSyntheticGeoreferentialTemporal(100000, 870, 10) +# _ap.createGeoreferentialTemporalDatabase("T10_geo_temp.txt") +# else: +# print("Error! The number of input parameters do not match the total number of parameters provided")