diff --git a/PAMI/extras/syntheticDataGenerator/geoReferentialTransactionalDatabase.py b/PAMI/extras/syntheticDataGenerator/geoReferentialTransactionalDatabase.py new file mode 100644 index 00000000..2a6b5b27 --- /dev/null +++ b/PAMI/extras/syntheticDataGenerator/geoReferentialTransactionalDatabase.py @@ -0,0 +1,239 @@ +# generateTransactionalDatabase is a code used to convert the database into Temporal database. +# +# **Importing this algorithm into a python program** +# -------------------------------------------------------- +# from PAMI.extras.generateDatabase import generateTransactionalDatabase as db +# obj = db(10, 5, 10) +# obj.create() +# obj.save('db.txt') +# print(obj.getTransactions()) to get the transactional database as a pandas dataframe + +# **Running the code from the command line** +# -------------------------------------------------------- +# python generateDatabase.py 10 5 10 db.txt +# cat db.txt +# + + +__copyright__ = """ +Copyright (C) 2021 Rage Uday Kiran + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +""" + +import numpy as np +import pandas as pd +import sys + + +class geoReferentialTransactionalDatabase: + """ + :Description Generate a transactional database with the given number of lines, average number of items per line, and total number of items + + :Attributes: + numLines: int + - number of lines + avgItemsPerTransaction: int + - average number of items per line + numItems: int + - total number of items + + :Methods: + create: + Generate the transactional database + save: + Save the transactional database to a file + getTransactions: + Get the transactional database + + + + + """ + + def getPoint(self, x1, y1, x2, y2): + + return (np.random.randint(x1, x2), np.random.randint(y1, y2)) + + def __init__(self, databaseSize, avgItemsPerTransaction, numItems, x1, y1, x2, y2, sep='\t') -> None: + """ + Initialize the transactional database with the given parameters + + Parameters: + databaseSize: int - number of lines + avgItemsPerTransaction: int - average number of items per line + numItems: int - total number of items + """ + + self.databaseSize = databaseSize + self.avgItemsPerTransaction = avgItemsPerTransaction + self.numItems = numItems + self.db = [] + self.x1 = x1 + self.y1 = y1 + self.x2 = x2 + self.y2 = y2 + self.seperator = sep + + numPoints = (x2 - x1) * (y2 - y1) + if numItems > numPoints: + raise ValueError("Number of points is less than the number of lines * average items per line") + + self.itemPoint = {} + usedPoints = set() + + for i in range(1, numItems + 1): + # self.itemPoint[i] = (np.random.randint(x1, x2), np.random.randint(y1, y2)) + point = self.getPoint(x1, y1, x2, y2) + while point in usedPoints: + point = self.getPoint(x1, y1, x2, y2) + self.itemPoint[i] = point + + def tuning(self, array, sumRes) -> list: + """ + Tune the array so that the sum of the values is equal to sumRes + + :param array: list of values + + :type array: list + + :param sumRes: the sum of the values in the array to be tuned + + :type sumRes: int + + :return: list of values with the tuned values and the sum of the values in the array to be tuned and sumRes is equal to sumRes + + :rtype: list + """ + + while np.sum(array) != sumRes: + # get index of largest value + randIndex = np.random.randint(0, len(array)) + # if sum is too large, decrease the largest value + if np.sum(array) > sumRes: + array[randIndex] -= 1 + # if sum is too small, increase the smallest value + else: + minIndex = np.argmin(array) + array[randIndex] += 1 + return array + + def generateArray(self, nums, avg, maxItems) -> list: + """ + Generate a random array of length n whose values average to m + + :param nums: number of values + + :type nums: list + + :param avg: average value + + :type avg: float + + :param maxItems: maximum value + + :type maxItems: int + + :return: random array + + :rtype: list + """ + + # generate n random values + values = np.random.randint(1, maxItems, nums) + + sumRes = nums * avg + + self.tuning(values, sumRes) + + # if any value is less than 1, increase it and tune the array again + while np.any(values < 1): + for i in range(nums): + if values[i] < 1: + values[i] += 1 + self.tuning(values, sumRes) + + while np.any(values > maxItems): + for i in range(nums): + if values[i] > maxItems: + values[i] -= 1 + self.tuning(values, sumRes) + + # if all values are same then randomly increase one value and decrease another + while np.all(values == values[0]): + values[np.random.randint(0, nums)] += 1 + self.tuning(values, sumRes) + + return values + + def create(self) -> None: + """ + Generate the transactional database + :return: None + """ + db = set() + + values = self.generateArray(self.databaseSize, self.avgItemsPerTransaction, self.numItems) + + for value in values: + line = np.random.choice(range(1, self.numItems + 1), value, replace=False) + nline = [self.itemPoint[i] for i in line] + # print(line, nline) + # for i in range(len(line)): + # print(line[i], self.itemPoint[line[i]]) + # line[i] = self.itemPoint[line[i]] + self.db.append(nline) + # self.db.append(line) + + def save(self, sep, filename) -> None: + """ + Save the transactional database to a file + + :param filename: name of the file + + :type filename: str + + :return: None + """ + + with open(filename, 'w') as f: + for line in self.db: + # f.write(','.join(map(str, line)) + '\n') + line = list(map(str, line)) + f.write(sep.join(line) + '\n') + + def getTransactions(self) -> pd.DataFrame: + """ + Get the transactional database + + :return: the transactional database + + :rtype: pd.DataFrame + """ + df = pd.DataFrame(['\t'.join(map(str, line)) for line in self.db], columns=['Transactions']) + return df + + +if __name__ == "__main__": + # test the class + db = generateSpatioTransactional(10, 5, 10, 1, 5, 5, 10) + db.create() + db.save('\t', '2.txt') + print(db.getTransactions()) + + obj = generateSpatioTransactional(sys.argv[1], sys.argv[2], sys.argv[3], sys.argv[4], sys.argv[5], sys.argv[6], + sys.argv[7]) + obj.create() + obj.save(sys.argv[8]) + # print(obj.getTransactions()) diff --git a/setup.py b/setup.py index 266a28e7..6d0f3187 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ setuptools.setup( name='pami', - version='2024.11.09.2', + version='2024.11.09.3', author='Rage Uday Kiran', author_email='uday.rage@gmail.com', description='This software is being developed at the University of Aizu, Aizu-Wakamatsu, Fukushima, Japan',