From 0710c82880916a54a4552bee54590bfb009aae51 Mon Sep 17 00:00:00 2001 From: Tarun Sreepada Date: Wed, 29 May 2024 18:21:39 +0900 Subject: [PATCH] standard format Dataframe has one column where each transaction is a string connected by a seperator. --- PAMI/correlatedPattern/basic/CoMine.py | 3 +++ PAMI/correlatedPattern/basic/CoMinePlus.py | 3 +++ .../syntheticDataGenerator/TransactionalDatabase.py | 5 ++--- PAMI/frequentPattern/basic/Apriori.py | 8 ++++---- PAMI/frequentPattern/basic/Aprioribitset.py | 3 +++ PAMI/frequentPattern/basic/ECLAT.py | 3 +++ PAMI/frequentPattern/basic/ECLATDiffset.py | 3 +++ PAMI/frequentPattern/basic/ECLATbitset.py | 3 +++ PAMI/frequentPattern/basic/FPGrowth.py | 4 ++++ PAMI/frequentPattern/closed/CHARM.py | 4 ++++ PAMI/frequentPattern/maximal/MaxFPGrowth.py | 4 ++++ PAMI/frequentPattern/topk/FAE.py | 3 +++ 12 files changed, 39 insertions(+), 7 deletions(-) diff --git a/PAMI/correlatedPattern/basic/CoMine.py b/PAMI/correlatedPattern/basic/CoMine.py index 0e5a140e..c28e7c02 100644 --- a/PAMI/correlatedPattern/basic/CoMine.py +++ b/PAMI/correlatedPattern/basic/CoMine.py @@ -247,6 +247,9 @@ def _creatingItemSets(self) -> None: i = self._iFile.columns.values.tolist() if 'Transactions' in i: self._Database = self._iFile['Transactions'].tolist() + self._Database = [x.split(self._sep) for x in self._Database] + else: + print("The column name should be Transactions and each line should be separated by tab space or a seperator specified by the user") if isinstance(self._iFile, str): if _ab._validators.url(self._iFile): data = _ab._urlopen(self._iFile) diff --git a/PAMI/correlatedPattern/basic/CoMinePlus.py b/PAMI/correlatedPattern/basic/CoMinePlus.py index 73bb188b..ef5bb10d 100644 --- a/PAMI/correlatedPattern/basic/CoMinePlus.py +++ b/PAMI/correlatedPattern/basic/CoMinePlus.py @@ -247,6 +247,9 @@ def _creatingItemSets(self) -> None: i = self._iFile.columns.values.tolist() if 'Transactions' in i: self._Database = self._iFile['Transactions'].tolist() + self._Database = [x.split(self._sep) for x in self._Database] + else: + print("The column name should be Transactions and each line should be separated by tab space or a seperator specified by the user") if isinstance(self._iFile, str): if _ab._validators.url(self._iFile): data = _ab._urlopen(self._iFile) diff --git a/PAMI/extras/syntheticDataGenerator/TransactionalDatabase.py b/PAMI/extras/syntheticDataGenerator/TransactionalDatabase.py index 9f8aa7c8..817b8c46 100644 --- a/PAMI/extras/syntheticDataGenerator/TransactionalDatabase.py +++ b/PAMI/extras/syntheticDataGenerator/TransactionalDatabase.py @@ -173,7 +173,7 @@ def save(self, filename) -> None: for line in self.db: f.write(str(self.seperator).join(map(str, line)) + '\n') - def getTransactions(self) -> pd.DataFrame: + def getTransactions(self, sep = "\t") -> pd.DataFrame: """ Get the transactional database in dataFrame format @@ -182,8 +182,7 @@ def getTransactions(self) -> pd.DataFrame: """ column = "Transactions" db = pd.DataFrame(columns=[column]) - temp = ["\t".join([str(a) for a in x]) for x in self.db] - db[column] = temp + db[column] = [sep.join(map(str, line)) for line in self.db] return db diff --git a/PAMI/frequentPattern/basic/Apriori.py b/PAMI/frequentPattern/basic/Apriori.py index b0496990..1ea2accc 100644 --- a/PAMI/frequentPattern/basic/Apriori.py +++ b/PAMI/frequentPattern/basic/Apriori.py @@ -161,10 +161,10 @@ def _creatingItemSets(self) -> None: print("its empty..") i = self._iFile.columns.values.tolist() if 'Transactions' in i: - temp = self._iFile['Transactions'].tolist() - - for k in temp: - self._Database.append(set(k)) + self._Database = self._iFile['Transactions'].tolist() + self._Database = [x.split(self._sep) for x in self._Database] + else: + print("The column name should be Transactions and each line should be separated by tab space or a seperator specified by the user") if isinstance(self._iFile, str): if _ab._validators.url(self._iFile): data = _ab._urlopen(self._iFile) diff --git a/PAMI/frequentPattern/basic/Aprioribitset.py b/PAMI/frequentPattern/basic/Aprioribitset.py index 1a530de7..b4836e70 100644 --- a/PAMI/frequentPattern/basic/Aprioribitset.py +++ b/PAMI/frequentPattern/basic/Aprioribitset.py @@ -191,6 +191,9 @@ def _creatingItemSets(self): i = self._iFile.columns.values.tolist() if 'Transactions' in i: self._Database = self._iFile['Transactions'].tolist() + self._Database = [x.split(self._sep) for x in self._Database] + else: + print("The column name should be Transactions and each line should be separated by tab space or a seperator specified by the user") if isinstance(self._iFile, str): if _ab._validators.url(self._iFile): diff --git a/PAMI/frequentPattern/basic/ECLAT.py b/PAMI/frequentPattern/basic/ECLAT.py index c926a137..d8a029e7 100644 --- a/PAMI/frequentPattern/basic/ECLAT.py +++ b/PAMI/frequentPattern/basic/ECLAT.py @@ -162,6 +162,9 @@ def _creatingItemSets(self) -> float: i = self._iFile.columns.values.tolist() if 'Transactions' in i: self._Database = self._iFile['Transactions'].tolist() + self._Database = [x.split(self._sep) for x in self._Database] + else: + print("The column name should be Transactions and each line should be separated by tab space or a seperator specified by the user") if isinstance(self._iFile, str): if _ab._validators.url(self._iFile): data = _ab._urlopen(self._iFile) diff --git a/PAMI/frequentPattern/basic/ECLATDiffset.py b/PAMI/frequentPattern/basic/ECLATDiffset.py index f535ae50..c36ded2c 100644 --- a/PAMI/frequentPattern/basic/ECLATDiffset.py +++ b/PAMI/frequentPattern/basic/ECLATDiffset.py @@ -161,6 +161,9 @@ def _creatingItemSets(self): i = self._iFile.columns.values.tolist() if 'Transactions' in i: self._Database = self._iFile['Transactions'].tolist() + self._Database = [x.split(self._sep) for x in self._Database] + else: + print("The column name should be Transactions and each line should be separated by tab space or a seperator specified by the user") if isinstance(self._iFile, str): if _ab._validators.url(self._iFile): data = _ab._urlopen(self._iFile) diff --git a/PAMI/frequentPattern/basic/ECLATbitset.py b/PAMI/frequentPattern/basic/ECLATbitset.py index b35311da..979a7cc6 100644 --- a/PAMI/frequentPattern/basic/ECLATbitset.py +++ b/PAMI/frequentPattern/basic/ECLATbitset.py @@ -178,6 +178,9 @@ def _creatingItemSets(self): i = self._iFile.columns.values.tolist() if 'Transactions' in i: self._Database = self._iFile['Transactions'].tolist() + self._Database = [x.split(self._sep) for x in self._Database] + else: + print("The column name should be Transactions and each line should be separated by tab space or a seperator specified by the user") if isinstance(self._iFile, str): if _ab._validators.url(self._iFile): diff --git a/PAMI/frequentPattern/basic/FPGrowth.py b/PAMI/frequentPattern/basic/FPGrowth.py index 7bd55bf8..6c28b4e6 100644 --- a/PAMI/frequentPattern/basic/FPGrowth.py +++ b/PAMI/frequentPattern/basic/FPGrowth.py @@ -227,6 +227,10 @@ def __creatingItemSets(self) -> None: i = self._iFile.columns.values.tolist() if 'Transactions' in i: self.__Database = self._iFile['Transactions'].tolist() + self.__Database = [x.split(self._sep) for x in self.__Database] + else: + print("The column name should be Transactions and each line should be separated by tab space or a seperator specified by the user") + #print(self.Database) if isinstance(self._iFile, str): diff --git a/PAMI/frequentPattern/closed/CHARM.py b/PAMI/frequentPattern/closed/CHARM.py index 4695cde8..ee36ff89 100644 --- a/PAMI/frequentPattern/closed/CHARM.py +++ b/PAMI/frequentPattern/closed/CHARM.py @@ -193,6 +193,10 @@ def _creatingItemsets(self): i = self._iFile.columns.values.tolist() if 'Transactions' in i: self._Database = self._iFile['Transactions'].tolist() + self._Database = [i.split(self._sep) for i in self._Database] + + else: + print("The column name should be Transactions and each line should be separated by tab space or a seperator specified by the user") for i in self._Database: self._lno += 1 for j in i: diff --git a/PAMI/frequentPattern/maximal/MaxFPGrowth.py b/PAMI/frequentPattern/maximal/MaxFPGrowth.py index ffc1c254..0632c7de 100644 --- a/PAMI/frequentPattern/maximal/MaxFPGrowth.py +++ b/PAMI/frequentPattern/maximal/MaxFPGrowth.py @@ -533,6 +533,10 @@ def _creatingItemSets(self): i = self._iFile.columns.values.tolist() if 'Transactions' in i: self._Database = self._iFile['Transactions'].tolist() + self._Database = [x.split(self._sep) for x in self._Database] + else: + print("The column name should be Transactions and each line should be separated by tab space or a seperator specified by the user") + if isinstance(self._iFile, str): if _ab._validators.url(self._iFile): data = _ab._urlopen(self._iFile) diff --git a/PAMI/frequentPattern/topk/FAE.py b/PAMI/frequentPattern/topk/FAE.py index 2dba7edd..81098326 100644 --- a/PAMI/frequentPattern/topk/FAE.py +++ b/PAMI/frequentPattern/topk/FAE.py @@ -160,6 +160,9 @@ def _creatingItemSets(self): i = self._iFile.columns.values.tolist() if 'Transactions' in i: self._Database = self._iFile['Transactions'].tolist() + self._Database = [x.split(self._sep) for x in self._Database] + else: + print("The column name should be Transactions and each line should be separated by tab space or a seperator specified by the user") # print(self.Database) if isinstance(self._iFile, str):