Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

#2 updated the readtheDocs and CoMinePlus.py documentation #431

Merged
merged 1 commit into from
Jun 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 33 additions & 3 deletions PAMI/correlatedPattern/basic/CoMine.py
Original file line number Diff line number Diff line change
Expand Up @@ -300,13 +300,43 @@ def startMine(self) -> None:
self.mine()

def _maxSup(self, itemSet, item):
"""
Calculate the maximum support value for a given itemSet and item.

:param itemSet: A set of items to compare.
:type itemSet: list or set
:param item: An individual item to compare.
:type item: Any
:return: The maximum support value from the itemSet and the individual item.
:rtype: float or int
"""
sups = [self._mapSupport[i] for i in itemSet] + [self._mapSupport[item]]
return max(sups)

def _allConf(self, itemSet):
"""
Calculate the all-confidence value for a given itemSet.

:param itemSet: A set of items for which to calculate the all-confidence.
:type itemSet: list or set
:return: The all-confidence value for the itemSet.
:rtype: float
"""
return self._finalPatterns[itemSet] / max([self._mapSupport[i] for i in itemSet])

def recursive(self, item, nodes, root):
"""
Recursively build the tree structure for itemsets and find patterns that meet
the minimum support and all-confidence thresholds.

:param item: The current item being processed.
:type item: Any
:param nodes: The list of nodes to be processed.
:type nodes: list of _Node
:param root: The root node of the current tree.
:type root: _Node
:return: None
"""

if root.item is None:
newRoot = _Node([item], 0, None)
Expand All @@ -327,7 +357,7 @@ def recursive(self, item, nodes, root):
itemCounts = {k:v for k, v in itemCounts.items() if v >= self._minSup}
if len(itemCounts) == 0:
return

itemNodes = {}
for transaction, count in transactions:
transaction = [i for i in transaction if i in itemCounts]
Expand All @@ -340,8 +370,8 @@ def recursive(self, item, nodes, root):
itemNodes[item][0].add(node)
itemNodes[item][1] += count

itemNodes = {k:v for k, v in sorted(itemNodes.items(), key=lambda x: x[1][1], reverse=True)}
itemNodes = {k:v for k, v in sorted(itemNodes.items(), key=lambda x: x[1][1], reverse=True)}


for item in itemCounts:
conf = itemNodes[item][1] / self._maxSup(newRoot.item, item)
Expand Down
101 changes: 63 additions & 38 deletions PAMI/correlatedPattern/basic/CoMinePlus.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,22 @@
# CoMine is one of the fundamental algorithm to discover correlated patterns in a transactional database.
# CoMinePlus is one of the fundamental algorithm to discover correlated patterns in a transactional database.
#
# **Importing this algorithm into a python program**
# --------------------------------------------------------
#
# from PAMI.correlatedPattern.basic import CoMine as alg
# from PAMI.correlatedPattern.basic import CoMinePlus as alg
#
# iFile = 'sampleTDB.txt'
#
# minSup = 0.25 # can be specified between 0 and 1
#
# minAllConf = 0.2 # can be specified between 0 and 1
#
# obj = alg.CoMine(iFile, minSup, minAllConf, sep)
# obj = alg.CoMinePlus(iFile, minSup, minAllConf, sep)
#
# obj.mine()
#
# Rules = obj.getPatterns()
# frequentPatterns = obj.getPatterns()
#
# print("Total number of Patterns:", len(Patterns))
# print("Total number of Patterns:", len(frequentPatterns))
#
# obj.save(oFile)
#
Expand Down Expand Up @@ -119,30 +118,30 @@ class CoMine(_ab._correlatedPatterns):
About this algorithm
====================

:**Description**: CoMine is one of the fundamental algorithm to discover correlated patterns in a transactional database. It is based on the traditional FP-Growth algorithm. This algorithm uses depth-first search technique to find all correlated patterns in a transactional database.
:**Description**: CoMinePlus is one of the fundamental algorithm to discover correlated patterns in a transactional database. It is based on the traditional FP-Growth algorithm. This algorithm uses depth-first search technique to find all correlated patterns in a transactional database.

:**Reference**: Lee, Y.K., Kim, W.Y., Cao, D., Han, J. (2003). CoMine: efficient mining of correlated patterns. In ICDM (pp. 581–584).

:**parameters**: **iFile** (*str*) -- **Name of the Input file to mine complete set of correlated patterns**
**oFile** (*str*) -- **Name of the output file to store complete set of correlated patterns**
**minSup** (*int or float or str*) -- **The user can specify minSup either in count or proportion of database size. If the program detects the data type of minSup is integer, then it treats minSup is expressed in count.**
**minAllConf** (*float*) -- **The user can specify minAllConf values within the range (0, 1).**
**sep** (*str*) -- **This variable is used to distinguish items from one another in a transaction. The default seperator is tab space. However, the users can override their default separator.**

:**Attributes**: **memoryUSS** (*float*) -- **To store the total amount of USS memory consumed by the program**
**memoryRSS** (*float*) -- **To store the total amount of RSS memory consumed by the program**
**startTime** (*float*) -- **To record the start time of the mining process**
**endTime** (*float*) -- **To record the completion time of the mining process**
**minSup** (*int*) -- **The user given minSup**
**minAllConf** (*float*) -- **The user given minimum all confidence Ratio(should be in range of 0 to 1)**
**Database** (*list*) -- **To store the transactions of a database in list**
**mapSupport** (*Dictionary*) -- **To maintain the information of item and their frequency**
**lno** (*int*) -- **it represents the total no of transactions**
**tree** (*class*) -- **it represents the Tree class**
**itemSetCount** (*int*) -- **it represents the total no of patterns**
**finalPatterns** (*dict*) -- **it represents to store the patterns**
**itemSetBuffer** (*list*) -- **it represents the store the items in mining**
**maxPatternLength** (*int*) -- **it represents the constraint for pattern length**
:**parameters**: - **iFile** (*str*) -- *Name of the Input file to mine complete set of correlated patterns.*
- **oFile** (*str*) -- *Name of the output file to store complete set of correlated patterns.*
- **minSup** (*int or float or str*) -- *The user can specify minSup either in count or proportion of database size. If the program detects the data type of minSup is integer, then it treats minSup is expressed in count.*
- **minAllConf** (*float*) -- *The user can specify minAllConf values within the range (0, 1).*
- **sep** (*str*) -- *This variable is used to distinguish items from one another in a transaction. The default seperator is tab space. However, the users can override their default separator.*

:**Attributes**: - **memoryUSS** (*float*) -- *To store the total amount of USS memory consumed by the program.*
- **memoryRSS** (*float*) -- *To store the total amount of RSS memory consumed by the program.*
- **startTime** (*float*) -- *To record the start time of the mining process.*
- **endTime** (*float*) -- *To record the completion time of the mining process.*
- **minSup** (*int*) -- *The user given minSup.*
- **minAllConf** (*float*) -- *The user given minimum all confidence Ratio(should be in range of 0 to 1).*
- **Database** (*list*) -- *To store the transactions of a database in list.*
- **mapSupport** (*Dictionary*) -- *To maintain the information of item and their frequency.*
- **lno** (*int*) -- *it represents the total no of transactions.*
- **tree** (*class*) -- *it represents the Tree class.*
- **itemSetCount** (*int*) -- *it represents the total no of patterns.*
- **finalPatterns** (*dict*) -- *it represents to store the patterns.*
- **itemSetBuffer** (*list*) -- *it represents the store the items in mining.*
- **maxPatternLength** (*int*) -- *it represents the constraint for pattern length.*

Execution methods
=================
Expand All @@ -153,33 +152,33 @@ class CoMine(_ab._correlatedPatterns):

Format:

(.venv) $ python3 CoMine.py <inputFile> <outputFile> <minSup> <minAllConf> <sep>
(.venv) $ python3 CoMinePlus.py <inputFile> <outputFile> <minSup> <minAllConf> <sep>

Example Usage:

(.venv) $ python3 CoMine.py sampleTDB.txt output.txt 0.25 0.2
(.venv) $ python3 CoMinePlus.py sampleTDB.txt output.txt 0.25 0.2

.. note:: minSup can be specified in support count or a value between 0 and 1.

**Calling from a python program**

.. code-block:: python

from PAMI.correlatedPattern.basic import CoMine as alg
from PAMI.correlatedPattern.basic import CoMinePlus as alg

iFile = 'sampleTDB.txt'

minSup = 0.25 # can be specified between 0 and 1

minAllConf = 0.2 # can be specified between 0 and 1

obj = alg.CoMine(iFile, minSup, minAllConf,sep)
obj = alg.CoMinePlus(iFile, minSup, minAllConf,sep)

obj.mine()

patterns = obj.getPatterns()
frequentPatterns = obj.getPatterns()

print("Total number of Patterns:", len(patterns))
print("Total number of Patterns:", len(frequentPatterns))

obj.savePatterns(oFile)

Expand All @@ -200,7 +199,7 @@ class CoMine(_ab._correlatedPatterns):
Credits
=======

The complete program was written by B.Sai Chitra under the supervision of Professor Rage Uday Kiran.
The complete program was written by B.Sai Chitra and revised by Tarun Sreepads under the supervision of Professor Rage Uday Kiran.

"""

Expand Down Expand Up @@ -294,20 +293,46 @@ def _convert(self, value: Union[int, float, str]) -> None:

@deprecated("It is recommended to use 'mine()' instead of 'startMine()' for mining process. Starting from January 2025, 'startMine()' will be completely terminated.")
def startMine(self) -> None:
"""
main method to start
"""
self.mine()

def _maxSup(self, itemSet, item):
"""
Calculate the maximum support value for a given itemSet and item.

:param itemSet: A set of items to compare.
:type itemSet: list or set
:param item: An individual item to compare.
:type item: Any
:return: The maximum support value from the itemSet and the individual item.
:rtype: float or int
"""
sups = [self._mapSupport[i] for i in itemSet] + [self._mapSupport[item]]
return max(sups)

def _allConf(self, itemSet):
"""
Calculate the all-confidence value for a given itemSet.

:param itemSet: A set of items for which to calculate the all-confidence.
:type itemSet: list or set
:return: The all-confidence value for the itemSet.
:rtype: float
"""
return self._finalPatterns[itemSet] / max([self._mapSupport[i] for i in itemSet])

def recursive(self, item, nodes, root):

"""
Recursively build the tree structure for itemsets and find patterns that meet
the minimum support and all-confidence thresholds.

:param item: The current item being processed.
:type item: Any
:param nodes: The list of nodes to be processed.
:type nodes: list of _Node
:param root: The root node of the current tree.
:type root: _Node
:return: None
"""

newRoot = _Node(root.item + [item], 0, None)

Expand Down
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file modified finalSphinxDocs/_build/doctrees/environment.pickle
Binary file not shown.
Loading