Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Made changes to gSpan and implemented method to get flat transactions #421

Merged
merged 4 commits into from
May 27, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 26 additions & 9 deletions PAMI/subgraphMining/basic/gspan.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ def save(self, oFile):
The `save` function writes information about frequent subgraphs to a specified
output file in a specific format.

:param outputPath: The `save` method is used to write the results of frequent
:param oFile: The `save` method is used to write the results of frequent
subgraphs to a file specified by the `outputPath` parameter. The method iterates over each
frequent subgraph in `self.frequentSubgraphs` and writes the subgraph information to the file
"""
Expand Down Expand Up @@ -231,7 +231,7 @@ def subgraphIsomorphisms(self, c: _ab.DFSCode, g: _ab.Graph):
for mappedV2 in g.getAllNeighbors(mappedV1):
if (v2Label == mappedV2.getLabel() and
mappedV2.getId() not in mappedVertices and
eLabel == g.getEdgeLabel(mappedV1, mappedV2.getId())):
eLabel == g.getEdgeLabel(mappedV1, mappedV2.getId())):

tempM = iso.copy()
tempM[v2] = mappedV2.getId()
Expand Down Expand Up @@ -445,7 +445,7 @@ def isCanonical(self, c: _ab.DFSCode):
"""
canC = _ab.DFSCode()
for i in range(c.size):
extensions = self.rightMostPathExtensionsFromSingle(canC, _ab.Graph(c))
extensions = self.rightMostPathExtensionsFromSingle(canC, _ab.Graph(-1, None, c))
minEe = None
for ee in extensions.keys():
if minEe is None or ee.smallerThan(minEe):
Expand All @@ -459,18 +459,18 @@ def isCanonical(self, c: _ab.DFSCode):
return True


def gSpan(self, graphDb, outputFrequentVertices):
def gSpan(self, graphDb, outputSingleVertices):
"""
The gSpan function in Python processes a graph database by precalculating vertex lists, removing
infrequent vertex pairs, and performing a depth-first search algorithm.

:param graphDb: The `graphDb` parameter refers to a graph database that the algorithm is
operating on.
:param outputFrequentVertices: The `outputFrequentVertices` parameter is a boolean flag that
determines whether the frequent vertices should be output or not.
:param outputSingleVertices: The `outputFrequentVertices` parameter is a boolean flag that
determines whether single vertices should be output or not.
"""
if outputFrequentVertices or GSpan.eliminate_infrequent_vertices:
self.findAllOnlyOneVertex(graphDb, outputFrequentVertices)
if outputSingleVertices or GSpan.eliminate_infrequent_vertices:
self.findAllOnlyOneVertex(graphDb, outputSingleVertices)

for g in graphDb:
g.precalculateVertexList()
Expand Down Expand Up @@ -521,7 +521,7 @@ def findAllOnlyOneVertex(self, graphDb, outputFrequentVertices):
:param graphDb: The `graphDb` parameter refers to a graph database that the algorithm is
operating on.
:param outputFrequentVertices: The `outputFrequentVertices` parameter is a boolean flag that
determines whether the frequent vertices should be included in the output or not.
determines whether single vertices should be included in the output or not.
"""
self.frequentVertexLabels = []
labelM = {}
Expand Down Expand Up @@ -663,3 +663,20 @@ def getSubgraphGraphMapping(self):
mappings.append(mapping)
return mappings

def saveSubgraphsByGraphId(self, oFile):
"""
Save subgraphs by graph ID as a flat transaction, such that each row represents the graph ID and each row can contain multiple subgraph IDs.
"""
graphToSubgraphs = {}

for i, subgraph in enumerate(self.frequentSubgraphs):
for graphId in subgraph.setOfGraphsIds:
if graphId not in graphToSubgraphs:
graphToSubgraphs[graphId] = []
graphToSubgraphs[graphId].append(i)

graphToSubgraphs = {k: graphToSubgraphs[k] for k in sorted(graphToSubgraphs)}

with open(oFile, 'w') as f:
for _, subgraphIds in graphToSubgraphs.items():
f.write(f"{' '.join(map(str, subgraphIds))}\n")