Skip to content

Commit

Permalink
a little cleaner, but still need to choose better variable names. the…
Browse files Browse the repository at this point in the history
… ones from the paper aren't going to cut it.
  • Loading branch information
Alex Olivas committed Jun 25, 2020
1 parent d0c1f76 commit bc363ae
Showing 1 changed file with 26 additions and 21 deletions.
47 changes: 26 additions & 21 deletions voka/lof.py
Original file line number Diff line number Diff line change
@@ -1,39 +1,39 @@

# https://www.dbs.ifi.lmu.de/Publikationen/Papers/LOF.pdf

'''
Module that contains the function that calculates a Local Outlier Factor.
https://www.dbs.ifi.lmu.de/Publikationen/Papers/LOF.pdf
'''
import numpy

def _distance(v1, v2):
def distance(vector1, vector2):
'''
Euclidean distance.
'''
a1 = numpy.array(v1)
a2 = numpy.array(v2)
return numpy.linalg.norm(a1-a2)
def _reach(p, k, o, D):
array1 = numpy.array(vector1)
array2 = numpy.array(vector2)
return numpy.linalg.norm(array1-array2)

def reach(p, k, o, D):

'''
k-distance of object p is defined as the distance d(p,o) between
p and an object o in D such that:
i) For at least k objects o' in D/{p} d(p,o') <= d(p,o)
ii) For at most k-1 objects o' in D/{p} d(p,o') < d(p,o)
i) For at least k objects o' in D/{p} d(p,o') <= d(p,o)
ii) For at most k-1 objects o' in D/{p} d(p,o') < d(p,o)
'''
distances = list()
for op in D:
d = _distance(p, op)
d = distance(p, op)
if d > 0:
distances.append(d)
distances.sort()
kdistance = max(distances[:k]) if distances[:k] else 0.
return max([kdistance, _distance(p, o)])
return max([kdistance, distance(p, o)])

def _lrd(p, k, D):
def local_reachability_density(p, k, D):
'''
Local Reachability Density
'''
denominator = sum([_reach(p, k, op, D) for op in D])
denominator = sum([reach(p, k, op, D) for op in D])
return len(D)/denominator if denominator else 0.

def LOF(p, k, D):
Expand All @@ -45,8 +45,13 @@ def LOF(p, k, D):
return the LocalOutlierFactor for point 'p' compared
to collection of reference points in 'D', using k-distance 'k'.
'''
ratios = [_lrd(op, k, D)/_lrd(p, k, D)
for op in D
if _lrd(p, k, D)]

return sum(ratios)/float(len(ratios)) if ratios else 0.
ratios = list()
for op in D:
numerator = local_reachability_density(op, k, D)
denominator = local_reachability_density(p, k, D)
if denominator:
ratios.append(numerator/denominator)

result = sum(ratios)/float(len(ratios)) if ratios else 0.

return result

0 comments on commit bc363ae

Please sign in to comment.