Skip to content

Commit

Permalink
Standardize imports for edist.py ocropus-archive#176
Browse files Browse the repository at this point in the history
  • Loading branch information
zuphilip committed Dec 19, 2017
1 parent 8cfce57 commit 5a71411
Showing 1 changed file with 7 additions and 6 deletions.
13 changes: 7 additions & 6 deletions ocrolib/edist.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
from scipy.ndimage import filters
from pylab import *
import re

import numpy as np
from scipy.ndimage import filters

def levenshtein(a,b):
"""Calculates the Levenshtein distance between a and b.
(Clever compact Pythonic implementation from hetland.org)"""
Expand All @@ -23,10 +24,10 @@ def xlevenshtein(a,b,context=1):
n, m = len(a), len(b)
assert m>0 # xlevenshtein should only be called with non-empty b string (ground truth)
if a == b: return 0,[] # speed up for the easy case
sources = empty((m+1,n+1),object)
sources = np.empty((m+1,n+1),object)
sources[:,:] = None
dists = 99999*ones((m+1,n+1))
dists[0,:] = arange(n+1)
dists = np.full((m+1,n+1),99999)
dists[0,:] = np.arange(n+1)
for i in range(1,m+1):
previous = dists[i-1,:]
current = dists[i,:]
Expand Down Expand Up @@ -70,7 +71,7 @@ def xlevenshtein(a,b,context=1):
al = " "*context+al+" "*context
bl = " "*context+bl+" "*context
assert "~" not in al and "~" not in bl
same = array([al[i]==bl[i] for i in range(len(al))],'i')
same = np.array([al[i]==bl[i] for i in range(len(al))],'i')
same = filters.minimum_filter(same,1+2*context)
als = "".join([al[i] if not same[i] else "~" for i in range(len(al))])
bls = "".join([bl[i] if not same[i] else "~" for i in range(len(bl))])
Expand Down

0 comments on commit 5a71411

Please sign in to comment.