Merge pull request #206 from zuphilip/imports

Standardize Imports I, #176
ocropus-archive · Sep 29, 2017 · d823ba4 · d823ba4
2 parents 5027794 + 8f354ad
commit d823ba4
Show file tree

Hide file tree

Showing 14 changed files with 384 additions and 288 deletions.
diff --git a/ocropus-dewarp b/ocropus-dewarp
@@ -2,20 +2,17 @@
 
 from __future__ import print_function
 
-import random as pyrandom
 import re
-from pylab import *
-import os.path
-import ocrolib
 import argparse
-import matplotlib
-import numpy
+import sys
+
+import numpy as np
+from scipy.misc import imsave
+
+import ocrolib
 from ocrolib import lineest
-import ocrolib.lstm as lstm
-import traceback
-import scipy
 
-numpy.seterr(divide='raise',over='raise',invalid='raise',under='ignore')
+np.seterr(divide='raise',over='raise',invalid='raise',under='ignore')
 
 parser = argparse.ArgumentParser("dewarp lines as used for the recognizer")
 
@@ -36,9 +33,9 @@ if len(inputs)==0:
 print("# inputs", len(inputs))
 
 if args.lineest=="center":
-  lnorm = lineest.CenterNormalizer()
+    lnorm = lineest.CenterNormalizer()
 else:
-  raise Exception(args.lineest+": unknown line normalizer")
+    raise Exception(args.lineest+": unknown line normalizer")
 
 lnorm.setHeight(args.height)
 
@@ -47,9 +44,9 @@ for fname in inputs:
     base,_ = ocrolib.allsplitext(fname)
     try:
         line = ocrolib.read_image_gray(fname)
-        lnorm.measure(amax(line)-line)
-        line = lnorm.normalize(line,cval=amax(line))
-        scipy.misc.imsave(base+".dew.png",line)
+        lnorm.measure(np.amax(line)-line)
+        line = lnorm.normalize(line,cval=np.amax(line))
+        imsave(base+".dew.png",line)
     except Exception as e:
         print("ERROR", e)
         continue
diff --git a/ocropus-econf b/ocropus-econf
@@ -3,14 +3,21 @@
 
 from __future__ import print_function
 
-import warnings,numpy,argparse,sys,os,os.path,multiprocessing,codecs
+import warnings
+import argparse
+import sys
+import os.path
+import multiprocessing
+import codecs
 from collections import Counter
+
+import numpy as np
+
 import ocrolib
-from pylab import *
 from ocrolib import edist
 
 # disable rank warnings from polyfit
-warnings.simplefilter('ignore',numpy.RankWarning)
+warnings.simplefilter('ignore',np.RankWarning)
 
 parser = argparse.ArgumentParser(description = """
 Compute the edit distances between ground truth and recognizer output.

diff --git a/ocropus-errs b/ocropus-errs
@@ -3,7 +3,12 @@
 
 from __future__ import print_function
 
-import argparse,sys,os,os.path,multiprocessing
+import argparse
+import sys
+import os
+import os.path
+import multiprocessing
+
 import ocrolib
 from ocrolib import edist
 
@@ -27,6 +32,7 @@ args.files = ocrolib.glob_all(args.files)
 if not ".gt." in args.files[0]:
     sys.stderr.write("warning: compare on .gt.txt files, not .txt files\n")
 
+
 def process1(fname):
     # fgt = ocrolib.allsplitext(fname)[0]+args.gtextension
     gt = ocrolib.project_text(ocrolib.read_text(fname),kind=args.kind)

diff --git a/ocropus-gpageseg b/ocropus-gpageseg
@@ -13,13 +13,19 @@
 
 from __future__ import print_function
 
-from pylab import *
-import argparse,glob,os,os.path
+import argparse
+import glob
+import os
+import os.path
+import sys
 import traceback
+from multiprocessing import Pool
+
+import numpy as np
 from scipy.ndimage import measurements
 from scipy.misc import imsave
 from scipy.ndimage.filters import gaussian_filter,uniform_filter,maximum_filter
-from multiprocessing import Pool
+
 import ocrolib
 from ocrolib import psegutils,morph,sl
 from ocrolib.exceptions import OcropusException
@@ -100,18 +106,21 @@ parser.add_argument('files',nargs='+')
 args = parser.parse_args()
 args.files = ocrolib.glob_all(args.files)
 
+
 def norm_max(v):
-    return v/amax(v)
+    return v/np.amax(v)
+
+
 def check_page(image):
     if len(image.shape)==3: return "input image is color image %s"%(image.shape,)
-    if mean(image)<median(image): return "image may be inverted"
+    if np.mean(image)<np.median(image): return "image may be inverted"
     h,w = image.shape
     if h<600: return "image not tall enough for a page image %s"%(image.shape,)
     if h>10000: return "image too tall for a page image %s"%(image.shape,)
     if w<600: return "image too narrow for a page image %s"%(image.shape,)
     if w>10000: return "line too wide for a page image %s"%(image.shape,)
     slots = int(w*h*1.0/(30*30))
-    _,ncomps = measurements.label(image>mean(image))
+    _,ncomps = measurements.label(image>np.mean(image))
     if ncomps<10: return "too few connected components for a page image (got %d)"%(ncomps,)
     if ncomps>slots: return "too many connnected components for a page image (%d > %d)"%(ncomps,slots)
     return None
@@ -120,6 +129,7 @@ def check_page(image):
 def print_info(*objs):
     print("INFO: ", *objs, file=sys.stdout)
 
+
 def print_error(*objs):
     print("ERROR: ", *objs, file=sys.stderr)
 
@@ -135,21 +145,22 @@ print_info("")
 if args.parallel>1:
     args.quiet = 1
 
+
 def B(a):
-    if a.dtype==dtype('B'): return a
-    return array(a,'B')
+    if a.dtype==np.dtype('B'): return a
+    return np.array(a,'B')
+
 
 def DSAVE(title,image):
     if not args.debug: return
     if type(image)==list:
         assert len(image)==3
-        image = transpose(array(image),[1,2,0])
+        image = np.transpose(np.array(image),[1,2,0])
     fname = "_"+title+".png"
     print_info("debug " + fname)
     imsave(fname,image)
 
 
-
 ################################################################
 ### Column finding.
 ###
@@ -170,62 +181,66 @@ def compute_separators_morph(binary,scale):
     vert = morph.select_regions(vert,sl.dim0,min=20*scale,nbest=args.maxseps)
     return vert
 
+
 def compute_colseps_morph(binary,scale,maxseps=3,minheight=20,maxwidth=5):
     """Finds extended vertical whitespace corresponding to column separators
     using morphological operations."""
     boxmap = psegutils.compute_boxmap(binary,scale,dtype='B')
     bounds = morph.rb_closing(B(boxmap),(int(5*scale),int(5*scale)))
-    bounds = maximum(B(1-bounds),B(boxmap))
+    bounds = np.maximum(B(1-bounds),B(boxmap))
     cols = 1-morph.rb_closing(boxmap,(int(20*scale),int(scale)))
     cols = morph.select_regions(cols,sl.aspect,min=args.csminaspect)
     cols = morph.select_regions(cols,sl.dim0,min=args.csminheight*scale,nbest=args.maxcolseps)
     cols = morph.r_erosion(cols,(int(0.5+scale),0))
     cols = morph.r_dilation(cols,(int(0.5+scale),0),origin=(int(scale/2)-1,0))
     return cols
 
+
 def compute_colseps_mconv(binary,scale=1.0):
     """Find column separators using a combination of morphological
     operations and convolution."""
     h,w = binary.shape
     smoothed = gaussian_filter(1.0*binary,(scale,scale*0.5))
     smoothed = uniform_filter(smoothed,(5.0*scale,1))
-    thresh = (smoothed<amax(smoothed)*0.1)
+    thresh = (smoothed<np.amax(smoothed)*0.1)
     DSAVE("1thresh",thresh)
     blocks = morph.rb_closing(binary,(int(4*scale),int(4*scale)))
     DSAVE("2blocks",blocks)
-    seps = minimum(blocks,thresh)
+    seps = np.minimum(blocks,thresh)
     seps = morph.select_regions(seps,sl.dim0,min=args.csminheight*scale,nbest=args.maxcolseps)
     DSAVE("3seps",seps)
     blocks = morph.r_dilation(blocks,(5,5))
     DSAVE("4blocks",blocks)
-    seps = maximum(seps,1-blocks)
+    seps = np.maximum(seps,1-blocks)
     DSAVE("5combo",seps)
     return seps
 
+
 def compute_colseps_conv(binary,scale=1.0):
     """Find column separators by convoluation and
     thresholding."""
     h,w = binary.shape
     # find vertical whitespace by thresholding
     smoothed = gaussian_filter(1.0*binary,(scale,scale*0.5))
     smoothed = uniform_filter(smoothed,(5.0*scale,1))
-    thresh = (smoothed<amax(smoothed)*0.1)
+    thresh = (smoothed<np.amax(smoothed)*0.1)
     DSAVE("1thresh",thresh)
     # find column edges by filtering
     grad = gaussian_filter(1.0*binary,(scale,scale*0.5),order=(0,1))
     grad = uniform_filter(grad,(10.0*scale,1))
     # grad = abs(grad) # use this for finding both edges
-    grad = (grad>0.5*amax(grad))
+    grad = (grad>0.5*np.amax(grad))
     DSAVE("2grad",grad)
     # combine edges and whitespace
-    seps = minimum(thresh,maximum_filter(grad,(int(scale),int(5*scale))))
+    seps = np.minimum(thresh,maximum_filter(grad,(int(scale),int(5*scale))))
     seps = maximum_filter(seps,(int(2*scale),1))
     DSAVE("3seps",seps)
     # select only the biggest column separators
     seps = morph.select_regions(seps,sl.dim0,min=args.csminheight*scale,nbest=args.maxcolseps)
     DSAVE("4seps",seps)
     return seps
 
+
 def compute_colseps(binary,scale):
     """Computes column separators either from vertical black lines or whitespace."""
     print_info("considering at most %g whitespace column separators" % args.maxcolseps)
@@ -241,12 +256,11 @@ def compute_colseps(binary,scale):
         seps = compute_separators_morph(binary,scale)
         DSAVE("colseps",0.7*seps+0.3*binary)
         #colseps = compute_colseps_morph(binary,scale)
-        colseps = maximum(colseps,seps)
-        binary = minimum(binary,1-seps)
+        colseps = np.maximum(colseps,seps)
+        binary = np.minimum(binary,1-seps)
     return colseps,binary
 
 
-
 ################################################################
 ### Text Line Finding.
 ###
@@ -273,21 +287,22 @@ def compute_gradmaps(binary,scale):
     top = ocrolib.norm_max((grad>0)*grad)
     return bottom,top,boxmap
 
+
 def compute_line_seeds(binary,bottom,top,colseps,scale):
     """Base on gradient maps, computes candidates for baselines
     and xheights.  Then, it marks the regions between the two
     as a line seed."""
     t = args.threshold
     vrange = int(args.vscale*scale)
     bmarked = maximum_filter(bottom==maximum_filter(bottom,(vrange,0)),(2,2))
-    bmarked = bmarked*(bottom>t*amax(bottom)*t)*(1-colseps)
+    bmarked = bmarked*(bottom>t*np.amax(bottom)*t)*(1-colseps)
     tmarked = maximum_filter(top==maximum_filter(top,(vrange,0)),(2,2))
-    tmarked = tmarked*(top>t*amax(top)*t/2)*(1-colseps)
+    tmarked = tmarked*(top>t*np.amax(top)*t/2)*(1-colseps)
     tmarked = maximum_filter(tmarked,(1,20))
-    seeds = zeros(binary.shape,'i')
+    seeds = np.zeros(binary.shape,'i')
     delta = max(3,int(scale/2))
     for x in range(bmarked.shape[1]):
-        transitions = sorted([(y,1) for y in find(bmarked[:,x])]+[(y,0) for y in find(tmarked[:,x])])[::-1]
+        transitions = sorted([(y,1) for y in np.where(bmarked[:,x])[0]]+[(y,0) for y in np.where(tmarked[:,x][0])])[::-1]
         transitions += [(0,0)]
         for l in range(len(transitions)-1):
             y0,s0 = transitions[l]
@@ -302,7 +317,6 @@ def compute_line_seeds(binary,bottom,top,colseps,scale):
     return seeds
 
 
-
 ################################################################
 ### The complete line segmentation process.
 ################################################################
@@ -313,12 +327,13 @@ def remove_hlines(binary,scale,maxsize=10):
     for i,b in enumerate(objects):
         if sl.width(b)>maxsize*scale:
             labels[b][labels[b]==i+1] = 0
-    return array(labels!=0,'B')
+    return np.array(labels!=0,'B')
+
 
 def compute_segmentation(binary,scale):
     """Given a binary image, compute a complete segmentation into
     lines, computing both columns and text lines."""
-    binary = array(binary,'B')
+    binary = np.array(binary,'B')
 
     # start by removing horizontal black lines, which only
     # interfere with the rest of the page segmentation
@@ -340,12 +355,11 @@ def compute_segmentation(binary,scale):
     llabels = morph.propagate_labels(boxmap,seeds,conflict=0)
     if not args.quiet: print_info("spreading labels")
     spread = morph.spread_labels(seeds,maxdist=scale)
-    llabels = where(llabels>0,llabels,spread*binary)
+    llabels = np.where(llabels>0,llabels,spread*binary)
     segmentation = llabels*binary
     return segmentation
 
 
-
 ################################################################
 ### Processing each file.
 ################################################################
@@ -369,7 +383,7 @@ def process1(job):
     checktype(binary,ABINARY2)
 
     if not args.nocheck:
-        check = check_page(amax(binary)-binary)
+        check = check_page(np.amax(binary)-binary)
         if check is not None:
             print_error("%s SKIPPED %s (use -n to disable this check)" % (fname, check))
             return
@@ -386,7 +400,7 @@ def process1(job):
     else:
         scale = args.scale
     print_info("scale %f" % (scale))
-    if isnan(scale) or scale>1000.0:
+    if np.isnan(scale) or scale>1000.0:
         print_error("%s: bad scale (%g); skipping\n" % (fname, scale))
         return
     if scale<args.minscale:
@@ -397,10 +411,10 @@ def process1(job):
 
     if not args.quiet: print_info("computing segmentation")
     segmentation = compute_segmentation(binary,scale)
-    if amax(segmentation)>args.maxlines:
-        print_error("%s: too many lines %g" % (fname, amax(segmentation)))
+    if np.amax(segmentation)>args.maxlines:
+        print_error("%s: too many lines %g" % (fname, np.amax(segmentation)))
         return
-    if not args.quiet: print_info("number of lines %g" % amax(segmentation))
+    if not args.quiet: print_info("number of lines %g" % np.amax(segmentation))
 
     # compute the reading order
 
@@ -411,8 +425,8 @@ def process1(job):
 
     # renumber the labels so that they conform to the specs
 
-    nlabels = amax(segmentation)+1
-    renumber = zeros(nlabels,'i')
+    nlabels = np.amax(segmentation)+1
+    renumber = np.zeros(nlabels,'i')
     for i,v in enumerate(lsort): renumber[lines[v].label] = 0x010000+(i+1)
     segmentation = renumber[segmentation]
 
@@ -437,6 +451,7 @@ if len(args.files)==1 and os.path.isdir(args.files[0]):
 else:
     files = args.files
 
+
 def safe_process1(job):
     fname,i = job
     try: