elev.py

# Elevation transformation -- OpenCL and numpy both
from __future__ import division
import numpy as np
from time import time
from utils import chunks
from itertools import product
#
import gzip
import cPickle as pickle

try:
    import pyopencl as cl
    import pyopencl.array as cla
    hasCL = True
except ImportError:
    hasCL = False


class Elev(object):

    def __init__(self, elarray, wantCL=True, platform_num=None):
        """
        Take the elevation array as generated by GDAL.

        Keyword arguments:
        elarray -- array of elevation values

        """

        self.elarray = elarray
        # sigh
        self.elflat = elarray.flatten().astype(np.float32)

        self.wantCL = wantCL
        self.canCL = False

        if hasCL and self.wantCL:
            try:
                platforms = cl.get_platforms()
                try:
                    platform = platforms[platform_num]
                    self.devices = self.platform.get_devices()
                    self.context = cl.Context(self.devices)
                except TypeError:
                    # The user may be asked to select a platform.
                    self.context = cl.create_some_context()
                    self.devices = self.context.devices
                except IndexError:
                    raise
                self.queue = cl.CommandQueue(self.context)
                filestr = ''.join(open('elev.cl', 'r').readlines())
                self.program = cl.Program(self.context, filestr).build(devices=self.devices)
                for device in self.devices:
                    buildlog = self.program.get_build_info(device, cl.program_build_info.LOG)
                    if (len(buildlog) > 1):
                        print 'Build log for device', device, ':\n', buildlog
                # Only the first kernel is used.
                self.kernel = self.program.all_kernels()[0]

                # Local and global sizes are device-dependent.
                self.local_size = {}
                self.global_size = {}
                # Groups should be overcommitted.
                # For now, use 3 (48 cores / 16 cores per halfwarp) * 2
                for device in self.devices:
                    work_group_size = self.kernel.get_work_group_info(cl.kernel_work_group_info.WORK_GROUP_SIZE, device)
                    num_groups_for_1d = device.max_compute_units * 3 * 2
                    self.local_size[device] = (work_group_size,)
                    self.global_size[device] = (num_groups_for_1d * work_group_size,)
                self.canCL = True
            except cl.RuntimeError:
                print 'warning: unable to use pyopencl, defaulting to numpy'

    def __call__(self, trim, vscale, sealevel, pickle_name=None):
        """
        Shoehorn the array into the range required by Minecraft.

        trim -- range between sea level and minimum elevation
                to be removed
        vscale -- vertical scale
        sealevel -- Minecraft level corresponding to zero elevation
        pickle -- boolean: save variables for pickling

        """

        if self.canCL and self.wantCL:
            # These values do not change from run to run.
            trim_arg = np.float32(trim)
            vscale_arg = np.float32(vscale)
            sealevel_arg = np.float32(sealevel)
            # Calculate how many retval elements can be evaluated per run.
            static_data = trim_arg.nbytes + vscale_arg.nbytes + sealevel_arg.nbytes
            # Each base element is one float32 (4 bytes).
            bpe_single = 4
            # Each retval element is one float32 (4 bytes).
            bpe_total = bpe_single + 4
            # Check both single and total limits on elems-per-slice.
            eps_single = [int(0.95*device.max_mem_alloc_size/bpe_single) for device in self.devices]
            eps_total = [int(0.95*device.global_mem_size-static_data/bpe_total) for device in self.devices]
            elem_limits = [min(eps_single[x], eps_total[x]) for x in xrange(len(self.devices))]
            # For now, at least, do not create retval or chunk buffer here.
            results = []
            # NB: Only supporting one device for now.
            best_device = np.argmax(elem_limits)
            global_size = self.global_size[self.devices[best_device]]
            local_size = self.local_size[self.devices[best_device]]
            for chunk in chunks(self.elflat, elem_limits[best_device]):
                # Create retvals and chunk buffer here instead of above.
                lenchunk = len(chunk)
                retvals_arr = np.empty(lenchunk, dtype=np.float32)
                retvals_buf = cla.to_device(self.queue, retvals_arr)
                chunk_buf = cla.to_device(self.queue, chunk)
                lenchunk_arg = np.uint32(lenchunk)
                event = self.program.elev(self.queue, global_size, local_size, retvals_buf.data, chunk_buf.data, lenchunk_arg, trim_arg, vscale_arg, sealevel_arg)
                event.wait()
                # Copy retvals_buf to results.
                retvals_arr = retvals_buf.get()
                if results == []:
                    results = retvals_arr.tolist()
                else:
                    results += retvals_arr.tolist()
        else:
            results = ((self.elflat - trim)/vscale)+sealevel
        if pickle_name is not None:
            # Pickle variables for testing purposes.
            picklefilename = 'elev-%s.pkl.gz' % pickle_name
            print 'Pickling to %s...' % picklefilename
            f = gzip.open(picklefilename, 'wb')
            pickle.dump(self.elarray, f, -1)
            pickle.dump(trim, f, -1)
            pickle.dump(vscale, f, -1)
            pickle.dump(sealevel, f, -1)
            # pickle.dump(results, f, -1)
        return np.asarray(results).reshape(self.elarray.shape)

    @staticmethod
    def test(fileobj, image=False):
        # Import from pickled variables for now.
        jar = gzip.GzipFile(fileobj=fileobj)
        elarray = pickle.load(jar)
        trim = pickle.load(jar)
        vscale = pickle.load(jar)
        sealevel = pickle.load(jar)
        jar.close()
        lenelarray = elarray.size

        print 'Generating results with OpenCL'
        atime1 = time()
        gpu_elev = Elev(elarray, wantCL=True)
        if not gpu_elev.canCL:
            raise AssertionError('Cannot run test without working OpenCL')
        gpu_results = gpu_elev(trim, vscale, sealevel)
        atime2 = time()
        adelta = atime2-atime1
        print '... finished in ', adelta, 'seconds!'

        print 'Generating results with numpy'
        btime1 = time()
        cpu_elev = Elev(elarray, wantCL=False)
        cpu_results = cpu_elev(trim, vscale, sealevel)
        btime2 = time()
        bdelta = btime2-btime1
        print '... finished in ', bdelta, 'seconds!'

        # Compare the results.
        allowed_error_percentage = 1
        maxnomatch = int(allowed_error_percentage*0.01*lenelarray)
        xlen, ylen = gpu_results.shape
        if image:
            print 'Generating image of differences'
            import re
            import Image
            imagefile = re.sub('pkl.gz', 'png', fileobj.name)
            # diffarr = (cpu_results + 128 - gpu_results).astype(np.int32)
            diffarr = np.array([[int(128 + cpu_results[x, y] - gpu_results[x, y]) for y in xrange(ylen)] for x in xrange(xlen)], dtype=np.int32)
            Image.fromarray(diffarr).save(imagefile)
        else:
            nomatch = sum([1 if abs(cpu_results[x, y] - gpu_results[x, y]) > 0.0001 else 0 for x, y in product(xrange(xlen), xrange(ylen))])
            nomatchmsg = '%d of %d (%d%%) failed to match' % (nomatch, lenelarray, 100*nomatch/lenelarray)
            if nomatch > maxnomatch:
                countprint = 0
                for x, y in product(xrange(xlen), xrange(ylen)):
                    if abs(cpu_results[x, y] - gpu_results[x, y]) > 0.0001:
                        countprint += 1
                        if countprint < 10:
                            print "no match at ", x, y
                            print " CPU: ", cpu_results[x, y]
                            print " GPU: ", gpu_results[x, y]
                        else:
                            break
                raise AssertionError(nomatchmsg)
            else:
                print nomatchmsg


def main():
    """Test routine to confirm module consistency."""

    import argparse
    import glob

    parser = argparse.ArgumentParser(description='Test elev functionality with OpenCL and numpy.')
    parser.add_argument('files', type=argparse.FileType('r'), nargs='*',
                        help='a data file to be processed')
    parser.add_argument('--image', action='store_true',
                        help='generate an image with the differences')

    args = parser.parse_args()
    if (args.files == []):
        args.files = [open(file) for file in glob.glob('./elev-*.pkl.gz')]
    for testfile in args.files:
        print 'Testing %s' % testfile.name
        Elev.test(testfile, image=args.image)
        testfile.close()


if __name__ == '__main__':
    main()