#!/usr/bin/env python
# __BEGIN_LICENSE__
#  Copyright (c) 2009-2013, United States Government as represented by the
#  Administrator of the National Aeronautics and Space Administration. All
#  rights reserved.
#
#  The NGT platform is licensed under the Apache License, Version 2.0 (the
#  "License"); you may not use this file except in compliance with the
#  License. You may obtain a copy of the License at
#  http://www.apache.org/licenses/LICENSE-2.0
#
#  Unless required by applicable law or agreed to in writing, software
#  distributed under the License is distributed on an "AS IS" BASIS,
#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#  See the License for the specific language governing permissions and
#  limitations under the License.
# __END_LICENSE__

# This program depends on a variety of Python modules that can be
# installed following the instructions in the documentation where the
# use of this tool is shown.

# The purpose of this program is to generate a good initial disparity
# estimate for stereo processing by matching interest points taken
# full resolution at sampled regions across the input images.  This
# helps out in cases where there is high resolution detail such as ice
# and snow patterns but no low resolution detail.

from __future__ import print_function
import sys, optparse, subprocess, re, os, math, time, datetime
def die(msg, code=-1):
    print(msg, file=sys.stderr)
    sys.exit(code)

# The path to the ASP python files
basepath    = os.path.abspath(sys.path[0])
pythonpath  = os.path.abspath(basepath + '/../Python')  # for dev ASP
libexecpath = os.path.abspath(basepath + '/../libexec') # for packaged ASP
sys.path.insert(0, basepath) # prepend to Python path
sys.path.insert(0, pythonpath)
sys.path.insert(0, libexecpath)

import asp_system_utils
asp_system_utils.verify_python_version_is_supported()

try:
    from osgeo import gdal, gdalconst
except ImportError:    
    import gdal, gdalconst
gdal.UseExceptions()
    
import numpy as np
import scipy.ndimage as sf
import scipy.stats as ss
import scipy.spatial as spatial
from scipy.ndimage import convolve, binary_dilation, convolve1d
from scipy.sparse import coo_matrix
from scipy.interpolate import griddata
from multiprocessing import Pool, cpu_count
from optparse import OptionParser

# The path to the ASP python files
basepath    = os.path.abspath(sys.path[0])
pythonpath  = os.path.abspath(basepath + '/../Python')  # for dev ASP
libexecpath = os.path.abspath(basepath + '/../libexec') # for packaged ASP
sys.path.insert(0, basepath) # prepend to Python path
sys.path.insert(0, pythonpath)
sys.path.insert(0, libexecpath)

from asp_system_utils import get_asp_version

#==============================================================================
# Start of supporting functions
#==============================================================================
def prevpow2(i):
    """ Find largest 2^n that is <= given number. """
    n = 1
    while 2*n <= i: n *= 2
    return n

class im_subset:
    def __init__(self, c0, r0, Nc, Nr, source, user_nodata, pad_val=0, Bands=(1,2,3)):
        self.source = source
        self.c0 = c0
        self.r0 = r0
        self.Nc = Nc
        self.Nr = Nr
        self.z  = []
        # if the level is zero, this is a copy of a file, if it's >0, it's a copy of a copy of a file
        self.level = 0
        self.Bands = Bands
        self.pad_val = pad_val
        # get the nodata value from the source
        if hasattr(self.source, 'level'):
            self.level=self.source.level+1
            if user_nodata is not None:
                self.noData = user_nodata
            else:
                self.noData = self.source.noData
        else:
            band=self.source.GetRasterBand(self.Bands[0])
            if user_nodata is not None:
                self.noData = user_nodata
            else:
                self.noData = band.GetNoDataValue()
            if self.noData is None:
                self.noData = 0.0

    def setBounds(self, c0, r0, Nc, Nr, update=0):
        self.c0 = np.int64(c0)
        self.r0 = np.int64(r0)
        self.Nc = np.int64(Nc)
        self.Nr = np.int64(Nr)
        if update > 0:
            self.copySubsetFrom(pad_val=self.pad_val)

    def copySubsetFrom(self, pad_val=0):
        if hasattr(self.source, 'level'):  # copy data from another subset
            self.z = np.zeros((self.source.z.shape[0], self.Nr, self.Nc), self.source.z.dtype) + pad_val
            (sr0, sr1, dr0, dr1, vr) = match_range(self.source.r0, self.source.Nr, self.r0, self.Nr)
            (sc0, sc1, dc0, dc1, vc) = match_range(self.source.c0, self.source.Nc, self.c0, self.Nc)
            if (vr & vc):
                self.z[:, dr0:dr1, dc0:dc1]=self.source.z[:,sr0:sr1, sc0:sc1]
            self.level=self.source.level+1
        else:  # read data from a file
            band   = self.source.GetRasterBand(self.Bands[0])
            src_NB = self.source.RasterCount
            dt     = gdal.GetDataTypeName(band.DataType)
            self.z = np.zeros((src_NB, self.Nr, self.Nc)) + pad_val
            (sr0, sr1, dr0, dr1, vr) = match_range(0, band.YSize, self.r0, self.Nr)
            (sc0, sc1, dc0, dc1, vc) = match_range(0, band.XSize, self.c0, self.Nc)
            if (vr & vc):
                self.z[:, dr0:dr1, dc0:dc1]=self.source.ReadAsArray(int(sc0),  int(sr0), int(sc1-sc0), int(sr1-sr0))
            self.level=0

    def trim_from_edges(self, x_trim, y_trim):
        self.z  = self.z[:, y_trim[0]:-y_trim[1], x_trim[0]:-x_trim[1]]
        self.r0 = self.r0+y_trim[0]
        self.c0 = self.c0+x_trim[0]
        self.Nr = self.Nr-np.sum(y_trim)
        self.Nc = self.Nc-np.sum(x_trim)

    def writeSubsetTo(self, bands, target):
        if hasattr(target, 'level') and target.level > 0:
            (sr0, sr1, dr0, dr1, vr) = match_range(target.source.r0, target.source.Nr, self.r0, self.Nr)
            (sc0, sc1, dc0, dc1, vc) = match_range(target.source.c0, target.source.Nc, self.c0, self.Nc)
            if (vr & vc):
                for b in bands:
                    target.source.z[b,sr0:sr1, sc0:sc1]=self.z[b, dr0:dr1, dc0:dc1]
        else:
            band=target.source.GetRasterBand(1)
            (sr0, sr1, dr0, dr1, vr) = match_range(0, band.YSize, self.r0, self.Nr)
            (sc0, sc1, dc0, dc1, vc) = match_range(0, band.XSize, self.c0, self.Nc)
            if (vr & vc):
                for bb in (bands):
                    band=target.source.GetRasterBand(int(bb))
                    band.WriteArray( self.z[bb-1, dr0:dr1, dc0:dc1], int(sc0), int(sr0))

def match_range(s0, ns, d0, nd):
    i0  = max(s0, d0)
    i1  = min(s0+ns, d0+nd)
    si0 = max(0, i0-s0)
    si1 = min(ns, i1-s0)
    di0 = max(0, i0-d0)
    di1 = min(nd,i1-d0)
    any_valid=(di1>di0) & (si1 > si0)
    return (si0, si1, di0, di1, any_valid)


# Try and use the faster Fourier transform functions from the pyFFTW
# module if available. Otherwise use the normal scipy fftpack ones
# instead (~2-3x slower!).
try:
    from pyfftw.interfaces.scipy_fftpack import fftn, ifftn

except ImportError:
    print("Module 'pyfftw' (FFTW Python bindings) could not be imported.\n"\
          "To install it, try running 'easy_install anfft' from the terminal.\n"\
          "Falling back on the slower 'fftpack' module for ND Fourier transforms.")
    from scipy.fftpack import fftn, ifftn

class TemplateMatch(object):
    """
    N-dimensional template search by normalized cross-correlation or sum of
    squared differences.

    Arguments:
    ------------------------
        template    The template to search for

    """
    def __init__(self,template):

        self.template = template

    def __call__(self,a):

        if a.ndim != self.template.ndim:
            raise Exception('Input array must have the same number of dimensions as the template (%i)'
                            %self.template.ndim)

        C = norm_xcorr(self.template, a, trim=True)
        return  C

def norm_xcorr(t, a, trim=True):
    """
    Fast normalized cross-correlation for n-dimensional arrays

    Inputs:
    ----------------
        t   The template. Must have at least 2 elements, which
            cannot all be equal.

        a   The search space. Its dimensionality must match that of
            the template.

        trim    If True (default), the output array is trimmed down to
            the size of the search space. Otherwise, its size will
            be (f.shape[dd] + t.shape[dd] -1) for dimension dd.

    Output:
    ----------------
        nxcorr  An array of cross-correlation coefficients, which may
            vary from -1.0 to 1.0.

    Wherever the search space has zero  variance under the template,
    normalized  cross-correlation is undefined. In such regions, the
    correlation coefficients are set to zero.

    References:
        Hermosillo et al 2002: Variational Methods for Multimodal Image
        Matching, International Journal of Computer Vision 50(3),
        329-343, 2002
        <http://www.springerlink.com/content/u4007p8871w10645/>

        Lewis 1995: Fast Template Matching, Vision Interface,
        p.120-123, 1995
        <http://www.idiom.com/~zilla/Papers/nvisionInterface/nip.html>

        <http://en.wikipedia.org/wiki/Cross-correlation#Normalized_cross-correlation>

    Alistair Muldal
    Department of Pharmacology
    University of Oxford
    <alistair.muldal@pharm.ox.ac.uk>

    Sept 2012

    """

    if t.size < 2:
        raise Exception('Invalid template')
    if t.size > a.size:
        raise Exception('The input array must be smaller than the template')

    std_t,mean_t = np.std(t),np.mean(t)

    if std_t == 0:
        #raise Exception('The values of the template must not all be equal (they are equal to %f)' % mean_t)
        #warnings.warn('The values of the template are all the same, correlation failed')
        return None

    t = np.float64(t)
    a = np.float64(a)

    # output dimensions of xcorr need to match those of local_sum
    outdims = np.array([a.shape[dd]+t.shape[dd]-1 for dd in range(a.ndim)])
    # hack by BS 1/15/2013-- the direct method appears not to work

    af = fftn(a,shape=outdims)
    tf = fftn(ndflip(t),shape=outdims)
    # 'non-normalized' cross-correlation
    xcorr = np.real(ifftn(tf*af))

    # local linear and quadratic sums of input array in the region of the
    # template
    ls_a  = local_sum(a,t.shape, 0)
    ls2_a = local_sum(a**2,t.shape, np.mean(a**2))

    # now we need to make sure xcorr is the same size as ls_a
    xcorr = procrustes(xcorr,ls_a.shape,side='both')

    # local standard deviation of the input array
    ls_diff = ls2_a - (ls_a**2)/t.size
    ls_diff = np.where(ls_diff < 0,0,ls_diff)
    sigma_a = np.sqrt(ls_diff)

    # standard deviation of the template
    sigma_t = np.sqrt(t.size-1.)*std_t

    # denominator: product of standard deviations
    denom = sigma_t*sigma_a

    # numerator: local mean corrected cross-correlation
    numer = (xcorr - ls_a*mean_t)

    # sigma_t cannot be zero, so wherever the denominator is zero, this must
    # be because sigma_a is zero (and therefore the normalized cross-
    # correlation is undefined), so set nxcorr to zero in these regions
    tol    = np.sqrt(np.finfo(denom.dtype).eps)
    nxcorr = np.zeros(numer.shape)
    good   = denom>tol
    nxcorr[good]=numer[good]/denom[good]
    #nxcorr = np.where(denom < tol,0,numer/denom)

    # if any of the coefficients are outside the range [-1 1], they will be
    # unstable to small variance in a or t, so set them to zero to reflect
    # the undefined 0/0 condition
    bad = nxcorr-1. >  np.sqrt(np.finfo(nxcorr.dtype).eps);
    nxcorr[bad] = 0.

    if trim:
        nxcorr  = procrustes(nxcorr,a.shape,side='both')
        sigma_a = procrustes(sigma_a,a.shape,side='both')
    return nxcorr

def local_sum(a, tshape, padval):
    """For each element in an n-dimensional input array, calculate
    the sum of the elements within a surrounding region the size of
    the template"""

    # zero-padding
    a = ndpad(a, tshape, padval)

    # difference between shifted copies of an array along a given dimension
    def shiftdiff(a,tshape,shiftdim):
        ind1 = [slice(None,None),]*a.ndim
        ind2 = [slice(None,None),]*a.ndim
        ind1[shiftdim] = slice(tshape[shiftdim], a.shape[shiftdim]-1)
        ind2[shiftdim] = slice(0,a.shape[shiftdim]-tshape[shiftdim]-1)
        return a[tuple(ind1)] - a[tuple(ind2)]

    # take the cumsum along each dimension and subtracting a shifted version
    # from itself. this reduces the number of computations to 2*N additions
    # and 2*N subtractions for an N-dimensional array, independent of its
    # size.
    #
    # See:
    # <http://www.idiom.com/~zilla/Papers/nvisionInterface/nip.html>
    for dd in range(a.ndim):
        a = np.cumsum(a,dd)
        a = shiftdiff(a,tshape,dd)
    return a


def ndpad(a,npad=None,padval=0):
    """
    Pads the edges of an n-dimensional input array with a constant value
    across all of its dimensions.

    Inputs:
    ----------------
        a   The array to pad

        npad*   The pad width. Can either be array-like, with one
            element per dimension, or a scalar, in which case the
            same pad width is applied to all dimensions.

        padval  The value to pad with. Must be a scalar (default is 0).

    Output:
    ----------------
        b   The padded array

    *If npad is not a whole number, padding will be applied so that the
    'left' edge of the output is padded less than the 'right', e.g.:

        a       == np.array([1,2,3,4,5,6])
        ndpad(a,1.5)    == np.array([0,1,2,3,4,5,6,0,0])

    In this case, the average pad width is equal to npad (but if npad was
    not a multiple of 0.5 this would not still hold). This is so that ndpad
    can be used to pad an array out to odd final dimensions.
    """

    if npad == None:
        npad = np.ones(a.ndim)
    elif np.isscalar(npad):
        npad = (npad,)*a.ndim
    elif len(npad) != a.ndim:
        raise Exception('Length of npad (%i) does not match the '\
                'dimensionality of the input array (%i)'
                %(len(npad),a.ndim))

    # initialise padded output
    padsize = [a.shape[dd]+2*npad[dd] for dd in range(a.ndim)]
    b = np.ones(padsize,a.dtype)*padval

    # construct an N-dimensional list of slice objects
    ind = [slice(int(np.floor(npad[dd])), int(a.shape[dd]+np.floor(npad[dd]))) for dd in range(a.ndim)]

    # fill in the non-pad part of the array
    b[tuple(ind)] = a
    return b

def procrustes(a,target,side='both',padval=0):
    """
    Forces an array to a target size by either padding it with a constant or
    truncating it

    Arguments:
        a   Input array of any type or shape
        target  Dimensions to pad/trim to, must be a list or tuple
    """

    try:
        if len(target) != a.ndim:
            raise TypeError('Target shape must have the same number of dimensions as the input')
    except TypeError:
        raise TypeError('Target must be array-like')

    try:
        b = np.ones(target,a.dtype)*padval
    except TypeError:
        raise TypeError('Pad value must be numeric')
    except ValueError:
        raise ValueError('Pad value must be scalar')

    aind = [slice(None,None)]*a.ndim
    bind = [slice(None,None)]*a.ndim

    # pad/trim comes after the array in each dimension
    if side == 'after':
        for dd in range(a.ndim):
            if a.shape[dd] > target[dd]:
                aind[dd] = slice(None, int(target[dd]))
            elif a.shape[dd] < target[dd]:
                bind[dd] = slice(None, int(a.shape[dd]))

    # pad/trim comes before the array in each dimension
    elif side == 'before':
        for dd in range(a.ndim):
            if a.shape[dd] > target[dd]:
                aind[dd] = slice(int(a.shape[dd]-target[dd]), None)
            elif a.shape[dd] < target[dd]:
                bind[dd] = slice(int(target[dd]-a.shape[dd]), None)

    # pad/trim both sides of the array in each dimension
    elif side == 'both':
        for dd in range(a.ndim):
            if a.shape[dd] > target[dd]:
                diff = (a.shape[dd]-target[dd])/2.
                aind[dd] = slice(int(np.floor(diff)), int(a.shape[dd]-np.ceil(diff)))
            elif a.shape[dd] < target[dd]:
                diff = (target[dd]-a.shape[dd])/2.
                bind[dd] = slice(int(np.floor(diff)), int(target[dd]-np.ceil(diff)))

    else:
        raise Exception('Invalid choice of pad type: %s' %side)

    b[tuple(bind)] = a[tuple(aind)]

    return b

def ndflip(a):
    """Inverts an n-dimensional array along each of its axes"""
    ind = (slice(None,None,-1),)*a.ndim
    return a[ind]

def log_filter(img, noData):
    """
    performs a separable Laplacian of Gaussian filter on the input image.
    Returns a copy of the original image, converted to float64
    """
    img1 = np.float64(img.copy())
    img1 = sf.laplace( img1, mode='constant')
    if np.sum(img<=noData) > 0 : #zero the filtered image at  data-nodata boundaries
        mask = np.float64(img.copy())
        mask[:] = 1.
        mask[img<=noData] = 0.0
        mask=sf.laplace(mask, mode='constant')
        img1[mask != 0.] = 0.   # set the borders to zero
    img1 = sf.gaussian_filter(img1, (1.4, 1.4), mode='constant')
    return img1

#==============================================================================
# Start of sparse_disp functions
#==============================================================================

def run_one_block(template_size, KW, min_template_sigma, Xc, Yc,
                  search_range_x, search_range_y, dx0, dy0, T_img, S_img, noData):

    # Do template matching for a single block in the left image.
    # LOG filter the images
    T_filt = log_filter(T_img, noData)
    S_filt = log_filter(S_img, noData)
    std_T  = np.std(T_filt)
    if min_template_sigma is not None:
        if std_T <= min_template_sigma:
            return(-2, Xc, Yc, -1, 0, 0)

    # Run the feature match.
    TT = T_filt[KW:template_size+KW,  KW:template_size+KW ]
    SS = S_filt[KW:search_range_y+KW, KW:search_range_x+KW]
    # If the search image is large, do an initial search at 2x lower resolution
    if (SS.shape[0] > 32+TT.shape[0]) or (SS.shape[1] >32+TT.shape[1]):
        low_res_template = TT[1:-1:2, 1:-1:2]
        low_res_image    = SS[1:-1:2, 1:-1:2]
        template_matcher = TemplateMatch(low_res_template) # Perform template search
        result = template_matcher(low_res_image)
        if result is None:
            #warnings.warn('run_one_block: TemplateMatch returned None at xc=%d, yc=%d' % (Xc, Yc))
            return(-3., Xc, Yc, 0., np.NaN, np.NaN)

        result = result[int(template_size/4):int(search_range_y/2-template_size/4), int(template_size/4):int(search_range_x/2-template_size/4)]
        ijC    = 2*(np.array(np.unravel_index(np.argmax(result), result.shape))-[result.shape[0]/2., result.shape[1]/2.]).astype(int)

        t_xr = np.array(ijC[1]+[-template_size/2-16, template_size/2+16]+SS.shape[1]/2, dtype=np.int16)
        t_yr = np.array(ijC[0]+[-template_size/2-16, template_size/2+16]+SS.shape[0]/2, dtype=np.int16)
        if t_xr[0] >= 0 and t_yr[0] >= 0 and t_xr[1] <= SS.shape[1] and t_yr[1] <= SS.shape[0] :
            dx0 = dx0+ijC[1]
            dy0 = dy0+ijC[0]
            SS  = SS[t_yr[0]:t_yr[1], t_xr[0]:t_xr[1]]

    template_matcher = TemplateMatch(TT)
    result = template_matcher(SS)

    # trim off edges of result
    result=result[int(template_size/2):int(SS.shape[0]-template_size/2),
                  int(template_size/2):int(SS.shape[1]-template_size/2)]

    ij  = np.unravel_index(np.argmax(result), result.shape)
    ijC = ij-np.array([result.shape[0]/2, result.shape[1]/2])

    return (result[ij], Xc, Yc, std_T, ijC[1]+dx0, ijC[0]+dy0)

def run_blocks(param):

    # Run template matching for a set of blocks. This function is being
    # distributed across multiple processors.

    (Tfile, Sfile, processes, xgi, ygi, these_ind, template_size, search_range_xy_i,
     dxy0_i, XYc_i, min_template_sigma, user_nodata) = param
    matcher = fft_matcher(Tfile, Sfile, processes, user_nodata)

    KW=matcher.KW

    s_x_bounds=np.c_[XYc_i[:,0]+dxy0_i[:,0]-search_range_xy_i[:,0]-KW-1000,
                     XYc_i[:,0]+dxy0_i[:,0]+search_range_xy_i[:,0]+KW+1000]
    s_y_bounds=np.c_[XYc_i[:,1]+dxy0_i[:,1]-search_range_xy_i[:,1]-KW-1000,
                     XYc_i[:,1]+dxy0_i[:,1]+search_range_xy_i[:,1]+KW+1000]
    matcher.T_sub.setBounds(xgi-template_size, ygi-template_size, matcher.blocksize+2*template_size,
                            matcher.blocksize+2*template_size, update=1)
    matcher.S_sub.setBounds(s_x_bounds[:,0].min(),
                            s_y_bounds[:,0].min(),
                            s_x_bounds[:,1].max()-s_x_bounds[:,0].min(),
                            s_y_bounds[:,1].max()-s_y_bounds[:,0].min(),
                            update=1)
    indices = []; c = []; x = []; y = []; sigma = []; dx = []; dy = [];

    # loop over the sub-blocks

    count=-1
    T_buffer=im_subset(0, 0, 0, 0, matcher.T_sub, user_nodata, pad_val=matcher.T_sub.noData)
    S_buffer=im_subset(0, 0, 0, 0, matcher.S_sub, user_nodata, pad_val=matcher.S_sub.noData)

    for Xc, Yc, search_range_x, search_range_y, dx0, dy0 in \
          zip(XYc_i[:,0], XYc_i[:,1], search_range_xy_i[:,0], search_range_xy_i[:,1], dxy0_i[:,0],dxy0_i[:,1]):
        #print("S_buffer.Nodata=%f, T_buffer.noData"% (matcher.S_sub.noData, matcher.T_sub.noData)
        count=count+1
        t_xr=[Xc-(template_size/2-1)-KW,      Xc+(template_size/2) +KW]
        t_yr=[Yc-(template_size/2-1)-KW,      Xc+(template_size/2) +KW]
        s_xr=[Xc+dx0-(search_range_x/2-1)-KW, Xc+(search_range_x/2)+KW]
        s_yr=[Yc+dy0-(search_range_y/2-1)-KW, Yc+(search_range_y/2)+KW]

        # Read in the data for this block. Use the im_subset objects:
        # read nodata if we read past the image edges.

        # Read T
        T_buffer.setBounds(t_xr[0]-KW, t_yr[0]-KW, template_size+2.*KW,
                           template_size+2.*KW, update=1)
        T_img=T_buffer.z[0,:,:]
        if np.mean(T_img<=T_buffer.noData)>.1: # bail if > 10% 0, flag with C=-2
            continue

        # Read S
        S_buffer.setBounds(s_xr[0]-KW, s_yr[0]-KW, search_range_x+2.*KW,
                           search_range_y+2.*KW, update=1)
        S_img=S_buffer.z[0,:,:]
        if np.mean(S_img<=S_buffer.noData) > .25: # bail if > 25% 0
            continue
        out = run_one_block(template_size, KW, min_template_sigma, Xc, Yc, int(search_range_x),
                            int(search_range_y), dx0, dy0, T_img, S_img, T_buffer.noData)

        indices.append(these_ind[count])
        c.append(out[0])
        x.append(out[1])
        y.append(out[2])
        sigma.append(out[3])
        dx.append(out[4])
        dy.append(out[5])

    return(indices[:], c[:], x[:], y[:], sigma[:], dx[:], dy[:])

class fft_matcher(object):
    """
    class to perform fft matches on a pair of image files.  Uses the GDAL
    API for reads and writes, and the norm_xcorr package to do the matching
    Arguments:
        For initialization:
            Tfile  The template file -- small images are extracted from this file
                and correlated against sub-images of Sfile
            Sfile  The search file.

        For correlation:
            template_size : Size of the square template
            search_range_xy_i  : [2, N], array-like  search windows in x and y
                        to use at each center (n_columns, n_rows)
            dxy0_i   : [2, N], array-like  initial offset estimates
                        (delta_col,  delta_row)
            XYc_i    : [2,N] pixel centers to search in the template image
                        (col, row)

        Outputs from correlation:
            xyC     :  Pixel centers in the template image
            dxy     :  pixel offsets needed to shift the template to match the search image
            C       :  Correlation value for the best match (0<C<1).
                        -1 indicates invalid search or template data
    """
    def __init__(self, Tfile, Sfile, processes, user_nodata):
        self.Tfile  = Tfile
        self.Sfile  = Sfile
        self.processes = processes
        self.T_ds   = gdal.Open(Tfile, gdalconst.GA_ReadOnly)
        self.T_band = self.T_ds.GetRasterBand(1)
        self.S_ds   = gdal.Open(Sfile, gdalconst.GA_ReadOnly)
        self.S_band = self.S_ds.GetRasterBand(1)
        self.Nx     = self.T_band.XSize
        self.Ny     = self.T_band.YSize
        self.KW     = 13  # pad the edges by this amount to avoid edge effects
        self.S_sub  = im_subset(0, 0, self.S_band.XSize, self.S_band.YSize,
                                self.S_ds, user_nodata, pad_val=0, Bands=[1])
        self.T_sub  = im_subset(0, 0, self.T_band.XSize, self.S_band.YSize,
                                self.T_ds, user_nodata, pad_val=0, Bands=[1])

        search_geotransform      = self.S_sub.source.GetGeoTransform()
        self.search_geotransform = search_geotransform
        self.UL_S = np.array([search_geotransform[0], search_geotransform[3]])
        LL_S      = np.array([search_geotransform[0], 
                              search_geotransform[3]+self.S_sub.Nr*search_geotransform[5]])
        UR_S      = np.array([search_geotransform[0]+self.S_sub.Nc*search_geotransform[1], 
                              search_geotransform[3]])

        template_geotransform      = self.T_sub.source.GetGeoTransform()
        self.template_geotransform = template_geotransform
        self.UL_T = np.array([template_geotransform[0], template_geotransform[3]])
        LL_T      = np.array([template_geotransform[0], 
                              template_geotransform[3]+self.T_sub.Nr*template_geotransform[5]])
        UR_T      = np.array([template_geotransform[0]+self.T_sub.Nc*template_geotransform[1],  
                              template_geotransform[3]])

        XR = [np.max([LL_S[0], LL_T[0]]), np.min([UR_S[0], UR_T[0]])]
        YR = [np.max([LL_S[1], LL_T[1]]), np.min([UR_S[1], UR_T[1]])]
        self.XR = XR
        self.YR = YR
        self.T_c0c1 = np.array([max([0, (LL_T[0]-XR[0])/template_geotransform[1]]),
                                min(self.T_sub.Nc, (XR[1]-LL_T[0])/template_geotransform[1])]).astype(int)
        self.T_r0r1 = np.array([max([0 , (YR[1]-UR_T[1])/template_geotransform[5]]),
                                min([self.T_sub.Nr, (LL_T[1]-YR[1])/template_geotransform[5]])]).astype(int)

        self.blocksize   = 2048
        self.user_nodata = user_nodata

    def __call__(self, template_size, search_range_xy_i, dxy0_i, XYc_i, min_template_sigma):

        # loop over pixel centers
        self.C   = np.zeros([XYc_i.shape[0], 1])-1
        self.sigma_template = (self.C).copy()
        self.dxy = np.zeros([XYc_i.shape[0], 2])
        self.xyC = np.zeros([XYc_i.shape[0], 2])

        xg0, yg0 = np.meshgrid(np.arange(0, self.T_band.XSize, self.blocksize),
                               np.arange(0, self.T_band.YSize, self.blocksize))

        TaskParams = []
        for xgi, ygi in zip(xg0.ravel(), yg0.ravel()):
            these = np.logical_and(np.logical_and(XYc_i[:,0] > xgi,
                                                  XYc_i[:,0] <= xgi+self.blocksize),
                                   np.logical_and(XYc_i[:,1] > ygi,
                                                  XYc_i[:,1] <= ygi+self.blocksize))
            if ~np.any(these):
                continue
            XYc  = XYc_i[these,:]
            dxy0 = dxy0_i[these,:]
            search_range_xy = search_range_xy_i[these,:]
            these_ind = np.array(np.nonzero(these)).ravel()
            param = (self.Tfile, self.Sfile, self.processes, xgi.copy(), ygi.copy(), these_ind.copy(),
                     template_size, search_range_xy.copy(), dxy0.copy(), XYc.copy(), min_template_sigma,
                     self.user_nodata)
            TaskParams.append(param)

        if self.processes > 1: # Run using multiple processes
            pool = Pool(processes=self.processes)
            Out = pool.map(run_blocks, TaskParams, chunksize=1)
        else: # Run using single process (for debugging)
            Out = [run_blocks(TP) for TP in TaskParams]

        for out in Out:
            (indices, c, x, y, sigma, dx, dy) = out

            for i in range(len(indices)):
                ind = indices[i]
                self.C[ind] = c[i]
                self.xyC[ind] = [x[i], y[i]]
                self.sigma_template[ind] = sigma[i]
                self.dxy[ind,:] = [dx[i], dy[i]]

        return (self.xyC).copy(), (self.dxy).copy(), (self.C).copy(), (self.sigma_template).copy()

def make_pt_2_neighbors(tri):
    """ make a dictionary of the neighbors of each point in triangulation tri """
    pt_dict=dict()
    for vlist in tri.simplices:
        for i in vlist:
            if not i in pt_dict:
                pt_dict[i]=list()
            for k in vlist:
                if k != i:
                    pt_dict[i].insert(0,k)
    for i in range(tri.points.shape[0]):
        pt_dict[i]=np.unique(pt_dict[i]).tolist()
    return pt_dict


def unique_rows(data):
    """ return the indices for the unique rows of matrix (data) """

    udict = dict()
    for row in range(len(data)):
        row_data = tuple(data[row,:])
        if not row_data in udict:
            udict[row_data] = row
    uInd=udict.values()

    new_ind = []
    for key in udict:
        new_ind.append(udict[key])

    new_rows = data[new_ind, :]
    
    return new_ind, new_rows

def search_new_pts(xy, dxy, t_size, matcher, min_template_sigma=0., mask=None):
    """
    use an image matcher object to perform a template match between
    two images at the points defined by xy, using search windows that
    span the range in dxy
    """

    if mask is not None:
        x_geo    = matcher.template_geotransform[0] + matcher.template_geotransform[1]*xy[:,0]
        y_geo    = matcher.template_geotransform[3] + matcher.template_geotransform[5]*xy[:,1]
        good_pts = img_interpolate_linear(x_geo, y_geo, mask['Z'], mask['GT'])
        good_pts = (~np.isnan(good_pts)) & (good_pts > 0)
        if ~np.any(good_pts):
            return np.zeros(0), np.zeros(0), np.zeros(0), np.zeros(0)
        xy  = xy [good_pts,:]
        dxy = dxy[good_pts,:]

    dxy0 = np.c_[dxy[:,0]+dxy[:,1], dxy[:,2]+dxy[:,3]]/2.
    dxyR = np.c_[dxy[:,1]-dxy[:,0], dxy[:,3]-dxy[:,2]]
    sw   = np.array(2.**np.ceil(np.log2(np.maximum(dxyR+t_size, t_size+16))))
    xy_new, dxy_new, C_new, Pt_new = matcher(t_size, sw, dxy0, xy,
                                             min_template_sigma=min_template_sigma)
    xy_bad_mask = xy_new [C_new.ravel()==-2,:]
    good        = C_new>0
    xy_new      = xy_new [good.ravel(),:]
    dxy_new     = dxy_new[good.ravel(),:]
    C_new       = C_new  [good.ravel()  ]
    Pt_new      = Pt_new [good.ravel()  ]
    return xy_new, dxy_new, C_new, xy_bad_mask

def neighborhood_range(pt_list, dx_mat, dy_mat, tri, pt_dict, max_dist=None, calc_min_slope=None):
    """
    for each point in pt_list, return the maximum and minimum offset to its neighbors.
    inputs:
        pt_list:  list of points to be searched
        dx_mat: sparse matrix with x disparity values for each pixel in the
                template image
        dy_may: ditto, but for y
        tri:    a triangulation.  th points field in this triangulation is
                used to get the x and y offsets for the point indices
        pt_dict:  a dictionary giving the neighbor point numbers for each
                  point in tri
    output:
        dx_range: an Nx4 martix.  columns 0 and 1 give the min and max x
        offsets around each point, columns 2 and 3 give the min and max y.
    """
    dxy_range = np.zeros([len(pt_list), 4])
    if calc_min_slope is not None:
        dxy_slope = np.zeros([len(pt_list), 2])
    for row, pt in enumerate(pt_list):
        neighbors = pt_dict[pt]
        nbhd_pts  = tri.points[neighbors,:]
        this_pt   = tri.points[pt,:]
        if max_dist is not None:
            dist2    = (nbhd_pts[:,1]-this_pt[1])**2 + (nbhd_pts[:,0]-this_pt[0])**2
            nbhd_pts = nbhd_pts[dist2 < max_dist**2,:]
        if nbhd_pts is None or min(nbhd_pts.shape) < 1:
            continue
        dx_vals = np.append(np.array(dx_mat[nbhd_pts[:,1], nbhd_pts[:,0]]), dx_mat[int(this_pt[1]), int(this_pt[0])])
        dy_vals = np.append(np.array(dy_mat[nbhd_pts[:,1], nbhd_pts[:,0]]), dy_mat[int(this_pt[1]), int(this_pt[0])])
        if calc_min_slope is not None:
            dist     = np.sqrt((nbhd_pts[:,1]-this_pt[1])**2 + (nbhd_pts[:,0]-this_pt[0])**2)
            dx_slope = np.abs(dx_vals[0:-1]-dx_vals[-1])/dist
            dy_slope = np.abs(dy_vals[0:-1]-dy_vals[-1])/dist
            dxy_slope[row,:] = [np.min(dx_slope), np.min(dy_slope)];

        dxy_range[row,[0,1]] = [np.min(dx_vals), np.max(dx_vals)]
        dxy_range[row,[2,3]] = [np.min(dy_vals), np.max(dy_vals)]
    if calc_min_slope is not None:
        return dxy_range, dxy_slope
    else:
        return dxy_range ##, dxy_bar

def test_epipolar(dxy_0, ep_vec, dxy, tol):
    """
    given an origin vector and an epipolar unit vector, projects a set of
    points against the epipolar vector and gives their perpendicular offset
    WRT that vector, and a boolean array that shows whether this offset is
    smaller that tol.
    """
    delta=np.abs(np.dot((dxy-dxy_0), [ep_vec[1], -ep_vec[0]]))
    disp_mag=np.sqrt((dxy[:,0]-dxy_0[0])**2 +(dxy[:,1]-dxy_0[1])**2)
    good=(delta < tol) | (delta < 0.02 * disp_mag )
    return good, delta

def est_epipolar_vec(dxy, C, C_tol, ep_vec_initial, F_use):
    """
    given a set of offsets and correlation values:
        For those point with  C>C_tol, find the median offset and the
        eigenvectors of the distribution of the points around that median
        offset.  The eigenvector corresponding to the largest eigenvalue is
        assumed to be the epipolar vector.

    """
    if ep_vec_initial is not None:
        return ep_vec_initial, np.zeros([1,2])

    P_use   =100*np.array([(1-F_use)/2., (F_use+1.)/2.])
    ctr_vals=((dxy[:,0] >= np.percentile(dxy[:,0], P_use[0])) & 
              (dxy[:,0] <= np.percentile(dxy[:,0], P_use[1])) & 
              (dxy[:,1] >= np.percentile(dxy[:,1], P_use[0])) & 
              (dxy[:,1] <= np.percentile(dxy[:,1], P_use[1]))  )
    good        = ctr_vals & np.array(C.ravel()>C_tol, dtype=bool)
    dxy1        = np.array(dxy[good.ravel(),:])
    dxy_ctr     = np.array([np.median(dxy1[:,0]), np.median(dxy1[:,1])])
    dxy_0       = dxy1.copy()
    dxy_0[:,0]  = dxy_0[:,0]-dxy_ctr[0]
    dxy_0[:,1]  = dxy_0[:,1]-dxy_ctr[1]
    vals, vecs  = np.linalg.eig(np.dot(dxy_0.transpose(), dxy_0))
    biggest_vec = vecs[:,np.argmax(abs(vals)) ]
    return biggest_vec, dxy_ctr

def img_interpolate_linear( x, y, I, GT, nodata_val=np.NaN):
    result=np.zeros_like(x)+nodata_val
    # get the col and row in the image
    col  = (x-GT[0])/GT[1]
    row  = (y-GT[3])/GT[5]
    good = (row > 0) & (row < I.shape[0]-1) & (col > 0) & (col<I.shape[1]-1)
    if ~np.any(good):
        return result
    # perform interpolation only for in-bounds xy
    row = row[good]
    col = col[good]
    rx  = row-np.floor(row)
    ry  = col-np.floor(col)
    row = np.floor(row).astype('int')
    col = np.floor(col).astype('int')
    # add the contributions of each neighbor
    ztemp = np.zeros_like(row)
    ztemp =         (1-ry)*(1-rx)*I[(row, col)]
    ztemp = ztemp + (ry)*(1-rx)*I[(row+1,col)]
    ztemp = ztemp + (1-ry)*(rx)*I[(row,col+1)]
    ztemp = ztemp + (ry)*(rx)*I[(row+1,col+1)]
    result[good] = ztemp
    return result

def grid_disp(xg, yg, xy, dx, dy, ex, ey, L_valid, N_coarse, dispDs, spreadDs, downscale, cr_out):
    grid_spacing = xg[0,1]-xg[0,0]
    # valid kernel dilates the distance mask, which tells whether to correlate
    sigma_valid  = L_valid/3;
    N_valid      = np.ceil(sigma_valid*3/grid_spacing)
    kernel_valid = np.exp(-0.5*(grid_spacing*np.arange(-N_valid, N_valid+1)/(sigma_valid))**2)
    # smoothing kernel is used to make a large-scale smoothed version of the disparities
    # set its smoothing width to 512 pixels, make sure the whole kernel is at least 1024 pixels wider than the valid kernel
    N_sm      = 2048/downscale+np.ceil(L_valid/grid_spacing);
    sigma_sm  = N_sm/4.
    kernel_sm = np.exp(-0.5*(np.arange(-N_sm, N_sm+1)/sigma_sm)**2)

    row  = np.floor((xy[:,1]-yg[0,0])/(yg[1,0]-yg[0,0])).astype('int')
    col  = np.floor((xy[:,0]-xg[0,0])/(xg[0,1]-xg[0,0])).astype('int')
    
    good = (row>=0) & (row < xg.shape[0]-1) & (col >=0) & (col < xg.shape[1]-1)
    dist_mask = np.zeros_like(xg)
    dist_mask[row[good], col[good]] = 1.
    dist_mask = convolve1d(convolve1d(dist_mask, kernel_valid, axis=0, mode='constant'), 
                           kernel_valid, axis=1, mode='constant')
    dist_mask = dist_mask > np.exp(-0.5*(L_valid/sigma_valid)**2)

    # take a subset of points from dist_mask to limit the interpolation distance for each quantity
    # Otherwise, the triangulation makes long skinny triangles along concave boundaries of the data points
    mask1 = dist_mask[0::N_coarse, 0::N_coarse].ravel()
    xg1   = xg[0::N_coarse, 0::N_coarse].ravel()
    yg1   = yg[0::N_coarse, 0::N_coarse].ravel()
    xg1   = xg1[mask1==0]
    yg1   = yg1[mask1==0]
    for count, z in enumerate((dx, dy)):
        zi = griddata(np.append(xy, np.c_[xg1, yg1], 0), np.append(z, np.NaN+np.zeros([len(xg1),1]),0), (xg, yg), method='linear')
        zi_smooth = convolve1d(convolve1d(np.nan_to_num(zi), kernel_sm, axis=0, mode='constant'), kernel_sm, axis=1, mode='constant')
        if count<1:
            mask_smooth=convolve1d(convolve1d((~np.isnan(zi)).astype('float32'), kernel_sm, axis=0, mode='constant'), kernel_sm, axis=1, mode='constant')
        zi_smooth[mask_smooth>1e-5 ] = zi_smooth[mask_smooth>1e-5]/mask_smooth[mask_smooth>1e-5]
        zi_smooth[mask_smooth<=1e-5] = 0
        zi[np.isnan(zi)] = zi_smooth[np.isnan(zi)]
        zi = np.nan_to_num(zi)
        # ... and write it out to the disparity file
        dispDs.GetRasterBand(count+1).WriteArray(zi[:,:,0], int(cr_out[0]), int(cr_out[1]))
        if count==0:
            dx_g = zi
        if count==1:
            dy_g = zi
    dispDs.GetRasterBand(3).WriteArray(dist_mask.astype('float32'), int(cr_out[0]), int(cr_out[1]))
    zi = np.nan_to_num(griddata(np.append(xy, np.c_[xg1, yg1], 0), np.append(ex, np.NaN+np.zeros([len(xg1),1]),0), (xg, yg), method='linear'))
    zi[dist_mask==0] = 0
    spreadDs.GetRasterBand(1).WriteArray(zi[:,:,0], int(cr_out[0]), int(cr_out[1]))
    zi = np.nan_to_num(griddata(np.append(xy, np.c_[xg1, yg1], 0), np.append(ey, np.NaN+np.zeros([len(xg1),1]),0), (xg, yg), method='linear'))
    zi[dist_mask==0] = 0
    spreadDs.GetRasterBand(2).WriteArray(zi[:,:,0],                   int(cr_out[0]), int(cr_out[1]))
    spreadDs.GetRasterBand(3).WriteArray(dist_mask.astype('float32'), int(cr_out[0]), int(cr_out[1]))
    return

#==============================================================================
# Main program
#==============================================================================

def main():
    """ For a pair of images, perform a sparse image match using the full
    resolution of the images.  Begin at resolution coarse_skip.  For each
    point in a grid spanning the image 1, extract a template image of
    size template_size, and use fft-based correlation to find its match in image 2,
    over a wide search window (size xsearch by ysearch).  Based on matches from
    this coarse grid, estimate an epipolar vector (the largest eigenvector of
    the disparity vectors) and a tolerance for disparities to be parallel to
    this vector (use max (16 pix, 90th percentile of differeces)).

    Next, refine points for which the range of disparities among the
    neighbors are larger than tolerance refine_tol.  Refinement consists
    of adding the eight grid neighbors of a point, at half the previous
    point spacing.  During refinment, the search windows are selected to span
    the neighbors' x and y disparities, and are padded by 16 pixels and padded
    again to give a power-of-two search size.

    After each refinement, the new matches are checked againt the epipolar
    vector, and all the matched points are checked for convergence.  Refinment
    continues until the point spacing reaches fine_skip, or until no points
    need to be refined.

    Writes the output disparity image files:
      [output_prefix]/-D_sub.tif
      [output_prefix]/-D_sub_spread.tif

    ???
    Results are written to oufile:
        pixel_x
        pixel_y
        x_disparity
        y_diparity
        correlation value
        (max-min) neighbor x disparity
        (max-min) neighbor y disparity
     """

    usage = '''%prog [options] left_image right_image output_prefix

  ''' + get_asp_version()

    parser = OptionParser(usage=usage)
    parser.set_defaults(epipolar_fltr=True)
    parser.add_option("-x", "--xsearch",   dest="search_range_x", default=1024-56, type="int",
                      help="initial x search range, pixels (%default)")
    parser.add_option("-y", "--ysearch",   dest="search_range_y", default=1024-56, type="int",
                      help="initial y search range, pixels (%default)")
    parser.add_option("-t", "--template",  dest="template_size",  default=56,      type="int",
                      help="template size, pixels (%default)")
    parser.add_option("-c", "--coarse",    dest="coarse_skip",    default=None,    type="int",
                      help="initial search-point spacing, pixels (%default)")
    parser.add_option("-f", "--fine",      dest="fine_skip",      default=64,      type="int",
                      help="final search-point spacing, pixels (%default)")
    parser.add_option("-r","--refine_tol", dest="refine_tol",     default=8,       type="int",
                      help="pixel range tolerance for refinement (%default)")
    parser.add_option("-d","--output_dec_scale", dest="output_scale",  default=8,  type="int",
                      help="if set, output a second copy of the offsets scaled by this amount (%default), to file called OUTFILE_(OUTPUT_SCALE)")
    parser.add_option("-p","--output_pad",   dest="output_pad",  default=2, type="int",
                      help="pad the output search range by this amount (%default)")
    parser.add_option("-s", "--sigma_t_min", dest="sigma_t_min", default=0., type="float",
                       help="matches will not be calculated if the standard deviation of the template is less than this value (%default)")
    parser.add_option("-R", "--R_lim_min", dest="R_lim_min",  default=32., type="float",
                       help="The minimum-neighbor-disparity parameter is set to the maximum of this parameter and the 98th percentile of all minimum neighbor disparities.\n  Points whose neighbors have a minimum disparity range larger than this parameter will be removed (%default)")
    parser.add_option("-l", "--R_limax_disp_range", dest="R_limax_disp_range",  default=128., type="float",
                       help="The minimum-neighbor-disparity parameter is set to the minimum of this parameter and the 98th percentile of all minimum neighbor disparities.\n  Points whose neighbors have a minimum disparity range larger than this parameter will be removed (%default)")
    parser.add_option("-m", "--mask_file", dest="mask_file", default=None, type="string",
                      help="if specified, use this geotif mask file to reject template-image points that should not be searched (1=search, 0=don't)")
    parser.add_option("-M", "--Max_disp_range", dest="max_disp_range",  default=64.,       type="float",
                       help="Un-scaled disparity range in output limited to this value (%default)")
    parser.add_option("-P", "--processes",      dest="processes",     default=cpu_count(), type="int",
                      help="The number of processes to use (%default)")
    parser.add_option("--no_epipolar_fltr",     action="store_false", dest="epipolar_fltr",
                      help="Disable filtering of disparities by distance from epipolar vector")
    parser.add_option("-e", "--epipolar_axis",  dest="epipolar_axis", default=None,  type="int",
                      help="If set, specify the axis that is epipolar 0=x, 1=y (%default)")
    parser.add_option("-n", "--nodata-value",   dest="user_nodata",   default=None,  type="int",
                      help="The no-data value (pixel values <= nodata are not not used. (%default)")
    parser.add_option("-w", "--fill-dist",      dest="fill_dist",     default=1000., type="float",
                      help="Fill in gaps of this size or more with smoothed values. (%default)")
    parser.add_option("-D", "--Debug",          dest="Debug",         default=False, action="store_true",
                      help="Output deugging info and text file of correlation-estimate points")
    (options, args) = parser.parse_args()

    if len(args) < 3:
        parser.print_help()
        die('\nERROR: Missing input files or output prefix', code=2)

    if options.mask_file is not None:
        mask_file=gdal.Open(options.mask_file)
        in_mask={'GT': np.array(mask_file.GetGeoTransform()), 'Z': np.array(mask_file.ReadAsArray())}
    else:
        in_mask=None

    # undocumented options #
    F_ep_pts_to_use = 0.9
    ep_tol_max      = 24
    ep_tol_min      = 4
    dxy_slope_tol   = 3.0
    corr_score_tolerance = 0.4  # minimum quality needed in defining the epipolar vector estimate

    template_file = args[0]
    search_file   = args[1]
    output_prefix = args[2]
    sys.stdout.flush()
    print("Start: " + str(datetime.datetime.now()))
    print("template_file = " + template_file)
    print("search_file   = " + search_file  )
    print("output_prefix = " + output_prefix)

    # Minimum number of coarse points in any dimension
    min_npts = 8

    # Adjust the input parameters based on the images. This
    # may need to become more advanced.
    T_ds   = gdal.Open(template_file, gdalconst.GA_ReadOnly)
    T_band = T_ds.GetRasterBand(1)
    # Limit search range to less than a quarter of the image size
    # - This appears to be a hard limitation imposed by the FFT usage??
    prev2x = prevpow2( T_band.XSize)
    if options.search_range_x > prev2x/4:
        options.search_range_x = prev2x/4
        print("Warning: Decreasing xsearch value to: %d" % options.search_range_x)
    prev2y = prevpow2(T_band.YSize)
    if options.search_range_y > prev2y/4:
        options.search_range_y = prev2y/4
        print("Warning: Decreasing ysearch value to: %d" % options.search_range_y)

    # Automatically set coarse_skip if not specified
    if options.coarse_skip is None:
        options.coarse_skip = np.min([prev2x/min_npts, prev2y/min_npts])
        print("Setting coarse skip to: %d" % options.coarse_skip)
    else:
        # Check to make sure coarse_skip isn't TOO coarse
        # Minimum of 4 points in smallest dimension
        if options.coarse_skip > np.min([prev2x/min_npts, prev2y/min_npts]):
            options.coarse_skip = np.min([prev2x/min_npts, prev2y/min_npts])
            print("Warning: decreasing coarse skip to: %d" % options.coarse_skip)

    search_range_x = options.search_range_x
    search_range_y = options.search_range_y
    template_size  = options.template_size

    # If the user specified an epipolar axis, limit the search range to primarily along that axis
    #  with only a small search range perpendicular to that axis.
    if options.epipolar_axis is not None: # Handle user epipolar axis input
        ep_vec_initial = np.zeros([1,2])
        ep_vec_initial[options.epipolar_axis] = 1.
        if options.epipolar_axis == 0: # X axis
            search_range_y = np.min([search_range_y, search_range_x/10.]);
        else: # Y axis
            search_range_x = np.min([search_range_x, search_range_y/10.]);
    else:
        ep_vec_initial = None

    # Create the output directory
    out_dir = os.path.dirname(output_prefix)
    if len(out_dir) == 0: 
        out_dir = '.'
    if not os.path.exists(out_dir): 
        os.makedirs(out_dir)

    # Generate a list of skip distances from the course level to the fine level.
    skip_vals = 2.**np.arange(np.floor(np.log2(options.coarse_skip/2.)),
                              np.floor(np.log2(options.fine_skip  /2.)), -1)

    # Initialize the matcher object
    matcher = fft_matcher(template_file, search_file, options.processes, options.user_nodata)

    # Define the initial search points
    # ??
    edge_pad = np.array([search_range_x/4.+template_size/2, 
                         search_range_y/4.+template_size/2])
    x_centers = np.arange(matcher.T_c0c1[0]+edge_pad[0], 
                          matcher.T_c0c1[1]-edge_pad[0], options.coarse_skip)
    if x_centers[-1] < matcher.T_c0c1[1]:
        x_centers = np.append(x_centers, int((x_centers[-1]+matcher.T_c0c1[1])/2.))
    y_centers = np.arange(matcher.T_r0r1[0]+edge_pad[1], 
                          matcher.T_r0r1[1]-edge_pad[1], options.coarse_skip)
    if y_centers[-1] < matcher.T_r0r1[1]:
        y_centers = np.append(y_centers, int((y_centers[-1]+matcher.T_r0r1[1])/2.))
    [x_centers_grid, y_centers_grid] = np.meshgrid(x_centers, y_centers)
    xy_centers0 = np.c_[x_centers_grid.ravel(), y_centers_grid.ravel()]

    # Find the offset that matches the origins of the two images
    geotransform      = matcher.search_geotransform
    origin_diff       = np.c_[matcher.UL_T - matcher.UL_S].transpose()
    origin_diff[0][0] = np.floor(origin_diff[0][0]/np.abs(geotransform[1]))
    origin_diff[0][1] = np.floor(origin_diff[0][1]/np.abs(geotransform[5]))
    print("Running initial search: " + str(datetime.datetime.now()))

    # Find best correlation matches in the search image for each center in the template image
    # ???
    dxy0      = np.dot(np.c_[np.ones_like(xy_centers0[:,0])], origin_diff*[1, -1])
    dxy_score = np.c_[dxy0[:,0]-search_range_x/2., 
                      dxy0[:,0]+search_range_x/2, 
                      dxy0[:,1]-search_range_y/2, 
                      dxy0[:,1]+search_range_y/2]
    xy, dxy, corr_scores, xy_bad_mask = search_new_pts(xy_centers0, dxy_score, template_size, matcher,
                                             min_template_sigma=options.sigma_t_min, mask=in_mask)

    if options.epipolar_fltr:
        # Throw out disparity results which are too far from the epipolar line
        
        # Fit an epipolar line to the detected offsets
        ep_vec, dxy_ctr = est_epipolar_vec(dxy, corr_scores, corr_score_tolerance, 
                                           ep_vec_initial, F_ep_pts_to_use)
        # Compare the offsets to the fit line
        tolerance = 32
        good_indices, ep_dist = test_epipolar(dxy_ctr, ep_vec, dxy, tolerance)
        
        # Get the 90th percentile distance from the epipolar line
        ep_f90 = np.percentile(ep_dist[corr_scores.ravel() > corr_score_tolerance], 90)
        # Use the 90th percentile dist as the tolerance unless it falls out of bounds
        ep_tol = np.minimum(ep_tol_max, np.maximum(ep_tol_min, ep_f90))
        
        # If any points were marked as bad in the first epipolar test...
        if (ep_vec_initial is not None) and np.any(~good_indices):
            # Run the fit again with just good points and recompute the epipolar tolerance.
            ep_vec, dxy_ctr = est_epipolar_vec(dxy        [good_indices,:], 
                                               corr_scores[good_indices,:], 
                                               corr_score_tolerance, None, F_ep_pts_to_use)
            ep_f90          = np.percentile(ep_dist[corr_scores.ravel() > corr_score_tolerance], 90)
            ep_tol          = np.minimum(ep_tol_max,np.maximum(ep_tol_min, ep_f90))
            good_indices, ep_dist   = test_epipolar(dxy_ctr, ep_vec, dxy, ep_tol)
        print(" --- ep vec estimated at(%f,%f), tolerance=%f, ep_dist_f90=%f" 
              % (ep_vec[0], ep_vec[1], ep_tol, ep_f90))
    else:
        # Create boolean array with True for all values in dx, don't filter the points.
        good_indices = (dxy != np.nan)[:,0]
    if len(good_indices) == 0:
        print('No correlation matches detected!')
        return -1

    # Make sparse matrices for storing dx and dy values, store initial values
    im_shape     = [matcher.Ny, matcher.Nx]
    dx_mat       = coo_matrix((dxy[good_indices,0], (xy[good_indices,1], xy[good_indices,0])), shape=im_shape).tocsr()
    dy_mat       = coo_matrix((dxy[good_indices,1], (xy[good_indices,1], xy[good_indices,0])), shape=im_shape).tocsr()
    score_mat    = coo_matrix(((corr_scores[good_indices]).ravel(), 
                               (xy[good_indices,1], xy[good_indices,0])), shape=im_shape).tocsr()
    bad_mask_mat = coo_matrix((np.ones_like(xy_bad_mask[:,0]), 
                              (xy_bad_mask[:,1], xy_bad_mask[:,0])), shape=im_shape).tocsr()
    xy_list = xy[good_indices,:]
    
    # Points tested so far are the nozero members of score_mat
    all_pts = np.c_[score_mat.nonzero()];
    all_pts = all_pts[:,[1,0]];
    
    # ???
    # Perform triangulation of points, then get min/max dx and dy differences with the neighboring points.
    tri        = spatial.Delaunay(all_pts)
    pt_dict    = make_pt_2_neighbors(tri)
    dxy_score  = neighborhood_range(np.arange(0, all_pts.shape[0]), dx_mat, dy_mat, tri, pt_dict)
    indices_to_refine = np.arange(0, xy_list.shape[0] )
       
    # Indices into the vales in the dxy_score variable
    OFFSET_MIN_X = 0
    OFFSET_MAX_X = 1
    OFFSET_MIN_Y = 2
    OFFSET_MAX_Y = 3
    
    # Define the pattern of pixel centers to refine at each step.  Duplicates will be deleted.
    refine_x = np.array([-1.,  0.,  1., -1.,  1., -1., 0., 1.])
    refine_y = np.array([-1., -1., -1.,  0.,  0.,  1., 1., 1.]);
    
    # Iterate through our disparity search coarseness levels, low to high res.
    recalc_neighborhood_range = False
    for delta_x in skip_vals:
        print("----------refining to pixel skip length %d---------" % delta_x)
        print("Refining start time: " + str(datetime.datetime.now()))
        if len(indices_to_refine)==0:
            print("    No refinement points for scale: %d" % delta_x)
            recalc_neighborhood_range=True
            break
        
        # Add neighbors of the last set of points to the list
        #sc=plt.scatter(all_pts[indices_to_refine,0], all_pts[indices_to_refine,1], c=dxy_score[:,1]-dxy_score[:,0]); plt.axis('equal'); plt.colorbar(sc)
        new_x = (np.tile(all_pts[indices_to_refine, 0], [8,1]).transpose()+refine_x*delta_x).ravel()
        new_y = (np.tile(all_pts[indices_to_refine, 1], [8,1]).transpose()+refine_y*delta_x).ravel()
        
        # The search range for the new points is set based on the 
        #  min and max of the dxy_scores of the refine points
        new_dxy_score = np.array([np.tile(dxy_score[:,OFFSET_MIN_X], [8,1]).transpose().ravel(),
                                  np.tile(dxy_score[:,OFFSET_MAX_X], [8,1]).transpose().ravel(),
                                  np.tile(dxy_score[:,OFFSET_MIN_Y], [8,1]).transpose().ravel(),
                                  np.tile(dxy_score[:,OFFSET_MAX_Y], [8,1]).transpose().ravel()]).transpose()

        # define min and max pt indices
        num_image_rows = im_shape[0]
        num_image_cols = im_shape[1]
        x_lims         = np.array([1, num_image_cols-1])
        y_lims         = np.array([1, num_image_rows-1])
        # clamp range of x and y to these indices
        new_x[new_x > x_lims[1]] = x_lims[1]
        new_x[new_x < 0        ] = x_lims[0]
        new_y[new_y > y_lims[1]] = y_lims[1]
        new_y[new_y < 0        ] = y_lims[0]

        # ??
        not_dups      = np.squeeze(np.array(score_mat[new_y, new_x]==0))
        new_xy        = np.c_[new_x[not_dups], new_y[not_dups]]
        uRows, new_xy = unique_rows(new_xy)

        new_dxy_score = new_dxy_score[uRows,:]
        N_search      = new_xy.shape[0]

        # Search for the best image correlation matches around our new points
        new_xy, new_dxy, new_corr_scores, new_xy_bad = search_new_pts(new_xy, new_dxy_score, template_size, matcher, 
                                                                      min_template_sigma=options.sigma_t_min, mask=in_mask)
        if len(new_xy)==0:
            print("    no new points found")
            recalc_neighborhood_range = True
            break

        # Either epipolar filter the points or call them all good.
        if options.epipolar_fltr:
            good_indices, ep_dist = test_epipolar(dxy_ctr, ep_vec, new_dxy, ep_tol)
            print("    searched %d points, found %d good matches" % (N_search, np.sum(good_indices) ))
        else:
            good_indices = (new_dxy != np.nan)[:,0]

        # TODO: Make a function out of this code?
        # Add the new points to the sparse matrix of tested points
        dx_mat       = dx_mat+coo_matrix((new_dxy[good_indices,0], 
                                          [new_xy[good_indices,1], new_xy[good_indices,0]]), im_shape).tocsr()
        dy_mat       = dy_mat+coo_matrix((new_dxy[good_indices,1], 
                                          [new_xy[good_indices,1], new_xy[good_indices,0]]), im_shape).tocsr()
        score_mat    = score_mat+coo_matrix(((new_corr_scores[good_indices]).ravel(), 
                                             [new_xy[good_indices,1], new_xy[good_indices,0]]), im_shape).tocsr()
        bad_mask_mat = bad_mask_mat + coo_matrix((1*np.ones_like(new_xy_bad[:,0]), (new_xy_bad[:,1], new_xy_bad[:,0])), shape=im_shape).tocsr()
        bad_indices  = ~good_indices
        bad_mask_mat = bad_mask_mat + coo_matrix((2*np.ones_like(new_xy[bad_indices,1]), 
                                                  (new_xy[bad_indices,1], new_xy[bad_indices,0])), shape=im_shape).tocsr()

        #ID_new_pts=lil_matrix((np.ones_like(new_dxy[good_indices,0]), [new_xy[good_indices,1], new_xy[good_indices,0]]), im_shape).tocsr()

        all_pts = np.c_[score_mat.nonzero()];
        all_pts = all_pts[:,[1,0]];
        # Extract the dx and dy values, re-estimate the ep vector
        dxy = np.array(np.c_[dx_mat[all_pts[:,1], all_pts[:,0]].transpose(), 
                             dy_mat[all_pts[:,1], all_pts[:,0]].transpose()])
        C   = np.array(score_mat[all_pts[:,1], all_pts[:,0]].transpose())

        if options.epipolar_fltr and ep_vec_initial is not None:
            ep_vec, dxy_ctr = est_epipolar_vec(dxy, C, corr_score_tolerance)

        # Triangulate all the good points so far
        tri     = spatial.Delaunay(all_pts)
        pt_dict = make_pt_2_neighbors(tri)
        # Zero out points for which delta(disparity)/delta(dist) is too large
        # - ie, delete points with too rapid rate of disparity change.
        dxy_score, min_dxy_slope = neighborhood_range(range(all_pts.shape[0]), dx_mat, dy_mat, tri, pt_dict, calc_min_slope=True)
        bad_indices = np.max(min_dxy_slope, axis=1) > dxy_slope_tol
        
        if options.Debug:
            print("---deleting %d points that failed the d(disparity)/d(dist) test" % np.sum(bad_indices))
            
        if (bad_indices is not None) and np.any(bad_indices):
            # ???
            score_mat    = score_mat.tolil()
            score_mat[all_pts[bad_indices,1], all_pts[bad_indices,0]] = 0
            score_mat    = score_mat.tocsr()
            bad_mask_mat = bad_mask_mat + coo_matrix((4*np.ones_like(all_pts[bad_indices,1]), 
                                                      (all_pts[bad_indices,1], all_pts[bad_indices,0])), shape=im_shape).tocsr()

            all_pts   = np.c_[score_mat.nonzero()]
            all_pts   = all_pts[:,[1,0]];
            tri       = spatial.Delaunay(all_pts)
            pt_dict   = make_pt_2_neighbors(tri)
            dxy_score = neighborhood_range(range(all_pts.shape[0]), dx_mat, dy_mat, tri, pt_dict)


        # Don't refine if we're on the last value of the refinement list
        if delta_x == skip_vals[-1]:
            continue
            
        test_pts  = np.arange(0, all_pts.shape[0])
        # Test the new points and their neighbors for convergence
        to_refine = np.logical_or(((dxy_score[:,OFFSET_MAX_X]-dxy_score[:,OFFSET_MIN_X]) > options.refine_tol), 
                                  ((dxy_score[:,OFFSET_MAX_Y]-dxy_score[:,OFFSET_MIN_Y]) > options.refine_tol))
        indices_to_refine = test_pts [to_refine]
        dxy_score         = dxy_score[to_refine,:]
        # N.B.  we can often end up refining more points than we searched on the
        # last round, because points from previous rounds can get marked for refinement
        print("    found %d points to refine" %  len(indices_to_refine))

    # END LOOP through pixel skip sizes

    if recalc_neighborhood_range:
        all_pts   = np.c_[score_mat.nonzero()];
        all_pts   = all_pts[:,[1,0]];
        dxy_score = neighborhood_range(range(all_pts.shape[0]), dx_mat, dy_mat, tri, pt_dict)

    # Delete the 1% of matches with the greatest disparity range
    R_dx = dxy_score[:,OFFSET_MAX_X]-dxy_score[:,OFFSET_MIN_X]
    R_dy = dxy_score[:,OFFSET_MAX_Y]-dxy_score[:,OFFSET_MIN_Y]

    R_dx_mat     = coo_matrix((R_dx, [all_pts[:,1], all_pts[:,0]]), im_shape).tocsr()
    R_dy_mat     = coo_matrix((R_dy, [all_pts[:,1], all_pts[:,0]]), im_shape).tocsr()
    R_dxy_score  = neighborhood_range(range(all_pts.shape[0]), R_dx_mat, R_dy_mat, tri, pt_dict)
    P99          = (np.percentile(R_dxy_score[:,OFFSET_MIN_X], 99), 
                    np.percentile(R_dxy_score[:,OFFSET_MIN_Y], 99))
    R_max        = np.maximum(P99,   options.R_lim_min)
    R_max        = np.minimum(R_max, options.R_limax_disp_range)
    bad_xy       = all_pts[np.logical_or((R_dxy_score[:,OFFSET_MIN_X] > R_max[0]),  
                                         (R_dxy_score[:,OFFSET_MIN_Y] > R_max[1])),:]
    bad_mask_mat = bad_mask_mat + coo_matrix((8*np.ones_like(bad_xy[:,1]), (bad_xy[:,1], bad_xy[:,0])), shape=im_shape).tocsr()
    if options.Debug:
        print("rejecting %d detected outliers using the minimum-disparity-difference test with R_max = %f, %f" 
              % (bad_xy.shape[0], R_max[0], R_max[1]))
        
    # Delete the bad value, convert dx dy, and C to lil matrices
    score_mat = score_mat.tolil()
    dx_mat    = dx_mat.tolil()
    dy_mat    = dy_mat.tolil()
    for bad in bad_xy:
        dx_mat   [bad[1], bad[0]] = 0
        dy_mat   [bad[1], bad[0]] = 0
        score_mat[bad[1], bad[0]] = 0
    score_mat = score_mat.tocsr()
    dx_mat    = dx_mat.tocsr()
    dy_mat    = dy_mat.tocsr()
    all_pts   = np.c_[score_mat.nonzero()];
    all_pts   = all_pts[:,[1,0]];
    if len(all_pts) == 0:
        print('No points remaining after outlier rejection!')
        return -1
    
    # Triangulate all the good points so far
    tri       = spatial.Delaunay(all_pts)
    pt_dict   = make_pt_2_neighbors(tri)

    # Check the score for output, ignore differences for points separated by more than 2*coarse skip
    dxy_score = neighborhood_range(range(all_pts.shape[0]), dx_mat, dy_mat, tri, pt_dict, max_dist= 2.*options.coarse_skip)
    R_dx      = dxy_score[:,OFFSET_MAX_X]-dxy_score[:,OFFSET_MIN_X]
    R_dy      = dxy_score[:,OFFSET_MAX_Y]-dxy_score[:,OFFSET_MIN_Y]

    print("--------output stats------")
    for pct in (99, 95, 90, 84):
        P = (pct, np.percentile(R_dx, pct), np.percentile(R_dy, pct))
        print("%dth percetiles (rx,ry) = (%5.2f %5.2f)" % P)

    dx = dx_mat   [all_pts[:,1], all_pts[:,0]].transpose()
    dy = dy_mat   [all_pts[:,1], all_pts[:,0]].transpose()
    C  = score_mat[all_pts[:,1], all_pts[:,0]].transpose()

    # transform the pixel centers to map coordinates
    geotransform = matcher.search_geotransform
    driver       = matcher.T_ds.GetDriver()
    projection   = matcher.T_ds.GetProjection()
    xy           = np.c_[geotransform[0]+all_pts[:,0]*geotransform[1]+all_pts[:,1]*geotransform[2], 
                         geotransform[3]+all_pts[:,0]*geotransform[4]+all_pts[:,1]*geotransform[5]]
    out          = np.c_[xy, dx , dy , C, dxy_score[:,OFFSET_MAX_X]-dxy_score[:,OFFSET_MIN_X]+options.output_pad, 
                                          dxy_score[:,OFFSET_MAX_Y]-dxy_score[:,OFFSET_MIN_Y]+options.output_pad]
    # spit out the good and bad masks
    good_xy = np.c_[score_mat.nonzero()][:,[1, 0]]
    good_xy = np.c_[geotransform[0]+good_xy[:,0]*geotransform[1]+good_xy[:,1]*geotransform[2], 
                    geotransform[3]+good_xy[:,0]*geotransform[4]+good_xy[:,1]*geotransform[5]]
    bad_xy  = np.c_[bad_mask_mat.nonzero()][:,[1, 0]]
    if np.min(bad_xy.shape) > 0:
        bad_flag = bad_mask_mat[bad_xy[:,1], bad_xy[:,0]]
        bad_xy   = np.c_[geotransform[0]+bad_xy[:,0]*geotransform[1]+bad_xy[:,1]*geotransform[2], 
                         geotransform[3]+bad_xy[:,0]*geotransform[4]+bad_xy[:,1]*geotransform[5]]
    else:
        bad_flag = None
        bad_xy   = None
    # cleanup memory before gridding starts
    pt_dict    = None
    tri        = None
    dx         = None
    dy         = None
    dx_mat     = None
    dy_mat     = None
    R_dx_mat   = None
    R_dy_mat   = None
    xy         = None
    matcher    = None
    C          = None
    indices_to_refine = None

    if options.Debug:
        print("--- Debug enabled-----")
        print(" writing csv file: %s" % output_prefix+'.csv')
        outfile=open(output_prefix+'.csv','w')
        outfile.write('%x, y, dx, dy, C, R_dx, R_dy\n')
        for line in out:
            outfile.write("%7.0f, %7.0f, %7.0f, %7.0f, %4.2f, %7.0f, %7.0f\n" %  tuple(x for x in line.tolist()[0]))
        outfile.close
        print(" writing bad points to file: %s" % output_prefix+'_bad_pts.csv')
        print(" bad matches are flagged:")
        print("  1: failed signal strength test")
        print("  2: failed epipolar test")
        print("  4: failed d(disparity)/d(dist) test")
        print("  8: failed the total-delta-disparity test")
        print("-----")
        outfile = open(output_prefix+'_bad_pts.csv', 'w')
        outfile.write('%x, y, flag\n')
        if bad_flag is not None:
            out_bad = np.c_[bad_xy[:,0], bad_xy[:,1], bad_flag.transpose()]
            for line in out_bad:
                outfile.write("%7.2f, %7.2f, %3d\n" %  tuple(x for x in line.tolist()[0]))
        outfile.close
        out_bad  = None
        bad_flag = None
    # END IF (if options.Debug)

    if options.output_scale > 0.:
        out = out/options.output_scale
        # scale x, y, and C back up by output_scale
        for col in [0, 1, 4]:
            out[:,col] = out[:,col]*options.output_scale
        delta_x = options.output_scale*np.abs(geotransform[1])
        x0      = geotransform[0]
        y0      = geotransform[3]
        x0      = delta_x*(np.ceil(x0/delta_x))
        y0      = delta_x*(np.ceil(y0/delta_x))
        Gdx     = geotransform[1]*options.output_scale
        Gdy     = geotransform[5]*options.output_scale

        GT1    = np.array(geotransform)
        GT1[0] = x0;
        GT1[3] = y0;
        GT1[1] = Gdx
        GT1[5] = Gdy
        nx1    = int(im_shape[1]/options.output_scale)
        ny1    = int(im_shape[0]/options.output_scale)
        xg     = (np.arange(0, nx1)+0.5)*Gdx+x0
        yg     = (np.arange(0, ny1)+0.5)*Gdy+y0
        disp_file   = output_prefix + '-D_sub.tif'
        spread_file = output_prefix + '-D_sub_spread.tif'
        dispDs      = driver.Create(disp_file,   nx1, ny1, 3, gdalconst.GDT_Int32)
        spreadDs    = driver.Create(spread_file, nx1, ny1, 3, gdalconst.GDT_Int32)
        # interpolate the scaled dx and dy values
        OutBlocksize = 1024
        c0_out       = np.arange(0, xg.shape[0], OutBlocksize)
        r0_out       = np.arange(0, yg.shape[0], OutBlocksize)
        grid_pad     = options.fill_dist*2
        for c0 in c0_out:
            for r0 in r0_out:
                if options.Debug:
                    print("     gridding output for col %d out of %d, row %d out of %d" % 
                          (int(c0/OutBlocksize)+1, int(c0_out[-1]/OutBlocksize)+1, 
                           int(r0/OutBlocksize)+1, int(r0_out[-1]/OutBlocksize)+1))
                cols = np.arange(c0, np.minimum(c0+OutBlocksize, len(xg)))
                rows = np.arange(r0, np.minimum(r0+OutBlocksize, len(yg)))
                if (len(rows)==0) | (len(cols)==0):
                    continue
                [xg_sub, yg_sub] = np.meshgrid(xg[cols], yg[rows])
                XR_out = np.array([np.amin(xg[cols])-grid_pad, np.amax(xg[cols])+grid_pad])
                YR_out = np.array([np.amin(yg[rows])-grid_pad, np.amax(yg[rows])+grid_pad])
                cr_out = [cols[0], rows[0]]
                # make a 1-d array to populate with the truth values we'll use to select data points
                ii     = np.zeros(out.shape[0]).astype('bool')  
                ii[:] = ((out[:,0] > XR_out[0]) & (out[:,0] < XR_out[1]) & 
                         (out[:,1] > YR_out[0]) & (out[:,1] < YR_out[1])).ravel()
                if np.sum(ii) < 4:
                    continue
                if options.Debug:
                    print(" ...based on %d points" % np.sum(ii))
                if (np.amax(out[ii,0])-np.amin(out[ii,0]) > 0) and (np.amax(out[ii,1])-np.amin(out[ii,1]) > 0 ) :
                    grid_disp(xg_sub, yg_sub, out[ii,0:2], out[ii,2], out[ii,3], out[ii,5], out[ii,6], 
                              options.fill_dist, int(search_range_x), dispDs, spreadDs, options.output_scale, cr_out)

        for Ds in (dispDs, spreadDs):
            Ds.SetGeoTransform(tuple(GT1))
            Ds.SetProjection(projection)
        spreadDs = None
        dispDs   = None
    # END IF (if options.output_scale > 0.)

    print("End: " + str(datetime.datetime.now()))

if __name__=="__main__":
    main()
