/* RKL - RPN kernel Library for C and FORTRAN programming * Copyright (C) 2019 Recherche en Prevision Numerique * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation, * version 2.1 of the License. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the * Free Software Foundation, Inc., 59 Temple Place - Suite 330, * Boston, MA 02111-1307, USA. */ //****P* librkl/RPN kernel library interpolators // DESCRIPTION // // Set of 3D interpolators, // C and Fortran callable (C name and Fortran name are the same) // // interpolate the value at point (px, py, pz) fron a 3 Dimensional real(float) array // Fortran : real, dimension(dimi,dimj,dimk) :: array // C : float array[dimk][dimj][dimi] // (Fortran storage order is expected) // // the interpolation routines expect to receive the address of point // array(i[+offseti],1[+offsetj],1) // and will interpolate to get the value at point (px,py,pz) // px, py, pz are expressed in "index" space, i.e. // array(1.5, 1, 1) is halfway between array(1,1,1) and array(2,1,1) // array(1 ,1.5, 1) is halfway between array(1,1,1) and array(1,2,1) // array(1 ,1 ,2.5) is halfway between array(1,1,2) and array(1,1,3) // array(2.5,3.5,4.5) is halfway between array(2,3,4) and array(3,4,5) // // constant spacing between array points is assumed for the first 2 dimensions. // // along the third dimension, non constant spacing is assumed, the positions along // said third dimension are supplied via an opaque descriptor created with a call // to the Vsearch_setup family of routines (returning a C pointer to said descriptor) // // 4 point lagrange polynomials are used for cubic interpolations. interpolation is always // cubic along the first 2 dimensions, while along the third dimension linear interpolation // is used in the first 2 (pz <= 2) and the last 2 (pz >= nk-1) intervals, and cubic // interpolation is used in the other intervals (2.0 < pz < nk - 1) // // there is a also monotonic interpolation option. // in that case, 4 results are returned // - normal interpolation result // - tri-linear interpolation result // - min and max values for the inner 2x2x2 set of values used for tri-linear interpolation // // Name Purpose // // Vsearch_setup : simple setup // Vsearch_setup_plus : Vsearch_setup + x and y direction offsets // Tricublin_zyx1_n : simple interpolator, source is a single variable in a 3D array // Tricublin_zyx1_n_m : source is a group of m 3D arrays // Tricublin_zyx3_n : source is a 3 variable ( dimension(3, ....) ) 3D array // Tricublin_mono_zyx_n : monotonic version of Tricublin_zyx1_n // Tricublin_mono_zyx_n_m : monotonic version of Tricublin_zyx1_n_m // // Tricublin_zyx1_n_m, Tricublin_zyx3_n, Tricublin_mono_zyx_n_m save computing cycles by // reusing the computed interpolation coefficients for multiple arrays. The interpolation // coefficients are dependent upon px, py, pz. // // the horizontal (first 2 dimensions) interpolation is bicubic. // (an extra linear interpolation is performed in the monotonic case) // // the vertical (3rd dimension) interpolation is normally cubic, except when the target // lies between the first 2 or the last 2 levels, in which case it is linear. // // the monotonic interpolators provides 4 outputs // - the result of the centered tri-cubic or bi-cubic+linear interpolation (4x4x4 or 4x4x2) // - the result of a centered tri-linear interpolation (2x2x2) // - the minimum of the 2x2x2 sub-cube used for tri-linear interpolation // - the maximum of the 2x2x2 sub-cube used for tri-linear interpolation // // the interpolation routines assume that the address of the source array is the point // px = 1.0, py = 1.0, pz = 1.0 which is why f(1,1,1) is normally passed to these routines. // if this is not the case, offseti and offsetj are used to compensate // // EXAMPLES // // use ISO_C_BINDING // include "tricublin_f90.inc" // type(C_PTR) :: lv, mv ! C pointer opaque descriptor object // real(kind=REAL64), dimension(NK) :: levels ! list of positions for the third dimension // real, dimension(-2:NI+4,-1:NJ+3,NK), target :: f, f1, f2, f3 ! single valued source arrays // real, dimension(3,-2:NI+4,-1:NJ+3,NK), target :: f3 ! triple valued source array // type(C_PTR), dimension(3) :: f123 // real, dimension(NPOINTS) :: d, l, mi, ma ! normal, linear, min, max // real, dimension(3,NPOINTS) :: d3, l3, mi3, ma3 ! normal, linear, min, max // real, dimension(3,NPOINTS) :: pxpypz ! (px,py,pz) triplets // ...... ! levels(1:NK) = .... // lv = vsearch_setup(levels, NK, NI+7, NJ+5) ! NI+7, NJ+5 to account for halos // mv = vsearch_setup_plus(levels, NK, NI+7, NJ+5, 3, 2) ! passing f instead of f(1,1,1) // ...... ! pxpypz(1:3,1:NPOINTS) = ...., [1.0,1.0,1.0] points to to f(1,1,1) // call tricublin_zyx1_n(d ,f(1,1,1) ,pxpypz,lv,NPOINTS) ! single interpolation // call tricublin_zyx1_n(d ,f ,pxpypz,mv,NPOINTS) ! with offset compensation (halos) // call tricublin_zyx3_n(d3,f3(1,1,1,1),pxpypz,lv,NPOINTS) ! triple interpolation // f123(1) = C_LOC(f1(1,1,1)) // f123(2) = C_LOC(f2(1,1,1)) // f123(3) = C_LOC(f3(1,1,1)) // call tricublin_zyx1_n_m(d3,f123,pxpypz,lv,NPOINTS,3) ! 3 single interpolations // call tricublin_mono_zyx_n(d,l,mi,ma,f(1,1,1),pxpypz,lv,NPOINTS) ! monotonic interpolation, 1 array // call tricublin_mono_zyx_n_m(d3,l3,mi3,ma3,f123,pxpypz,lv,NPOINTS,3) ! monotonic interpolation, 3 arrays //**** #define MAX(a,b) ((a) > (b)) ? (a) : (b) #define MIN(a,b) ((a) < (b)) ? (a) : (b) #if defined(__AVX2__) && defined(__x86_64__) #include #endif #include #include #if defined(TIMING) #include //uint64_t Nanocycles(void) { // uint64_t rdtscp_(void) { // version "in order" avec "serialization" // uint32_t lo, hi; // __asm__ volatile ("rdtscp" // : /* outputs */ "=a" (lo), "=d" (hi) // : /* no inputs */ // : /* clobbers */ "%rcx"); // // __asm__ volatile ("mfence"); // return (uint64_t)lo | (((uint64_t)hi) << 32); // struct timeval t; // uint64_t i; // int status; // status = gettimeofday(&t,NULL); // i = t.tv_sec; // i = i * 1000000; // i = i + t.tv_usec; // i = i * 1000; // nanoseconds // return i; //}*/ #endif #if defined(__AVX2__) && defined(__x86_64__) static double cp133 = 1.0/3.0; #endif static double cp167 = 1.0/6.0; static double cm167 = -1.0/6.0; static double cm5 = -0.5; static double cp5 = 0.5; static double one = 1.0; static double two = 2.0; typedef struct{ float px; // position along x in index space float py; // position along y in index space float pz; // position along z in index space // float z; // absolute position along z } pxpypz; typedef struct{ double *z; // table for levels (nk doubles) double *ocz; // pointer to inverse of coefficient denominators [4*nk doubles] uint32_t ni; // distance between rows uint32_t nj; // number of rows uint32_t nk; // number of levels (max 255) uint32_t nij; // ni*nj, distance between levels uint32_t offi; // offset to add to i position (e.g. global to local grid remapping) uint32_t offj; // offset to add to j position (e.g. global to local grid remapping) }ztab; /*static void cubic_coeff_denom(double *t, double *f, int n){ int i, j; t += 4; // will store no coefficients for first interval (no point below) j = 0; double a, b, c, d, ab, ac, ad, bc, bd, cd; for( ; jz = malloc(nk * sizeof(double)); if(NULL == lv->z){ // malloc failed free(lv); // deallocate lv return NULL; } for(i=0 ; iz[i] = targets[i] ; } // z coordinate table // for(i=0 ; iz[i]) ; } ; printf("\n"); // denominators for Lagrange cubic polynomials coefficients ( entries 1 to n-2 make sense ) // entries 0 and n-1 are fudged lv->ocz = (double *) malloc(4 * nkl * sizeof(double)); if(NULL == lv->ocz){ // malloc failed free(lv->z); // deallocate z free(lv); // deallocate lv return NULL; } if(nk >= 4){ // protect against nk < 4 denominators( &(lv->ocz[0]) , targets[0], targets[1], targets[2], targets[3]); // level 0 coeffs are normally not used for(i=1 ; iocz[4*i]) , lv->z[i-1], lv->z[i ], lv->z[i+1], lv->z[i+2]); } denominators( &(lv->ocz[4*(nk-2)]) , targets[nk-4], targets[nk-3], targets[nk-2], targets[nk-1]); // level nk-2 coeffs are normally not used denominators( &(lv->ocz[4*(nk-1)]) , targets[nk-4], targets[nk-3], targets[nk-2], targets[nk-1]); // level nk-1 coeffs are normally not used }else{ for(i=0 ; iocz[i] = 1.0; // not used, set to 1.0 to avoid Floating Point errors later on } lv->ni = ni; // nb of points along x lv->nj = nj; // nb of points along y lv->nk = nk; // nb of points along z lv->nij = ni*nj; // ni * nj lv->offi = 0; lv->offj = 0; return lv; // return pointer to filled table } //****f* librkl/Vsearch_setup_plus // Synopsis // // ARGUMENTS ztab *Vsearch_setup_plus(double *targets, int nk, int ni, int nj, int offseti, int offsetj) //**** { ztab *lv; lv = Vsearch_setup(targets, nk, ni, nj); if(lv != NULL) { lv->offi = offseti; lv->offj = offsetj; } return lv; // return pointer to filled table } static inline int Vcoef_pxyz4_inline(double *cxyz, int *offset, float px8, float py8, float pz8, ztab *lv, int Step_kount){ int ix, iy, iz, ijk, zlinear; double pxy[2], *base, *pos; double zza, zzb, zzc, zzd, zzab, zzcd, dz, px, py, pz; int i, j; px = px8 ; // fractional index positions along x, y, z (float to double) py = py8 ; pz = pz8 ; // printf("px py pz = %f %f %f\n",px,py,pz); ix = px ; px = px - ix; // px is now deltax (fractional part of px) ix = ix + lv->offi; // global to local grid remapping ijk = ix - 2; // x displacement (elements), ix assumed to always be >1 and < ni-1 // printf("ix = %d, ijk = %d\n",ix,ijk); cxyz[12] = 0.0; cxyz[13] = 1.0 - px; // linear interpolation coefficients along x cxyz[14] = px; cxyz[15] = 0.0; iy = py ; py = py - iy; // py is now deltay (fractional part of py) iy = iy + lv->offj; // global to local grid remapping ijk = ijk + (iy - 2) * lv->ni; // add y displacement (rows), ix assumed to always be >1 and < nj-1 // printf("iy = %d, ijk = %d\n",iy,ijk); cxyz[16] = 1.0 - py; // linear interpolation coefficients along y cxyz[17] = py; iz = pz ; if(iz<1) iz = 1; if(iz>lv->nk-1) iz = lv->nk-1; // iz < 1 or iz > nk-1 will result in linear extrapolation dz = pz - iz; // dz is now "fractional" part of pz (may be <0 or >1 if extrapolating) ijk = ijk + (iz -1) * lv->nij; // add z displacement (2D planes) cxyz[18] = 1.0 - dz; // linear interpolation coefficients along z cxyz[19] = dz; iz--; // iz needs to be in "origin 0" (C index from Fortran index) zlinear = (iz - 1) | (lv->nk - 3 - iz); zlinear >>= 31; // nonzero only if iz < 1 or iz > nk -3 (top and bottom intervals) if(! zlinear) ijk = ijk - lv->nij; // not the linear case, go down one 2D plane to get lower left corner of 4x4x4 cube *offset = ijk; // printf("iz = %d, ijk = %d\n",iz,ijk); // now we can compute the coefficients along z using iz and dz if(zlinear){ cxyz[ 8] = 0.0; // coefficients for linear interpolation along z cxyz[ 9] = 1.0 - dz; cxyz[10] = dz; cxyz[11] = 0.0; }else{ base = &(lv->ocz[4*iz]); // precomputed inverses of denominators pos = &(lv->z[iz]); pz = dz * pos[1] + (1.0 - dz) * pos[0]; // pz is now an absolute position zza = pz - pos[-1] ; zzb = pz - pos[0] ; zzc = pz - pos[1] ; zzd = pz - pos[2] ; zzab = zza * zzb ; zzcd = zzc * zzd; cxyz[ 8] = zzb * zzcd * base[0]; // cxyz[16] = TRIPRD(pz,pos[1],pos[2],pos[3]) * base[0]; cxyz[ 9] = zza * zzcd * base[1]; // cxyz[17] = TRIPRD(pz,pos[0],pos[2],pos[3]) * base[1]; cxyz[10] = zzd * zzab * base[2]; // cxyz[18] = TRIPRD(pz,pos[0],pos[1],pos[3]) * base[2]; cxyz[11] = zzc * zzab * base[3]; // cxyz[19] = TRIPRD(pz,pos[0],pos[1],pos[2]) * base[3]; } cxyz[ 0] = cm167*px*(px-one)*(px-two); // coefficients for cubic interpolation along x cxyz[ 1] = cp5*(px+one)*(px-one)*(px-two); cxyz[ 2] = cm5*px*(px+one)*(px-two); cxyz[ 3] = cp167*px*(px+one)*(px-one); cxyz[ 4] = cm167*py*(py-one)*(py-two); // coefficients for cubic interpolation along y cxyz[ 5] = cp5*(py+one)*(py-one)*(py-two); cxyz[ 6] = cm5*py*(py+one)*(py-two); cxyz[ 7] = cp167*py*(py+one)*(py-one); //Mohammad /*if(zlinear){ cxyz[ 8] = 0.0; // coefficients for linear interpolation along z cxyz[ 9] = 1.0 - dz; cxyz[10] = dz; cxyz[11] = 0.0; }else{ base = &(lv->ocz[4*iz]); // precomputed inverses of denominators pos = &(lv->z[iz]); pz = dz * pos[1] + (1.0 - dz) * pos[0]; // pz is now an absolute position if (Step_kount/2-Step_kount/2.0==0){//backward cxyz[ 8] = (pz - pos[0])*(pz - pos[1])/((pos[-1] - pos[0])*(pos[-1] - pos[1])); cxyz[ 9] = (pz - pos[-1])*(pz - pos[1])/((pos[0] - pos[-1])*(pos[0] - pos[1])); cxyz[10] = (pz - pos[-1])*(pz - pos[0])/((pos[1] - pos[-1])*(pos[1] - pos[0])); cxyz[11] = 0.0; } else{ cxyz[ 8] = 0.0; cxyz[ 9] = (pz - pos[1])*(pz - pos[2])/((pos[0] - pos[1])*(pos[0] - pos[2])); cxyz[10] = (pz - pos[0])*(pz - pos[2])/((pos[1] - pos[0])*(pos[1] - pos[2])); cxyz[11] = (pz - pos[0])*(pz - pos[1])/((pos[2] - pos[0])*(pos[2] - pos[1])); } } if (Step_kount/2-Step_kount/2.0==0){//backward cxyz[0] = cp5*px*(px-one); // polynomial coefficients along i cxyz[1] = -1.0*(px+one)*(px-one); cxyz[2] = cp5*px*(px+one); cxyz[3] = 0.0;//cp167*x*(x+one)*(x-one); cxyz[4] = cp5*py*(py-one); // polynomial coefficients along j cxyz[5] = -1.0*(py+one)*(py-one); cxyz[6] = cp5*py*(py+one); cxyz[7] = 0.0;//cp167*y*(y+one)*(y-one); } else{//forward cxyz[0] = 0.0;//cm167*x*(x-one)*(x-two); // polynomial coefficients along i cxyz[1] = cp5*(px-one)*(px-two); cxyz[2] = -1.0*px*(px-two); cxyz[3] = cp5*px*(px-one); cxyz[4] = 0.0;//cm167*y*(y-one)*(y-two); // polynomial coefficients along j cxyz[5] = cp5*(py-one)*(py-two); cxyz[6] = -1.0*py*(py-two); cxyz[7] = cp5*py*(py-one); }*/ //linear /* cxyz[ 8] = 0.0; // coefficients for linear interpolation along z cxyz[ 9] = 1.0 - dz; cxyz[10] = dz; cxyz[11] = 0.0; cxyz[0] = 0.0;//cp5*x*(x-one); // polynomial coefficients along i cxyz[1] = -1.0*(px-one); cxyz[2] = 1.0*px; cxyz[3] = 0.0;//cp167*x*(x+one)*(x-one); cxyz[4] = 0.0;//cp5*y*(y-one); // polynomial coefficients along j cxyz[5] = -1.0*(py-one); cxyz[6] = 1.0*py; cxyz[7] = 0.0;//cp167*y*(y+one)*(y-one); */ return zlinear; // linear / cubic flag } // Fortran dimensions: f1(NI,NJ,NK) // NI : length of a line // NINJ : length of a plane // zlinear : zero if cubic interpolation along z, non zero if linear interpolation along z // f1 : address of lower left corner of the 4 x 4 x 4 or 4 x 4 x 2 interpolation box // interpolation is done along z, then along y, then along x // values are interpolated using the cubic/linear polynomial coefficients // pxyz(8) and pxyz(11) both = 0.0 when linear interpolation along z // pxyz(9) = 1.0 - dz, pxyz(10) = dz are expected in the linear case // in the z linear case, planes 0 and 1 are the same as are planes 2 and 3 // static inline void Tricublin_zyxf_beta(float *d, float *f1, double *px, double *py, double *pz, int NI, int NINJ){ static inline void Tricublin_zyxf1_inline(float *d, float *f1, double *pxyz, int NI, int NINJ, int zlinear, int Step_kount){ int ni = NI; int ninj = NINJ; int ninjl; // ninj (cubic along z) or 0 (linear along z) int ni2, ni3; double *px = pxyz; double *py = pxyz+4; double *pz = pxyz+8; double va4[4], vb4[4], vc4[4], vd4[4], dst[4]; int i, ninj2, ninj3; ni2 = ni + ni; ni3 = ni2 + ni; ninjl = ninj ; // assuming cubic case. in the linear case, ninjl will be set to 0 if(zlinear) ninjl = 0; ninj2 = ninj + ninjl; ninj3 = ninj2 + ninjl; // field 1 for (i=0 ; i<4 ; i++){ va4[i] = f1[i ]*pz[0] + f1[i +ninjl]*pz[1] + f1[i +ninj2]*pz[2] + f1[i +ninj3]*pz[3]; vb4[i] = f1[i+ni ]*pz[0] + f1[i+ni +ninjl]*pz[1] + f1[i+ni +ninj2]*pz[2] + f1[i+ni +ninj3]*pz[3]; vc4[i] = f1[i+ni2]*pz[0] + f1[i+ni2+ninjl]*pz[1] + f1[i+ni2+ninj2]*pz[2] + f1[i+ni2+ninj3]*pz[3]; vd4[i] = f1[i+ni3]*pz[0] + f1[i+ni3+ninjl]*pz[1] + f1[i+ni3+ninj2]*pz[2] + f1[i+ni3+ninj3]*pz[3]; dst[i] = va4[i]*py[0] + vb4[i]*py[1] + vc4[i]*py[2] + vd4[i]*py[3]; } d[0] = dst[0]*px[0] + dst[1]*px[1] + dst[2]*px[2] + dst[3]*px[3]; //SWEEP /*if (Step_kount/2-Step_kount/2.0==0){//backward for (i=0 ; i<3 ; i++){ va4[i] = f1[i ]*pz[0] + f1[i +ninjl]*pz[1] + f1[i +ninj2]*pz[2]; vb4[i] = f1[i+ni ]*pz[0] + f1[i+ni +ninjl]*pz[1] + f1[i+ni +ninj2]*pz[2]; vc4[i] = f1[i+ni2]*pz[0] + f1[i+ni2+ninjl]*pz[1] + f1[i+ni2+ninj2]*pz[2]; //vd4[i] = f1[i+ni3]*pz[0] + f1[i+ni3+ninjl]*pz[1] + f1[i+ni3+ninj2]*pz[2] + f1[i+ni3+ninj3]*pz[3]; dst[i] = va4[i]*py[0] + vb4[i]*py[1] + vc4[i]*py[2];// + vd4[i]*py[3]; } d[0] = dst[0]*px[0] + dst[1]*px[1] + dst[2]*px[2];// + dst[3]*px[3]; } else{//forward for (i=1 ; i<4 ; i++){ //va4[i] = f1[i ]*pz[0] + f1[i +ninjl]*pz[1] + f1[i +ninj2]*pz[2] + f1[i +ninj3]*pz[3]; vb4[i] = f1[i+ni +ninjl]*pz[1] + f1[i+ni +ninj2]*pz[2] + f1[i+ni +ninj3]*pz[3]; vc4[i] = f1[i+ni2+ninjl]*pz[1] + f1[i+ni2+ninj2]*pz[2] + f1[i+ni2+ninj3]*pz[3]; vd4[i] = f1[i+ni3+ninjl]*pz[1] + f1[i+ni3+ninj2]*pz[2] + f1[i+ni3+ninj3]*pz[3]; dst[i] = vb4[i]*py[1] + vc4[i]*py[2] + vd4[i]*py[3]; } d[0] = dst[1]*px[1] + dst[2]*px[2] + dst[3]*px[3]; }*/ } // Fortran dimensions: d(n) , f1(*), pxyz(3,n), lv(*) // d : results // f1 : source array (dimensions found in opaque object lv) // first index along each dimension assumed to be 1 // pxyz : positions along x, y, z , in "index" space // lv : opaque object containing vertical description (see Vsearch_setup) // n : number of points // process n points // for each point 1 value from ixyz, 3 values from pxyz are used (1 element) // it is ASSUMED that along X and Y interpolation will always be CUBIC // ( 2 <= px < "ni"-1 ) ( 2 <= py < "nj"-1 ) // pz < 2 and pz >= nk - 1 will induce linear interpolation or extrapolation //****f* librkl/Tricublin_zyx1_n // Synopsis // // ARGUMENTS void Tricublin_zyx1_n(float *d, float *f1, pxpypz *pxyz, ztab *lv, int n, int Step_kount) //**** { double cxyz[24]; // interpolation coefficients 4 for each dimension (x, y, z) int ixyz; // unilinear index into array f1 (collapsed dimensions) int zlinear; // non zero if linear interpolation // all above computed in Vcoef_pxyz4, used in Tricublin_zyxf1 while(n--){ zlinear = Vcoef_pxyz4_inline(cxyz, &ixyz, pxyz->px, pxyz->py, pxyz->pz, lv, Step_kount); // compute coefficients Tricublin_zyxf1_inline(d, f1 + ixyz, cxyz, lv->ni, lv->nij, zlinear, Step_kount); // interpolate d++; // next result pxyz += 1; // next set of positions } } //****f* librkl/Tricublin_zyx1_n_m // Synopsis // // ARGUMENTS void Tricublin_zyx1_n_m(float *d, float **fs, pxpypz *pxyz, ztab *lv, int n, int m, int Step_kount) //**** { // multiple field version with sources pointer lists, d[n][m] ( Fortran dimension(m,n)) double cxyz[24]; // interpolation coefficients 4 for each dimension (x, y, z) int ixyz; // unilinear index into array f1 (collapsed dimensions) int i; int zlinear; // non zero if linear interpolation // all above computed in Vcoef_pxyz4, used in Tricublin_zyxf1 while(n--){ zlinear = Vcoef_pxyz4_inline(cxyz, &ixyz, pxyz->px, pxyz->py, pxyz->pz, lv, Step_kount); // compute coefficients for(i=0 ; i< m ; i++){ Tricublin_zyxf1_inline(d, fs[i] + ixyz, cxyz, lv->ni, lv->nij, zlinear, Step_kount); // interpolate d++; // next result } pxyz += 1; // next set of positions } } //****f* librkl/Tricublin_zyx1_m_n // Synopsis // // ARGUMENTS void Tricublin_zyx1_m_n(float *dm, float **fs, pxpypz *pxyz, ztab *lv, int n, int m, int Step_kount) //**** { // multiple field version with sources pointer lists, d[m][n] ( Fortran dimension(n,m)) double cxyz[24]; // interpolation coefficients 4 for each dimension (x, y, z) int ixyz; // unilinear index into array f1 (collapsed dimensions) int i, j; int n0 = n; float *d0; int zlinear; // non zero if linear interpolation // all above computed in Vcoef_pxyz4, used in Tricublin_zyxf1 // printf("Mohammad Tricublin_zyx1_m_n \n"); for(j=0 ; jpx, pxyz->py, pxyz->pz, lv, Step_kount); // compute coefficients d0 = dm; // base address for results for(i=0 ; i< m ; i++){ Tricublin_zyxf1_inline(d0, fs[i] + ixyz, cxyz, lv->ni, lv->nij, zlinear, Step_kount); // interpolate // printf("DEBUG: i = %d, n= %d, offset = %d, value = %f \n",i,n,d0-dm,*d0); d0 = d0 + n0; // next result } dm++; // next set of results pxyz += 1; // next set of positions // return; } } //****f* librkl/Tricublin_zyx1_p // Synopsis // // ARGUMENTS void Tricublin_zyx1_p(float **dp, float **fs, pxpypz *pxyz, ztab *lv, int n, int m, int Step_kount) //**** { // multiple field version with results and sources pointer lists double cxyz[24]; // interpolation coefficients 4 for each dimension (x, y, z) int ixyz; // unilinear index into array f1 (collapsed dimensions) int i, j; int zlinear; // non zero if linear interpolation // all above computed in Vcoef_pxyz4, used in Tricublin_zyxf1 for(j=0 ; jpx, pxyz->py, pxyz->pz, lv, Step_kount); // compute coefficients for(i=0 ; i< m ; i++){ // loop over m variables Tricublin_zyxf1_inline(dp[i] + j, fs[i] + ixyz, cxyz, lv->ni, lv->nij, zlinear, Step_kount); // interpolate } pxyz += 1; // next set of positions } } static inline void Tricublin_zyx_mm_d_inline(float *d, float *lin, float *min, float *max, float *f1, double *cxyz, int NI, int NINJ, int zlinear, int Step_kount){ int ni = NI; int ninj = NINJ; int ninjl; // ninj (cubic along z) or 0 (linear along z) int ni2, ni3; double *px, *py, *pz; double va4[4], vb4[4], vc4[4], vd4[4], dst[4], dsl[4]; int i, ninj2, ninj3; double ma, mi; px = cxyz; py = cxyz + 4; pz = cxyz + 8; ni2 = ni + ni; ni3 = ni2 + ni; ninjl = ninj ; // assuming cubic case. in the linear case, ninjl will be set to 0 if(zlinear) ninjl = 0; ninj2 = ninj + ninjl; ninj3 = ninj2 + ninjl; // field 1 // TODO: add min max code for (i=0 ; i<4 ; i++){ // tricubic or bicubic/linear interpolation va4[i] = f1[i ]*pz[0] + f1[i +ninjl]*pz[1] + f1[i +ninj2]*pz[2] + f1[i +ninj3]*pz[3]; vb4[i] = f1[i+ni ]*pz[0] + f1[i+ni +ninjl]*pz[1] + f1[i+ni +ninj2]*pz[2] + f1[i+ni +ninj3]*pz[3]; vc4[i] = f1[i+ni2]*pz[0] + f1[i+ni2+ninjl]*pz[1] + f1[i+ni2+ninj2]*pz[2] + f1[i+ni2+ninj3]*pz[3]; vd4[i] = f1[i+ni3]*pz[0] + f1[i+ni3+ninjl]*pz[1] + f1[i+ni3+ninj2]*pz[2] + f1[i+ni3+ninj3]*pz[3]; dst[i] = va4[i]*py[0] + vb4[i]*py[1] + vc4[i]*py[2] + vd4[i]*py[3]; } d[0] = dst[0]*px[0] + dst[1]*px[1] + dst[2]*px[2] + dst[3]*px[3]; //SWEEP /*if (Step_kount/2-Step_kount/2.0==0){//backward for (i=0 ; i<3 ; i++){ va4[i] = f1[i ]*pz[0] + f1[i +ninjl]*pz[1] + f1[i +ninj2]*pz[2]; vb4[i] = f1[i+ni ]*pz[0] + f1[i+ni +ninjl]*pz[1] + f1[i+ni +ninj2]*pz[2]; vc4[i] = f1[i+ni2]*pz[0] + f1[i+ni2+ninjl]*pz[1] + f1[i+ni2+ninj2]*pz[2]; //vd4[i] = f1[i+ni3]*pz[0] + f1[i+ni3+ninjl]*pz[1] + f1[i+ni3+ninj2]*pz[2] + f1[i+ni3+ninj3]*pz[3]; dst[i] = va4[i]*py[0] + vb4[i]*py[1] + vc4[i]*py[2];// + vd4[i]*py[3]; } d[0] = dst[0]*px[0] + dst[1]*px[1] + dst[2]*px[2];// + dst[3]*px[3]; } else{//forward for (i=1 ; i<4 ; i++){ //va4[i] = f1[i ]*pz[0] + f1[i +ninjl]*pz[1] + f1[i +ninj2]*pz[2] + f1[i +ninj3]*pz[3]; vb4[i] = f1[i+ni +ninjl]*pz[1] + f1[i+ni +ninj2]*pz[2] + f1[i+ni +ninj3]*pz[3]; vc4[i] = f1[i+ni2+ninjl]*pz[1] + f1[i+ni2+ninj2]*pz[2] + f1[i+ni2+ninj3]*pz[3]; vd4[i] = f1[i+ni3+ninjl]*pz[1] + f1[i+ni3+ninj2]*pz[2] + f1[i+ni3+ninj3]*pz[3]; dst[i] = vb4[i]*py[1] + vc4[i]*py[2] + vd4[i]*py[3]; } d[0] = dst[1]*px[1] + dst[2]*px[2] + dst[3]*px[3]; }*/ for (i=1 ; i<3 ; i++){ // linear interpolation vb4[i] = f1[i+ni +ninjl]*cxyz[18] + f1[i+ni +ninj2]*cxyz[19]; vc4[i] = f1[i+ni2+ninjl]*cxyz[18] + f1[i+ni2+ninj2]*cxyz[19]; dsl[i] = vb4[i]*cxyz[16] + vc4[i]*cxyz[17]; } lin[0] = dsl[1]*cxyz[13] + dsl[2]*cxyz[14]; ma = f1[1 + ni + ninjl] ; mi = ma; // point [1,1,1] of 2x2x2 inner box for (i=1 ; i<3 ; i++){ // min max of 2x2x2 inner box ma = MAX(ma , f1[i + ni + ninjl]); ma = MAX(ma , f1[i + ni + ninj2]); mi = MIN(mi , f1[i + ni + ninjl]); mi = MIN(mi , f1[i + ni + ninj2]); ma = MAX(ma , f1[i + ni2 + ninjl]); ma = MAX(ma , f1[i + ni2 + ninj2]); mi = MIN(mi , f1[i + ni2 + ninjl]); mi = MIN(mi , f1[i + ni2 + ninj2]); } *max = ma ; *min = mi; } //****f* librkl/Tricublin_mono_zyx_n // Synopsis // // ARGUMENTS void Tricublin_mono_zyx_n(float *d, float *l, float *mi, float *ma, float *f, pxpypz *pxyz, ztab *lv, int n, int Step_kount) //**** { double cxyz[24]; // interpolation coefficients 4 for each dimension (x, y, z) int ixyz; // unilinear index into array f1 (collapsed dimensions) int zlinear; // non zero if linear interpolation // all above computed in Vcoef_pxyz4, used in Tricublin_zyxf1 while(n--){ zlinear = Vcoef_pxyz4_inline(cxyz, &ixyz, pxyz->px, pxyz->py, pxyz->pz, lv, Step_kount); // compute coefficients Tricublin_zyx_mm_d_inline(d, l, mi, ma, f + ixyz, cxyz, lv->ni, lv->nij, zlinear, Step_kount); // interpolate d++; // next result l++; mi++; ma++; pxyz += 1; // next set of positions } } //****f* librkl/Tricublin_mono_zyx_n_m // Synopsis // // ARGUMENTS void Tricublin_mono_zyx_n_m(float *d, float *l, float *mi, float *ma, float **fs, pxpypz *pxyz, ztab *lv, int n, int m, int Step_kount) //**** { int i; double cxyz[24]; // interpolation coefficients 4 for each dimension (x, y, z) int ixyz; // unilinear index into array f1 (collapsed dimensions) int zlinear; // non zero if linear interpolation // all above computed in Vcoef_pxyz4, used in Tricublin_zyxf1 while(n--){ zlinear = Vcoef_pxyz4_inline(cxyz, &ixyz, pxyz->px, pxyz->py, pxyz->pz, lv, Step_kount); // compute coefficients for(i=0 ; ini, lv->nij, zlinear, Step_kount); // interpolate d++; // next result l++; mi++; ma++; } pxyz += 1; // next set of positions } } //****f* librkl/Tricublin_mono_zyx_m_n // Synopsis // // ARGUMENTS void Tricublin_mono_zyx_m_n(float *dm, float *lm, float *mim, float *mam, float **fs, pxpypz *pxyz, ztab *lv, int n, int m, int Step_kount) //**** { int i; float *d0, *l0, *mi0, *ma0; double cxyz[24]; // interpolation coefficients 4 for each dimension (x, y, z) int ixyz; // unilinear index into array f1 (collapsed dimensions) int n0 = n; int zlinear; // non zero if linear interpolation // all above computed in Vcoef_pxyz4, used in Tricublin_zyxf1 while(n--){ zlinear = Vcoef_pxyz4_inline(cxyz, &ixyz, pxyz->px, pxyz->py, pxyz->pz, lv, Step_kount); // compute coefficients d0 = dm; l0 = lm; mi0 = mim; ma0 = mam; for(i=0 ; ini, lv->nij, zlinear, Step_kount); // interpolate d0 += n0; // next result l0 += n0; mi0 += n0; ma0 += n0; } dm++; // next set of results lm++; mim++; mam++; pxyz += 1; // next set of positions } } // Fortran dimensions: d(3) , f(3,NI,NJ,NK) // NI : size of a 1D line // NINJ : size of a 2D plane // f : address of lower left corner of the 4 x 4 x 4 or 4 x 4 x 2 box // interpolation is done along z, then along y, then along x // 3 values are interpolated using the same cubic/linear polynomial coefficients // pz(2) = 1.0 - dz, pz(3) = dz are expected in the linear along z case (0.0 <= dz <= 1.0) // in the z linear case, planes 0 and 1 are the same as are planes 2 and 3 // note for future expansion: // should both interpolations have to be done, planes 1 and 2 can be used for the z linear case, // and planes 0, 1, 2, 3 for the z cubic case // in that case another mechanism will have to be used to signal the z linear case static inline void Tricublin_zyxf3_inline(float *d, float *f, double *pxyz, int NI, int NINJ, int zlinear, int Step_kount){ // static inline void Tricublin_zyx3f_beta(float *d, float *f, double *px, double *py, double *pz, int NI, int NINJ){ int ni = 3*NI; int ninj = 3*NINJ; double *px = pxyz; double *py = pxyz+4; double *pz = pxyz+8; int ninjl; // ninj (cubic along z) or 0 (linear along z) float *s = f; double dst[13]; // int64_t *L = (int64_t *)d; int ni2, ni3; double va4[12], vb4[12], vc4[12], vd4[12]; int ninj2, ninj3, i; ni2 = ni + ni; ni3 = ni2 + ni; dst[12] = 0; ninjl = ninj ; // assuming cubic case. in the linear case, ninjl will be set to 0 if(zlinear) ninjl = 0; ninj2 = ninj + ninjl; ninj3 = ninj2 + ninjl; for (i=0 ; i<12 ; i++){ va4[i] = s[i ]*pz[0] + s[i +ninjl]*pz[1] + s[i +ninj2]*pz[2] + s[i +ninj3]*pz[3]; vb4[i] = s[i+ni ]*pz[0] + s[i+ni +ninjl]*pz[1] + s[i+ni +ninj2]*pz[2] + s[i+ni +ninj3]*pz[3]; vc4[i] = s[i+ni2]*pz[0] + s[i+ni2+ninjl]*pz[1] + s[i+ni2+ninj2]*pz[2] + s[i+ni2+ninj3]*pz[3]; vd4[i] = s[i+ni3]*pz[0] + s[i+ni3+ninjl]*pz[1] + s[i+ni3+ninj2]*pz[2] + s[i+ni3+ninj3]*pz[3]; dst[i] = va4[i]*py[0] + vb4[i]*py[1] + vc4[i]*py[2] + vd4[i]*py[3]; } d[0] = dst[0]*px[0] + dst[3]*px[1] + dst[6]*px[2] + dst[ 9]*px[3]; d[1] = dst[1]*px[0] + dst[4]*px[1] + dst[7]*px[2] + dst[10]*px[3]; d[2] = dst[2]*px[0] + dst[5]*px[1] + dst[8]*px[2] + dst[11]*px[3]; //SWEEP /*if (Step_kount/2-Step_kount/2.0==0){//backward for (i=0 ; i<9 ; i++){ va4[i] = s[i ]*pz[0] + s[i +ninjl]*pz[1] + s[i +ninj2]*pz[2]; vb4[i] = s[i+ni ]*pz[0] + s[i+ni +ninjl]*pz[1] + s[i+ni +ninj2]*pz[2]; vc4[i] = s[i+ni2]*pz[0] + s[i+ni2+ninjl]*pz[1] + s[i+ni2+ninj2]*pz[2]; vd4[i] = s[i+ni3]*pz[0] + s[i+ni3+ninjl]*pz[1] + s[i+ni3+ninj2]*pz[2]; dst[i] = va4[i]*py[0] + vb4[i]*py[1] + vc4[i]*py[2]; } d[0] = dst[0]*px[0] + dst[3]*px[1] + dst[6]*px[2]; d[1] = dst[1]*px[0] + dst[4]*px[1] + dst[7]*px[2]; d[2] = dst[2]*px[0] + dst[5]*px[1] + dst[8]*px[2]; } else{//forward for (i=3 ; i<12 ; i++){ va4[i] = s[i +ninjl]*pz[1] + s[i +ninj2]*pz[2] + s[i +ninj3]*pz[3]; vb4[i] = s[i+ni +ninjl]*pz[1] + s[i+ni +ninj2]*pz[2] + s[i+ni +ninj3]*pz[3]; vc4[i] = s[i+ni2+ninjl]*pz[1] + s[i+ni2+ninj2]*pz[2] + s[i+ni2+ninj3]*pz[3]; vd4[i] = s[i+ni3+ninjl]*pz[1] + s[i+ni3+ninj2]*pz[2] + s[i+ni3+ninj3]*pz[3]; dst[i] = vb4[i]*py[1] + vc4[i]*py[2] + vd4[i]*py[3]; } d[0] = dst[3]*px[1] + dst[6]*px[2] + dst[ 9]*px[3]; d[1] = dst[4]*px[1] + dst[7]*px[2] + dst[10]*px[3]; d[2] = dst[5]*px[1] + dst[8]*px[2] + dst[11]*px[3]; }*/ } // process n triplets // for each triplet 1 value from ixyz, 3 values from pxyz are used (1 element) // it is ASSUMED that along X and Y interpolation will always be CUBIC // ( 2 <= px < "ni"-1 ) ( 2 <= py < "nj"-1 ) // pz < 2 and pz >= nk - 1 will induce linear interpolation or extrapolation //****f* librkl/Tricublin_zyx3_n // Synopsis // // ARGUMENTS void Tricublin_zyx3_n(float *d, float *f3, pxpypz *pxyz, ztab *lv, int n, int Step_kount) // printf("Mohammad Tricublin_zyx3_n\n"); //**** { double cxyz[24]; // interpolation coefficients 4 for each dimension (x, y, z) int ixyz; // unilinear index into array f1 (collapsed dimensions) int zlinear; // non zero if linear interpolation // all above computed in Vcoef_pxyz4, used in Tricublin_zyxf1 /*printf*/("%12.7f %12.7f %12.7f\n",pxyz->px, pxyz->py, pxyz->pz); while(n--){ zlinear = Vcoef_pxyz4_inline(cxyz, &ixyz, pxyz->px, pxyz->py, pxyz->pz, lv, Step_kount); // compute coefficients Tricublin_zyxf3_inline(d, f3 + ixyz*3, cxyz, lv->ni, lv->nij, zlinear, Step_kount); // interpolate d+=3; // next result pxyz += 1; // next set of positions } }