#if ! defined(F_TEST) #if defined(NEVER_TRUE) rm -f intrp_bicub_yx intrp_bicub_yx.o intrp_bicub_yx_f.F90 ln -sf intrp_bicub_yx.c intrp_bicub_yx_f.F90 to build (Intel compilers) : s.cc -O2 -DTIMING -c -march=core-avx2 intrp_bicub_yx.c s.f90 -no-wrap-margin -DF_TEST -o intrp_bicub_yx intrp_bicub_yx_f.F90 intrp_bicub_yx.o s.cc -O2 -DTIMING -c -march=core2 intrp_bicub_yx.c s.f90 -no-wrap-margin -DF_TEST -o intrp_bicub_yx0 intrp_bicub_yx_f.F90 intrp_bicub_yx.o to build (GNU compilers) : gcc -O3 -DTIMING -c -mavx2 -mfma intrp_bicub_yx.c gfortran -DF_TEST -o intrp_bicub_yx intrp_bicub_yx_f.F90 intrp_bicub_yx.o -ffree-line-length-none gcc -O3 -DTIMING -c -msse3 intrp_bicub_yx.c gfortran -DF_TEST -o intrp_bicub_yx0 intrp_bicub_yx_f.F90 intrp_bicub_yx.o -ffree-line-length-none ./intrp_bicub_yx ./intrp_bicub_yx0 #endif static double cp167 = 0.166666666666666667E0; static double cm167 = -0.166666666666666667E0; static double cp5 = .5; static double cm5 = -.5; static double one = 1.0; static double two = 2.0; #if defined(TIMING) #include #include #pragma weak rdtsc_=rdtsc uint64_t rdtsc_(void); uint64_t rdtsc(void) { // version rapide "out of order" #if defined(__x86_64__) uint32_t lo, hi; __asm__ volatile ("rdtscp" : /* outputs */ "=a" (lo), "=d" (hi) : /* no inputs */ : /* clobbers */ "%rcx"); return (uint64_t)lo | (((uint64_t)hi) << 32); #else static uint64_t time0; return time0++; #endif } #endif #if defined(__x86_64__) #include // use separate multiply and add instructions if fused multiply-add not available #if defined(__AVX__) #if ! defined(__FMA__) #define _mm256_fmadd_ps(a,b,c) _mm256_add_ps(_mm256_mul_ps(a,b),c) #define _mm256_fmadd_pd(a,b,c) _mm256_add_pd(_mm256_mul_pd(a,b),c) #define _mm_fmadd_ps(a,b,c) _mm_add_ps(_mm_mul_ps(a,b),c) #define _mm_fmadd_pd(a,b,c) _mm_add_pd(_mm_mul_pd(a,b),c) #endif #endif #endif // default values mean practically "no limit" static int qminx = -999999999; // limits for the 'q' grid, in "origin 0" static int qmaxx = 999999999; static int qminy = -999999999; static int qmaxy = 999999999; static int uminx = -999999999; // limits for the 'u' grid static int umaxx = 999999999; static int uminy = -999999999; static int umaxy = 999999999; static int vminx = -999999999; // limits for the 'v' grid static int vmaxx = 999999999; static int vminy = -999999999; static int vmaxy = 999999999; static int qoffx = 0; static int qoffy = 0; static int uoffx = 0; static int uoffy = 0; static int voffx = 0; static int voffy = 0; // set offsets for q grid, replicate q values for u and v grids void set_intrp_bicub_off_yx(int qoffsetx, int qoffsety){ //call in yyg_init.F90, qoffsetx=1-l_i0 qoffsety=1-l_j0 qoffx = qoffsetx; qoffy = qoffsety; uoffx = qoffsetx; uoffy = qoffsety; voffx = qoffsetx; voffy = qoffsety; } // set bounds for 'q' (scalars), 'u', and 'v' grids void set_intrp_bicub_quv(int qxmin, int qxmax, int qymin, int qymax, int uxmin, int uxmax, int uymin, int uymax, int vxmin, int vxmax, int vymin, int vymax){ //call in yyg_init.F90, (wb ,eb ,sb ,nb , wbu,ebu,sbu,nbu, wbv,ebv,sbv,nbv) // qxmin is in "origin 1", qminx is in "origin 0", same for other limits // uxmin is in "origin 1", uminx is in "origin 0", same for other limits // vxmin is in "origin 1", vminx is in "origin 0", same for other limits // bounds for scalars qminx = qxmin - 1 ; // min index along x qmaxx = qxmax - 1 ; // max index along x qminy = qymin - 1 ; // min index along y qmaxy = qymax - 1 ; // max index along y // bounds for u uminx = uxmin - 1 ; // min index along x umaxx = uxmax - 1 ; // max index along x uminy = uymin - 1 ; // min index along y umaxy = uymax - 1 ; // max index along y // bounds for v vminx = vxmin - 1 ; // min index along x vmaxx = vxmax - 1 ; // max index along x vminy = vymin - 1 ; // min index along y vmaxy = vymax - 1 ; // max index along y } static inline int bicub_coeffs_inline(double xx, double yy, int ni, double *wx, double *wy, int qminx, int qmaxx, int qminy, int qmaxy, int offsetx, int offsety, int Step_n){ double x, y; int ix, iy; ix = xx ; if(ix > xx) ix = ix -1 ; ix = ix - 2; // ix > xx if xx is negative iy = yy ; if(iy > yy) iy = iy -1 ; iy = iy - 2; // iy > yy if yy is negative ix = (ix < qminx) ? qminx : ix; // apply boundary limits ix = (ix > qmaxx) ? qmaxx : ix; iy = (iy < qminy) ? qminy : iy; iy = (iy > qmaxy) ? qmaxy : iy; // printf("Mohammad %d %d %d \n", ix, qminx, qmaxx); x = xx - 2 - ix; // xx and yy are in "ORIGIN 1" y = yy - 2 - iy; ix = ix + offsetx; iy = iy + offsety; // printf("Mohammad %d \n", ix); // scanf("%d \n", ix); //int Mohammad; // scanf("%d \n", Mohammad); //4 grids for each departure point: x0=-1, x1=0, x2=1, x3=2 /* wx[0] = cm167*x*(x-one)*(x-two); // polynomial coefficients along i wx[1] = cp5*(x+one)*(x-one)*(x-two); wx[2] = cm5*x*(x+one)*(x-two); wx[3] = cp167*x*(x+one)*(x-one); wy[0] = cm167*y*(y-one)*(y-two); // polynomial coefficients along j wy[1] = cp5*(y+one)*(y-one)*(y-two); wy[2] = cm5*y*(y+one)*(y-two); wy[3] = cp167*y*(y+one)*(y-one); */ //Mohammad if (Step_n/2-Step_n/2.0==0){//backward wx[0] = cp5*x*(x-one); // polynomial coefficients along i wx[1] = -1.0*(x+one)*(x-one); wx[2] = cp5*x*(x+one); wx[3] = 0.0;//cp167*x*(x+one)*(x-one); wy[0] = cp5*y*(y-one); // polynomial coefficients along j wy[1] = -1.0*(y+one)*(y-one); wy[2] = cp5*y*(y+one); wy[3] = 0.0;//cp167*y*(y+one)*(y-one); } else{//forward wx[0] = 0.0;//cm167*x*(x-one)*(x-two); // polynomial coefficients along i wx[1] = cp5*(x-one)*(x-two); wx[2] = -1.0*x*(x-two); wx[3] = cp5*x*(x-one); wy[0] = 0.0;//cm167*y*(y-one)*(y-two); // polynomial coefficients along j wy[1] = cp5*(y-one)*(y-two); wy[2] = -1.0*y*(y-two); wy[3] = cp5*y*(y-one); } // printf("Mohammad 2142022\n"); //linear /*wx[0] = 0.0;//cp5*x*(x-one); // polynomial coefficients along i wx[1] = -1.0*(x-two); wx[2] = 1.0*(x-one); wx[3] = 0.0;//cp167*x*(x+one)*(x-one); wy[0] = 0.0;//cp5*y*(y-one); // polynomial coefficients along j wy[1] = -1.0*(y-two); wy[2] = 1.0*(y-one); wy[3] = 0.0;//cp167*y*(y+one)*(y-one); */ //printf("case1 \n"); //int number; //scanf("%d", number); return ix + iy * ni; // point to lower left corner of 4 x 4 subarray } // version for scalar variables void intrp_bicub_yx_s(float *f, float *r, int ni, int ninj, int nk, double xx, double yy, int Step_kount){ double fd0[4], fd1[4], fd2[4], fd3[4] ; double wx[4], wy[4] ; int ni2 = ni + ni; // + 2 rows int ni3 = ni2 + ni; // + 3 rows int i, k; double x, y; int ix, iy; //int Step_n; //Step_n = Step_kount; f += bicub_coeffs_inline(xx, yy, ni, wx, wy, qminx, qmaxx, qminy, qmaxy, qoffx, qoffy, Step_kount); // use inline function /*for(k=0 ; k maxval) ? f[i+ni ] : maxval; maxval = (f[i+ni2] > maxval) ? f[i+ni2] : maxval; } /*for(i=0 ; i<4 ; i++){ // easily vectorizable form fd0[i] = ( f[i]*wy[0] + f[i+ni]*wy[1] + f[i+ni2]*wy[2] + f[i+ni3]*wy[3] ) * wx[i]; } r[0] = fd0[0] + fd0[1] + fd0[2] + fd0[3]; */ //SWEEP if (Step_kount/2-Step_kount/2.0==0){//backward for(i=0 ; i<3 ; i++){ // easily vectorizable form fd0[i] = ( f[i]*wy[0] + f[i+ni]*wy[1] + f[i+ni2]*wy[2] ) * wx[i]; } r[0] = fd0[0] + fd0[1] + fd0[2]; } else{//forward for(i=1 ; i<4 ; i++){ // easily vectorizable form fd0[i] = ( f[i+ni]*wy[1] + f[i+ni2]*wy[2] + f[i+ni3]*wy[3] ) * wx[i]; } r[0] = fd0[1] + fd0[2] + fd0[3]; } r[0] = (r[0] < minval ) ? minval : r[0] ; // apply monotonic limits r[0] = (r[0] > maxval ) ? maxval : r[0] ; f += ninj; r ++; } } #endif