#Script to do vertical regridding of cloud fields for CSIRO model, and take zonal mean.
#
#Need a separate script for this because much of the data was saved in a non-standard format.
#
#Will save zonal mean as npy file for loading into "cloudFractionZonalMeanProfiles.py".
#
#Rick Russotto, started 8 February 2018
import numpy as np
import netCDF4 as nc4
#CSIRO did in fact use hybrid sigma pressure levels, according to the papers on the model,
#even though the non-cmorized cloud fraction files S. Phipps sent me said they were sigma
#coordinates, which are a different system (p=sigma*p0).
#I have the values of p0, a and b from the cmor-ized "cl" file for the G1oceanAlbedo experiment.
#Thery are copied here:
P0 = 101320 #Pa
A = np.array([0.0022509045204, 0.01090822959886, 0.02744510978233, 0.0511613292935427,
0.081499498156741, 0.117432065629021, 0.156602006617164,
0.194782429689618, 0.226158613316395, 0.24456001986107,
0.245284186803889, 0.226803046531514, 0.191604108558055,
0.145736687814914, 0.0971914310375395, 0.0537988267800073,
0.0216049382716049, 0.00445816186556929 ])
B = np.array([0.993290933614031, 0.967486832129535, 0.918371076774597,
0.848701496495209, 0.761093094435852, 0.658631034508153,
0.545729954974057, 0.428674360433839, 0.315336585586212,
0.213944781236324, 0.131259023072654, 0.0708649918772652,
0.0323327913047714, 0.0116707195924933, 0.0029457431737088,
0.000384986663065359, 0, 0])
#Additional challenge: for piControl the surface pressure was only saved for the last 50 years--
#and it's daily. I have to take averages to convert these to monthly.
#Load the pressure output and do the monthly averages for piControl
fpath_ps_PI = '../nobackup/CSIRO_non-CMOR/ps_piControl.nc'
fpath_ps_G1 = '../nobackup/ps_Amon_CSIRO-Mk3L-1-2_G1_r1i1p1_000101-007012.nc'
fpath_ps_A4 = '../nobackup/ps_Amon_CSIRO-Mk3L-1-2_abrupt4xCO2_r1i1p1_000101-015012.nc'
#Do the calculations necessary for piControl
Dataset_ps_PI_daily = nc4.Dataset(fpath_ps_PI)
ps_PI_daily = Dataset_ps_PI_daily.variables['psf'][:]*100 #Was reported in mb, need Pa
numLats = np.shape(ps_PI_daily)[1]
numLons = np.shape(ps_PI_daily)[2]
#Convert daily to monthly means
#50 years, 365-day calendar (no leap years).
monthLengths = np.array([31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31])
cumulativeMonthLengths = np.cumsum(monthLengths)
truncatedCumulativeMonthLengths = cumulativeMonthLengths[:-1]
monthFirstDays = np.append(0, truncatedCumulativeMonthLengths) #January starts at 0, Feb. at 31, etc.
#PS is 3D (day, lat, lon)
#Need another 3D array--month, lat, lon
#Initialize and fill in in loop
ps_PI_monthly = np.zeros([50*12, numLats, numLons])
for i in range(50*12):
print('Converting piControl daily to monthly pressures--Month ' + str(i))
ps_PI_monthly[i,:,:] = np.mean(ps_PI_daily[(i//12)*365+monthFirstDays[np.mod(i,12)]:
(i//12)*365+monthFirstDays[np.mod(i,12)]+monthLengths[np.mod(i,12)],:,:],axis=0)
#// guarantees integer division even in Python 3.
#NetCDF Dataset objects for the G1 and abrupt4xCO2 pressures (already cmor-ized)
Dataset_ps_G1 = nc4.Dataset(fpath_ps_G1)
Dataset_ps_A4 = nc4.Dataset(fpath_ps_A4)
#Load the cloud output into NetCDF Dataset objects
fpath_cl_PI = '../nobackup/CSIRO_non-CMOR/cloud_piControl_part2.nc' #(Use last 50 years of this) (This goes from years 00701 to 01200--500 years)
fpath_cl_G1 = '../nobackup/CSIRO_non-CMOR/cloud_g1_member1.nc'
fpath_cl_A4 = '../nobackup/CSIRO_non-CMOR/cloud_abrupt4xco2_member1.nc'
print('Loading NetCDF files (cl)')
Dataset_cl_PI = nc4.Dataset(fpath_cl_PI)
Dataset_cl_G1 = nc4.Dataset(fpath_cl_G1)
Dataset_cl_A4 = nc4.Dataset(fpath_cl_A4)
#Function to do the regridding for the non-CMOR cloud files for this model: (monthly)
#Based on the "zonalMeanMultiYearPressureVaryingVariable" function from the geomipFunctions module
# Function to interpolate height-varying model output variables to desired pressure levels and calculate
# zonal mean and multi-year mean. Takes some inspiration from code by Dan Vimont at
# http://www.aos.wisc.edu/~dvimont/matlab/
#
# This version is for Hybrid Sigma vertical coordinates.
#
#
# Inputs:
# Data: NetCDF4 Dataset object containing the cloud output
# pLevels: pressure levels to interpolate to, in hPa.
# firstYear: first year of the multi-year mean (measured from start of run)
# lastYear: last " " " " " "
# DataPS: NetCDF4 Dataset object containing the surface pressure data.
# varPS: PS specified as array instead of Dataset object (necessary for piControl)
# P0, A, B: parameters for hybrid sigma coordinates, defined above
# return4D: if true, return the 4D intermediate result (without zonal or time mean) instead of 2D
#
# Output:
# A 2-D Numpy array with latitude on one axis and pressure on the other, and NaNs where no data exist
# due to terrain (i.e. over Antarctica). Dimension order: pressure, latitude
# Actually Python should convert NaNs to masked arrays when reductions are done, i.e. zonal mean with fewer dimensions.
def zonalMeanMultiYearCloudsCSIRO(Data, pLevels, firstYear, lastYear, P0, A, B, DataPS='none', varPS = 'none', return4D = False):
# Extract the data from the objects as well as the surface pressure
datavar = Data.variables['c'][firstYear*12:lastYear*12+12,:,:,:] #time, lev, lat, lon
if varPS == 'none':
if DataPS == 'none':
print('Error: no surface pressure output saved in same file as the cloud output for this model')
return
else:
PS = DataPS.variables['ps'][firstYear*12:lastYear*12+12,:,:] #time, lat, lon; Surface pressure (Pa)
else: #Specify PS as Python array (variable) defined outside the function, rather than NetCDF Dataset object
PS = varPS
# Calculate pressure at the points where the Data are located.
# Conversion from hybrid sigma levels to pressure levels
# based on equation: #P[i,j,k] = a[k]*p0 + b[k]*PS[i,j]
# But P and PS vary in time whereas a and b don't, so need to use broadcasting.
AkP0 = A*P0
BkPSij = B[None,:,None,None]*PS[:,None,:,:] #Result: 4D array (time, lev, lat, lon)
print('Shape of 4D array to interpolate:')
print(np.shape(BkPSij))
pressureMat = (AkP0[None,:,None,None] + BkPSij)/100. #Divide by 100 to go from Pa to hPa
# Okay, now the hard part:
# Interpolate the data from the model's native pressure levels to the desired pressure levels.
# The model's pressure levels vary with time, latitude, and longitude, whereas we need consistent
# pressure levels for time and zonal averaging. So we have to do a linear interpolation in the
# vertical coordinate that varies with latitude, longitude and time. But doing this in nested loops
# is way too slow, so we need to use matrix operations.
#
# Further complicating the picture is the fact that sometimes the desired pressure level lies outside
# the range of the model pressure levels, e.g. due to terrain. To account for this we need to put
# nans in these places where there is no data, and use nanmean at the end for zonal and time mean.
#
# General strategy: only one loop, over the new pressure levels. At each desired pressure level, calculate
# the difference between the native pressures and the desired pressure, and find the vertical
# indices of the native pressure closest to the desired pressure above and below. Find the native pressures and
# the data values corresponding to these indices in order to do the linear interpolation. In order to put nans
# where there is no data, keep track of indices columns where all native pressures are either
# above or below the desired pressure, and slot in nans in the appropriate place right before the final
# interpolation calculation.
# First: reshape the native pressures and data into 2D arrays, with time/latitude/longitude all on one axis
# and vertical coordinate on the other axis. This makes it simpler to extract data at
# the particular vertical index we want (which varies with time, latitude, and longitude) later on.
# But we will still use the 4D arrays in other parts of the calculation.
s = np.shape(pressureMat)
numTimes = s[0]
numLevs = s[1]
numLats = s[2]
numLons = s[3]
#Make vertical level the last axis so that columns remain intact when matrices are reshaped
pressure2D = np.swapaxes(pressureMat,1,3) #Now it's time, lon, lat, level
pressure2D = np.reshape(pressure2D, (numTimes*numLats*numLons, numLevs))
data2D = np.swapaxes(datavar,1,3)
data2D = np.reshape(data2D, (numTimes*numLats*numLons, numLevs))
### --- Everything below this line didn't need to be modified for this model --- ###
# Preallocate array to hold interpolated data
interpMat = np.empty((pressureMat.shape[0], len(pLevels), pressureMat.shape[2], pressureMat.shape[3]))
# Now: loop over the desired pressure levels and interpolate data to each one
for k in range(0, len(pLevels)):
print('Interpolating to ' + str(pLevels[k]) + ' hPa')
# This code block: find the upper boundaries of the native pressure and data for interpolation
print('Positive side')
pressureMatDiffPos = pressureMat - pLevels[k] #Result: 4D array of differences between native and desired pressure.
#Positive values: higher native pressure than the level we're filling
#Negative values: lower native pressure than the level we're filling
# We're only interested in positive values (higher pressure end) for now,
# so set negative ones to a very high number and then find the index associated with the minimum along the
# vertical coordinate.
pressureMatDiffPos[pressureMatDiffPos < 0] = 1.e10
upperIndex = np.argmin(pressureMatDiffPos, axis=1) #upperIndex is 3D array in time, lat, lon
# Next: Record indices where we're trying to interpolate to greater than the maximum native pressure in the column.
# If this is the case, every value of pressureMatDiffPos in the column will have been set to 1.e10, so the
# difference between the max and min in the column will be zero.
# We'll create an array of boolean values that are true if this is one such column. They'll be used
# later to slot in nans.
nanUpperIndex = np.max(pressureMatDiffPos, axis=1) - np.min(pressureMatDiffPos,axis=1) #3D array: time, lat, lon
nanUpperIndexBool = np.ones(np.shape(nanUpperIndex), dtype = bool)
nanUpperIndexBool[nanUpperIndex >= 1.e-99] = False
# Now, convert the 3D arrays containing vertical indices of interest and boolean values for nans to 1D vectors
upperIndex1D = np.swapaxes(upperIndex, 1,2) #switched lat and lon to match reshaped matrices above
upperIndex1D = np.reshape(upperIndex1D, numTimes*numLats*numLons) #Convert to a vector
nanUpperIndex1D = np.swapaxes(nanUpperIndexBool, 1,2)
nanUpperIndex1D = np.reshape(nanUpperIndex1D, numTimes*numLats*numLons)
# Now, extract the native pressure and data values at the upper bound indices we found
# (I tested this method for extracting data using a sample 2D array in the command line)
upperPressureBound = pressure2D[range(0,len(upperIndex1D)),upperIndex1D] #Result: 1D vector
upperDataBound = data2D[range(0,len(upperIndex1D)),upperIndex1D]
# Set the pressure and data boundary data to nans where we are trying to interpolate to outside the data range
upperPressureBound[nanUpperIndex1D] = np.nan
upperDataBound[nanUpperIndex1D] = np.nan
#print(upperDataBound) #debug output
# This code block: same as above but for the lower boundaries. (far less comments)
print('Negative side')
pressureMatDiffNeg = pressureMat - pLevels[k]
pressureMatDiffNeg[pressureMatDiffNeg > 0] = 1.e10 #This time we are only interested in negative values
lowerIndex = np.argmin(np.abs(pressureMatDiffNeg), axis=1)
nanLowerIndex = np.max(pressureMatDiffNeg, axis=1) - np.min(pressureMatDiffNeg,axis=1)
nanLowerIndexBool = np.ones(np.shape(nanLowerIndex), dtype = bool)
nanLowerIndexBool[nanLowerIndex >= 1.e-99] = False
lowerIndex1D = np.swapaxes(lowerIndex, 1,2)
lowerIndex1D = np.reshape(lowerIndex1D, numTimes*numLats*numLons)
nanLowerIndex1D = np.swapaxes(nanLowerIndexBool, 1,2)
nanLowerIndex1D = np.reshape(nanLowerIndex1D, numTimes*numLats*numLons)
lowerPressureBound = pressure2D[range(0,len(lowerIndex1D)),lowerIndex1D]
lowerDataBound = data2D[range(0,len(lowerIndex1D)),lowerIndex1D]
lowerPressureBound[nanLowerIndex1D] = np.nan
lowerDataBound[nanLowerIndex1D] = np.nan
#print(lowerDataBound) #debug output
# Now: linearly interpolate the data in log pressure space
# (interpolating in log pressure means interpolating linearly w.r.t. height)
interpVec = lowerDataBound+(upperDataBound-lowerDataBound)*(np.log(pLevels[k])-np.log(lowerPressureBound))/(
np.log(upperPressureBound)-np.log(lowerPressureBound))
# Finally: Reshape to 3D matrix to put in the later 4D matrix
interpSlice = np.reshape(interpVec, (numTimes, numLons, numLats)) #time, lon, lat for consistency with above
interpSlice = np.swapaxes(interpSlice,1,2) # switch lat and lon again
interpMat[:,k,:,:] = interpSlice #Populate the returned matrix
print('Finished k = ' + str(k))
# Now we have a 4D matrix of interpolated data in time, pressure, latitude and longitude.
if return4D:
return interpMat
# Now take zonal and time means
interpMatZonalMean = np.nanmean(interpMat, axis=3)
interpMatTimeMean = np.nanmean(interpMatZonalMean, axis=0)
return interpMatTimeMean
#Do the regridding/zonal mean and save to npy file. (Multiply by 100 before saving, to go from fraction to percent)
#The standard pressure levels from CMIP5: (hPa)
levs = np.array([10, 20, 30, 50, 70, 100, 150, 200, 250, 300, 400, 500, 600, 700, 850, 925, 1000])
#piControl: 500 years, use last 40 so (starting at 0) first year is 460, last is 499
#Also use last 40 years of the 50-year monthly mean pressure matrix I have calculated.
print('Vertical regridding and zonal mean cloud fraction for CSIRO for piControl')
zonalMean_cl_PI = zonalMeanMultiYearCloudsCSIRO(Dataset_cl_PI, levs, 460, 499, P0, A, B, varPS = ps_PI_monthly[120:,:,:])*100.
np.save('npyFiles/zonalMeans_cl_PI_cs.npy', zonalMean_cl_PI)
#G1 and abrupt4xCO2: years 10 to 49 of the datasets, for both G1 and abrupt4xCO2
#Like for piControl, the non-cmor netcdf files say these start at year 201; I assume the years were relabeled to start at 1 for the
#cmorized file names, and that there's no spinup here.
print('Vertical regridding and zonal mean cloud fraction for CSIRO for piControl')
zonalMean_cl_G1 = zonalMeanMultiYearCloudsCSIRO(Dataset_cl_G1, levs, 10, 49, P0, A, B, DataPS = Dataset_ps_G1)*100.
np.save('npyFiles/zonalMeans_cl_G1_cs.npy', zonalMean_cl_G1)
print('Vertical regridding and zonal mean cloud fraction for CSIRO for abrupt4xCO2')
zonalMean_cl_A4 = zonalMeanMultiYearCloudsCSIRO(Dataset_cl_A4, levs, 10, 49, P0, A, B, DataPS = Dataset_ps_A4)*100.
np.save('npyFiles/zonalMeans_cl_A4_cs.npy', zonalMean_cl_A4)
#2-12-18: save intermediate result (4D array) for the cloudFractionMaps script
print('Vertical regridding cloud fraction for CSIRO for piControl')
cl_4D_PI = zonalMeanMultiYearCloudsCSIRO(Dataset_cl_PI, levs, 460, 499, P0, A, B, varPS = ps_PI_monthly[120:,:,:], return4D = True)*100.
np.save('../nobackup/npyFiles/cl_4D_PI_cs.npy', cl_4D_PI)
print('Vertical regridding cloud fraction for CSIRO for G1')
cl_4D_G1 = zonalMeanMultiYearCloudsCSIRO(Dataset_cl_G1, levs, 10, 49, P0, A, B, DataPS = Dataset_ps_G1, return4D = True)*100.
np.save('../nobackup/npyFiles/cl_4D_G1_cs.npy', cl_4D_G1)
print('Vertical regridding cloud fraction for CSIRO for abrupt4xCO2')
cl_4D_A4 = zonalMeanMultiYearCloudsCSIRO(Dataset_cl_A4, levs, 10, 49, P0, A, B, DataPS = Dataset_ps_A4, return4D = True)*100.
np.save('../nobackup/npyFiles/cl_4D_A4_cs.npy', cl_4D_A4)