Created by Ivan Lima on Wed Jun 13 2018 09:15:53 -0400
%matplotlib notebook
import pandas as pd
import xarray as xr
import numpy as np
import matplotlib.pyplot as plt
import os, gsw, datetime
from cesm_utils import da2ma, fixtime, spd, dpm
from he3_inventory import *
plt.rcParams['xtick.minor.bottom'] = False # fix strange minor ticks in time series plots
print('Last updated on {}'.format(datetime.datetime.now().ctime()))
def get_lat_band(df, latmin, latmax, zsurf=-10):
""" extract obs at given latitude bands """
band = df[(df.tritium_flag==2) | (df.tritium_flag==6)] # only valid measurements
band = band[band.latitude <= latmax]
band = band[band.latitude > latmin]
if zsurf:
band = band[band.depth > zsurf]
band = band[['stadate','latitude','longitude','depth','tritium','tritium_error','tritium_flag']]
return band.sort_values('stadate') # sort by date
def lat_band_mean(ds, latmin, latmax, vname='TRITIUM'):
""" compute min, max & area weighted averages for model """
da = ds[vname].where((ds.TLAT>latmin) & (ds.TLAT<=latmax))
area = ds.TAREA.where((ds.TLAT>latmin) & (ds.TLAT<=latmax))
vmean = (da * area).sum(dim=['nlat','nlon']) / area.sum(dim=['nlat','nlon'])
vstd = np.sqrt(((da - vmean)**2 * area).sum(dim=['nlat','nlon']) / area.sum(dim=['nlat','nlon']))
vmin, vmax = vmean - 1.96 * vstd, vmean + 1.96 * vstd # 95% confidence interval
vmin[vmin<0] = 0
return pd.DataFrame({'model':vmean.to_series(), 'vmin': vmin.to_series(), 'vmax':vmax.to_series()})
def lat_band_weight(ds, latmin, latmax, vname='TRITIUM'):
""" compute weights for lat bin"""
da = ds[vname].where((ds.TLAT>latmin) & (ds.TLAT<=latmax))
area = ds.TAREA.where((ds.TLAT>latmin) & (ds.TLAT<=latmax))
vsum = (da * area).sum(dim=['nlat','nlon'])/(ds[vname] * ds.TAREA).sum(dim=['nlat','nlon'])
return pd.DataFrame({'model':vsum.to_series()})
lat_bands = ((-90,-60), (-60,-45), (-45,-5), (-5,15), (15,45), (45,60), (60,90))
zbins2 = np.array([-4000, -3500, -3000, -2500, -2000, -1500, -1000, -500, -400, -300, -250, -200, -150, -100, -50, 0])
def dyear2date(strn_list, yr0=1950): # convert decimal year to date objects
ndays = [np.round((np.float(x)-yr0) * 365.2425) for x in strn_list] # number of days since 1950-01-01
return pd.to_datetime(ndays, unit='D',origin='%d-01-01'%yr0)
database = pd.read_csv('data/tritium-helium_database/tritium_helium_all_revised.csv', index_col=0, na_values=-999,
parse_dates=[6], date_parser=dyear2date, low_memory=False)
database['depth'] = gsw.z_from_p(np.abs(database.pressure), database.latitude) # compute depth from pressure
ds_model_inv = xr.open_dataset('/home/ivan/Data/Postproc/Tritium-3H/tritium.tr3he.GIAF.05.nc')
ds_model_inv['TAREA'] = ds_model_inv.TAREA.where(ds_model_inv.REGION_MASK>0) # mask land & marginal seas
Shaded area corresponds to 95% confidence interval for model mean.
fig, axs = plt.subplots(len(lat_bands),1,sharex=True,figsize=(6.4,10))
fig.subplots_adjust(top=0.95,hspace=0.3)
for ax, band in zip(axs.ravel(), lat_bands[::-1]):
df_obs = get_lat_band(database, *band, zsurf=None)
profile, x, y = get_profile(df_obs, zb=zbins2)
inventory = get_inventory(profile,nn=11,zb=zbins2)
df_mod = lat_band_mean(ds_model_inv,*band,'TRITIUM_inventory')
df_mod.index = df_mod.index.to_datetimeindex()
_ = inventory.plot(style='.', ax=ax, legend=False, markersize=2)
_ = df_mod.plot(y='model', ax=ax, style='-', legend=False)
_ = ax.fill_between(df_mod.index, df_mod.vmin, df_mod.vmax, facecolor='C1', alpha=0.3)
_ = ax.set_title('lat: {} to {}'.format(*band), fontsize=10)
ax.autoscale(axis='x',tight=True)
_ = ax.legend(loc='best')
_ = fig.text(0.05,0.575,'tritium inventory (pmol m$^{-2}$)',ha='center',va='center',rotation=90)
For each latitude band we have the observed ($O$) and model ($M$) tritium inventories. We compute a scaling factor or slope ($s$) for each latitude band by fitting a linear regression to:
$$ O(t) = s\; M(t) $$for each latidude band (intercept = 0).
import statsmodels.api as sm
fig, axs = plt.subplots(3, 3, figsize=(9.5,9.5))
fig.delaxes(axs[2,2])
fig.delaxes(axs[2,1])
slopes = {}
for ax, band in zip(axs.ravel(), lat_bands[::-1]):
# obs inventory
df_obs = get_lat_band(database, *band, zsurf=None)
profile, _, _ = get_profile(df_obs, zb=zbins2)
inv_obs = get_inventory(profile,nn=11,zb=zbins2)
# model inventory
df_mod = lat_band_mean(ds_model_inv,*band,'TRITIUM_inventory')
df_mod.index = df_mod.index.to_datetimeindex()
inv_mod = df_mod['model']
# merge obs & model data
inventory = pd.DataFrame({'obs': inv_obs, 'model': inv_mod}).dropna()
# compute OLS
x, y = inventory['model'].values, inventory['obs'].values
lreg = sm.OLS(y,x).fit()
slopes[band] = lreg.params[0]
# plot data
_ = ax.plot(x, y, '.')
_ = ax.plot(x, lreg.predict(x), '-')
_ = ax.set(xlim=(0,y.max()), ylim=(0,y.max()))
_ = ax.set_title('lat: {} to {}'.format(*band), fontsize=10)
_ = ax.text(0.975,0.975,'slope = {:.3f}\np = {:.3g}\n$R^2 = {:.3f}$'.format(lreg.params[0], lreg.pvalues[0],
lreg.rsquared),
transform=ax.transAxes, ha='right', va='top', fontsize=9)
for ax in [axs[1,1], axs[1,2], axs[2,0]]:
_ = ax.set(xlabel='model (pmol m$^{-2}$)')
for ax in axs[:,0]:
_ = ax.set(ylabel='observations (pmol m$^{-2}$)')
We weight the scaling faction ($s$) for each latidude band by the band tritium inventory.
tot, wtot = 0, 0
print('latitude\tslope\tweight')
for k, x in slopes.items():
latmin, latmax = k
# w = ds_model_inv.TAREA.where((ds_model_inv.TLAT>latmin) & (ds_model_inv.TLAT<=latmax)
# ).sum(dim=['nlat','nlon']).item()
inv = lat_band_weight(ds_model_inv,latmin,latmax,'TRITIUM_inventory')
w = inv.loc['1980-12-31','model'].values.item() # use inventory in 1980
tot = x * w + tot
wtot = w + wtot
print('{:3} to {:3}\t{:.3f}\t{:.3f}'.format(*k,x,w))
slope = tot/wtot
print('\ninventory weighted mean slope = {:.3f}'.format(slope))
ds = xr.open_dataset('/home/ivan/Data/Postproc/Tritium-3H/tr3he.GIAF.05_tritium_dep.nc', decode_times=False)
ds = fixtime(ds,1824)
ds['dpm'] = xr.DataArray(np.resize(dpm,720), coords=[ds.time], dims=['time'])
ds['prec'] = (ds.STF_TRITIUM_PREC * spd * ds.dpm * ds.TAREA * 1.e-4).sum(dim=['nlat','nlon']).cumsum(dim='time')
ds['roff'] = (ds.STF_TRITIUM_ROFF * spd * ds.dpm * ds.TAREA * 1.e-4).sum(dim=['nlat','nlon']).cumsum(dim='time')
ds['evap'] = (ds.STF_TRITIUM_EVAP * spd * ds.dpm * ds.TAREA * 1.e-4).sum(dim=['nlat','nlon']).cumsum(dim='time')
ds['prec'] = ds.prec * 1.e-12 # pmol -> mol
ds['roff'] = ds.prec * 1.e-12 # pmol -> mol
ds['evap'] = ds.evap * 1.e-12 # pmol -> mol
df = ds[['prec','evap','roff']].to_dataframe()
df.index = df.index.to_datetimeindex()
df['prec'] = df.prec + df.roff # add river influx to precipitation
df = df.drop(columns=['roff'])
ratio = df.prec/df.evap
fig, axs = plt.subplots(1, 2, sharex=True, figsize=(9.5,4.8))
_ = df.plot(ax=axs[0])
_ = axs[0].set(ylabel='cummulative $^3\!$H deposition (mol)')
_ = ratio.plot(ax=axs[1])
_ = axs[1].set(ylabel='prec/evap')
The model inventory ($M$) can be partitioned into the precipitation ($P$) and vapor ($V$) contributions:
$$ M = P + V $$Given the observed ($O$) and model ($M$) tritium inventories and the scaling factor or slope ($s$) we computed previously we have:
$$ O = s\,M = s\,(P + V) $$We compute a correction factor ($c$) for the vapor fluxes ($V$) and leave the precipitation fluxes ($P$) alone by setting:
$$ s\;(P + V) = P + c\,V $$Solving for $c$ we have:
$$ c = \frac{P}{V}(s -1) + s $$Using $P/V$ from 1980-12-31 we have:
correction = ratio['1980-12-31'] * (slope - 1) + slope
print('c = {:.3f}'.format(correction))
Correction factor for vapor fluxes ($c$) as function of $P/V$ (time):
c = ratio * (slope - 1) + slope
fig, ax = plt.subplots()
_ = ax.plot(ratio.index, c)
x1, x2 = ax.get_xlim()
_ = ax.hlines(c.mean(), x1, x2, linestyles='dotted', lw=1)
_ = ax.set(xlabel='time', ylabel='vapor correction factor', xlim=(x1,x2))
_ = ax.text(0.975, 0.65, '$\overline{c}=%.2f$'%c.mean(), ha='right', va='top', transform=ax.transAxes)
# band = lat_bands[-1]
# # obs inventory
# df_obs = get_lat_band(database, *band, zsurf=None)
# profile, _, _ = get_profile(df_obs, zb=zbins2)
# inv_obs = get_inventory(profile,nn=11,zb=zbins2)
# # model inventory
# df_mod = lat_band_mean(ds_model_inv,*band,'TRITIUM_inventory')
# inv_mod = df_mod['model']
# inv_mod.index.name = 'time'
# inv_obs.index.name = 'time'
# inv_mod.to_csv('tr_inv_mod.csv',header=True)
# inv_obs.to_csv('tr_inv_obs.csv',header=True)