Compute scaling factor for tritium vapor fluxes

Created by Ivan Lima on Wed Jun 13 2018 09:15:53 -0400

In [3]:
%matplotlib notebook
import pandas as pd
import xarray as xr
import numpy as np
import matplotlib.pyplot as plt
import os, gsw, datetime
from cesm_utils import da2ma, fixtime, spd, dpm
from he3_inventory import *
plt.rcParams['xtick.minor.bottom'] = False # fix strange minor ticks in time series plots
print('Last updated on {}'.format(datetime.datetime.now().ctime()))
Last updated on Wed Apr  3 15:25:33 2019
In [4]:
def get_lat_band(df, latmin, latmax, zsurf=-10):
    """ extract obs at given latitude bands """
    band = df[(df.tritium_flag==2) | (df.tritium_flag==6)] # only valid measurements
    band = band[band.latitude <= latmax]
    band = band[band.latitude > latmin]
    if zsurf:
        band = band[band.depth > zsurf]
    band = band[['stadate','latitude','longitude','depth','tritium','tritium_error','tritium_flag']]
    return band.sort_values('stadate') # sort by date

def lat_band_mean(ds, latmin, latmax, vname='TRITIUM'): 
    """ compute min, max & area weighted averages for model """
    da = ds[vname].where((ds.TLAT>latmin) & (ds.TLAT<=latmax))
    area = ds.TAREA.where((ds.TLAT>latmin) & (ds.TLAT<=latmax))
    vmean = (da * area).sum(dim=['nlat','nlon']) / area.sum(dim=['nlat','nlon'])
    vstd = np.sqrt(((da - vmean)**2 * area).sum(dim=['nlat','nlon']) / area.sum(dim=['nlat','nlon']))
    vmin, vmax = vmean - 1.96 * vstd, vmean + 1.96 * vstd # 95% confidence interval
    vmin[vmin<0] = 0
    return pd.DataFrame({'model':vmean.to_series(), 'vmin': vmin.to_series(), 'vmax':vmax.to_series()}) 

def lat_band_weight(ds, latmin, latmax, vname='TRITIUM'): 
    """ compute weights for lat bin"""
    da = ds[vname].where((ds.TLAT>latmin) & (ds.TLAT<=latmax))
    area = ds.TAREA.where((ds.TLAT>latmin) & (ds.TLAT<=latmax))
    vsum = (da * area).sum(dim=['nlat','nlon'])/(ds[vname] * ds.TAREA).sum(dim=['nlat','nlon'])
    return pd.DataFrame({'model':vsum.to_series()}) 

lat_bands = ((-90,-60), (-60,-45), (-45,-5), (-5,15), (15,45), (45,60), (60,90))
zbins2 = np.array([-4000, -3500, -3000, -2500, -2000, -1500, -1000, -500, -400, -300, -250, -200, -150, -100, -50, 0])

Read tritium database

In [5]:
def dyear2date(strn_list, yr0=1950): # convert decimal year to date objects
    ndays = [np.round((np.float(x)-yr0) * 365.2425) for x in strn_list] # number of days since 1950-01-01
    return pd.to_datetime(ndays, unit='D',origin='%d-01-01'%yr0)

database = pd.read_csv('data/tritium-helium_database/tritium_helium_all_revised.csv', index_col=0, na_values=-999,
                       parse_dates=[6], date_parser=dyear2date, low_memory=False)

database['depth'] = gsw.z_from_p(np.abs(database.pressure), database.latitude) # compute depth from pressure

Read model tritium inventory

In [6]:
ds_model_inv = xr.open_dataset('/home/ivan/Data/Postproc/Tritium-3H/tritium.tr3he.GIAF.05.nc')
ds_model_inv['TAREA'] = ds_model_inv.TAREA.where(ds_model_inv.REGION_MASK>0) # mask land & marginal seas

Tritium inventory by latitude bands

Shaded area corresponds to 95% confidence interval for model mean.

In [7]:
fig, axs = plt.subplots(len(lat_bands),1,sharex=True,figsize=(6.4,10))
fig.subplots_adjust(top=0.95,hspace=0.3)
for ax, band in zip(axs.ravel(), lat_bands[::-1]):
    df_obs = get_lat_band(database, *band, zsurf=None)
    profile, x, y = get_profile(df_obs, zb=zbins2)
    inventory = get_inventory(profile,nn=11,zb=zbins2)
    df_mod = lat_band_mean(ds_model_inv,*band,'TRITIUM_inventory')
    df_mod.index = df_mod.index.to_datetimeindex()
    _ = inventory.plot(style='.', ax=ax, legend=False, markersize=2)
    _ = df_mod.plot(y='model', ax=ax, style='-', legend=False)
    _ = ax.fill_between(df_mod.index, df_mod.vmin, df_mod.vmax, facecolor='C1', alpha=0.3)
    _ = ax.set_title('lat: {} to {}'.format(*band), fontsize=10)
    ax.autoscale(axis='x',tight=True)
    
_ = ax.legend(loc='best')    
_ = fig.text(0.05,0.575,'tritium inventory (pmol m$^{-2}$)',ha='center',va='center',rotation=90)

Compute scaling factor for each latitude band

For each latitude band we have the observed ($O$) and model ($M$) tritium inventories. We compute a scaling factor or slope ($s$) for each latitude band by fitting a linear regression to:

$$ O(t) = s\; M(t) $$

for each latidude band (intercept = 0).

In [8]:
import statsmodels.api as sm
fig, axs = plt.subplots(3, 3, figsize=(9.5,9.5))
fig.delaxes(axs[2,2])
fig.delaxes(axs[2,1])
slopes = {}
for ax, band in zip(axs.ravel(), lat_bands[::-1]):
    # obs inventory
    df_obs = get_lat_band(database, *band, zsurf=None)
    profile, _, _ = get_profile(df_obs, zb=zbins2)
    inv_obs = get_inventory(profile,nn=11,zb=zbins2)
    # model inventory
    df_mod = lat_band_mean(ds_model_inv,*band,'TRITIUM_inventory')
    df_mod.index = df_mod.index.to_datetimeindex()
    inv_mod = df_mod['model']
    # merge obs & model data
    inventory = pd.DataFrame({'obs': inv_obs, 'model': inv_mod}).dropna()
    # compute OLS
    x, y = inventory['model'].values, inventory['obs'].values
    lreg = sm.OLS(y,x).fit()
    slopes[band] = lreg.params[0]
    # plot data
    _ = ax.plot(x, y, '.')
    _ = ax.plot(x, lreg.predict(x), '-')
    _ = ax.set(xlim=(0,y.max()), ylim=(0,y.max()))
    _ = ax.set_title('lat: {} to {}'.format(*band), fontsize=10)
    _ = ax.text(0.975,0.975,'slope = {:.3f}\np = {:.3g}\n$R^2 = {:.3f}$'.format(lreg.params[0], lreg.pvalues[0], 
                                                                              lreg.rsquared),
                transform=ax.transAxes, ha='right', va='top', fontsize=9)

for ax in [axs[1,1], axs[1,2], axs[2,0]]:
    _ = ax.set(xlabel='model (pmol m$^{-2}$)')
    
for ax in axs[:,0]:
    _ = ax.set(ylabel='observations (pmol m$^{-2}$)')

Compute area weighted mean scaling factor (slope)

We weight the scaling faction ($s$) for each latidude band by the band tritium inventory.

In [9]:
tot, wtot = 0, 0
print('latitude\tslope\tweight')
for k, x in slopes.items():
    latmin, latmax = k
#     w = ds_model_inv.TAREA.where((ds_model_inv.TLAT>latmin) & (ds_model_inv.TLAT<=latmax)
#                                 ).sum(dim=['nlat','nlon']).item()
    inv = lat_band_weight(ds_model_inv,latmin,latmax,'TRITIUM_inventory')
    w = inv.loc['1980-12-31','model'].values.item() # use inventory in 1980
    tot = x * w + tot
    wtot = w + wtot
    print('{:3} to {:3}\t{:.3f}\t{:.3f}'.format(*k,x,w))

slope = tot/wtot
print('\ninventory weighted mean slope = {:.3f}'.format(slope))
latitude	slope	weight
 60 to  90	3.722	0.098
 45 to  60	2.806	0.149
 15 to  45	1.962	0.389
 -5 to  15	1.292	0.131
-45 to  -5	1.357	0.202
-60 to -45	1.915	0.026
-90 to -60	3.879	0.006

inventory weighted mean slope = 2.060

Compute precipitation & vapor contributions to inventory

In [10]:
ds = xr.open_dataset('/home/ivan/Data/Postproc/Tritium-3H/tr3he.GIAF.05_tritium_dep.nc', decode_times=False)
ds = fixtime(ds,1824)
ds['dpm'] = xr.DataArray(np.resize(dpm,720), coords=[ds.time], dims=['time'])
ds['prec'] = (ds.STF_TRITIUM_PREC * spd * ds.dpm * ds.TAREA * 1.e-4).sum(dim=['nlat','nlon']).cumsum(dim='time')
ds['roff'] = (ds.STF_TRITIUM_ROFF * spd * ds.dpm * ds.TAREA * 1.e-4).sum(dim=['nlat','nlon']).cumsum(dim='time')
ds['evap'] = (ds.STF_TRITIUM_EVAP * spd * ds.dpm * ds.TAREA * 1.e-4).sum(dim=['nlat','nlon']).cumsum(dim='time')
ds['prec'] = ds.prec * 1.e-12 # pmol -> mol
ds['roff'] = ds.prec * 1.e-12 # pmol -> mol
ds['evap'] = ds.evap * 1.e-12 # pmol -> mol
In [11]:
df = ds[['prec','evap','roff']].to_dataframe()
df.index = df.index.to_datetimeindex()
df['prec'] = df.prec + df.roff # add river influx to precipitation
df = df.drop(columns=['roff'])
ratio = df.prec/df.evap
fig, axs = plt.subplots(1, 2, sharex=True, figsize=(9.5,4.8))
_ = df.plot(ax=axs[0])
_ = axs[0].set(ylabel='cummulative $^3\!$H deposition (mol)')
_ = ratio.plot(ax=axs[1])
_ = axs[1].set(ylabel='prec/evap')

The model inventory ($M$) can be partitioned into the precipitation ($P$) and vapor ($V$) contributions:

$$ M = P + V $$

Given the observed ($O$) and model ($M$) tritium inventories and the scaling factor or slope ($s$) we computed previously we have:

$$ O = s\,M = s\,(P + V) $$

We compute a correction factor ($c$) for the vapor fluxes ($V$) and leave the precipitation fluxes ($P$) alone by setting:

$$ s\;(P + V) = P + c\,V $$

Solving for $c$ we have:

$$ c = \frac{P}{V}(s -1) + s $$

Using $P/V$ from 1980-12-31 we have:

In [12]:
correction = ratio['1980-12-31'] * (slope - 1) + slope
print('c = {:.3f}'.format(correction))
c = 4.790

Correction factor for vapor fluxes ($c$) as function of $P/V$ (time):

In [13]:
c = ratio * (slope - 1) + slope
fig, ax = plt.subplots()
_ = ax.plot(ratio.index, c)
x1, x2 = ax.get_xlim()
_ = ax.hlines(c.mean(), x1, x2, linestyles='dotted', lw=1)
_ = ax.set(xlabel='time', ylabel='vapor correction factor', xlim=(x1,x2))
_ = ax.text(0.975, 0.65, '$\overline{c}=%.2f$'%c.mean(), ha='right', va='top', transform=ax.transAxes)
In [14]:
# band = lat_bands[-1]
# # obs inventory
# df_obs = get_lat_band(database, *band, zsurf=None)
# profile, _, _ = get_profile(df_obs, zb=zbins2)
# inv_obs = get_inventory(profile,nn=11,zb=zbins2)
# # model inventory
# df_mod = lat_band_mean(ds_model_inv,*band,'TRITIUM_inventory')
# inv_mod = df_mod['model']
# inv_mod.index.name = 'time'
# inv_obs.index.name = 'time'
# inv_mod.to_csv('tr_inv_mod.csv',header=True)
# inv_obs.to_csv('tr_inv_obs.csv',header=True)