# -*- coding: utf-8 -*-
"""
This module contains the functions for calculating parameter based validation statistics for
a set of matchup records.
:Dependencies [External]: os, scipy, numpy, copy
:Dependencies [Internal]: waveval.ModelData, waveval.ObsData, waveval.MatchUpDatabase, waveval.Geometry
"""
# ----------------------------------------------------------------------------
# IMPORTS
# ----------------------------------------------------------------------------
# Standard Python Dependencies
import os
import scipy.io as sio
import copy
import numpy as np
from scipy.interpolate import interp1d
# Non-Standard Python Dependencies
# Local Module Dependencies
from waveval import ModelData as MData
from waveval import ObsData as OData
from waveval import MatchUpDatabase as MDB
from waveval.Geometry import pol2cmplx
from waveval.Graphics import plot_time_series, plot_correlation, plot_close
# Other Dependencies
# ----------------------------------------------------------------------------
# GLOBAL VARIABLES
# ----------------------------------------------------------------------------
# ----------------------------------------------------------------------------
# CLASS DEFINITIONS
# ----------------------------------------------------------------------------
# ----------------------------------------------------------------------------
# FUNCTION DEFINITIONS
# ----------------------------------------------------------------------------
# ======================== Data Selection =================================
[docs]def getVarName(record, Options: list):
"""
Get variable names based on processsing options list
"""
hasVar = [False]*len(Options)
for indx, vn in enumerate(Options):
hasVar[indx] = record.hasVar(vn)
varIndx = np.where(hasVar)[0]
if len(varIndx) != 0:
varName = Options[varIndx[0]]
else:
varName = ''
return varName
[docs]def getSurfaceIndx(record, dataFmt):
"""
Get index of surface layer in depth data
"""
if dataFmt == 'InSituTAC':
depth = record.getVar('DEPH')
elif dataFmt == 'SeaDataNet':
depth = record.getVar('DEPTH')
dims = depth.shape
if len(dims) <= 1:
surfIndx = [0]
else:
surfIndx = np.where(depth[0, :] == 0.0)[0]
return surfIndx[0]
# ========================== Statistics ===================================
[docs]def corr_coef(model, obs):
"""
Validation Metric: Correlation Coefficient
:param numpy.array, float model: model data time series
:param numpy.array, floatobs: observation time series mapped onto model time stamps
:return: correlation coefficient (R)
:rtype: float
"""
m = np.squeeze(model)
o = np.squeeze(obs)
num = np.nansum((m-np.nanmean(m))*(o-np.nanmean(o)))
msqr = np.nansum(np.square(m-np.nanmean(m)))
osqr = np.nansum(np.square(o-np.nanmean(o)))
denom = np.sqrt(msqr*osqr)
R = num/denom
return R
[docs]def bias(model, obs):
"""
Validation Metric: Bias
:param numpy.array, float model: model data time series
:param numpy.array, float obs: observation time series mapped onto model time stamps
:return: bias (b)
:rtype: float
"""
m = np.squeeze(model)
o = np.squeeze(obs)
b = np.nanmean(m)-np.nanmean(o)
return b
[docs]def mean_bias(model, obs):
"""
Validation Metric: Mean Bias
:param numpy.array, float model: model data time series
:param numpy.array, float obs: observation time series mapped onto model time stamps
:return: mb
:rtype: float
"""
m = np.squeeze(model)
o = np.squeeze(obs)
mb = np.nanmean(m-o)
return mb
[docs]def mean_absolute_error(model, obs):
"""
Validation Metric: Mean absolute error
:param numpy.array, float model: model data time series
:param numpy.array, float obs: observation time series mapped onto model time stamps
:return: mae
:rtype: float
"""
m = np.squeeze(model)
o = np.squeeze(obs)
mae = np.nanmean(np.abs(m-o))
return mae
[docs]def norm_ma_error(model, obs):
"""
Validation Metric: Normalised mean absolute error
:param numpy.array, float model: model data time series
:param numpy.array, float obs: observation time series mapped onto model time stamps
:return: nmae
:rtype: float
"""
m = np.squeeze(model)
o = np.squeeze(obs)
nmae = np.nansum(np.abs(m-o))/np.nansum(o)
nmae = nmae * 100.0
return nmae
[docs]def norm_mean_bias(model, obs):
"""
Validation Metric: Normalised mean bias
:param numpy.array, float model: model data time series
:param numpy.array, float obs: observation time series mapped onto model time stamps
:return: nmb
:rtype: float
"""
# Returns value as percentage
m = np.squeeze(model)
o = np.squeeze(obs)
nmb = np.nansum(m-o)/np.nansum(o)
nmb = nmb * 100.0
return nmb
[docs]def rms_error(model, obs):
"""
Validation Metric: Root-mean-square error
:param numpy.array, float model: model data time series
:param numpy.array, float obs: observation time series mapped onto model time stamps
:return: rmse
:rtype: float
"""
m = np.squeeze(model)
o = np.squeeze(obs)
N = min(len(model != np.nan), len(obs != np.nan))
rmse = np.sqrt(np.nansum(np.square(m-o))/N)
return rmse
[docs]def norm_rmse(model, obs):
"""
Validation Metric: Normalised root-mean-square error
:param numpy.array, float model: model data time series
:param numpy.array, float obs: observation time series mapped onto model time stamps
:return: nrmse
:rtype: float
"""
# Returns value as percentage
o = np.squeeze(obs)
nrmse = rms_error(model, obs) / np.sqrt(np.nanmean(np.square(o)))
nrmse = nrmse * 100.0
return nrmse
[docs]def scatter_index(model, obs):
"""
Validation Metric: Scatter index
:param numpy.array, float model: model data time series
:param numpy.array, float obs: observation time series mapped onto model time stamps
:return: si
:rtype: float
"""
# Returns value as percentage
m = np.squeeze(model)
o = np.squeeze(obs)
num = np.nansum(np.square((m-np.nanmean(m))-(o-np.nanmean(o))))
denom = np.nansum(np.square(o))
si = np.sqrt(num/denom)
si = si * 100.0
return si
[docs]def reliability_index(model, obs):
"""
Validation Metric: Reliability index
:param numpy.array, float model: model data time series
:param numpy.array, float obs: observation time series mapped onto model time stamps
:return: ri
:rtype: float
"""
m = np.squeeze(model)
o = np.squeeze(obs)
N = min(len(model != np.nan), len(obs != np.nan))
ri = np.exp(np.sqrt(np.nansum(np.square(np.log(o/m)))/N))
return ri
[docs]def skill_score(model, obs):
"""
Validation Metric: Skill score
:param numpy.array, float model: model data time series
:param numpy.array, float obs: observation time series mapped onto model time stamps
:return: ss
:rtype: float
"""
m = np.squeeze(model)
o = np.squeeze(obs)
num = np.nansum(np.square(np.abs(m-o)))
mean_o = np.nanmean(o)
denom = np.nansum(np.square(np.abs(m-mean_o)+np.abs(o-mean_o)))
ss = 1.0 - np.sqrt(num/denom)
return ss
[docs]def model_efficiency(model, obs):
"""
Validation Metric: Model efficiency
:param numpy.array, float model: model data time series
:param numpy.array, float obs: observation time series mapped onto model time stamps
:return: mef
:rtype: float
"""
m = np.squeeze(model)
o = np.squeeze(obs)
mean_m = np.nanmean(m)
mean_o = np.nanmean(o)
denom = np.nansum(np.square(o-mean_o))
num = denom-np.nansum(np.square(m-mean_m))
mef = num/denom
return mef
# ====================== Circular Statistics ===============================
[docs]def circ_mod_pi(theta):
"""
Fix angular range in radians to -\\pi <= \\theta <= \\pi
:param numpy.array, float theta: vector of angular values in radians
:return: theta in range -\\pi to \\pi
:rtype: numpy.array, float
"""
# This function requires theta in radians
if theta.size > 1:
indx = np.where(theta < -np.pi)[0]
theta[indx] = theta[indx]+2.0*np.pi
indx = np.where(theta > np.pi)[0]
theta[indx] = theta[indx]-2.0*np.pi
else:
if theta < -np.pi:
theta += 2.0*np.pi
elif theta > np.pi:
theta -= 2.0*np.pi
return theta
[docs]def circ_mean(theta, degrees=True):
"""
Calculate mean of angular data for range (-180 to 180)
Parameters
----------
theta : numpy.array, float
vector of angular values.
degrees : bool, optional
angular data units flag. The default is degrees = True.
Returns
-------
mean_theta : float
mean of the angular data in same units as passed in.
"""
if degrees:
thetar = circ_mod_pi(np.radians(np.squeeze(theta)))
else:
thetar = circ_mod_pi(np.squeeze(theta))
mean_theta = np.nanmean(thetar)
if degrees:
mean_theta = np.degrees(mean_theta)
return mean_theta
[docs]def circ_std(theta, degrees=True):
"""
Calculate standard deviation of angular data for range (-180 to 180)
Parameters
----------
theta : numpy.array, float
vector of angular values.
degrees : bool, optional
angular data units flag. The default is degrees = True.
Returns
-------
std_theta : float
stanadrd deviation of the angular data in same units as passed in.
"""
if degrees:
thetar = circ_mod_pi(np.radians(np.squeeze(theta)))
else:
thetar = circ_mod_pi(np.squeeze(theta))
std_theta = np.nanstd(thetar)
if degrees:
std_theta = np.degrees(std_theta)
return std_theta
[docs]def circ_corr(theta1, theta2, degrees=True):
"""
Validation Metric (Circular): Correlation Coefficient
:param numpy.array,float theta1: vector of angular data (modelled)
:param numpy.array,float theta1: vector of angualr data (observed)
:param bool degrees: angular data units flag. The default is degrees = True
:return: circular correlation coefficient (c_corr)
:rtype: float
"""
if degrees:
theta1r = np.radians(np.squeeze(theta1), dtype='double')
theta2r = np.radians(np.squeeze(theta2), dtype='double')
else:
theta1r = np.double(np.squeeze(theta1))
theta2r = np.double(np.squeeze(theta2))
cmplx1 = pol2cmplx(np.ones_like(theta1r, dtype=np.double), theta1r)
cmplx2 = pol2cmplx(np.ones_like(theta2r, dtype=np.double), theta2r)
c_corr = np.abs(np.cov(cmplx1,cmplx2)[0,1])/(np.std(cmplx1)*np.std(cmplx2))
return c_corr
[docs]def circ_mean_bias(theta1, theta2, degrees=True):
"""
Validation Metric (Circular): Mean Bias
:param numpy.array,float theta1: vector of angular data (modelled)
:param numpy.array,float theta1: vector of angualr data (observed)
:param bool degrees: angular data units flag. The default is degrees = True
:return: circular mean bias (mb)
:rtype: float
"""
theta1 = np.double(np.squeeze(theta1))
theta2 = np.double(np.squeeze(theta2))
if degrees:
dtheta = circ_mod_pi(np.radians(theta1 - theta2))
else:
dtheta = circ_mod_pi(theta1 - theta2)
circ_mb = np.nanmean(dtheta)
if degrees:
circ_mb = np.degrees(circ_mb)
return circ_mb
[docs]def circ_norm_mean_bias(theta1, theta2, degrees=True):
"""
Validation Metric (Circular): Normalised mean Bias
:param numpy.array,float theta1: vector of angular data (modelled)
:param numpy.array,float theta1: vector of angualr data (observed)
:param bool degrees: angular data units flag. The default is degrees = True
:return: circular normalised mean bias (nmb) as a percentage
:rtype: float
"""
# Returns value as percentage
theta1 = np.double(np.squeeze(theta1))
theta2 = np.double(np.squeeze(theta2))
if degrees:
circ_nmb = circ_mean_bias(theta1, theta2, degrees)/np.double(180.0)
else:
circ_nmb = circ_mean_bias(theta1, theta2, degrees)/(np.pi)
circ_nmb = circ_nmb * 100.0
return circ_nmb
[docs]def circ_mean_abs_error(theta1, theta2, degrees=True):
"""
Validation Metric (Circular): Mean absolute error
:param numpy.array,float theta1: vector of angular data (modelled)
:param numpy.array,float theta1: vector of angualr data (observed)
:param bool degrees: angular data units flag. The default is degrees = True
:return: circular mean absolute error (mae)
:rtype: float
"""
theta1 = np.double(np.squeeze(theta1))
theta2 = np.double(np.squeeze(theta2))
if degrees:
dtheta = circ_mod_pi(np.radians(theta1 - theta2))
else:
dtheta = circ_mod_pi(theta1 - theta2)
circ_mae = np.nanmean(np.abs(dtheta))
if degrees:
circ_mae = np.degrees(circ_mae)
return circ_mae
[docs]def circ_norm_ma_error(theta1, theta2, degrees=True):
"""
Validation Metric (Circular): Normalised mean absolute error
:param numpy.array,float theta1: vector of angular data (modelled)
:param numpy.array,float theta1: vector of angualr data (observed)
:param bool degrees: angular data units flag. The default is degrees = True
:return: circular normalised mean absolute error (mae) as a percentage.
:rtype: float
"""
# Returns value as percentage
theta1 = np.double(np.squeeze(theta1))
theta2 = np.double(np.squeeze(theta2))
if degrees:
circ_nmae = circ_mean_abs_error(theta1, theta2, degrees)/np.double(180.0)
else:
circ_nmae = circ_mean_abs_error(theta1, theta2, degrees)/(np.pi)
circ_nmae = circ_nmae * 100.0
return circ_nmae
[docs]def circ_rms_error(theta1, theta2, degrees=True):
"""
Validation Metric (Circular): Root-mean-square error
:param numpy.array,float theta1: vector of angular data (modelled)
:param numpy.array,float theta1: vector of angualr data (observed)
:param bool degrees: angular data units flag. The default is degrees = True
:return: circular RMS error (circ_rmse)
:rtype: float
"""
theta1 = np.squeeze(theta1)
theta2 = np.squeeze(theta2)
if degrees:
dtheta = circ_mod_pi(np.radians(theta1 - theta2))
else:
dtheta = circ_mod_pi(theta1 - theta2)
circ_rmse = np.sqrt(np.nanmean(np.square(dtheta)))
if degrees:
circ_rmse = np.degrees(circ_rmse)
return circ_rmse
[docs]def circ_norm_rmse(theta1, theta2, degrees=True):
"""
Validation Metric (Circular): Normalised root-mean-square error
:param numpy.array,float theta1: vector of angular data (modelled)
:param numpy.array,float theta1: vector of angualr data (observed)
:param bool degrees: angular data units flag. The default is degrees = True
:return: circular normalised RMS erro (circ)nrmse as a percentage
:rtype: float
"""
# Returns value as percentage
theta1 = np.squeeze(theta1)
theta2 = np.squeeze(theta2)
if degrees:
dtheta = circ_mod_pi(np.radians(theta1 - theta2))
else:
dtheta = circ_mod_pi(theta1 - theta2)
num = np.sqrt(np.nanmean(np.square(dtheta)))
denom = np.pi
circ_nrmse = (num / denom) * 100.0
return circ_nrmse
[docs]def circ_scatter_index(theta1, theta2, degrees=True):
"""
Validation Metric (Circular): Scatter index
:param numpy.array,float theta1: vector of angular data (modelled)
:param numpy.array,float theta1: vector of angualr data (observed)
:param bool degrees: angular data units flag. The default is degrees = True
:return: circular scatter index (circ_si)
:rtype: float
"""
# Returns value as percentage
theta1 = np.double(np.squeeze(theta1))
theta2 = np.double(np.squeeze(theta2))
if degrees:
theta1 = np.radians(theta1)
theta2 = np.radians(theta2)
theta1 = circ_mod_pi(theta1)
theta2 = circ_mod_pi(theta2)
mtheta1 = np.nanmean(theta1)
mtheta2 = np.nanmean(theta2)
dtheta1 = circ_mod_pi(theta1-mtheta1)
dtheta2 = circ_mod_pi(theta2-mtheta2)
num = np.nanmean(np.square(circ_mod_pi(dtheta1-dtheta2)))
denom = np.square(np.pi)
circ_si = np.sqrt(num/denom) * 100.0
return circ_si
# =========================== Data Extraction =============================
# ========================== Data Interpolators ===========================
[docs]def time_interpolate_obs(t_obs, t_var, t_model):
"""
Interpolation of observation data onto the model output timestamps
"""
intrp_var = np.empty_like(t_model, dtype='float')
intrp_var[:] = np.nan
for i_ref, t_ref in enumerate(t_model):
idx = np.sort(np.argsort(abs(t_obs-t_ref))[0:2])
o_diff = abs(np.diff(t_obs[idx]))*24.0
m_diff = np.min(abs(t_obs[idx]-t_ref))*24.0
if (o_diff < 1.5) and (m_diff < 1.0):
if t_ref < t_obs[idx[0]]:
intrp_var[i_ref] = t_var[idx[0]]
elif t_ref > t_obs[idx[1]]:
intrp_var[i_ref] = t_var[idx[1]]
else:
fnc = interp1d(t_obs[idx], t_var[idx],
kind='nearest', bounds_error=False)
intrp_var[i_ref] = fnc(t_ref)
return intrp_var
# ============================ Analysis Tools =============================
[docs]def initialize_results():
"""
Initialise a dictionary for collecting validation results
"""
results = {}
results.update({'MODEL': []})
results.update({'OBSERVATION': []})
results.update({'YEAR': []})
results.update({'MONTH': []})
results.update({'PARAM': []})
results.update({'R': []})
results.update({'MB': []})
results.update({'NMB': []})
results.update({'MAE': []})
results.update({'NMAE': []})
results.update({'RMSE': []})
results.update({'NRMSE': []})
results.update({'SI': []})
results.update({'NSMPL': []})
return results
[docs]def calculate_circular_metrics(model, obs, nsampl, degrees=True):
"""
Calculate circlar validation metrics for a pair of modelled and observation time series
"""
R = circ_corr(model, obs, degrees)
mb = circ_mean_bias(model, obs, degrees)
nmb = circ_norm_mean_bias(model, obs, degrees)
mae = circ_mean_abs_error(model, obs, degrees)
nmae = circ_norm_ma_error(model, obs, degrees)
rmse = circ_rms_error(model, obs, degrees)
nrmse = circ_norm_rmse(model, obs, degrees)
si = circ_scatter_index(model, obs, degrees)
metrics = {}
metrics['R'] = R
metrics['MB'] = mb
metrics['NMB'] = nmb
metrics['MAE'] = mae
metrics['NMAE'] = nmae
metrics['RMSE'] = rmse
metrics['NRMSE'] = nrmse
metrics['SI'] = si
metrics['NSMPL'] = nsampl
return metrics
[docs]def calculate_metrics(model, obs, nsampl):
"""
Calculate standard validation metrics for a pair of modelled and observation time series
"""
R = corr_coef(model, obs)
mb = mean_bias(model, obs)
nmb = norm_mean_bias(model, obs)
mae = mean_absolute_error(model, obs)
nmae = norm_ma_error(model, obs)
rmse = rms_error(model, obs)
nrmse = norm_rmse(model, obs)
si = scatter_index(model, obs)
metrics = {}
metrics['R'] = R
metrics['MB'] = mb
metrics['NMB'] = nmb
metrics['MAE'] = mae
metrics['NMAE'] = nmae
metrics['RMSE'] = rmse
metrics['NRMSE'] = nrmse
metrics['SI'] = si
metrics['NSMPL'] = nsampl
return metrics
[docs]def results_record(modelFileName, obsFileName, obsVarName,
year, month, metrics):
"""
Create results recod from calculated validation metrics
"""
results = {}
results['MODEL'] = modelFileName
results['BUOY'] = obsFileName
results['YEAR'] = year
results['MONTH'] = month
results['PARAM'] = obsVarName
for key in metrics.keys():
results[key] = metrics[key]
return results
[docs]def store_valid_results(store, results):
"""
Store validation results record in the results dictionary
"""
store['MODEL'].append(results['MODEL'])
store['OBSERVATION'].append(results['BUOY'])
store['YEAR'].append(results['YEAR'])
store['MONTH'].append(results['MONTH'])
store['PARAM'].append(results['PARAM'])
store['R'].append(results['R'])
store['MB'].append(results['MB'])
store['NMB'].append(results['NMB'])
store['MAE'].append(results['MAE'])
store['NMAE'].append(results['NMAE'])
store['RMSE'].append(results['RMSE'])
store['NRMSE'].append(results['NRMSE'])
store['SI'].append(results['SI'])
store['NSMPL'].append(results['NSMPL'])
return
[docs]def display_tabulated_results(store, mVarName):
"""
Display validation results as a table on the standard output
"""
print('Validation Statistics for', mVarName, '\n')
fmtstr = "{:<60}{:<25}{:^7}{:^7}{:^7}{:^9.3}{:^9.4}{:^9.4}{:^9.4}{:^9.4}{:^9.4}{:^9.4}{:^9.4}{:^9}"
for row in zip(*([key] + (value) for key, value in store.items())):
print(fmtstr.format(*row))
return
[docs]def save_tabulated_results(store, platform, mVarName, outpath):
"""
Save tabulated validation results as an ASCII file
"""
outtxtname = platform+'_VALIDATION_STATS_'+mVarName+'.txt'
outtxtfile = os.path.join(outpath,outtxtname)
with open(outtxtfile,'w') as ofile:
ofile.write('Validation Statistics for '+mVarName+'\n')
fmtstr = "{:<60}{:<25}{:^7}{:^7}{:^7}{:^9.3}{:^9.4}{:^9.4}{:^9.4}{:^9.4}{:^9.4}{:^9.4}{:^9.4}{:^9}"
for row in zip(*([key] + (value) for key, value in store.items())):
ofile.write(fmtstr.format(*row)+'\n')
outmatname = platform+'_VALIDATION_STATS_'+mVarName+'.mat'
outmatfile = os.path.join(outpath,outmatname)
saveFlg = sio.savemat(outmatfile,store)
return saveFlg
[docs]def save_cleaned_ts_data(t, rscd, buoy, platform, mVarName, outpath, year, month):
"""
Save cleaned matched model and buoy timeseries as a matlab \\*.mat binary file
"""
yrMnthStr = '_'+str(year)+'_'+str(month).zfill(2)
outdatname = platform+'_VALIDATION_TS_'+mVarName+yrMnthStr+'.mat'
data = {}
data['platform'] = platform
data['var'] = mVarName
data['year'] = year
data['month'] = month
data['time'] = t
data['rscd'] = rscd
data['buoy'] = buoy
outdatfile = os.path.join(outpath, outdatname)
saveFlg = sio.savemat(outdatfile, data)
return saveFlg
[docs]def load_binary_results(platform, mVarName, datapath):
"""
load binary results from a matlab file
"""
matname = platform+'_VALIDATION_STATS_'+mVarName+'.mat'
matfile = os.path.join(datapath,matname)
data = sio.loadmat(matfile)
store = initialize_results()
for key in data.keys():
if key[0] != '_':
if len(data[key].shape) == 1:
store[key] = data[key].tolist()
else:
store[key] = data[key][0].tolist()
return store
[docs]def process_record(rec,
buoyFmt: str, platform: str,
mVarName: str, varOptions: list,
pVarName: str, results_dir: str,
angularData=False, plot_results=False):
"""
Process a matched model/buoy data record
"""
rscdpath = copy.copy(rec[0])
rscdfname = copy.copy(rec[1])
rscdfile = os.path.join(rscdpath, rscdfname).replace('\\', '/')
rscd, rscd_t, rscd_var, wmtw = extract_model(rscdfile, mVarName)
#minNSampl = len(rscd_t)//2
minNSampl = 200
print(rscdfname)
buoypath = copy.copy(rec[2])
buoyfname = copy.copy(rec[3])
buoyfile = os.path.join(buoypath, buoyfname).replace('\\', '/')
bVarName, buoy, buoy_t, buoy_var = extract_obs(buoyfile, buoyFmt, varOptions)
print('Buoy file = ', buoyfname)
print('Buoy sampling period = ', buoy.sample_period)
year = copy.copy(rec[4])
month = copy.copy(rec[5])
valid_results = None
if bVarName != '':
print('Variable name = ', bVarName)
overlaps, tindx = wmtw.timeOverlap(buoy_t)
if (tindx[0] is not None) & (tindx[1] is not None):
ngood = len(np.where(buoy_var[tindx[0]:(tindx[1]+1)] != np.nan)[0])
else:
ngood = 0
if overlaps & (ngood >= minNSampl):
t0 = np.where(buoy_t >= rscd_t[0])[0][0]
t1 = np.where(buoy_t <= rscd_t[-1])[0][-1]
print('Number of good data points = ', ngood)
print('Points in RSCD time series = ', len(rscd_t))
print('Points in Buoy time series = ', len(buoy_t[t0:(t1+1)]))
print('Start times (model, in situ): ', rscd_t[0], buoy_t[t0])
print('End times (model, in situ): ', rscd_t[-1], buoy_t[t1])
model = []
obs = []
sample_period = np.median(np.diff(buoy_t[t0:(t1+1)]))*24.0
#if buoy.sample_period != 1.0:
if sample_period != 1.0:
# Get common indices
max_dt = np.max(np.diff(buoy_t[t0:(t1+1)]))
print('Maximum dt in obs =', max_dt*24., 'hours')
# Interpolate obs onto model times
t_mod = rscd_t.copy()
v_mod = rscd_var.copy()
t_obs = buoy_t[t0:(t1+1)].copy()
v_obs = buoy_var[t0:(t1+1)].copy()
v_obs = time_interpolate_obs(t_obs, v_obs, t_mod)
# Plot time series
if plot_results:
fig = plot_time_series(t_mod, v_mod, t_mod, v_obs,
bVarName, buoy.platform,
pVarName, rscdfname,
year, month,
[rscd_t[0], rscd_t[-1]])
plotname = ''.join(
('RSCD_', buoy.platform, '_',
pVarName,'_',
str(year),'_',
str(month).zfill(2),
'_TS.png'))
plotfile = os.path.join(results_dir,plotname)
fig.savefig(plotfile)
plot_close(fig)
# Select common points between timeseries
nans, x = np.isnan(v_obs), lambda z: z.nonzero()[0]
indices = x(~nans)
# Calculate validation metrics
model = v_mod[indices].copy()
obs = v_obs[indices].copy()
nsampl = len(indices)
save_cleaned_ts_data(t_mod, v_mod, v_obs, platform, mVarName, results_dir, year, month )
elif buoy.sample_period == 1.0:
# Get common indices
both = set(list(rscd_t)).intersection(list(buoy_t))
if len(both) > 0:
# Interpolate obs onto model times
t_mod = rscd_t.copy()
v_mod = rscd_var.copy()
t_obs = buoy_t[t0:(t1+1)].copy()
v_obs = buoy_var[t0:(t1+1)].copy()
# Plot time series
if plot_results:
fig = plot_time_series(t_mod, v_mod, t_obs, v_obs,
bVarName, buoy.platform,
pVarName, rscdfname,
year, month,
[rscd_t[0], rscd_t[-1]])
plotname = ''.join(
('RSCD_', buoy.platform, '_',
pVarName,'_',
str(year),'_',
str(month).zfill(2),
'_TS.png'))
plotfile = os.path.join(results_dir,plotname)
fig.savefig(plotfile)
plot_close(fig)
# Get common data indices
nans, x = np.isnan(v_obs), lambda z: z.nonzero()[0]
indices = x(~nans)
# Select common points between timeseries
model = v_mod[indices].copy()
obs = v_obs[indices].copy()
nsampl = len(both)
save_cleaned_ts_data(t_mod, v_mod, v_obs, platform, mVarName, results_dir, year, month )
# Calculate validation metrics
if (len(model) > minNSampl) & (len(obs) > minNSampl):
if angularData:
metrics = calculate_circular_metrics(model, obs, nsampl)
else:
metrics = calculate_metrics(model, obs, nsampl)
valid_results = results_record(rscdfname, buoyfname,
bVarName,
year, month, metrics)
# Plot correlation
if plot_results:
fig = plot_correlation(obs, model, bVarName,
buoy.platform,
pVarName, rscdfname,
year, month,
metrics)
plotname = ''.join(
('RSCD_', buoy.platform, '_',
pVarName,'_',
str(year),'_',
str(month).zfill(2),
'_CORR.png'))
plotfile = os.path.join(results_dir,plotname)
fig.savefig(plotfile)
plot_close(fig)
else:
print('No common indices found for metrics calculations.')
print()
else:
print('Insuffient valid data for statistics. (# Good = ',
ngood, ')')
print()
else:
print('No variable match with ', varOptions)
print('Buoy variables: ', buoy.file['vars'])
print()
return valid_results
[docs]def validate_records(records, dataFmt: str, platform: str,
mVarName: str, varOptions: list,
pVarName,
results_dir,
plot_results=False):
"""
Process a set of matched model/buoy data records
"""
# Create results dictionary
results_store = initialize_results()
if mVarName == 'dir':
angularData=True
else:
angularData=False
n_valid_results = 0
for rec in records:
result = process_record(rec, dataFmt, platform,
mVarName, varOptions,
pVarName, results_dir,
angularData, plot_results)
if result is not None:
store_valid_results(results_store, result)
n_valid_results += 1
return n_valid_results, results_store