Source code for Validation

# -*- coding: utf-8 -*-
"""
This module contains the functions for calculating parameter based validation statistics for 
a set of matchup records. 

:Dependencies [External]: os, scipy, numpy, copy
:Dependencies [Internal]: waveval.ModelData, waveval.ObsData, waveval.MatchUpDatabase, waveval.Geometry

"""
# ----------------------------------------------------------------------------
#   IMPORTS
# ----------------------------------------------------------------------------
# Standard Python Dependencies
import os
import scipy.io as sio
import copy
import numpy as np
from scipy.interpolate import interp1d
# Non-Standard Python Dependencies
# Local Module Dependencies
from waveval import ModelData as MData
from waveval import ObsData as OData
from waveval import MatchUpDatabase as MDB
from waveval.Geometry import pol2cmplx
from waveval.Graphics import plot_time_series, plot_correlation, plot_close
# Other Dependencies

# ----------------------------------------------------------------------------
#   GLOBAL VARIABLES
# ----------------------------------------------------------------------------


# ----------------------------------------------------------------------------
#   CLASS DEFINITIONS
# ----------------------------------------------------------------------------


# ----------------------------------------------------------------------------
#   FUNCTION DEFINITIONS
# ----------------------------------------------------------------------------
# ======================== Data Selection =================================
[docs]def getVarName(record, Options: list): """ Get variable names based on processsing options list """ hasVar = [False]*len(Options) for indx, vn in enumerate(Options): hasVar[indx] = record.hasVar(vn) varIndx = np.where(hasVar)[0] if len(varIndx) != 0: varName = Options[varIndx[0]] else: varName = '' return varName
[docs]def getSurfaceIndx(record, dataFmt): """ Get index of surface layer in depth data """ if dataFmt == 'InSituTAC': depth = record.getVar('DEPH') elif dataFmt == 'SeaDataNet': depth = record.getVar('DEPTH') dims = depth.shape if len(dims) <= 1: surfIndx = [0] else: surfIndx = np.where(depth[0, :] == 0.0)[0] return surfIndx[0]
# ========================== Statistics ===================================
[docs]def corr_coef(model, obs): """ Validation Metric: Correlation Coefficient :param numpy.array, float model: model data time series :param numpy.array, floatobs: observation time series mapped onto model time stamps :return: correlation coefficient (R) :rtype: float """ m = np.squeeze(model) o = np.squeeze(obs) num = np.nansum((m-np.nanmean(m))*(o-np.nanmean(o))) msqr = np.nansum(np.square(m-np.nanmean(m))) osqr = np.nansum(np.square(o-np.nanmean(o))) denom = np.sqrt(msqr*osqr) R = num/denom return R
[docs]def bias(model, obs): """ Validation Metric: Bias :param numpy.array, float model: model data time series :param numpy.array, float obs: observation time series mapped onto model time stamps :return: bias (b) :rtype: float """ m = np.squeeze(model) o = np.squeeze(obs) b = np.nanmean(m)-np.nanmean(o) return b
[docs]def mean_bias(model, obs): """ Validation Metric: Mean Bias :param numpy.array, float model: model data time series :param numpy.array, float obs: observation time series mapped onto model time stamps :return: mb :rtype: float """ m = np.squeeze(model) o = np.squeeze(obs) mb = np.nanmean(m-o) return mb
[docs]def mean_absolute_error(model, obs): """ Validation Metric: Mean absolute error :param numpy.array, float model: model data time series :param numpy.array, float obs: observation time series mapped onto model time stamps :return: mae :rtype: float """ m = np.squeeze(model) o = np.squeeze(obs) mae = np.nanmean(np.abs(m-o)) return mae
[docs]def norm_ma_error(model, obs): """ Validation Metric: Normalised mean absolute error :param numpy.array, float model: model data time series :param numpy.array, float obs: observation time series mapped onto model time stamps :return: nmae :rtype: float """ m = np.squeeze(model) o = np.squeeze(obs) nmae = np.nansum(np.abs(m-o))/np.nansum(o) nmae = nmae * 100.0 return nmae
[docs]def norm_mean_bias(model, obs): """ Validation Metric: Normalised mean bias :param numpy.array, float model: model data time series :param numpy.array, float obs: observation time series mapped onto model time stamps :return: nmb :rtype: float """ # Returns value as percentage m = np.squeeze(model) o = np.squeeze(obs) nmb = np.nansum(m-o)/np.nansum(o) nmb = nmb * 100.0 return nmb
[docs]def rms_error(model, obs): """ Validation Metric: Root-mean-square error :param numpy.array, float model: model data time series :param numpy.array, float obs: observation time series mapped onto model time stamps :return: rmse :rtype: float """ m = np.squeeze(model) o = np.squeeze(obs) N = min(len(model != np.nan), len(obs != np.nan)) rmse = np.sqrt(np.nansum(np.square(m-o))/N) return rmse
[docs]def norm_rmse(model, obs): """ Validation Metric: Normalised root-mean-square error :param numpy.array, float model: model data time series :param numpy.array, float obs: observation time series mapped onto model time stamps :return: nrmse :rtype: float """ # Returns value as percentage o = np.squeeze(obs) nrmse = rms_error(model, obs) / np.sqrt(np.nanmean(np.square(o))) nrmse = nrmse * 100.0 return nrmse
[docs]def scatter_index(model, obs): """ Validation Metric: Scatter index :param numpy.array, float model: model data time series :param numpy.array, float obs: observation time series mapped onto model time stamps :return: si :rtype: float """ # Returns value as percentage m = np.squeeze(model) o = np.squeeze(obs) num = np.nansum(np.square((m-np.nanmean(m))-(o-np.nanmean(o)))) denom = np.nansum(np.square(o)) si = np.sqrt(num/denom) si = si * 100.0 return si
[docs]def reliability_index(model, obs): """ Validation Metric: Reliability index :param numpy.array, float model: model data time series :param numpy.array, float obs: observation time series mapped onto model time stamps :return: ri :rtype: float """ m = np.squeeze(model) o = np.squeeze(obs) N = min(len(model != np.nan), len(obs != np.nan)) ri = np.exp(np.sqrt(np.nansum(np.square(np.log(o/m)))/N)) return ri
[docs]def skill_score(model, obs): """ Validation Metric: Skill score :param numpy.array, float model: model data time series :param numpy.array, float obs: observation time series mapped onto model time stamps :return: ss :rtype: float """ m = np.squeeze(model) o = np.squeeze(obs) num = np.nansum(np.square(np.abs(m-o))) mean_o = np.nanmean(o) denom = np.nansum(np.square(np.abs(m-mean_o)+np.abs(o-mean_o))) ss = 1.0 - np.sqrt(num/denom) return ss
[docs]def model_efficiency(model, obs): """ Validation Metric: Model efficiency :param numpy.array, float model: model data time series :param numpy.array, float obs: observation time series mapped onto model time stamps :return: mef :rtype: float """ m = np.squeeze(model) o = np.squeeze(obs) mean_m = np.nanmean(m) mean_o = np.nanmean(o) denom = np.nansum(np.square(o-mean_o)) num = denom-np.nansum(np.square(m-mean_m)) mef = num/denom return mef
# ====================== Circular Statistics ===============================
[docs]def circ_mod_pi(theta): """ Fix angular range in radians to -\\pi <= \\theta <= \\pi :param numpy.array, float theta: vector of angular values in radians :return: theta in range -\\pi to \\pi :rtype: numpy.array, float """ # This function requires theta in radians if theta.size > 1: indx = np.where(theta < -np.pi)[0] theta[indx] = theta[indx]+2.0*np.pi indx = np.where(theta > np.pi)[0] theta[indx] = theta[indx]-2.0*np.pi else: if theta < -np.pi: theta += 2.0*np.pi elif theta > np.pi: theta -= 2.0*np.pi return theta
[docs]def circ_mean(theta, degrees=True): """ Calculate mean of angular data for range (-180 to 180) Parameters ---------- theta : numpy.array, float vector of angular values. degrees : bool, optional angular data units flag. The default is degrees = True. Returns ------- mean_theta : float mean of the angular data in same units as passed in. """ if degrees: thetar = circ_mod_pi(np.radians(np.squeeze(theta))) else: thetar = circ_mod_pi(np.squeeze(theta)) mean_theta = np.nanmean(thetar) if degrees: mean_theta = np.degrees(mean_theta) return mean_theta
[docs]def circ_std(theta, degrees=True): """ Calculate standard deviation of angular data for range (-180 to 180) Parameters ---------- theta : numpy.array, float vector of angular values. degrees : bool, optional angular data units flag. The default is degrees = True. Returns ------- std_theta : float stanadrd deviation of the angular data in same units as passed in. """ if degrees: thetar = circ_mod_pi(np.radians(np.squeeze(theta))) else: thetar = circ_mod_pi(np.squeeze(theta)) std_theta = np.nanstd(thetar) if degrees: std_theta = np.degrees(std_theta) return std_theta
[docs]def circ_corr(theta1, theta2, degrees=True): """ Validation Metric (Circular): Correlation Coefficient :param numpy.array,float theta1: vector of angular data (modelled) :param numpy.array,float theta1: vector of angualr data (observed) :param bool degrees: angular data units flag. The default is degrees = True :return: circular correlation coefficient (c_corr) :rtype: float """ if degrees: theta1r = np.radians(np.squeeze(theta1), dtype='double') theta2r = np.radians(np.squeeze(theta2), dtype='double') else: theta1r = np.double(np.squeeze(theta1)) theta2r = np.double(np.squeeze(theta2)) cmplx1 = pol2cmplx(np.ones_like(theta1r, dtype=np.double), theta1r) cmplx2 = pol2cmplx(np.ones_like(theta2r, dtype=np.double), theta2r) c_corr = np.abs(np.cov(cmplx1,cmplx2)[0,1])/(np.std(cmplx1)*np.std(cmplx2)) return c_corr
[docs]def circ_mean_bias(theta1, theta2, degrees=True): """ Validation Metric (Circular): Mean Bias :param numpy.array,float theta1: vector of angular data (modelled) :param numpy.array,float theta1: vector of angualr data (observed) :param bool degrees: angular data units flag. The default is degrees = True :return: circular mean bias (mb) :rtype: float """ theta1 = np.double(np.squeeze(theta1)) theta2 = np.double(np.squeeze(theta2)) if degrees: dtheta = circ_mod_pi(np.radians(theta1 - theta2)) else: dtheta = circ_mod_pi(theta1 - theta2) circ_mb = np.nanmean(dtheta) if degrees: circ_mb = np.degrees(circ_mb) return circ_mb
[docs]def circ_norm_mean_bias(theta1, theta2, degrees=True): """ Validation Metric (Circular): Normalised mean Bias :param numpy.array,float theta1: vector of angular data (modelled) :param numpy.array,float theta1: vector of angualr data (observed) :param bool degrees: angular data units flag. The default is degrees = True :return: circular normalised mean bias (nmb) as a percentage :rtype: float """ # Returns value as percentage theta1 = np.double(np.squeeze(theta1)) theta2 = np.double(np.squeeze(theta2)) if degrees: circ_nmb = circ_mean_bias(theta1, theta2, degrees)/np.double(180.0) else: circ_nmb = circ_mean_bias(theta1, theta2, degrees)/(np.pi) circ_nmb = circ_nmb * 100.0 return circ_nmb
[docs]def circ_mean_abs_error(theta1, theta2, degrees=True): """ Validation Metric (Circular): Mean absolute error :param numpy.array,float theta1: vector of angular data (modelled) :param numpy.array,float theta1: vector of angualr data (observed) :param bool degrees: angular data units flag. The default is degrees = True :return: circular mean absolute error (mae) :rtype: float """ theta1 = np.double(np.squeeze(theta1)) theta2 = np.double(np.squeeze(theta2)) if degrees: dtheta = circ_mod_pi(np.radians(theta1 - theta2)) else: dtheta = circ_mod_pi(theta1 - theta2) circ_mae = np.nanmean(np.abs(dtheta)) if degrees: circ_mae = np.degrees(circ_mae) return circ_mae
[docs]def circ_norm_ma_error(theta1, theta2, degrees=True): """ Validation Metric (Circular): Normalised mean absolute error :param numpy.array,float theta1: vector of angular data (modelled) :param numpy.array,float theta1: vector of angualr data (observed) :param bool degrees: angular data units flag. The default is degrees = True :return: circular normalised mean absolute error (mae) as a percentage. :rtype: float """ # Returns value as percentage theta1 = np.double(np.squeeze(theta1)) theta2 = np.double(np.squeeze(theta2)) if degrees: circ_nmae = circ_mean_abs_error(theta1, theta2, degrees)/np.double(180.0) else: circ_nmae = circ_mean_abs_error(theta1, theta2, degrees)/(np.pi) circ_nmae = circ_nmae * 100.0 return circ_nmae
[docs]def circ_rms_error(theta1, theta2, degrees=True): """ Validation Metric (Circular): Root-mean-square error :param numpy.array,float theta1: vector of angular data (modelled) :param numpy.array,float theta1: vector of angualr data (observed) :param bool degrees: angular data units flag. The default is degrees = True :return: circular RMS error (circ_rmse) :rtype: float """ theta1 = np.squeeze(theta1) theta2 = np.squeeze(theta2) if degrees: dtheta = circ_mod_pi(np.radians(theta1 - theta2)) else: dtheta = circ_mod_pi(theta1 - theta2) circ_rmse = np.sqrt(np.nanmean(np.square(dtheta))) if degrees: circ_rmse = np.degrees(circ_rmse) return circ_rmse
[docs]def circ_norm_rmse(theta1, theta2, degrees=True): """ Validation Metric (Circular): Normalised root-mean-square error :param numpy.array,float theta1: vector of angular data (modelled) :param numpy.array,float theta1: vector of angualr data (observed) :param bool degrees: angular data units flag. The default is degrees = True :return: circular normalised RMS erro (circ)nrmse as a percentage :rtype: float """ # Returns value as percentage theta1 = np.squeeze(theta1) theta2 = np.squeeze(theta2) if degrees: dtheta = circ_mod_pi(np.radians(theta1 - theta2)) else: dtheta = circ_mod_pi(theta1 - theta2) num = np.sqrt(np.nanmean(np.square(dtheta))) denom = np.pi circ_nrmse = (num / denom) * 100.0 return circ_nrmse
[docs]def circ_scatter_index(theta1, theta2, degrees=True): """ Validation Metric (Circular): Scatter index :param numpy.array,float theta1: vector of angular data (modelled) :param numpy.array,float theta1: vector of angualr data (observed) :param bool degrees: angular data units flag. The default is degrees = True :return: circular scatter index (circ_si) :rtype: float """ # Returns value as percentage theta1 = np.double(np.squeeze(theta1)) theta2 = np.double(np.squeeze(theta2)) if degrees: theta1 = np.radians(theta1) theta2 = np.radians(theta2) theta1 = circ_mod_pi(theta1) theta2 = circ_mod_pi(theta2) mtheta1 = np.nanmean(theta1) mtheta2 = np.nanmean(theta2) dtheta1 = circ_mod_pi(theta1-mtheta1) dtheta2 = circ_mod_pi(theta2-mtheta2) num = np.nanmean(np.square(circ_mod_pi(dtheta1-dtheta2))) denom = np.square(np.pi) circ_si = np.sqrt(num/denom) * 100.0 return circ_si
# =========================== Data Extraction =============================
[docs]def extract_model(rscdfile, mVarName: str): """ Extract model output for a specified variable """ rscd = MData.ww3(rscdfile) wmtw = MDB.temporalCoverage(rscd.time_start, rscd.time_end) rscd_t = rscd.getVar('time') if (mVarName == 'fp') or (mVarName == 'f02'): rscd_var = 1.0/rscd.getVar(mVarName) else: rscd_var = rscd.getVar(mVarName) # if mVarName == 'dir': # theta = rscd_var.copy()*np.pi/180.0 return rscd, rscd_t, rscd_var, wmtw
[docs]def extract_obs(buoyfile, buoyDataFmt: str, varOptions: list): """ Extract *in situ* buoy data for a specified data foramt and variable """ if buoyDataFmt == 'InSituTAC': buoy = OData.wavebuoy(buoyfile) bVarName = getVarName(buoy, varOptions) elif buoyDataFmt == 'SeaDataNet': buoy = OData.emecwb(buoyfile) bVarName = getVarName(buoy, varOptions) else: print('Unknown file format: '+buoyDataFmt) bVarName = '' if bVarName != '': varIndx = getSurfaceIndx(buoy, buoyDataFmt) buoy_t, buoy_var = buoy.getQCTimeseries(bVarName, varIndx) else: varIndx = None buoy_t = [] buoy_var = [] return bVarName, buoy, buoy_t, buoy_var
# ========================== Data Interpolators ===========================
[docs]def time_interpolate_obs(t_obs, t_var, t_model): """ Interpolation of observation data onto the model output timestamps """ intrp_var = np.empty_like(t_model, dtype='float') intrp_var[:] = np.nan for i_ref, t_ref in enumerate(t_model): idx = np.sort(np.argsort(abs(t_obs-t_ref))[0:2]) o_diff = abs(np.diff(t_obs[idx]))*24.0 m_diff = np.min(abs(t_obs[idx]-t_ref))*24.0 if (o_diff < 1.5) and (m_diff < 1.0): if t_ref < t_obs[idx[0]]: intrp_var[i_ref] = t_var[idx[0]] elif t_ref > t_obs[idx[1]]: intrp_var[i_ref] = t_var[idx[1]] else: fnc = interp1d(t_obs[idx], t_var[idx], kind='nearest', bounds_error=False) intrp_var[i_ref] = fnc(t_ref) return intrp_var
# ============================ Analysis Tools =============================
[docs]def initialize_results(): """ Initialise a dictionary for collecting validation results """ results = {} results.update({'MODEL': []}) results.update({'OBSERVATION': []}) results.update({'YEAR': []}) results.update({'MONTH': []}) results.update({'PARAM': []}) results.update({'R': []}) results.update({'MB': []}) results.update({'NMB': []}) results.update({'MAE': []}) results.update({'NMAE': []}) results.update({'RMSE': []}) results.update({'NRMSE': []}) results.update({'SI': []}) results.update({'NSMPL': []}) return results
[docs]def calculate_circular_metrics(model, obs, nsampl, degrees=True): """ Calculate circlar validation metrics for a pair of modelled and observation time series """ R = circ_corr(model, obs, degrees) mb = circ_mean_bias(model, obs, degrees) nmb = circ_norm_mean_bias(model, obs, degrees) mae = circ_mean_abs_error(model, obs, degrees) nmae = circ_norm_ma_error(model, obs, degrees) rmse = circ_rms_error(model, obs, degrees) nrmse = circ_norm_rmse(model, obs, degrees) si = circ_scatter_index(model, obs, degrees) metrics = {} metrics['R'] = R metrics['MB'] = mb metrics['NMB'] = nmb metrics['MAE'] = mae metrics['NMAE'] = nmae metrics['RMSE'] = rmse metrics['NRMSE'] = nrmse metrics['SI'] = si metrics['NSMPL'] = nsampl return metrics
[docs]def calculate_metrics(model, obs, nsampl): """ Calculate standard validation metrics for a pair of modelled and observation time series """ R = corr_coef(model, obs) mb = mean_bias(model, obs) nmb = norm_mean_bias(model, obs) mae = mean_absolute_error(model, obs) nmae = norm_ma_error(model, obs) rmse = rms_error(model, obs) nrmse = norm_rmse(model, obs) si = scatter_index(model, obs) metrics = {} metrics['R'] = R metrics['MB'] = mb metrics['NMB'] = nmb metrics['MAE'] = mae metrics['NMAE'] = nmae metrics['RMSE'] = rmse metrics['NRMSE'] = nrmse metrics['SI'] = si metrics['NSMPL'] = nsampl return metrics
[docs]def results_record(modelFileName, obsFileName, obsVarName, year, month, metrics): """ Create results recod from calculated validation metrics """ results = {} results['MODEL'] = modelFileName results['BUOY'] = obsFileName results['YEAR'] = year results['MONTH'] = month results['PARAM'] = obsVarName for key in metrics.keys(): results[key] = metrics[key] return results
[docs]def store_valid_results(store, results): """ Store validation results record in the results dictionary """ store['MODEL'].append(results['MODEL']) store['OBSERVATION'].append(results['BUOY']) store['YEAR'].append(results['YEAR']) store['MONTH'].append(results['MONTH']) store['PARAM'].append(results['PARAM']) store['R'].append(results['R']) store['MB'].append(results['MB']) store['NMB'].append(results['NMB']) store['MAE'].append(results['MAE']) store['NMAE'].append(results['NMAE']) store['RMSE'].append(results['RMSE']) store['NRMSE'].append(results['NRMSE']) store['SI'].append(results['SI']) store['NSMPL'].append(results['NSMPL']) return
[docs]def display_tabulated_results(store, mVarName): """ Display validation results as a table on the standard output """ print('Validation Statistics for', mVarName, '\n') fmtstr = "{:<60}{:<25}{:^7}{:^7}{:^7}{:^9.3}{:^9.4}{:^9.4}{:^9.4}{:^9.4}{:^9.4}{:^9.4}{:^9.4}{:^9}" for row in zip(*([key] + (value) for key, value in store.items())): print(fmtstr.format(*row)) return
[docs]def save_tabulated_results(store, platform, mVarName, outpath): """ Save tabulated validation results as an ASCII file """ outtxtname = platform+'_VALIDATION_STATS_'+mVarName+'.txt' outtxtfile = os.path.join(outpath,outtxtname) with open(outtxtfile,'w') as ofile: ofile.write('Validation Statistics for '+mVarName+'\n') fmtstr = "{:<60}{:<25}{:^7}{:^7}{:^7}{:^9.3}{:^9.4}{:^9.4}{:^9.4}{:^9.4}{:^9.4}{:^9.4}{:^9.4}{:^9}" for row in zip(*([key] + (value) for key, value in store.items())): ofile.write(fmtstr.format(*row)+'\n') outmatname = platform+'_VALIDATION_STATS_'+mVarName+'.mat' outmatfile = os.path.join(outpath,outmatname) saveFlg = sio.savemat(outmatfile,store) return saveFlg
[docs]def save_cleaned_ts_data(t, rscd, buoy, platform, mVarName, outpath, year, month): """ Save cleaned matched model and buoy timeseries as a matlab \\*.mat binary file """ yrMnthStr = '_'+str(year)+'_'+str(month).zfill(2) outdatname = platform+'_VALIDATION_TS_'+mVarName+yrMnthStr+'.mat' data = {} data['platform'] = platform data['var'] = mVarName data['year'] = year data['month'] = month data['time'] = t data['rscd'] = rscd data['buoy'] = buoy outdatfile = os.path.join(outpath, outdatname) saveFlg = sio.savemat(outdatfile, data) return saveFlg
[docs]def load_binary_results(platform, mVarName, datapath): """ load binary results from a matlab file """ matname = platform+'_VALIDATION_STATS_'+mVarName+'.mat' matfile = os.path.join(datapath,matname) data = sio.loadmat(matfile) store = initialize_results() for key in data.keys(): if key[0] != '_': if len(data[key].shape) == 1: store[key] = data[key].tolist() else: store[key] = data[key][0].tolist() return store
[docs]def process_record(rec, buoyFmt: str, platform: str, mVarName: str, varOptions: list, pVarName: str, results_dir: str, angularData=False, plot_results=False): """ Process a matched model/buoy data record """ rscdpath = copy.copy(rec[0]) rscdfname = copy.copy(rec[1]) rscdfile = os.path.join(rscdpath, rscdfname).replace('\\', '/') rscd, rscd_t, rscd_var, wmtw = extract_model(rscdfile, mVarName) #minNSampl = len(rscd_t)//2 minNSampl = 200 print(rscdfname) buoypath = copy.copy(rec[2]) buoyfname = copy.copy(rec[3]) buoyfile = os.path.join(buoypath, buoyfname).replace('\\', '/') bVarName, buoy, buoy_t, buoy_var = extract_obs(buoyfile, buoyFmt, varOptions) print('Buoy file = ', buoyfname) print('Buoy sampling period = ', buoy.sample_period) year = copy.copy(rec[4]) month = copy.copy(rec[5]) valid_results = None if bVarName != '': print('Variable name = ', bVarName) overlaps, tindx = wmtw.timeOverlap(buoy_t) if (tindx[0] is not None) & (tindx[1] is not None): ngood = len(np.where(buoy_var[tindx[0]:(tindx[1]+1)] != np.nan)[0]) else: ngood = 0 if overlaps & (ngood >= minNSampl): t0 = np.where(buoy_t >= rscd_t[0])[0][0] t1 = np.where(buoy_t <= rscd_t[-1])[0][-1] print('Number of good data points = ', ngood) print('Points in RSCD time series = ', len(rscd_t)) print('Points in Buoy time series = ', len(buoy_t[t0:(t1+1)])) print('Start times (model, in situ): ', rscd_t[0], buoy_t[t0]) print('End times (model, in situ): ', rscd_t[-1], buoy_t[t1]) model = [] obs = [] sample_period = np.median(np.diff(buoy_t[t0:(t1+1)]))*24.0 #if buoy.sample_period != 1.0: if sample_period != 1.0: # Get common indices max_dt = np.max(np.diff(buoy_t[t0:(t1+1)])) print('Maximum dt in obs =', max_dt*24., 'hours') # Interpolate obs onto model times t_mod = rscd_t.copy() v_mod = rscd_var.copy() t_obs = buoy_t[t0:(t1+1)].copy() v_obs = buoy_var[t0:(t1+1)].copy() v_obs = time_interpolate_obs(t_obs, v_obs, t_mod) # Plot time series if plot_results: fig = plot_time_series(t_mod, v_mod, t_mod, v_obs, bVarName, buoy.platform, pVarName, rscdfname, year, month, [rscd_t[0], rscd_t[-1]]) plotname = ''.join( ('RSCD_', buoy.platform, '_', pVarName,'_', str(year),'_', str(month).zfill(2), '_TS.png')) plotfile = os.path.join(results_dir,plotname) fig.savefig(plotfile) plot_close(fig) # Select common points between timeseries nans, x = np.isnan(v_obs), lambda z: z.nonzero()[0] indices = x(~nans) # Calculate validation metrics model = v_mod[indices].copy() obs = v_obs[indices].copy() nsampl = len(indices) save_cleaned_ts_data(t_mod, v_mod, v_obs, platform, mVarName, results_dir, year, month ) elif buoy.sample_period == 1.0: # Get common indices both = set(list(rscd_t)).intersection(list(buoy_t)) if len(both) > 0: # Interpolate obs onto model times t_mod = rscd_t.copy() v_mod = rscd_var.copy() t_obs = buoy_t[t0:(t1+1)].copy() v_obs = buoy_var[t0:(t1+1)].copy() # Plot time series if plot_results: fig = plot_time_series(t_mod, v_mod, t_obs, v_obs, bVarName, buoy.platform, pVarName, rscdfname, year, month, [rscd_t[0], rscd_t[-1]]) plotname = ''.join( ('RSCD_', buoy.platform, '_', pVarName,'_', str(year),'_', str(month).zfill(2), '_TS.png')) plotfile = os.path.join(results_dir,plotname) fig.savefig(plotfile) plot_close(fig) # Get common data indices nans, x = np.isnan(v_obs), lambda z: z.nonzero()[0] indices = x(~nans) # Select common points between timeseries model = v_mod[indices].copy() obs = v_obs[indices].copy() nsampl = len(both) save_cleaned_ts_data(t_mod, v_mod, v_obs, platform, mVarName, results_dir, year, month ) # Calculate validation metrics if (len(model) > minNSampl) & (len(obs) > minNSampl): if angularData: metrics = calculate_circular_metrics(model, obs, nsampl) else: metrics = calculate_metrics(model, obs, nsampl) valid_results = results_record(rscdfname, buoyfname, bVarName, year, month, metrics) # Plot correlation if plot_results: fig = plot_correlation(obs, model, bVarName, buoy.platform, pVarName, rscdfname, year, month, metrics) plotname = ''.join( ('RSCD_', buoy.platform, '_', pVarName,'_', str(year),'_', str(month).zfill(2), '_CORR.png')) plotfile = os.path.join(results_dir,plotname) fig.savefig(plotfile) plot_close(fig) else: print('No common indices found for metrics calculations.') print() else: print('Insuffient valid data for statistics. (# Good = ', ngood, ')') print() else: print('No variable match with ', varOptions) print('Buoy variables: ', buoy.file['vars']) print() return valid_results
[docs]def validate_records(records, dataFmt: str, platform: str, mVarName: str, varOptions: list, pVarName, results_dir, plot_results=False): """ Process a set of matched model/buoy data records """ # Create results dictionary results_store = initialize_results() if mVarName == 'dir': angularData=True else: angularData=False n_valid_results = 0 for rec in records: result = process_record(rec, dataFmt, platform, mVarName, varOptions, pVarName, results_dir, angularData, plot_results) if result is not None: store_valid_results(results_store, result) n_valid_results += 1 return n_valid_results, results_store