Source code for Validation

# -*- coding: utf-8 -*-
"""
This module contains the functions for calculating parameter based validation statistics for 
a set of matchup records. 

:Dependencies [External]: os, scipy, numpy, copy
:Dependencies [Internal]: waveval.ModelData, waveval.ObsData, waveval.MatchUpDatabase, waveval.Geometry

"""
# ----------------------------------------------------------------------------
#   IMPORTS
# ----------------------------------------------------------------------------
# Standard Python Dependencies
import os
import scipy.io as sio
import copy
import numpy as np
from scipy.interpolate import interp1d
# Non-Standard Python Dependencies
# Local Module Dependencies
from waveval import ModelData as MData
from waveval import ObsData as OData
from waveval import MatchUpDatabase as MDB
from waveval.Geometry import pol2cmplx
from waveval.Graphics import plot_time_series, plot_correlation, plot_close
# Other Dependencies

# ----------------------------------------------------------------------------
#   GLOBAL VARIABLES
# ----------------------------------------------------------------------------


# ----------------------------------------------------------------------------
#   CLASS DEFINITIONS
# ----------------------------------------------------------------------------


# ----------------------------------------------------------------------------
#   FUNCTION DEFINITIONS
# ----------------------------------------------------------------------------
# ======================== Data Selection =================================
[docs]def getVarName(record, Options: list):
    """
    Get variable names based on processsing options list
    """
    hasVar = [False]*len(Options)
    for indx, vn in enumerate(Options):
        hasVar[indx] = record.hasVar(vn)
    varIndx = np.where(hasVar)[0]
    if len(varIndx) != 0:
        varName = Options[varIndx[0]]
    else:
        varName = ''
    return varName


[docs]def getSurfaceIndx(record, dataFmt):
    """
    Get index of surface layer in depth data
    """
    if dataFmt == 'InSituTAC':
        depth = record.getVar('DEPH')
    elif dataFmt == 'SeaDataNet':
        depth = record.getVar('DEPTH')
    dims = depth.shape
    if len(dims) <= 1:
        surfIndx = [0]
    else:
        surfIndx = np.where(depth[0, :] == 0.0)[0]
    return surfIndx[0]


# ========================== Statistics ===================================
[docs]def corr_coef(model, obs):
    """
    Validation Metric: Correlation Coefficient
    
    :param numpy.array, float model: model data time series
    :param numpy.array, floatobs: observation time series mapped onto model time stamps
    
    :return: correlation coefficient (R)
    :rtype: float
    """
    m = np.squeeze(model)
    o = np.squeeze(obs)
    num = np.nansum((m-np.nanmean(m))*(o-np.nanmean(o)))
    msqr = np.nansum(np.square(m-np.nanmean(m)))
    osqr = np.nansum(np.square(o-np.nanmean(o)))
    denom = np.sqrt(msqr*osqr)
    R = num/denom
    return R


[docs]def bias(model, obs):
    """
    Validation Metric: Bias
    
    :param numpy.array, float model: model data time series
    :param numpy.array, float obs: observation time series mapped onto model time stamps
    
    :return: bias (b)
    :rtype: float
    """
    m = np.squeeze(model)
    o = np.squeeze(obs)
    b = np.nanmean(m)-np.nanmean(o)
    return b


[docs]def mean_bias(model, obs):
    """
    Validation Metric: Mean Bias
    
    :param numpy.array, float model: model data time series
    :param numpy.array, float obs: observation time series mapped onto model time stamps
    
    :return: mb
    :rtype: float
    """
    m = np.squeeze(model)
    o = np.squeeze(obs)
    mb = np.nanmean(m-o)
    return mb


[docs]def mean_absolute_error(model, obs):
    """
    Validation Metric: Mean absolute error
    
    :param numpy.array, float model: model data time series
    :param numpy.array, float obs: observation time series mapped onto model time stamps
    
    :return: mae
    :rtype: float
    """
    m = np.squeeze(model)
    o = np.squeeze(obs)
    mae = np.nanmean(np.abs(m-o))
    return mae


[docs]def norm_ma_error(model, obs):
    """
    Validation Metric: Normalised mean absolute error
    
    :param numpy.array, float model: model data time series
    :param numpy.array, float obs: observation time series mapped onto model time stamps
    
    :return: nmae
    :rtype: float
    """
    m = np.squeeze(model)
    o = np.squeeze(obs)
    nmae = np.nansum(np.abs(m-o))/np.nansum(o)
    nmae = nmae * 100.0
    return nmae


[docs]def norm_mean_bias(model, obs):
    """
    Validation Metric: Normalised mean bias
    
    :param numpy.array, float model: model data time series
    :param numpy.array, float obs: observation time series mapped onto model time stamps
    
    :return: nmb
    :rtype: float
    """
    # Returns value as percentage
    m = np.squeeze(model)
    o = np.squeeze(obs)
    nmb = np.nansum(m-o)/np.nansum(o)
    nmb = nmb * 100.0
    return nmb


[docs]def rms_error(model, obs):
    """
    Validation Metric: Root-mean-square error
    
    :param numpy.array, float model: model data time series
    :param numpy.array, float obs: observation time series mapped onto model time stamps
    
    :return: rmse
    :rtype: float
    """
    m = np.squeeze(model)
    o = np.squeeze(obs)
    N = min(len(model != np.nan), len(obs != np.nan))
    rmse = np.sqrt(np.nansum(np.square(m-o))/N)
    return rmse


[docs]def norm_rmse(model, obs):
    """
    Validation Metric: Normalised root-mean-square error
    
    :param numpy.array, float model: model data time series
    :param numpy.array, float obs: observation time series mapped onto model time stamps
    
    :return: nrmse
    :rtype: float
    """
    # Returns value as percentage
    o = np.squeeze(obs)
    nrmse = rms_error(model, obs) / np.sqrt(np.nanmean(np.square(o)))
    nrmse = nrmse * 100.0
    return nrmse


[docs]def scatter_index(model, obs):
    """
    Validation Metric: Scatter index
    
    :param numpy.array, float model: model data time series
    :param numpy.array, float obs: observation time series mapped onto model time stamps
    
    :return: si
    :rtype: float
    """
    # Returns value as percentage
    m = np.squeeze(model)
    o = np.squeeze(obs)
    num = np.nansum(np.square((m-np.nanmean(m))-(o-np.nanmean(o))))
    denom = np.nansum(np.square(o))
    si = np.sqrt(num/denom)
    si = si * 100.0
    return si


[docs]def reliability_index(model, obs):
    """
    Validation Metric: Reliability index
    
    :param numpy.array, float model: model data time series
    :param numpy.array, float obs: observation time series mapped onto model time stamps
    
    :return: ri
    :rtype: float
    """
    m = np.squeeze(model)
    o = np.squeeze(obs)
    N = min(len(model != np.nan), len(obs != np.nan))
    ri = np.exp(np.sqrt(np.nansum(np.square(np.log(o/m)))/N))
    return ri


[docs]def skill_score(model, obs):
    """
    Validation Metric: Skill score
    
    :param numpy.array, float model: model data time series
    :param numpy.array, float obs: observation time series mapped onto model time stamps
    
    :return: ss
    :rtype: float
    """
    m = np.squeeze(model)
    o = np.squeeze(obs)
    num = np.nansum(np.square(np.abs(m-o)))
    mean_o = np.nanmean(o)
    denom = np.nansum(np.square(np.abs(m-mean_o)+np.abs(o-mean_o)))
    ss = 1.0 - np.sqrt(num/denom)
    return ss


[docs]def model_efficiency(model, obs):
    """
    Validation Metric: Model efficiency
    
    :param numpy.array, float model: model data time series
    :param numpy.array, float obs: observation time series mapped onto model time stamps
    
    :return: mef
    :rtype: float
    """
    m = np.squeeze(model)
    o = np.squeeze(obs)
    mean_m = np.nanmean(m)
    mean_o = np.nanmean(o)
    denom = np.nansum(np.square(o-mean_o))
    num = denom-np.nansum(np.square(m-mean_m))
    mef = num/denom
    return mef


# ====================== Circular Statistics ===============================
[docs]def circ_mod_pi(theta):
    """
    Fix angular range in radians to -\\pi <= \\theta <= \\pi

    :param numpy.array, float theta: vector of angular values in radians
    
    :return: theta in range -\\pi to \\pi
    :rtype: numpy.array, float
    """
    # This function requires theta in radians
    if theta.size > 1:
        indx = np.where(theta < -np.pi)[0]
        theta[indx] = theta[indx]+2.0*np.pi
        indx = np.where(theta > np.pi)[0]
        theta[indx] = theta[indx]-2.0*np.pi
    else:
        if theta < -np.pi:
            theta += 2.0*np.pi
        elif theta > np.pi:
            theta -= 2.0*np.pi
    return theta

[docs]def circ_mean(theta, degrees=True):
    """
    Calculate mean of angular data for range (-180 to 180)

    Parameters
    ----------
    theta : numpy.array, float
        vector of angular values.
    degrees : bool, optional
        angular data units flag. The default is degrees = True.

    Returns
    -------
    mean_theta : float
        mean of the angular data in same units as passed in.

    """
    if degrees:
        thetar =  circ_mod_pi(np.radians(np.squeeze(theta)))
    else:
        thetar = circ_mod_pi(np.squeeze(theta))
    mean_theta = np.nanmean(thetar)
    if degrees:
        mean_theta = np.degrees(mean_theta)
    return mean_theta


[docs]def circ_std(theta, degrees=True):
    """
    Calculate standard deviation of angular data for range (-180 to 180)

    Parameters
    ----------
    theta : numpy.array, float
        vector of angular values.
    degrees : bool, optional
        angular data units flag. The default is degrees = True.

    Returns
    -------
    std_theta : float
        stanadrd deviation of the angular data in same units as passed in.

    """
    if degrees:
        thetar =  circ_mod_pi(np.radians(np.squeeze(theta)))
    else:
        thetar = circ_mod_pi(np.squeeze(theta))
    std_theta = np.nanstd(thetar)
    if degrees:
        std_theta = np.degrees(std_theta)
    return std_theta


[docs]def circ_corr(theta1, theta2, degrees=True):
    """
    Validation Metric (Circular): Correlation Coefficient

    :param numpy.array,float theta1: vector of angular data (modelled)
    :param numpy.array,float theta1: vector of angualr data (observed)
    :param bool degrees: angular data units flag. The default is degrees = True
    
    :return: circular correlation coefficient (c_corr)
    :rtype: float
    """
    if degrees:
        theta1r = np.radians(np.squeeze(theta1), dtype='double')
        theta2r = np.radians(np.squeeze(theta2), dtype='double')
    else:
        theta1r = np.double(np.squeeze(theta1))
        theta2r = np.double(np.squeeze(theta2))
    cmplx1 = pol2cmplx(np.ones_like(theta1r, dtype=np.double), theta1r)
    cmplx2 = pol2cmplx(np.ones_like(theta2r, dtype=np.double), theta2r)
    c_corr = np.abs(np.cov(cmplx1,cmplx2)[0,1])/(np.std(cmplx1)*np.std(cmplx2))
    return c_corr


[docs]def circ_mean_bias(theta1, theta2, degrees=True):
    """
    Validation Metric (Circular): Mean Bias

    :param numpy.array,float theta1: vector of angular data (modelled)
    :param numpy.array,float theta1: vector of angualr data (observed)
    :param bool degrees: angular data units flag. The default is degrees = True
    
    :return: circular mean bias (mb)
    :rtype: float
    """
    theta1 = np.double(np.squeeze(theta1))
    theta2 = np.double(np.squeeze(theta2))
    if degrees:
        dtheta = circ_mod_pi(np.radians(theta1 - theta2))
    else:
        dtheta = circ_mod_pi(theta1 - theta2)
    circ_mb = np.nanmean(dtheta)
    if degrees:
        circ_mb = np.degrees(circ_mb)
    return circ_mb


[docs]def circ_norm_mean_bias(theta1, theta2, degrees=True):
    """
    Validation Metric (Circular): Normalised mean Bias

    :param numpy.array,float theta1: vector of angular data (modelled)
    :param numpy.array,float theta1: vector of angualr data (observed)
    :param bool degrees: angular data units flag. The default is degrees = True
    
    :return: circular normalised mean bias (nmb) as a percentage
    :rtype: float
    """
    # Returns value as percentage
    theta1 = np.double(np.squeeze(theta1))
    theta2 = np.double(np.squeeze(theta2))
    if degrees:
        circ_nmb = circ_mean_bias(theta1, theta2, degrees)/np.double(180.0)
    else:
        circ_nmb = circ_mean_bias(theta1, theta2, degrees)/(np.pi)
    circ_nmb = circ_nmb * 100.0
    return circ_nmb


[docs]def circ_mean_abs_error(theta1, theta2, degrees=True):
    """
    Validation Metric (Circular): Mean absolute error

    :param numpy.array,float theta1: vector of angular data (modelled)
    :param numpy.array,float theta1: vector of angualr data (observed)
    :param bool degrees: angular data units flag. The default is degrees = True
    
    :return: circular mean absolute error (mae)
    :rtype: float
    """
    theta1 = np.double(np.squeeze(theta1))
    theta2 = np.double(np.squeeze(theta2))
    if degrees:
        dtheta = circ_mod_pi(np.radians(theta1 - theta2))
    else:
        dtheta = circ_mod_pi(theta1 - theta2)
    circ_mae = np.nanmean(np.abs(dtheta))
    if degrees:
        circ_mae = np.degrees(circ_mae)
    return circ_mae


[docs]def circ_norm_ma_error(theta1, theta2, degrees=True):
    """
    Validation Metric (Circular): Normalised mean absolute error

    :param numpy.array,float theta1: vector of angular data (modelled)
    :param numpy.array,float theta1: vector of angualr data (observed)
    :param bool degrees: angular data units flag. The default is degrees = True
    
    :return: circular normalised mean absolute error (mae) as a percentage.
    :rtype: float
    """
    # Returns value as percentage
    theta1 = np.double(np.squeeze(theta1))
    theta2 = np.double(np.squeeze(theta2))
    if degrees:
        circ_nmae = circ_mean_abs_error(theta1, theta2, degrees)/np.double(180.0)
    else:
        circ_nmae = circ_mean_abs_error(theta1, theta2, degrees)/(np.pi)
    circ_nmae = circ_nmae * 100.0
    return circ_nmae


[docs]def circ_rms_error(theta1, theta2, degrees=True):
    """
    Validation Metric (Circular): Root-mean-square error

    :param numpy.array,float theta1: vector of angular data (modelled)
    :param numpy.array,float theta1: vector of angualr data (observed)
    :param bool degrees: angular data units flag. The default is degrees = True
    
    :return: circular RMS error (circ_rmse)
    :rtype: float
    """
    theta1 = np.squeeze(theta1)
    theta2 = np.squeeze(theta2)
    if degrees:
        dtheta = circ_mod_pi(np.radians(theta1 - theta2))
    else:
        dtheta = circ_mod_pi(theta1 - theta2)
    circ_rmse = np.sqrt(np.nanmean(np.square(dtheta)))
    if degrees:
        circ_rmse = np.degrees(circ_rmse)
    return circ_rmse


[docs]def circ_norm_rmse(theta1, theta2, degrees=True):
    """
    Validation Metric (Circular): Normalised root-mean-square error

    :param numpy.array,float theta1: vector of angular data (modelled)
    :param numpy.array,float theta1: vector of angualr data (observed)
    :param bool degrees: angular data units flag. The default is degrees = True
    
    :return: circular normalised RMS erro (circ)nrmse as a percentage
    :rtype: float
    """
    # Returns value as percentage
    theta1 = np.squeeze(theta1)
    theta2 = np.squeeze(theta2)
    if degrees:
        dtheta = circ_mod_pi(np.radians(theta1 - theta2))
    else:
        dtheta = circ_mod_pi(theta1 - theta2)
    num = np.sqrt(np.nanmean(np.square(dtheta)))
    denom  = np.pi
    circ_nrmse = (num / denom) * 100.0
    return circ_nrmse


[docs]def circ_scatter_index(theta1, theta2, degrees=True):
    """
    Validation Metric (Circular): Scatter index

    :param numpy.array,float theta1: vector of angular data (modelled)
    :param numpy.array,float theta1: vector of angualr data (observed)
    :param bool degrees: angular data units flag. The default is degrees = True
    
    :return: circular scatter index (circ_si)
    :rtype: float
    """
    # Returns value as percentage
    theta1 = np.double(np.squeeze(theta1))
    theta2 = np.double(np.squeeze(theta2))
    if degrees:
        theta1 = np.radians(theta1)
        theta2 = np.radians(theta2)
    theta1 = circ_mod_pi(theta1)
    theta2 = circ_mod_pi(theta2)
    mtheta1 = np.nanmean(theta1)
    mtheta2 = np.nanmean(theta2)
    dtheta1 = circ_mod_pi(theta1-mtheta1)
    dtheta2 = circ_mod_pi(theta2-mtheta2)
    num = np.nanmean(np.square(circ_mod_pi(dtheta1-dtheta2)))
    denom = np.square(np.pi)
    circ_si = np.sqrt(num/denom) * 100.0
    return circ_si


# =========================== Data Extraction =============================
[docs]def extract_model(rscdfile, mVarName: str):
    """
    Extract model output for a specified variable
    """
    rscd = MData.ww3(rscdfile)
    wmtw = MDB.temporalCoverage(rscd.time_start, rscd.time_end)
    rscd_t = rscd.getVar('time')
    if (mVarName == 'fp') or (mVarName == 'f02'):
        rscd_var = 1.0/rscd.getVar(mVarName)
    else:
        rscd_var = rscd.getVar(mVarName)
    # if mVarName == 'dir':
    #     theta = rscd_var.copy()*np.pi/180.0
    return rscd, rscd_t, rscd_var, wmtw


[docs]def extract_obs(buoyfile, buoyDataFmt: str, varOptions: list):
    """
    Extract *in situ* buoy data for a specified data foramt and variable
    """
    if buoyDataFmt == 'InSituTAC':
        buoy = OData.wavebuoy(buoyfile)
        bVarName = getVarName(buoy, varOptions)
    elif buoyDataFmt == 'SeaDataNet':
        buoy = OData.emecwb(buoyfile)
        bVarName = getVarName(buoy, varOptions)
    else:
        print('Unknown file format: '+buoyDataFmt)
        bVarName = ''
    if bVarName != '':
        varIndx = getSurfaceIndx(buoy, buoyDataFmt)
        buoy_t, buoy_var = buoy.getQCTimeseries(bVarName, varIndx)
    else:
        varIndx = None
        buoy_t = []
        buoy_var = []
    return bVarName, buoy, buoy_t, buoy_var


# ========================== Data Interpolators ===========================
[docs]def time_interpolate_obs(t_obs, t_var, t_model):
    """
    Interpolation of observation data onto the model output timestamps
    """
    intrp_var = np.empty_like(t_model, dtype='float')
    intrp_var[:] = np.nan
    for i_ref, t_ref in enumerate(t_model):
        idx = np.sort(np.argsort(abs(t_obs-t_ref))[0:2])
        o_diff = abs(np.diff(t_obs[idx]))*24.0
        m_diff = np.min(abs(t_obs[idx]-t_ref))*24.0
        if (o_diff < 1.5) and (m_diff < 1.0):
            if t_ref < t_obs[idx[0]]:
                intrp_var[i_ref] = t_var[idx[0]]
            elif t_ref > t_obs[idx[1]]:
                intrp_var[i_ref] = t_var[idx[1]]
            else:
                fnc = interp1d(t_obs[idx], t_var[idx], 
                               kind='nearest', bounds_error=False)
                intrp_var[i_ref] = fnc(t_ref)
    return intrp_var


# ============================ Analysis Tools =============================
[docs]def initialize_results():
    """
    Initialise a dictionary for collecting validation results
    """
    results = {}
    results.update({'MODEL': []})
    results.update({'OBSERVATION': []})
    results.update({'YEAR': []})
    results.update({'MONTH': []})
    results.update({'PARAM': []})
    results.update({'R': []})
    results.update({'MB': []})
    results.update({'NMB': []})
    results.update({'MAE': []})
    results.update({'NMAE': []})
    results.update({'RMSE': []})
    results.update({'NRMSE': []})
    results.update({'SI': []})
    results.update({'NSMPL': []})
    return results


[docs]def calculate_circular_metrics(model, obs, nsampl, degrees=True):
    """
    Calculate circlar validation metrics for a pair of modelled and observation time series
    """
    R = circ_corr(model, obs, degrees)
    mb = circ_mean_bias(model, obs, degrees)
    nmb = circ_norm_mean_bias(model, obs, degrees)
    mae = circ_mean_abs_error(model, obs, degrees)
    nmae = circ_norm_ma_error(model, obs, degrees)
    rmse = circ_rms_error(model, obs, degrees)
    nrmse = circ_norm_rmse(model, obs, degrees)
    si = circ_scatter_index(model, obs, degrees)

    metrics = {}
    metrics['R'] = R
    metrics['MB'] = mb
    metrics['NMB'] = nmb
    metrics['MAE'] = mae
    metrics['NMAE'] = nmae
    metrics['RMSE'] = rmse
    metrics['NRMSE'] = nrmse
    metrics['SI'] = si
    metrics['NSMPL'] = nsampl
    return metrics


[docs]def calculate_metrics(model, obs, nsampl):
    """
    Calculate standard validation metrics for a pair of modelled and observation time series
    """
    R = corr_coef(model, obs)
    mb = mean_bias(model, obs)
    nmb = norm_mean_bias(model, obs)
    mae = mean_absolute_error(model, obs)
    nmae = norm_ma_error(model, obs)
    rmse = rms_error(model, obs)
    nrmse = norm_rmse(model, obs)
    si = scatter_index(model, obs)

    metrics = {}
    metrics['R'] = R
    metrics['MB'] = mb
    metrics['NMB'] = nmb
    metrics['MAE'] = mae
    metrics['NMAE'] = nmae
    metrics['RMSE'] = rmse
    metrics['NRMSE'] = nrmse
    metrics['SI'] = si
    metrics['NSMPL'] = nsampl
    return metrics


[docs]def results_record(modelFileName, obsFileName, obsVarName,
                   year, month, metrics):
    """
    Create results recod from calculated validation metrics
    """
    results = {}
    results['MODEL'] = modelFileName
    results['BUOY'] = obsFileName
    results['YEAR'] = year
    results['MONTH'] = month
    results['PARAM'] = obsVarName
    for key in metrics.keys():
        results[key] = metrics[key]
    return results


[docs]def store_valid_results(store, results):
    """
    Store validation results record in the results dictionary
    """
    store['MODEL'].append(results['MODEL'])
    store['OBSERVATION'].append(results['BUOY'])
    store['YEAR'].append(results['YEAR'])
    store['MONTH'].append(results['MONTH'])
    store['PARAM'].append(results['PARAM'])
    store['R'].append(results['R'])
    store['MB'].append(results['MB'])
    store['NMB'].append(results['NMB'])
    store['MAE'].append(results['MAE'])
    store['NMAE'].append(results['NMAE'])
    store['RMSE'].append(results['RMSE'])
    store['NRMSE'].append(results['NRMSE'])
    store['SI'].append(results['SI'])
    store['NSMPL'].append(results['NSMPL'])
    return


[docs]def display_tabulated_results(store, mVarName):
    """
    Display validation results as a table on the standard output
    """
    print('Validation Statistics for', mVarName, '\n')
    fmtstr = "{:<60}{:<25}{:^7}{:^7}{:^7}{:^9.3}{:^9.4}{:^9.4}{:^9.4}{:^9.4}{:^9.4}{:^9.4}{:^9.4}{:^9}"
    for row in zip(*([key] + (value) for key, value in store.items())):
        print(fmtstr.format(*row))
    return


[docs]def save_tabulated_results(store, platform, mVarName, outpath):
    """
    Save tabulated validation results as an ASCII file
    """
    outtxtname = platform+'_VALIDATION_STATS_'+mVarName+'.txt'
    outtxtfile = os.path.join(outpath,outtxtname)
    with open(outtxtfile,'w') as ofile:
        ofile.write('Validation Statistics for '+mVarName+'\n')
        fmtstr = "{:<60}{:<25}{:^7}{:^7}{:^7}{:^9.3}{:^9.4}{:^9.4}{:^9.4}{:^9.4}{:^9.4}{:^9.4}{:^9.4}{:^9}"
        for row in zip(*([key] + (value) for key, value in store.items())):
            ofile.write(fmtstr.format(*row)+'\n')
    outmatname = platform+'_VALIDATION_STATS_'+mVarName+'.mat'
    outmatfile = os.path.join(outpath,outmatname)
    saveFlg = sio.savemat(outmatfile,store)
    return saveFlg

[docs]def save_cleaned_ts_data(t, rscd, buoy, platform, mVarName, outpath, year, month):
    """
    Save cleaned matched model and buoy timeseries as a matlab \\*.mat binary file
    """
    yrMnthStr = '_'+str(year)+'_'+str(month).zfill(2)
    outdatname = platform+'_VALIDATION_TS_'+mVarName+yrMnthStr+'.mat'
    data = {}
    data['platform'] = platform
    data['var'] = mVarName
    data['year'] = year
    data['month'] = month
    data['time'] = t
    data['rscd'] = rscd
    data['buoy'] = buoy
    outdatfile = os.path.join(outpath, outdatname)
    saveFlg = sio.savemat(outdatfile, data)
    return saveFlg

[docs]def load_binary_results(platform, mVarName, datapath):
    """
    load binary results from a matlab file
    """
    matname = platform+'_VALIDATION_STATS_'+mVarName+'.mat'
    matfile = os.path.join(datapath,matname)
    data = sio.loadmat(matfile)
    store = initialize_results()
    for key in data.keys():
        if key[0] != '_':
            if len(data[key].shape) == 1:
                store[key] = data[key].tolist()
            else:
                store[key] = data[key][0].tolist()
    return store

[docs]def process_record(rec,
                   buoyFmt: str, platform: str,
                   mVarName: str, varOptions: list,
                   pVarName: str, results_dir: str, 
                   angularData=False, plot_results=False):
    """
    Process a matched model/buoy data record
    """
    rscdpath = copy.copy(rec[0])
    rscdfname = copy.copy(rec[1])
    rscdfile = os.path.join(rscdpath, rscdfname).replace('\\', '/')
    rscd, rscd_t, rscd_var, wmtw = extract_model(rscdfile, mVarName)
    #minNSampl = len(rscd_t)//2
    minNSampl = 200
    print(rscdfname)

    buoypath = copy.copy(rec[2])
    buoyfname = copy.copy(rec[3])
    buoyfile = os.path.join(buoypath, buoyfname).replace('\\', '/')
    bVarName, buoy, buoy_t, buoy_var = extract_obs(buoyfile, buoyFmt, varOptions)
    print('Buoy file = ', buoyfname)
    print('Buoy sampling period = ', buoy.sample_period)

    year = copy.copy(rec[4])
    month = copy.copy(rec[5])

    valid_results = None

    if bVarName != '':
        print('Variable name = ', bVarName)
        overlaps, tindx = wmtw.timeOverlap(buoy_t)
        if (tindx[0] is not None) & (tindx[1] is not None):
            ngood = len(np.where(buoy_var[tindx[0]:(tindx[1]+1)] != np.nan)[0])
        else:
            ngood = 0
        if overlaps & (ngood >= minNSampl):
            t0 = np.where(buoy_t >= rscd_t[0])[0][0]
            t1 = np.where(buoy_t <= rscd_t[-1])[0][-1]
            print('Number of good data points = ', ngood)
            print('Points in RSCD time series = ', len(rscd_t))
            print('Points in Buoy time series = ', len(buoy_t[t0:(t1+1)]))
            print('Start times (model, in situ): ', rscd_t[0], buoy_t[t0])
            print('End times (model, in situ): ', rscd_t[-1], buoy_t[t1])
            model = []
            obs = []
            sample_period = np.median(np.diff(buoy_t[t0:(t1+1)]))*24.0
            #if buoy.sample_period != 1.0:
            if sample_period != 1.0:
                # Get common indices
                max_dt = np.max(np.diff(buoy_t[t0:(t1+1)]))
                print('Maximum dt in obs =', max_dt*24., 'hours')
                # Interpolate obs onto model times
                t_mod = rscd_t.copy()
                v_mod = rscd_var.copy()
                t_obs = buoy_t[t0:(t1+1)].copy()
                v_obs = buoy_var[t0:(t1+1)].copy()
                v_obs = time_interpolate_obs(t_obs, v_obs, t_mod)
                # Plot time series
                if plot_results:
                    fig = plot_time_series(t_mod, v_mod, t_mod, v_obs,
                                     bVarName, buoy.platform,
                                     pVarName, rscdfname,
                                     year, month,
                                     [rscd_t[0], rscd_t[-1]])
                    plotname = ''.join(
                                  ('RSCD_', buoy.platform, '_',
                                   pVarName,'_',
                                   str(year),'_',
                                   str(month).zfill(2),
                                   '_TS.png'))
                    plotfile = os.path.join(results_dir,plotname)
                    fig.savefig(plotfile)
                    plot_close(fig)
                # Select common points between timeseries
                nans, x = np.isnan(v_obs), lambda z: z.nonzero()[0]
                indices = x(~nans)
                # Calculate validation metrics
                model = v_mod[indices].copy()
                obs = v_obs[indices].copy()
                nsampl = len(indices)
                save_cleaned_ts_data(t_mod, v_mod, v_obs, platform, mVarName, results_dir, year, month )
            elif buoy.sample_period == 1.0:
                # Get common indices
                both = set(list(rscd_t)).intersection(list(buoy_t))
                if len(both) > 0:
                    # Interpolate obs onto model times
                    t_mod = rscd_t.copy()
                    v_mod = rscd_var.copy()
                    t_obs = buoy_t[t0:(t1+1)].copy()
                    v_obs = buoy_var[t0:(t1+1)].copy()
                    # Plot time series
                    if plot_results:
                        fig = plot_time_series(t_mod, v_mod, t_obs, v_obs,
                                         bVarName, buoy.platform,
                                         pVarName, rscdfname,
                                         year, month,
                                         [rscd_t[0], rscd_t[-1]])
                        plotname = ''.join(
                                      ('RSCD_', buoy.platform, '_',
                                       pVarName,'_',
                                       str(year),'_',
                                       str(month).zfill(2),
                                       '_TS.png'))
                        plotfile = os.path.join(results_dir,plotname)
                        fig.savefig(plotfile)
                        plot_close(fig)
                    # Get common data indices
                    nans, x = np.isnan(v_obs), lambda z: z.nonzero()[0]
                    indices = x(~nans)
                    # Select common points between timeseries
                    model = v_mod[indices].copy()
                    obs = v_obs[indices].copy()
                    nsampl = len(both)
                    save_cleaned_ts_data(t_mod, v_mod, v_obs, platform, mVarName, results_dir, year, month )
            # Calculate validation metrics
            if (len(model) > minNSampl) & (len(obs) > minNSampl):
                if angularData:
                    metrics = calculate_circular_metrics(model, obs, nsampl)
                else:
                    metrics = calculate_metrics(model, obs, nsampl)
                valid_results = results_record(rscdfname, buoyfname,
                                               bVarName,
                                               year, month, metrics)
                # Plot correlation
                if plot_results:
                    fig = plot_correlation(obs, model, bVarName,
                                     buoy.platform,
                                     pVarName, rscdfname,
                                     year, month,
                                     metrics)
                    plotname = ''.join(
                                  ('RSCD_', buoy.platform, '_',
                                   pVarName,'_',
                                   str(year),'_',
                                   str(month).zfill(2),
                                   '_CORR.png'))
                    plotfile = os.path.join(results_dir,plotname)
                    fig.savefig(plotfile)
                    plot_close(fig)
            else:
                print('No common indices found for metrics calculations.')
            print()
        else:
            print('Insuffient valid data for statistics.  (# Good = ',
                  ngood, ')')
            print()
    else:
        print('No variable match with ', varOptions)
        print('Buoy variables: ', buoy.file['vars'])
        print()
    return valid_results


[docs]def validate_records(records, dataFmt: str, platform: str,
                     mVarName: str, varOptions: list,
                     pVarName,
                     results_dir,
                     plot_results=False):
    """
    Process a set of matched model/buoy data records
    """
    # Create results dictionary
    results_store = initialize_results()

    if mVarName == 'dir':
        angularData=True
    else:
        angularData=False

    n_valid_results = 0
    for rec in records:
        result = process_record(rec, dataFmt, platform,
                                mVarName, varOptions,
                                pVarName, results_dir,
                                angularData, plot_results)
        if result is not None:
            store_valid_results(results_store, result)
            n_valid_results += 1
    return n_valid_results, results_store