Source code for WaveStats

# -*- coding: utf-8 -*-
"""
This module contains functions for post-processing the validation statistics.

:Dependencies [External]: os, scipy, numpy
:Dependencies [Internal]: 

"""
# ----------------------------------------------------------------------------
#   IMPORTS
# ----------------------------------------------------------------------------
# Standard Python Dependencies
import os
# Non-Standard Python Dependencies
import numpy as np
import scipy.io as sio
# Local Module Dependencies
# Other Dependencies

# ----------------------------------------------------------------------------
#   GLOBAL VARIABLES
# ----------------------------------------------------------------------------
mStr = ['JAN','FEB','MAR','APR','MAY','JUN','JUL','AUG','SEP','OCT','NOV','DEC']

# ----------------------------------------------------------------------------
#   CLASS DEFINITIONS
# ----------------------------------------------------------------------------


# ----------------------------------------------------------------------------
# Function Definitions
# ----------------------------------------------------------------------------
# Global statistics
[docs]def get_metric_stats_global(dpath, files, metric:str): V_glbl = 0.0 V2_glbl = 0.0 n_glbl = 0 for f in files: stat = sio.loadmat(dpath+'/'+f) mstat = stat[metric][0].copy() indx = np.where(mstat > 500.0)[0] if np.size(indx) > 0: mstat[indx] = np.nan n_glbl += np.sum(~np.isnan(mstat)) V_glbl += np.nansum(mstat) V2_glbl += np.nansum(np.square(mstat)) V_glbl = V_glbl / n_glbl sigV_glbl = np.sqrt((V2_glbl/n_glbl)-np.square(V_glbl)) print('ALL '+str(V_glbl)+' +/- '+str(sigV_glbl)) print('') return V_glbl, sigV_glbl
[docs]def get_parameter_stats_global(dpath:str, param:str, metricStr:list): files = [f for f in os.listdir(dpath) if f.endswith(param+'.mat')] for metric in metricStr: print('METRIC: '+metric) V_glbl, sigV_glbl = get_metric_stats_global(dpath, files, metric) return V_glbl, sigV_glbl
# Statistics by year
[docs]def get_metric_stats_by_year(dpath, files, metric:str, yr:int): V_year = 0.0 V2_year = 0.0 V_mnth = np.zeros((12,), dtype=float) V2_mnth = np.zeros((12,), dtype=float) n_year = 0 n_mnth = np.zeros((12,), dtype=int) for f in files: stat = sio.loadmat(dpath+'/'+f) indx = np.where(stat['YEAR'] == str(yr))[0] nrecs = len(stat[metric][0][indx]) mstat = stat[metric][0][indx].copy() indx = np.where(mstat > 500.0)[0] if np.size(indx) > 0: mstat[indx] = np.nan n_year += np.sum(~np.isnan(mstat)) V_year += np.nansum(mstat) V2_year += np.nansum(np.square(mstat)) for irec in range(nrecs): if stat['YEAR'][irec] == str(yr): midx = int(stat['MONTH'][irec])-1 V_mnth[midx] += stat[metric][0][irec] V2_mnth[midx] += np.square(stat[metric][0][irec]) n_mnth[midx] += 1 V_year = V_year / n_year sigV_year = np.sqrt((V2_year/n_year)-np.square(V_year)) print('ALL '+str(V_year)+' +/- '+str(sigV_year)) print('') V_mnth = V_mnth / n_mnth sigV_mnth = np.sqrt((V2_mnth/n_mnth)-np.square(V_mnth)) for im in range(12): print(mStr[im]+' '+str(V_mnth[im])+' +/- '+str(sigV_mnth[im])) return V_year, sigV_year, V_mnth, sigV_mnth
[docs]def get_parameter_stats_by_year(dpath:str, param:str, metricStr:list, year:int): files = [f for f in os.listdir(dpath) if f.endswith(param+'.mat')] for metric in metricStr: print('METRIC: '+metric) V_year, sigV_year, V_mnth, sigV_mnth = get_metric_stats_by_year(dpath, files, metric, year) return V_year, sigV_year, V_mnth, sigV_mnth
# Statistics by buoy
[docs]def get_metric_stats_by_buoy(dpath, files, metric:str): metricStats = {} metricStats.update({'platform': []}) metricStats.update({'nyears': []}) metricStats.update({'nrecs': []}) metricStats.update({'min': []}) metricStats.update({'max': []}) metricStats.update({'mean': []}) metricStats.update({'stddev': []}) for f in files: # Load data from binary file pltfrm = f.split('_')[0] stat = sio.loadmat(dpath+'/'+f) nyrs = len(np.unique([int(n) for n in stat['YEAR']]).tolist()) mstat = stat[metric][0].copy() # Filter out missing data values indx = np.where(mstat > 500.0)[0] if np.size(indx) > 0: mstat[indx] = np.nan # Initialise variables minV = np.nanmin(mstat) maxV = np.nanmax(mstat) avgV = np.nanmean(mstat) sigV = np.nanstd(mstat, ddof=1) # Load validation statistics metricStats['platform'].append(pltfrm) metricStats['nyears'].append(nyrs) metricStats['nrecs'].append(np.sum(~np.isnan(mstat))) metricStats['min'].append(minV) metricStats['max'].append(maxV) metricStats['mean'].append(avgV) metricStats['stddev'].append(sigV) del pltfrm, stat, mstat, indx, del minV, maxV, avgV, sigV return metricStats
[docs]def get_parameter_stats_by_buoy(dpath:str, param:str, metric:str): files = [f for f in os.listdir(dpath) if f.endswith(param+'.mat')] paramStats = get_metric_stats_by_buoy(dpath, files, metric) return paramStats
[docs]def test_for_failure(meanV, metric:str): v = np.abs(meanV) fail = np.zeros((len(v),)) if metric == 'R': fail[v < 0.7] = 1 elif metric == 'NMB': fail[v > 20.0] = 1 elif metric == 'NMAE': fail[v > 20.0] = 1 elif metric == 'NRMSE': fail[v > 20.0] = 1 elif metric == 'SI': fail[v > 30.0] = 1 return fail
[docs]def get_buoy_locations(records): buoylocs = {} buoylocs.update({'rscdfile': []}) buoylocs.update({'buoyfile': []}) buoylocs.update({'platform': []}) buoylocs.update({'latitude': []}) buoylocs.update({'longitude': []}) for rec in records: buoylocs['rscdfile'].append(rec[1]) buoylocs['buoyfile'].append(rec[3]) buoylocs['platform'].append(rec[8]) buoylocs['latitude'].append(rec[9]) buoylocs['longitude'].append(rec[10]) return buoylocs