# -*- coding: utf-8 -*-
"""
This module contains functions for post-processing the validation statistics.
:Dependencies [External]: os, scipy, numpy
:Dependencies [Internal]:
"""
# ----------------------------------------------------------------------------
# IMPORTS
# ----------------------------------------------------------------------------
# Standard Python Dependencies
import os
# Non-Standard Python Dependencies
import numpy as np
import scipy.io as sio
# Local Module Dependencies
# Other Dependencies
# ----------------------------------------------------------------------------
# GLOBAL VARIABLES
# ----------------------------------------------------------------------------
mStr = ['JAN','FEB','MAR','APR','MAY','JUN','JUL','AUG','SEP','OCT','NOV','DEC']
# ----------------------------------------------------------------------------
# CLASS DEFINITIONS
# ----------------------------------------------------------------------------
# ----------------------------------------------------------------------------
# Function Definitions
# ----------------------------------------------------------------------------
# Global statistics
[docs]def get_metric_stats_global(dpath, files, metric:str):
V_glbl = 0.0
V2_glbl = 0.0
n_glbl = 0
for f in files:
stat = sio.loadmat(dpath+'/'+f)
mstat = stat[metric][0].copy()
indx = np.where(mstat > 500.0)[0]
if np.size(indx) > 0:
mstat[indx] = np.nan
n_glbl += np.sum(~np.isnan(mstat))
V_glbl += np.nansum(mstat)
V2_glbl += np.nansum(np.square(mstat))
V_glbl = V_glbl / n_glbl
sigV_glbl = np.sqrt((V2_glbl/n_glbl)-np.square(V_glbl))
print('ALL '+str(V_glbl)+' +/- '+str(sigV_glbl))
print('')
return V_glbl, sigV_glbl
[docs]def get_parameter_stats_global(dpath:str, param:str, metricStr:list):
files = [f for f in os.listdir(dpath) if f.endswith(param+'.mat')]
for metric in metricStr:
print('METRIC: '+metric)
V_glbl, sigV_glbl = get_metric_stats_global(dpath, files, metric)
return V_glbl, sigV_glbl
# Statistics by year
[docs]def get_metric_stats_by_year(dpath, files, metric:str, yr:int):
V_year = 0.0
V2_year = 0.0
V_mnth = np.zeros((12,), dtype=float)
V2_mnth = np.zeros((12,), dtype=float)
n_year = 0
n_mnth = np.zeros((12,), dtype=int)
for f in files:
stat = sio.loadmat(dpath+'/'+f)
indx = np.where(stat['YEAR'] == str(yr))[0]
nrecs = len(stat[metric][0][indx])
mstat = stat[metric][0][indx].copy()
indx = np.where(mstat > 500.0)[0]
if np.size(indx) > 0:
mstat[indx] = np.nan
n_year += np.sum(~np.isnan(mstat))
V_year += np.nansum(mstat)
V2_year += np.nansum(np.square(mstat))
for irec in range(nrecs):
if stat['YEAR'][irec] == str(yr):
midx = int(stat['MONTH'][irec])-1
V_mnth[midx] += stat[metric][0][irec]
V2_mnth[midx] += np.square(stat[metric][0][irec])
n_mnth[midx] += 1
V_year = V_year / n_year
sigV_year = np.sqrt((V2_year/n_year)-np.square(V_year))
print('ALL '+str(V_year)+' +/- '+str(sigV_year))
print('')
V_mnth = V_mnth / n_mnth
sigV_mnth = np.sqrt((V2_mnth/n_mnth)-np.square(V_mnth))
for im in range(12):
print(mStr[im]+' '+str(V_mnth[im])+' +/- '+str(sigV_mnth[im]))
return V_year, sigV_year, V_mnth, sigV_mnth
[docs]def get_parameter_stats_by_year(dpath:str, param:str, metricStr:list, year:int):
files = [f for f in os.listdir(dpath) if f.endswith(param+'.mat')]
for metric in metricStr:
print('METRIC: '+metric)
V_year, sigV_year, V_mnth, sigV_mnth = get_metric_stats_by_year(dpath, files, metric, year)
return V_year, sigV_year, V_mnth, sigV_mnth
# Statistics by buoy
[docs]def get_metric_stats_by_buoy(dpath, files, metric:str):
metricStats = {}
metricStats.update({'platform': []})
metricStats.update({'nyears': []})
metricStats.update({'nrecs': []})
metricStats.update({'min': []})
metricStats.update({'max': []})
metricStats.update({'mean': []})
metricStats.update({'stddev': []})
for f in files:
# Load data from binary file
pltfrm = f.split('_')[0]
stat = sio.loadmat(dpath+'/'+f)
nyrs = len(np.unique([int(n) for n in stat['YEAR']]).tolist())
mstat = stat[metric][0].copy()
# Filter out missing data values
indx = np.where(mstat > 500.0)[0]
if np.size(indx) > 0:
mstat[indx] = np.nan
# Initialise variables
minV = np.nanmin(mstat)
maxV = np.nanmax(mstat)
avgV = np.nanmean(mstat)
sigV = np.nanstd(mstat, ddof=1)
# Load validation statistics
metricStats['platform'].append(pltfrm)
metricStats['nyears'].append(nyrs)
metricStats['nrecs'].append(np.sum(~np.isnan(mstat)))
metricStats['min'].append(minV)
metricStats['max'].append(maxV)
metricStats['mean'].append(avgV)
metricStats['stddev'].append(sigV)
del pltfrm, stat, mstat, indx,
del minV, maxV, avgV, sigV
return metricStats
[docs]def get_parameter_stats_by_buoy(dpath:str, param:str, metric:str):
files = [f for f in os.listdir(dpath) if f.endswith(param+'.mat')]
paramStats = get_metric_stats_by_buoy(dpath, files, metric)
return paramStats
[docs]def test_for_failure(meanV, metric:str):
v = np.abs(meanV)
fail = np.zeros((len(v),))
if metric == 'R':
fail[v < 0.7] = 1
elif metric == 'NMB':
fail[v > 20.0] = 1
elif metric == 'NMAE':
fail[v > 20.0] = 1
elif metric == 'NRMSE':
fail[v > 20.0] = 1
elif metric == 'SI':
fail[v > 30.0] = 1
return fail
[docs]def get_buoy_locations(records):
buoylocs = {}
buoylocs.update({'rscdfile': []})
buoylocs.update({'buoyfile': []})
buoylocs.update({'platform': []})
buoylocs.update({'latitude': []})
buoylocs.update({'longitude': []})
for rec in records:
buoylocs['rscdfile'].append(rec[1])
buoylocs['buoyfile'].append(rec[3])
buoylocs['platform'].append(rec[8])
buoylocs['latitude'].append(rec[9])
buoylocs['longitude'].append(rec[10])
return buoylocs