Commit c5c6c30e authored by Christine Verbeke's avatar Christine Verbeke
Browse files

implementation sparx proton flux

parent 675c126c
......@@ -5,26 +5,55 @@ from inout.data import *
from goes.metrics import *
goes_2M_path = ["/Users/christinev/Desktop/metrics_validation/SW_metrics/output/SNEF_2M_output.txt",
"/Users/christinev/Desktop/metrics_validation/SW_metrics/output/SNEF_2M_output_1.txt"]
goes_800K_path = ["/Users/christinev/Desktop/metrics_validation/SW_metrics/output/SNEF_800K_output.txt",
"/Users/christinev/Desktop/metrics_validation/SW_metrics/output/SNEF_800K_output_1.txt"]
#Be careful about the units used in the output files of the models!
goes_data = get_goes_data()
goes_data.get_goes_electron_2M(goes_2M_path)
goes_data.get_goes_electron_800K(goes_800K_path)
#SNEF model
#model_type=1
#SPARX model
model_type=2
if (model_type==1):
goes_2M_path = ["/Users/christinev/Desktop/metrics_validation/SW_metrics/output/SNEF_2M_output.txt",
"/Users/christinev/Desktop/metrics_validation/SW_metrics/output/SNEF_2M_output_1.txt"]
goes_800K_path = ["/Users/christinev/Desktop/metrics_validation/SW_metrics/output/SNEF_800K_output.txt",
"/Users/christinev/Desktop/metrics_validation/SW_metrics/output/SNEF_800K_output_1.txt"]
goes_calculator = electron_goes_metrics_calculator()
goes_calculator.set_goes_2M_information(goes_data.goes_2M_data, model_name="SNEF")
goes_calculator.calculate_goes_2M_metrics()
goes_calculator.plot_goes_2M_metrics('/Users/christinev/Desktop/metrics_validation/SW_metrics/output/', filename="goes_2M_metrics")
goes_calculator.goes_2M_metrics_to_json('/Users/christinev/Desktop/metrics_validation/SW_metrics/output/', filename="goes_2M_metrics")
goes_data = get_goes_data()
goes_data.get_goes_electron_2M(goes_2M_path)
goes_data.get_goes_electron_800K(goes_800K_path)
goes_calculator = electron_goes_metrics_calculator()
goes_calculator.set_goes_2M_information(goes_data.goes_2M_data, model_name="SNEF")
goes_calculator.calculate_goes_2M_metrics()
goes_calculator.plot_goes_2M_metrics('/Users/christinev/Desktop/metrics_validation/SW_metrics/output/', filename="goes_2M_metrics")
goes_calculator.goes_2M_metrics_to_json('/Users/christinev/Desktop/metrics_validation/SW_metrics/output/', filename="goes_2M_metrics")
goes_calculator.set_goes_800K_information(goes_data.goes_800K_data, model_name="SNEF")
goes_calculator.calculate_goes_800K_metrics()
goes_calculator.plot_goes_800K_metrics('/Users/christinev/Desktop/metrics_validation/SW_metrics/output/', filename="goes_800K_metrics")
goes_calculator.goes_800K_metrics_to_json('/Users/christinev/Desktop/metrics_validation/SW_metrics/output/', filename="goes_800K_metrics")
goes_calculator.set_goes_800K_information(goes_data.goes_800K_data, model_name="SNEF")
goes_calculator.calculate_goes_800K_metrics()
goes_calculator.plot_goes_800K_metrics('/Users/christinev/Desktop/metrics_validation/SW_metrics/output/', filename="goes_800K_metrics")
goes_calculator.goes_800K_metrics_to_json('/Users/christinev/Desktop/metrics_validation/SW_metrics/output/', filename="goes_800K_metrics")
if (model_type==2):
#NOTE: FOR NOW timeranges should correspond to acutal timestamps of the SPARX model!!
#MODEL VALUES IN THE MODEL CAN ALSO NOT HAVE ZERO AS VALUE, so select timeranges where all flux is non-zero. ()
SPARX_model_data = ["/Users/christinev/Desktop/VerAndVal/sw_validation_metrics/output/forecast_data_for_plotting_SPARX.csv",
"/Users/christinev/Desktop/VerAndVal/sw_validation_metrics/output/forecast_data_for_plotting_SPARX.csv"]
time_range_path = '/Users/christinev/Desktop/VerAndVal/sw_validation_metrics/output/time_ranges_ex3.txt'
goes_data = get_goes_data()
goes_data.get_time_ranges(time_range_path)
goes_data.get_goes_proton_obs_data()
goes_data.get_goes_proton_sim_data(SPARX_model_data)
goes_calculator = proton_goes_metrics_calculator()
goes_calculator.set_goes_information(goes_data.goes_time_ranges, goes_data.full_proton_data, goes_data.goes_proton_sim_data, model_name="SPARX")
goes_calculator.calculate_proton_goes_metrics()
goes_calculator.plot_goes_metrics('/Users/christinev/Desktop/VerAndVal/sw_validation_metrics/output/', filename="goes_metrics")
goes_calculator.goes_metrics_to_json('/Users/christinev/Desktop/VerAndVal/sw_validation_metrics/output/', filename="goes_metrics")
......@@ -17,6 +17,8 @@ import seaborn as sns
from tabulate import tabulate
from sklearn.metrics import confusion_matrix
from matplotlib.gridspec import GridSpec
from datetime import datetime
from datetime import timedelta
# TO DO: use scikit lean package for ME, MAE, etc.
......@@ -25,7 +27,7 @@ from matplotlib.gridspec import GridSpec
class electron_goes_metrics_calculator(object):
"""
Class that provides Kp and Dst metrics from the provided simulation and observation data.
Class that provides electron flux metrics from the provided simulation and observation data.
Currently supports SNEF model data
"""
# def __init__(self):
......@@ -72,6 +74,8 @@ class electron_goes_metrics_calculator(object):
def plot_goes_2M_metrics(self, output_dir, filename='goes_2M_metrics'):
fig, axs = plt.subplots(2, 2)
plt.tight_layout()
axs[0, 0].hist(self.goes_2M_PercentageError , color="Gray" )
axs[0, 1].hist(self.goes_2M_AbsolutePercentageError, color="Gray")
axs[1, 0].hist(self.goes_2M_logeQ, color="Gray")
......@@ -156,6 +160,8 @@ class electron_goes_metrics_calculator(object):
def plot_goes_800K_metrics(self, output_dir, filename='goes_800K_metrics'):
fig, axs = plt.subplots(2, 2)
plt.tight_layout()
axs[0, 0].hist(self.goes_800K_PercentageError , color="Gray" )
axs[0, 1].hist(self.goes_800K_AbsolutePercentageError, color="Gray")
axs[1, 0].hist(self.goes_800K_logeQ, color="Gray")
......@@ -200,5 +206,184 @@ class electron_goes_metrics_calculator(object):
class proton_goes_metrics_calculator(object):
"""
Class that provides proton flux metrics from the provided simulation and observation data.
Currently supports SPARX model data
"""
# def __init__(self):
def set_goes_information(self,goes_time_ranges, goes_obs_data, goes_sim_data, model_name):
self.goes_obs_data = goes_obs_data
self.goes_sim_data = goes_sim_data
self.goes_time_ranges = goes_time_ranges
self.model_name = model_name
def calculate_proton_goes_metrics(self):
if (len(self.goes_obs_data)!=len(self.goes_sim_data)):
print ('Length of the given time ranges does not correspond to number of proton flux simulation files.')
exit()
for index, dates in self.goes_time_ranges.iterrows():
# print(self.goes_sim_data[index].loc['2014-09-10 17:46:00':'2014-09-10 18:06:00'])
start_date = dates['date start'].strftime('%Y-%m-%d %H:%M:%S')
end_date = dates['date end'].strftime('%Y-%m-%d %H:%M:%S')
end_date_obs = dates['date end'].strftime('%Y-%m-%d %H:%M:%S')
selected_sim_proton_data = self.goes_sim_data[index].loc[start_date:end_date]
selected_obs_proton_data = self.goes_obs_data[index].loc[start_date:end_date_obs]
if (len(selected_sim_proton_data) == len(selected_obs_proton_data) +1 ):
end_date_obs = (dates['date end']+timedelta(minutes=10)).strftime('%Y-%m-%d %H:%M:%S')
selected_obs_proton_data = self.goes_obs_data[index].loc[start_date:end_date_obs].copy()
selected_obs_proton_data['new_date'] = selected_sim_proton_data.index
selected_obs_proton_data.set_index("new_date", inplace=True)
try:
self.error_goes_10M = pd.concat([self.error_goes_10M,(selected_sim_proton_data['10MeV']-selected_obs_proton_data['ZPGT10E'])])
self.obs_goes_10M = pd.concat([self.obs_goes_10M, selected_obs_proton_data['ZPGT10E']])
self.sim_goes_10M = pd.concat([self.sim_goes_10M, selected_sim_proton_data['10MeV']])
self.error_goes_60M = pd.concat([self.error_goes_60M,(selected_sim_proton_data['60MeV']-selected_obs_proton_data['ZPGT60E'])])
self.obs_goes_60M = pd.concat([self.obs_goes_60M, selected_obs_proton_data['ZPGT60E']])
self.sim_goes_60M = pd.concat([self.sim_goes_60M, selected_sim_proton_data['60MeV']])
except AttributeError:
self.error_goes_10M = (selected_sim_proton_data['10MeV']-selected_obs_proton_data['ZPGT10E'])
self.obs_goes_10M = selected_obs_proton_data['ZPGT10E']
self.sim_goes_10M = selected_sim_proton_data['10MeV']
self.error_goes_60M = (selected_sim_proton_data['60MeV']-selected_obs_proton_data['ZPGT60E'])
self.obs_goes_60M = selected_obs_proton_data['ZPGT60E']
self.sim_goes_60M = selected_sim_proton_data['60MeV']
# RMSE = sqrt[ 1/n * (error_time)**2 ]
self.goes_10M_RootMeanSquareError = np.sqrt( sum(j*j for j in self.error_goes_10M) /len(self.error_goes_10M) )
#Percentage Error = (model-obs)/obs
self.goes_10M_PercentageError = [(error)/obs for error,obs in zip(self.error_goes_10M, self.obs_goes_10M) ]
#Absolute percentage error
self.goes_10M_AbsolutePercentageError = [abs( (error)/obs )for error,obs in zip(self.error_goes_10M, self.obs_goes_10M) ]
self.goes_10M_logeQ = [ np.log(model/obs) for model, obs in zip(self.sim_goes_10M, self.obs_goes_10M) ]
self.goes_10M_SymmetricAccuracy = [ abs(logeQ) for logeQ in self.goes_10M_logeQ]
median = statistics.median(self.goes_10M_SymmetricAccuracy)
#Median Symmetric Accuracy
self.goes_10M_MedianSymmetricAccuracy = 100.0*(np.exp(median)-1.0)
#Mean absolute percentage error (MAPE)
self.goes_10M_MeanAbsolutePercentageError = statistics.mean(self.goes_10M_AbsolutePercentageError)
MdlQ = statistics.mean(self.goes_10M_logeQ )
self.goes_10M_SymmetricSignedPercentageBias = 100*np.sign(MdlQ)*(np.exp( abs(MdlQ) )-1.0)
# print("RMSE", self.goes_10M_RootMeanSquareError)
# print("MSA", self.goes_10M_MedianSymmetricAccuracy)
# print("MAPE", self.goes_10M_MeanAbsolutePercentageError)
# print("SSPB", self.goes_10M_SymmetricSignedPercentageBias)
# RMSE = sqrt[ 1/n * (error_time)**2 ]
self.goes_60M_RootMeanSquareError = np.sqrt( sum(j*j for j in self.error_goes_60M) /len(self.error_goes_60M) )
#Percentage Error = (model-obs)/obs
self.goes_60M_PercentageError = [(error)/obs for error,obs in zip(self.error_goes_60M, self.obs_goes_60M) ]
#Absolute percentage error
self.goes_60M_AbsolutePercentageError = [abs( (error)/obs )for error,obs in zip(self.error_goes_60M, self.obs_goes_60M) ]
self.goes_60M_logeQ = [ np.log(model/obs) for model, obs in zip(self.sim_goes_60M, self.obs_goes_60M) ]
self.goes_60M_SymmetricAccuracy = [ abs(logeQ) for logeQ in self.goes_60M_logeQ]
median = statistics.median(self.goes_60M_SymmetricAccuracy)
#Median Symmetric Accuracy
self.goes_60M_MedianSymmetricAccuracy = 100.0*(np.exp(median)-1.0)
#Mean absolute percentage error (MAPE)
self.goes_60M_MeanAbsolutePercentageError = statistics.mean(self.goes_60M_AbsolutePercentageError)
MdlQ = statistics.mean(self.goes_60M_logeQ )
self.goes_60M_SymmetricSignedPercentageBias = 100*np.sign(MdlQ)*(np.exp( abs(MdlQ) )-1.0)
# print("RMSE", self.goes_60M_RootMeanSquareError)
# print("MSA", self.goes_60M_MedianSymmetricAccuracy)
# print("MAPE", self.goes_60M_MeanAbsolutePercentageError)
# print("SSPB", self.goes_60M_SymmetricSignedPercentageBias)
def plot_goes_metrics(self, output_dir, filename='goes_metrics'):
fig, axs = plt.subplots(2, 2)
plt.tight_layout()
axs[0, 0].hist(self.goes_10M_PercentageError , color="Gray" )
axs[0, 1].hist(self.goes_10M_AbsolutePercentageError, color="Gray")
axs[1, 0].hist(self.goes_10M_logeQ, color="Gray")
axs[1, 1].hist(self.goes_10M_SymmetricAccuracy, color="Gray")
axs[0,0].set(xlabel='Percentage Error', ylabel='Count')
axs[0,1].set(xlabel='Absolute Percentage Error', ylabel='Count')
axs[1,0].set(xlabel='log_e(model/obs)', ylabel='Count')
axs[1,1].set(xlabel='Symmetric Accuracy', ylabel='Count')
axs[0,0].axvline(x=statistics.median(self.goes_10M_PercentageError))
axs[0,0].axvline(x=statistics.mean(self.goes_10M_PercentageError), linestyle='--')
axs[0,1].axvline(x=statistics.median(self.goes_10M_AbsolutePercentageError), label='Median')
axs[0,1].axvline(x=statistics.mean(self.goes_10M_AbsolutePercentageError), linestyle='--', label='Mean')
axs[1,0].axvline(x=statistics.median(self.goes_10M_logeQ))
axs[1,0].axvline(x=statistics.mean(self.goes_10M_logeQ), linestyle='--')
axs[1,1].axvline(x=statistics.median(self.goes_10M_SymmetricAccuracy))
axs[1,1].axvline(x=statistics.mean(self.goes_10M_SymmetricAccuracy), linestyle='--')
axs[0,1].legend(loc='upper right')
plt.savefig(output_dir + filename + "_10M", dpi=200, bbox_inches='tight')
plt.close('All')
fig, axs = plt.subplots(2, 2)
plt.tight_layout()
axs[0, 0].hist(self.goes_60M_PercentageError , color="Gray" )
axs[0, 1].hist(self.goes_60M_AbsolutePercentageError, color="Gray")
axs[1, 0].hist(self.goes_60M_logeQ, color="Gray")
axs[1, 1].hist(self.goes_60M_SymmetricAccuracy, color="Gray")
axs[0,0].set(xlabel='Percentage Error', ylabel='Count')
axs[0,1].set(xlabel='Absolute Percentage Error', ylabel='Count')
axs[1,0].set(xlabel='log_e(model/obs)', ylabel='Count')
axs[1,1].set(xlabel='Symmetric Accuracy', ylabel='Count')
axs[0,0].axvline(x=statistics.median(self.goes_60M_PercentageError))
axs[0,0].axvline(x=statistics.mean(self.goes_60M_PercentageError), linestyle='--')
axs[0,1].axvline(x=statistics.median(self.goes_60M_AbsolutePercentageError), label='Median')
axs[0,1].axvline(x=statistics.mean(self.goes_60M_AbsolutePercentageError), linestyle='--', label='Mean')
axs[1,0].axvline(x=statistics.median(self.goes_60M_logeQ))
axs[1,0].axvline(x=statistics.mean(self.goes_60M_logeQ), linestyle='--')
axs[1,1].axvline(x=statistics.median(self.goes_60M_SymmetricAccuracy))
axs[1,1].axvline(x=statistics.mean(self.goes_60M_SymmetricAccuracy), linestyle='--')
axs[0,1].legend(loc='upper right')
plt.savefig(output_dir + filename + "_60M", dpi=200, bbox_inches='tight')
plt.close('All')
def goes_metrics_to_json(self, output_dir, filename="goes_metrics"):
# TO DO?: add additional information about the considered intervals
json_data = {
"metric_type": "goes_10M",
"model_name": self.model_name,
"name": filename,
"Number of points": len(self.error_goes_10M),
"Root Mean Square Error": self.goes_10M_RootMeanSquareError,
"Median Symmetric Accuracy": self.goes_10M_MedianSymmetricAccuracy,
"Mean Absolute Percentage Error": self.goes_10M_MeanAbsolutePercentageError,
"Symmetric Signed Percentage Bias": self.goes_10M_SymmetricSignedPercentageBias
}
with open(output_dir+filename+"_10M.json", 'w', encoding='utf-8') as f:
json.dump(json_data, f, ensure_ascii=False, indent=4)
json_data = {
"metric_type": "goes_60M",
"model_name": self.model_name,
"name": filename,
"Number of points": len(self.error_goes_60M),
"Root Mean Square Error": self.goes_60M_RootMeanSquareError,
"Median Symmetric Accuracy": self.goes_60M_MedianSymmetricAccuracy,
"Mean Absolute Percentage Error": self.goes_60M_MeanAbsolutePercentageError,
"Symmetric Signed Percentage Bias": self.goes_60M_SymmetricSignedPercentageBias
}
with open(output_dir+filename+"_60M.json", 'w', encoding='utf-8') as f:
json.dump(json_data, f, ensure_ascii=False, indent=4)
\ No newline at end of file
This diff is collapsed.
#Time start Time end
2014-09-11T18:46:00 2014-09-12T18:06:00
2014-09-11T18:26:00 2014-09-11T22:06:00
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment