#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Tue Aug 12 17:46:50 2025

Statistics and long term variation of the cfgnew parameters.

Cfgnew pars are loaded from calfiles for the entire mission span.

Outliers:
    Zscores are computed for each parameter for all its elements.
    The parameters coresponding to top 100 zscores are removed.

Computes:

    yearly values:
        parYmean    yearly average
        parYstd     yearly standard deviation

    mission values:
        parStd      mission standard deviation
        parMYstd    mean yearly std dev (from parYmean)
        parMean     mission average
        parTrend    mission trend

The spin axis offset_sc parYmean is replaced with yearly values from the calibration
(Zoffsettest.py). The corrseponding parMean is computed from the (changed) parYmean.
The spin axis offset_sc parYstd is similarly replaced together with the coresponding parMYstd.

The trends are computed as the difference between the last yearly average and
the first valid yearly average, divided by the number of years between them.
The trends in the final table are however expressed in decadal changes.
The trends of parameters wich show a full mission variation less than
3 mean yearly std devs (parMYstd) are set to zero.

The values in the printed table are rounded to 1 digit after comma for offsets and
4 digits for the other parameters.

@author: dragos
"""
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pickle
import sys
import re
from scipy import stats
import copy
from pathlib import Path
Cpath=str(Path(__file__).parent.parent)+'/modules/'
if not Cpath in sys.path: sys.path.append(Cpath)
import Cluster.load as load
# import Cluster.calibration as cal
import Cluster.config as cfg

dbeg = '2001-01-01'#cfg.FIRSTDAY#
dend = cfg.LASTDAY#'2010-01-10'#
version='04'#'last'#
fromPickle=True
toPickle=False


db=np.datetime64(dbeg)
de=np.datetime64(dend)
days=np.arange(db,de,np.timedelta64(1,'D'))

# pickle file name
interval=(np.datetime_as_string(db, unit='D')+'_to_'
         +np.datetime_as_string(de, unit='D'))
file=(cfg.PATHS['FGMroot']+'archive/calfiles/cfgnew/cfgnewALL_'
      +interval+'_V'+version+'.pkl')

if fromPickle:
    with open(file, 'rb') as f: parLT=pickle.load(f)
else:
    inputDir=None#cfg.PATHS['tmpDir']#
    parLT={'C'+_: pd.DataFrame(columns=cfg.FGMNAMES.keys(), index=days) for _ in '1234'}
    for sc in '1234':
        for day in days:
            cfgnewFile, V =load.cfgnew_file_name(day=day, spacecraft=sc, debug=True,
                                             version=version, archive=True,
                                             inputDir=inputDir)
            cfgnewPars=load.cfgnew_pars(cfgnewFile, GUInames=True)
            if cfgnewPars['file']:
                for par in parLT['C'+sc].columns:
                    parLT['C'+sc].loc[day,par]=cfgnewPars[par]

        # right shape for missing pars
        for parname in cfg.FGMNAMES.keys():
            nanshape=(3,3) if 'Matrix' in parname else (1,3)
            nanarr=np.full(nanshape,np.nan)
            parLT['C'+sc][parname]=(
                    parLT['C'+sc][parname].apply(lambda x: nanarr
                                                 if np.shape(x)[0] == 0 else x))

    if toPickle:
        with open(file, 'wb') as f: pickle.dump(parLT, f)

#%% stats

# remove outliers
# z-scores
outI={'C'+_: {_p:None for _p in cfg.FGMNAMES.keys()} for _ in '1234'}
# daily index
didx=parLT['C1'].index
outN=100
for sc in '1234':
    for parname in cfg.FGMNAMES.keys():
        columns=(0,1,2)
        rows=(0,1,2) if 'Matrix' in parname else (0,)
        pzs_=pd.DataFrame()
        for r in rows:
            for c in columns:
                par_=parLT['C'+sc][parname].map(lambda _:_[r,c])
                if par_.std() > 1.e-6:
                    zsc=np.abs(stats.zscore(par_))
                    pzs_=pd.concat([pzs_,pd.DataFrame({'date':didx,'zscore':zsc})])
        if pzs_.empty: continue
        outI['C'+sc][parname]=pzs_.date.iloc[np.argpartition(pzs_['zscore'], -outN)[-outN:]].to_numpy()
        parLT['C'+sc].loc[outI['C'+sc][parname],parname]*=np.nan


parYstd=copy.deepcopy(parLT)
# yearly index
yidx=parYstd['C1'].resample('YS').apply(lambda x: np.nan).index
# yearly std
parYstd={'C'+_: parYstd['C'+_].loc[yidx]*np.nan for _ in '1234'}
# misson std
parStd={'C'+_: parYstd['C'+_].iloc[-1]*np.nan for _ in '1234'}
# mission mean yearly std
parMYstd={'C'+_: parYstd['C'+_].iloc[-1]*np.nan for _ in '1234'}
# mission mean yearly std at least as large as range resol for offsets ...
parMYstdRes={'C'+_: parYstd['C'+_].iloc[-1]*np.nan for _ in '1234'}
# yearly mean
parYmean={'C'+_: parYstd['C'+_].loc[yidx]*np.nan for _ in '1234'}
# mission mean
parMean={'C'+_: parYstd['C'+_].iloc[-1]*np.nan for _ in '1234'}


def zerotoone(x):
    return 1 if x==0 else x
vzerotoone=np.vectorize(zerotoone)


for sc in '1234':
    print('C'+sc)
    for parname in cfg.FGMNAMES.keys():
        columns=(0,1,2)
        rows=(0,1,2) if 'Matrix' in parname else (0,)

        if 'offset' in parname:
            parLT['C'+sc][parname]=parLT['C'+sc][parname].apply(lambda x: np.round(x,3))

        idxok=parLT['C'+sc].loc[:,parname].apply(lambda x: np.sum(x)).dropna().index
        parMean['C'+sc][parname]=parLT['C'+sc].loc[:,parname][idxok].mean()
        parYmean['C'+sc][parname]=parLT['C'+sc].loc[:,parname][idxok].resample('YS').mean()
        nanarr=np.full_like(parYmean['C'+sc][parname].sum(), np.nan)
        parYmean['C'+sc][parname]=(parYmean['C'+sc][parname].apply(
            lambda x: nanarr if np.isscalar(x) else x))

        for r in rows:
            for c in columns:
                par_=parLT['C'+sc].loc[:,parname].map(lambda _:_[r,c] if len(_) else np.nan)
                par_=par_.astype(float)
                parStd['C'+sc][parname][r,c]=par_.std()
                pYstd_=par_.resample('YS').std()
                for year in yidx:
                        parYstd['C'+sc].loc[:,parname].loc[year][r,c]=pYstd_.loc[year]
        idxok=parYstd['C'+sc].loc[:,parname].apply(lambda x: np.sum(x)).dropna().index

        # parMYstd['C'+sc][parname]=parYstd['C'+sc].loc[:,parname][idxok].mean()

        # mean excluding zeroes
        nFin=parYstd['C'+sc].loc[:,parname].apply(lambda x: (x!=0).astype(int)).sum()
        nFin=vzerotoone(nFin)
        parMYstd['C'+sc][parname]=parYstd['C'+sc].loc[:,parname][idxok].sum()/nFin
        parMYstdRes['C'+sc][parname]=parMYstd['C'+sc][parname].copy()


        # relax trend condition for offsets
        if 'offset_r' in parname: parMYstdRes['C'+sc][parname]*=1.4
        # no range 7 trend
        if '_r7' in parname: parMYstdRes['C'+sc][parname]+=100
        # no offset r2 trend
        if parname == 'offset_r2': parMYstdRes['C'+sc][parname]+=100
        # no trend for upper right corner of matrix
        if 'Matrix' in parname:
            parMYstdRes['C'+sc][parname][0,]=100
            parMYstdRes['C'+sc][parname][1,1:]=100

# replace spin axis offset statistics with those derived from calibration
saOff={'C'+_: pd.read_csv(cfg.PATHS['tmpDir']+'C'+_+'_offZ_yearly.csv',
                             index_col=0, parse_dates=[0]) for _ in '1234'}
for sc in '1234':
    # change labels to year start to match resampled yearly index
    saOff['C'+sc].index-=pd.DateOffset(months=2)
    saOff['C'+sc].Oz+=cfg.ETALONOZ['C'+sc]
    for year in saOff['C'+sc].index:
        parYmean['C'+sc].offset_sc[year][0,0]=saOff['C'+sc].Oz[year]
        parYstd['C'+sc].offset_sc[year][0,0]=saOff['C'+sc].MAD[year]
    parMYstd['C'+sc].offset_sc[0,0]=parYstd['C'+sc].offset_sc.mean()[0,0]

    parMean['C'+sc].offset_sc[0,0]=parYmean['C'+sc].offset_sc.mean()[0,0]

# trends per year over the mission
nwy=1
dy=yidx[-1].year-yidx[0].year-2*nwy
parTrend={'C'+sc: (parYmean['C'+sc].iloc[-1-2*nwy:].sum()/(2*nwy+1)
                   -parYmean['C'+sc].iloc[:2*nwy+1].sum()/(2*nwy+1))/dy for sc in '1234'}
hasTrend={}
for sc in '1234':
    for r in '567':
        iok=parYmean['C'+sc].index[parYmean['C'+sc].index
                                           > cfg.FIRSTdayInRng[r]]
        dy=iok[-1].year-iok[0].year-2*nwy
        parTrend['C'+sc]['offset_r'+r]=(parYmean['C'+sc]['offset_r'+r].iloc[-1-2*nwy:].sum()/(2*nwy+1)
                                        -parYmean['C'+sc]['offset_r'+r].loc[iok[:2*nwy+1]].sum()/(2*nwy+1))/dy
        parTrend['C'+sc]['Matrix_r'+r]=(parYmean['C'+sc]['Matrix_r'+r].iloc[-1-2*nwy:].sum()/(2*nwy+1)
                                        -parYmean['C'+sc]['Matrix_r'+r].loc[iok[:2*nwy+1]].sum()/(2*nwy+1))/dy

    # remove too small trends
    # (overall (mission) trend lower than 4 mean yearly sigma)
    dy=yidx[-1].year-yidx[0].year-2*nwy
    hasTrend['C'+sc]=(np.abs(parTrend['C'+sc]*dy)/parMYstdRes['C'+sc]).apply(lambda x: x>3)
    hasTrend['C'+sc]=hasTrend['C'+sc].apply(lambda x: x.astype(int))
    parTrend['C'+sc]=parTrend['C'+sc]*hasTrend['C'+sc]


#%% plot all

Mlab=[['\n'+r"$1/G_x$",         '\n'+r'$-\theta_x\cos{\varphi_x}$',        '\n'+r'$-\theta_x\sin{\varphi_x}$'],
      ['\n'+r'$-\theta_y$', '\n'+r'$1/G_y$',                               '\n'+r'$-\varphi_y$'],
      ['\n'+r'$-\theta_z$', '\n'+r'$-(\varphi_y-\Delta\varphi_{yz})$', '\n'+r'$1/(G_y-\Delta G_{yz})$']]

lb={_:Mlab if 'Matrix' in _ else [['']*3]*3 for _ in cfg.FGMNAMES.keys()}

years=np.unique(np.array([np.datetime64(str(_.year)+'-01-01')
                          for _ in days.astype(object)]))

sc='1'
idx=parLT['C'+sc].index
figs={}
for parname in cfg.FGMNAMES.keys():
    columns=(0,1,2)
    rows=(0,1,2) if 'Matrix' in parname else (0,)
    print(parname)
    figs[parname]=plt.figure()
    for r in rows:
        for c in columns:
            par_=parLT['C'+sc].loc[:,parname].map(lambda _:_[r,c])
            par_=par_.astype(float)

            pYmean=parYmean['C'+sc].loc[:,parname].map(lambda _:_[r,c])
            pYstd=parYstd['C'+sc].loc[:,parname].map(lambda _:_[r,c])

            rng=parname[-1]
            if rng.isdigit():
                if int(rng) > 4:
                    par_[idx < cfg.FIRSTdayInRng[rng]]=np.nan
                    pYmean[years <= cfg.FIRSTdayInRng[rng]]=np.nan

            plt.subplot(len(columns), len(rows), r*3+c+1)
            plt.plot(days,par_,label=('C'+sc+' '+parname+' '+'xyz'[r]+'xyz'[c] +lb[parname][r][c]),
                     marker='.', linestyle='', alpha=0.1)

            plt.errorbar(pYmean.index+pd.DateOffset(months=6),pYmean, yerr=pYstd,
                     marker='o', linestyle='', alpha=0.9)

            ax=plt.gca()
            for year in zip(years[::2], years[1::2]):
                plt.axvspan(year[0], year[1] , alpha=0.05, color='black')
            ax.set_xlim(db,de)
            if parname == 'offset_r2': ax.set_ylim(par_.iloc[0]-1,par_.iloc[0]+1)
            ax.legend(loc='lower right' if 'Matrix' in parname else 'upper right')
            if r == 2: ax.set_xlabel('date')
            if c == 2 and 'Matrix' not in parname: ax.set_xlabel('date')
            if r == 0 and 'offset'     in parname: ax.set_ylabel('nT')
            if r == 0 and 'Angle'      in parname: ax.set_ylabel('deg')


#%% save figs

page_size=(16.5, 11.7) # landscape a3 size in inches...
plot_dir=cfg.PATHS['tmpDir']+'cfgnewPlots/'

for par in figs.keys():
    fname=plot_dir+'C'+sc+'_'+par+'.pdf'
    figs[par].set_figwidth(page_size[0])
    figs[par].set_figheight(page_size[1])
    figs[par].savefig(fname)
    plt.close(figs[par])
    print('saved: '+fname)


#%% latex tables

for sc in '1234':
    # decadal trends:
    parTrend['C'+sc]*=10
    # round
    offcols=[_ for _ in cfg.FGMNAMES.keys() if 'offset' in _]
    parTrend['C'+sc]=parTrend['C'+sc].apply(lambda x: np.round(x,4))
    parTrend['C'+sc][offcols] = parTrend['C'+sc][offcols].apply(lambda x: np.round(x,1))
    parMean['C'+sc]=parMean['C'+sc].apply(lambda x: np.round(x,4))
    parMean['C'+sc][offcols] = parMean['C'+sc][offcols].apply(lambda x: np.round(x,1))
    parMYstd['C'+sc]=parMYstd['C'+sc].apply(lambda x: np.round(x,4))
    parMYstd['C'+sc][offcols] = parMYstd['C'+sc][offcols].apply(lambda x: np.round(x,1))


LaTeXfile=cfg.PATHS['tmpDir']+'parStatsTables.tex'

docheader=r"""\documentclass[a4paper]{article}
\usepackage{booktabs}
\usepackage{amsmath}
\usepackage[dvipsnames]{xcolor}
\usepackage{rotating}
\usepackage{siunitx}
\usepackage{mathtools}
%\newrobustcmd{\LG}{\color{lightgray}}
\begin{document}
"""
docfooter=r"""\end{document}"""

tabfooter=r"""\bottomrule
\end{tabular}
\end{sidewaystable}
}"""

mtxcols=[_ for _ in cfg.FGMNAMES.keys() if 'Matrix_r' in _]

formatter={'float_kind':lambda x: "{:10.4f} ".format(x)}
formatterOff={'float_kind':lambda x: "{:10.1f} ".format(x)}

begMtx=r'$\begin{bmatrix*}[S]'
endMtx=r'\end{bmatrix*}$'

sisetupOff=r"""\sisetup{round-precision=1}
\sisetup{table-format=-3.2}
"""

with open(LaTeXfile, 'w') as f:
    print(docheader, file=f)

for sc in '1234':

    tabheader=r"""{
    \sisetup{round-mode=places,
             round-precision=4,
             table-format=-1.5}
    \begin{sidewaystable}\small
    \caption{"""+'Cluster '+sc+r""" FGM calibration parameters statistics
             over the entire mission lifetime. Offsets are expressed in nT,
             angles in degrees, matrix elements are adimensional factors.}
    \label{tab.C"""+sc+r"""stats}
    \medskip
    \catcode`,=4
    \begin{tabular}{lccc}
    \toprule
    parameter	& mission average & mean yearly standard deviation & decadal trend 	\\
    """

    with open(LaTeXfile, 'a') as f:
        print(tabheader, file=f)

        for off in [*offcols,'Angle_xyz']:
            fmt=formatter if 'Angle' in off else formatterOff
            print(r'\midrule', file=f)
            print(off.replace('_', '\_')+r' &', file=f)
            if 'Angle' not in off: print(sisetupOff, file=f)
            print(begMtx, file=f)
            mean=np.array2string(parMean['C'+sc][off][0], separator=",",
                                 formatter=fmt)[1:-1]
            mean=re.sub(r' (-?0\.0+) ', r'\\color{lightgray}\1', mean)
            print(mean+r'\\', file=f)
            print(endMtx, file=f)
            print(r'&', file=f)
            if 'Angle' not in off: print(sisetupOff, file=f)
            print(begMtx, file=f)
            sigma=np.array2string(parMYstd['C'+sc][off][0], separator=",",
                                 formatter=fmt)[1:-1]
            sigma=re.sub(r' (-?0\.0+) ', r'\\color{lightgray}\1', sigma)
            print(sigma+r'\\', file=f)
            print(endMtx, file=f)
            print(r'&', file=f)
            if 'Angle' not in off: print(sisetupOff, file=f)
            print(begMtx, file=f)
            trend=np.array2string(parTrend['C'+sc][off][0], separator=",",
                                 formatter=fmt)[1:-1]
            trend=re.sub(r' (-?0\.0+) ', r'\\color{lightgray}\1', trend)
            print(trend+r'\\', file=f)
            print(endMtx+r'\\', file=f)

        for mtx in mtxcols:
            print(r'\midrule', file=f)
            print(mtx.replace('_', '\_')+r' &', file=f)
            print(begMtx, file=f)
            for r in (0,1,2):
                mean=np.array2string(parMean['C'+sc][mtx][r], separator=",",
                                 formatter=formatter)[1:-1]
                mean=re.sub(r' (-?0\.0+) ', r'\\color{lightgray}\1', mean)
                print(mean+r'\\', file=f)

            print(endMtx, file=f)
            print(r'&', file=f)
            print(begMtx, file=f)
            for r in (0,1,2):
                sigma=np.array2string(parMYstd['C'+sc][mtx][r], separator=",",
                                      formatter=formatter)[1:-1]
                sigma=re.sub(r' (-?0\.0+) ', r'\\color{lightgray}\1', sigma)
                print(sigma+r'\\', file=f)
            print(endMtx, file=f)
            print(r'&', file=f)
            print(begMtx, file=f)
            for r in (0,1,2):
                trend=np.array2string(parTrend['C'+sc][mtx][r], separator=",",
                                 formatter=formatter)[1:-1]
                trend=re.sub(r' (-?0\.0+) ', r'\\color{lightgray}\1', trend)
                print(trend+r'\\', file=f)
            print(endMtx+r'\\', file=f)

        print(tabfooter, file=f)

with open(LaTeXfile, 'a') as f:
    print(docfooter, file=f)

#%% Supp Info figs

FigFile=cfg.PATHS['tmpDir']+'SIfigs.tex'

bFig=r"""
\begin{figure}[!htb]
    \centering
    \includegraphics[width=\textwidth]{parStats/plots/cropped/"""
eFig=r"""\end{figure}"""

pars=['Angle_xyz', 'offset_sc',
      'offset_r3', 'offset_r4', 'offset_r5', 'offset_r6', 'offset_r7',
      'Matrix_r2', 'Matrix_r3', 'Matrix_r4', 'Matrix_r5', 'Matrix_r6', 'Matrix_r7']

with open(FigFile, 'w') as f:
    for sc in '1234':
        print(r"\section{Cluster "+sc+"}",file=f)
        for par in pars:
            if "Angle" in par:
                bFig_=bFig.replace(r'\text',r'1.2\text')
            else:
                bFig_=bFig.replace(r'\text',r'1.4\text')
            print(bFig_+'C'+sc+'_'+par+r"-crop.pdf.jpg}", file=f)
            print(r"    \caption{Cluster "+sc+" "+par.replace('_','\_')+r"}", file=f)
            print(r"    \label{fig.SI.C"+sc+par+r"}", file=f)
            print(eFig, file=f)



#%% plot selected

pars=['C1 offset_sc y','C1 offset_sc z']
# pars=['C'+_n+' '+_p for _n in '1234' for _p in
#        ['offset_sc '+_c for _c in 'xyz']
#       +['Angle_xyz '+_c for _c in 'xyz']
#       +['Matrix_r'+_r+' zy' for _r in '2345']
#       +['Matrix_r'+_r+' zz' for _r in '2345']]


xn={' ':0, 'x':0,'y':1,'z':2}

to_plot=pd.DataFrame(columns=pars, index=days)

for par in pars:
    x=xn[par[-2]]
    y=xn[par[-1]]
    sc=par[:2]
    # to_plot[par]=[float(parLT[par[:9]].loc[_][x,y]) for _ in days]
    to_plot[par]=[float(np.nan if np.size(parLT[sc][par[3:12]].loc[_]) == 1 else
                                           parLT[sc][par[3:12]].loc[_][x,y]) for _ in days]

# to_plot[np.abs(to_plot.resample('ME').median().resample('D').interpolate(method='cubic')
#                -to_plot) > .5]=np.nan

for parname in to_plot.columns:
    print(parname)
    fig=plt.figure()
    to_plot[parname].plot(subplots=True, marker='o', linestyle='-', alpha=0.3)
    ax=plt.gca()
    plt.title(parname)

# to_plot.plot(subplots=True, marker='o', linestyle='', alpha=0.1)
