#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Wed Aug  6 13:38:39 2025

Module for loading magnetic field related Cluster data.

Exported functions
------------------

data_file_name() :
    Returns the names of raw data files.

cfgnew_file_name() :
    Returns the name of the cfgnew calibration parameters file.

fgmcal_file_name() :
    Returns the name of the fgmcal file.

cfgnew_pars() :
    Loads the cfgnew calibration parameters from file.

mag() :
    Loads the (daily) calibrated magnetic field data.

@author: dragos
"""
import numpy as np
import pandas as pd
import os
import glob
# import shutil
import subprocess
from io import StringIO
from warnings import warn
from . import config as cfg


def data_file_name(days, spacecraft=1, mode='n', debug=False, version='last'):
    """
    Returns the names of raw data files.

    Parameters
    ----------
    days : array of datetime64 compatible dates.
    spacecraft : integer or string, optional
        Spacecraft number (1 to 4). The default is 1.
    mode : one character string, optional
        'b' for burst science, 'n' for normal science and '?' for both.
        The default is 'n'.
    debug : boolean, optional
    version : string, optional
        The data version or 'last'. The default is 'last'.

    Returns
    -------
    files : list
        Data file names.

    """

    sc=str(spacecraft)

    days=np.array(days)
    if not days.ndim: days=days.reshape(1)
    days=[np.datetime64(_) for _ in days]
    dayStr=[np.datetime_as_string(_,unit='D')[2:].replace('-','') for _ in days]

    FGMroot=os.environ.get('FGMROOT')
    if not FGMroot: FGMroot=cfg.PATHS['FGMroot']

    path=FGMroot+'/data/raw/ESTEC/cluster'+sc+'/'+mode+'sd_'+sc+'/'
    filesGlob=[path+_+'f'+mode+'.??'+sc for _ in dayStr]
    allFiles=[glob.glob(_) for _ in filesGlob if glob.glob(_)]
    if version == 'last':
        V=[np.sort([_x[-2:-1] for _x in _])[-1] for _ in allFiles]
    else:
        V=version
    filesGlob_V=[path+_+'f'+mode+'.?'+_v+sc for (_ , _v) in zip(dayStr,V)]
    files=sum([glob.glob(_) for _ in filesGlob_V], [])
    if debug:
        print('Input data files:')
        print(files)
    return files



def cfgnew_file_name(day, spacecraft=1, debug=False, version='last',
                     archive=True, inputDir=None, count=None, maxV=99):
    """
    Returns the name of the cfgnew calibration parameters file. If no file is
    found for the requested day, try previous day, until a file is found.
    The files are searched either in the archive directory,
    in cfg.PATHS['FGMpath'] or in "inputDir", depending on the options.

    Parameters
    ----------
    day : datetime64 compatible scalar
    spacecraft : integer or string, optional
        Spacecraft number (1 to 4). The default is 1.
    debug : boolean, optional
    version : string, optional
        The calfile version. Last two characters before the .cfgnew extension,
        or 'last'. The default is 'last'.
    archive : boolean, optional
        The default is True.
    inputDir : string or None, optional
        The directory where the cfgnew files reside. If passed then the archive
        kw is ignored.
    count : one element integer list, optional
        Internal counter. The default is [0]. If set to a (one element list)
        variable, indicates if the file name corresponds to the input day
        (count[0] == 0). If set to var, it must be resseted to zero before each
        function call.

    Raises
    ------
    Exception
        If the function calls itself more than 1000 times (counter), or
        if day is before cfg.FIRSTDAY.

    Returns
    -------
    tuple of string
        (cfgnew file name,  version)

    """

    if count is None:
        count=[0]
        counting=False
    else:
        counting=True

    sc=str(spacecraft)
    day=np.datetime64(day)
    if day == cfg.FIRSTDAY: return '',''
    if day < cfg.FIRSTDAY:
        raise Exception('No cfgnew files before '+str(cfg.FIRSTDAY))
    if count[0] > 1000:
        raise Exception('Too many calls to cfgnew_file_name')

    if debug: print('count: '+str(count[0]))

    year=str(day.astype(object).year)[2:]
    month=str(day.astype(object).month).zfill(2)
    dayStr=str(day.astype(object).day).zfill(2)

    if inputDir and archive:
        warn('"inputDir" given, setting "archive" to False')
        archive=False

    if archive:
        FGMroot=os.environ.get('FGMROOT')
        if not FGMroot: FGMroot=cfg.PATHS['FGMroot']
        path=FGMroot+'archive/calfiles/cfgnew/'+year+'_'+month+'/'
    else:
        # if not inputDir: path=cfg.PATHS['FGMpath']
        path = inputDir if inputDir else cfg.PATHS['FGMpath']

    filesGlob=path+'c'+sc+'_'+year+month+dayStr+'_????r2_V??.cfgnew'
    allFiles=glob.glob(filesGlob)

    maxV=str(maxV).zfill(2)
    if allFiles:
        if version == 'last':
            V=np.sort([_[-9:-7] for _ in allFiles])
            V=V[V <= maxV][-1] if len(V[V <= maxV]) else '99'
        else:
            V=version
        filesGlob_V=path+'c'+sc+'_'+year+month+dayStr+'_????r2_V'+V+'.cfgnew'
        files=glob.glob(filesGlob_V)
    else:
        files=[]

    if files:
        if len(files) >=2:
            print('WARNING: multiple cfgnew files for single day:')
            print(files)
            # sort by modify time
            files = sorted(files, key=os.path.getmtime)

        if debug:
            print('Calibration cfgnew file(s):')
            print(files)

        if not counting: count[0]=0

        return files[-1], V
    else:
        print('WARNING: no cfgnew file found for '
              +np.datetime_as_string(day,unit='D')+'.')
        day=day-np.timedelta64(1,'D')
        count[0]+=1
        files, Ver =cfgnew_file_name(day, spacecraft=sc, debug=debug, version=version,
                               archive=archive, inputDir=inputDir,
                               maxV=maxV, count=count)
        return files, Ver


def fgmcal_file_name(day, spacecraft=1, debug=False, version='last',
                     archive=True):
    """
    Returns the name of the fgmcal file.

    Parameters
    ----------
    day : datetime64 compatible scalar
    spacecraft : integer or string, optional
        Spacecraft number (1 to 4). The default is 1.
    debug : boolean, optional
    version : string, optional
        The calfile version. Last two characters before the .fgmcal extension,
        or 'last'. The default is 'last'.
    archive : boolean, optional
        The default is True.

    Raises
    ------
    Exception
        if day is before cfg.FIRSTDAY.

    Returns
    -------
    file : string
        fgmcal file name.
    V : string
        version.

    """

    sc=str(spacecraft)
    day=np.datetime64(day, 'D')
    if day < cfg.FIRSTDAY:
        raise Exception('No fgmcal files before '+str(cfg.FIRSTDAY))

    year=str(day.astype(object).year)
    month=str(day.astype(object).month).zfill(2)
    dayStr=str(day.astype(object).day).zfill(2)

    if archive:
        FGMroot=os.environ.get('FGMROOT')
        if not FGMroot: FGMroot=cfg.PATHS['FGMroot']
        path=FGMroot+'archive/calfiles/fgmcal/'+year+'/'+month+'/'
    else:
        path=cfg.PATHS['FGMpath']

    if version == 'last':
        filesGlob=path+'C'+sc+'_'+year+month+dayStr+'_V??.fgmcal'
        if debug:
            print('pattern: ', filesGlob)
        allFiles=glob.glob(filesGlob)
        if allFiles:
            V=np.sort([_[-9:-7] for _ in allFiles])[-1]
        else:
            V=''
    else:
        V=str(version).zfill(2)

    file=path+'C'+sc+'_'+year+month+dayStr+'_V'+V+'.fgmcal'

    if debug: print('filename: '+file)

    if not os.path.isfile(file):
        file=''
        warn('no fgmcal file found for '+str(day)+' version V'+V)

    return file, V


def cfgnew_pars(cfgnewfile, GUInames=False):
    """
    Loads the cfgnew calibration parameters from file.

    Parameters
    ----------
    cfgnewfile : string
        cfgnew file name.
    GUInames : boolean, optional
        If set, the output keys are the GUI parameter names, otherwise are
        the parameter names as they appear in the file (without the '=' char).
        The default is False.

    Returns
    -------
    cfgnewPars : dictionary
        The cfgnew calibration parameters.

    """
    if cfgnewfile:
        V=cfgnewfile[-9:-7]
        rows= 28 if V < '03' else 32
        cfgnewPars=pd.read_csv(cfgnewfile,sep='\s+', header=None, comment='#',
                               names=['name', 'x','y','z'], nrows=rows)
        cfgnewPars['name']=[_.replace('=', '')  for _ in list(cfgnewPars['name'])]
        pNames=cfg.FGMNAMES.values()
        cfgnewPars={_:cfgnewPars[cfgnewPars['name'] == _][list('xyz')].to_numpy()
                    for _ in pNames}
    else: cfgnewPars={_:np.array([[]]) for _ in cfg.FGMNAMES.values()}

    if GUInames: cfgnewPars={_:cfgnewPars[cfg.FGMNAMES[_]] for _ in cfg.FGMNAMES.keys()}
    cfgnewPars['file']=cfgnewfile
    return cfgnewPars


def mag(dbeg, dend, spacecraft=1, mode='n', rng='all', coord= 'gse', FGMpath=None,
        archive=True, debug=False, dataversion='last', full=False, fgmcalfile=None):
    """
    Loads the (daily) calibrated magnetic field data.

    Parameters
    ----------
    dbeg : datetime64 compatible date
        Begining of the interval.
    dend : datetime64 compatible date
        End of the interval.
    spacecraft : integer or string (1 to 4), optional
        The spacecraft number. The default is 1.
    mode : one character string, optional
        The instrument mode. 'b' for burst mode, 'n' for normal mode, '?' for both.
        The default is 'n'.
    rng : string or integer, optional
        The instrument range (2 to 7). The default is 'all'.
    coord : string, optional
        Output reference frame.
          'sr'    for spin-reference system,
          'scs'   for spacecraft-sun system,
          'gse'   for geocentric solar ecliptic system (default),
          'gsm'   for geocentric solar magnetospheric system,
          'sm'    for solar magnetic system, or
          'j2k'   for geocentric equatorial inertial system of epoch J2000.
        The default is 'gse'.
    FGMpath : string or None, optional
        The path to the fgmcal calibration files. If 'None', read the FGMPATH env.
        If environment not set, then set to FGMroot+'/data/dcalf/'.
        Ignored if 'archive' is 'True'. The default is None.
    archive : boolean, optional
        If True, use the archived calibration parameters. The default is True.
    debug : boolean, optional
    dataversion : string, optional
        Vresion passed to data_file_name(). The default is 'last'.
    full : boolean
        If set include extra columns:
            'frq' - aquisition frequency,
            'TMmode' - ?,
            'range' - instrument range ,
            'ibob' - IB or OB sensor,
            'flag' - quality flag
    fgmcalfile : string, optional
        Full path to specific fgmcal file. The default is None.

    Returns
    -------
    B: DataFrame of calibrated magnetic filed. Column names: 'Bx','By','Bz'.

    """

    sc=str(spacecraft)
    rng=str(rng)

    db=np.datetime64(dbeg, 'us')
    de=np.datetime64(dend, 'us')

    days=np.arange(db,de,np.timedelta64(1,'D'))

    FGMroot=os.environ.get('FGMROOT')
    if not FGMroot: FGMroot=cfg.PATHS['FGMroot']

    if archive:
        Yb=str(db.astype(object).year)
        Mb=str(db.astype(object).month).zfill(2)
        FGMpath=FGMroot+'/archive/calfiles/fgmcal/'+Yb+'/'+Mb+'/'
        os.environ['FGMPATH'] = FGMpath
    else:
        os.environ['FGMPATH']= FGMpath if FGMpath else FGMroot+'/data/dcalf/'

    dataFiles=data_file_name(days, spacecraft=sc, mode=mode, version=dataversion,
                          debug=debug)

    fgmcalOption='-c '+fgmcalfile if fgmcalfile else ''
    fgmcutOption='' if rng=='all' else '-r ' + rng

    if full:
        fgmvecOption=' -f -a -r'
        columns=['Bx','By','Bz', 'frq', 'TMmode', 'range', 'ibob', 'flag']
    else:
        fgmvecOption=' -r'
        columns=['Bx','By','Bz']

    # unix time (because of bug in ddscut/fgmcut)
    dbUT=str(pd.Timestamp(db).timestamp())
    deUT=str(pd.Timestamp(de).timestamp())
    exePath=cfg.PATHS['exePath']
    command=(exePath+'ddsmrg ' + ' '.join(dataFiles)            + ' | ' +
             exePath+'ddscut ' + '-b ' + dbUT + ' -e ' + deUT   + ' | ' +
             exePath+'fgmtel '                                  + ' | ' +
             exePath+'fgmcut ' + fgmcutOption                   + ' | ' +
             exePath+'fgmcal ' + fgmcalOption                   + ' | ' +
             exePath+'fgmhrt ' + '-s ' + coord                  + ' | ' +
             exePath+'fgmvec ' + fgmvecOption)

    result=subprocess.run(command, shell=True, capture_output=True, text=True)
    if debug:
        print(result.stderr)
        print('fgmcal file:', fgmcalfile)
    if result.returncode:
        print(result.stderr)
        warn('Failed to load Cluster data')
        return pd.DataFrame(columns=columns)
    B=pd.read_csv(StringIO(result.stdout), sep= '\s+', index_col=0,
                  parse_dates=[0], names=['dtime',*columns])
    if not B.empty:
        # check if parse_dates failed
        if type(B.index[0]) is str:
            B.index=B.index.map(_fix_str2time)
            B=B.drop(B[np.isnat(B.index)].index)
        B.index=B.index.tz_localize(None)

    return B

def _fix_str2time(s):
    try:
        t=np.datetime64(s)
    except:
        t=np.datetime64('NaT')
    return t
