"""
PEMtk fitting base classes
Development for fitting class, take pemtk.dataClass as base and add some subselection & fitting methods.
10/05/21 v1b added fitting routines using ePSproc and lmfit libraries.
Adapted from dev code notebook (functional forms), still needs some tidying up & wrapping for class.
For dev code see http://127.0.0.1:8888/lab/tree/dev/PEMtk/fitting/fitting_routines_dev_PEMtk_300421.ipynb
13/10/20 v1
TODO:
- Clean up/finalise data scheme. Currently mix of dictionary style, self.data[dataset][datatype] and class attribs self.datatype. Better to go with former for flexibility, or latter for using as simple base class to wrap later? Former works with some existing plotting fns., but complicated.
- afblmMatEfit() defaults similarly messy, although basically working.
- More analysis tools for fitting & results to add, currently just self.fit() to run.
"""
import copy
from datetime import datetime as dt # Import datetime.datetime for now() function
import pandas as pd
import numpy as np
import xarray as xr
try:
from lmfit import Parameters, Minimizer, report_fit
except ImportError:
print("*** lmfit not found: data functions available, but not fitting.")
from pemtk.data.dataClasses import dataClass
from pemtk.util.env import isnotebook
from epsproc import matEleSelector, multiDimXrToPD, setADMs, setPolGeoms
from epsproc.geomFunc import afblmXprod, mfblmXprod
from epsproc.sphPlot import plotTypeSelector
from epsproc.util.setMatE import setMatE
# Set HTML output style for Xarray in notebooks (optional), may also depend on version of Jupyter notebook or lab, or Xr
# See http://xarray.pydata.org/en/stable/generated/xarray.set_options.html
# if isnotebook():
# xr.set_options(display_style = 'html')
# def lmmuListStrReformat(lmmuList):
# """Convert list of tuple labels to short str format"""
# # Parameter names [a-z_][a-z0-9_]*, so replace - sign with
# return ['_'.join(str(ele).replace('-','n') for ele in sub) for sub in lmmuList]
# Now moved to ._util, should move to ..util?
from ._util import lmmuListStrReformat
from ._stats import setPoissWeights
# Plotters - testing import routines here, include modules & flags
# from epsproc.plot import hvPlotters # Should probably use this!
# from pemtk.util.hvPlotters import setPlotDefaults # Pkg version, opts function only (as per tmo-dev)
# print(hvFlag) # Fails
from pemtk.util import hvPlotters # Imports full module, include setup routines.
# print(hvPlotters.hvFlag) # OK
# print(hvFlag) # Fails
# print(hvPlotters.hv)
try:
import seaborn as sns
except ImportError:
print("*** Seaborn not found: some plotting functions may be unavailable.")
snsFlag = False
[docs]class pemtkFit(dataClass):
"""
Class prototype for pemtkFit class. Dev version builds on dataClass, and adds some basic subselection & fitting capabilities.
"""
from ._aggUtil import aggToXR, setAggMatE
from ._analysis import (analyseFits, fitHist, fitsReport, classifyFits, corrPlot, paramPlot, paramsReport, paramsCompare, paramFidelity,
phaseCorrection, _mergePFLong, _setData, _setWide) #, scopeTest
from ._conv import pdConv, pdConvRef, pdConvSetFit
from ._filters import thresFits, _subsetFromXS, getFitInds
from ._io import processedToHDF5, writeFitData, loadFitData, _pickleData, _writePDData, _writeXRData, setTimeStampedFileName
from ._parallel import multiFit
from ._plotters import BLMfitPlot, lmPlotFit, BLMsetPlot, hvSave
# from ._stats import setPoissWeights
from ._sym import symCheck
from ._util import setClassArgs, _setDefaultFits, _getFitInds
# from pemtk.util import hvPlotters # Imports full module, include setup routines.
def __init__(self, matE = None, data = None, ADM = None, backend = 'afblmXprod', **kwargs):
self.__notebook__ = isnotebook()
self.setClassArgs(locals()) # Set attribs from passed args
super().__init__(**kwargs) # Run rest of __init__ from base class - MAY BE PROBLEMATIC WITH ARB **kwargs here, should set separate dict for this?
# Set dict to hold selection options. May want to propagate back to base class.
# For using as kwargs set these by function? Might be a bit of a headache, is there a better way?
# May also want to split by dataType?
# NOTE: see self.setSubset() for usage, but should propagate elsewhere too.
self.selOpts = {'matE':{},
'slices':{}}
# 22/08/22 Added fitOpts mainly for backend selection, but should also set other params here (possibly from self.selOpts?)
# self.fitOpts = {'backend':self.backends(backend=backend)}
# 01/09/22 - changed self.backends to set directly instead, should be simpler.
self.fitOpts = {}
self.backends(backend=backend)
# self.subset = {} # Set dict to hold data subset for fitting
self.subKey = 'subset' # Set subKey to use existing .data structure for compatibility with existing functions!
# self.resultsKey = 'results' # Set resultsKey to use existing .data structure for compatibility with existing functions!
self.data[self.subKey] = {} # Set empty to ensure setMatEFit will always run, even on blank init.
# Init fit params to None
# Currently set to push current fit to base structure, and keep all fits in .data[N] dicts.
self.lmmu = None
self.basis = None
self.fitData = None
self.fitInd = -1
# Set dict for file handling
# Currently just a log - appends to list on file IO.
self.files = {'filesIn':[], 'filesOut':[], 'dataPaths':[], 'filesOutFailed':[]}
# Quick hack for optional Holoviews import - may not be best way to do this?
# Issue without binding to self is scope of sub-module/imported methods (e.g. from _analysis.py).
# Methods within this file ARE within the import's scope however.
if hvPlotters.hvFlag:
self.hv = hvPlotters.hv
self.setPlotDefaults = hvPlotters.setPlotDefaults
self.setPlotDefaults()
else:
self.hv = False
self.setPlotDefaults = None
# Same for Seaborn.
if snsFlag:
self.sns = sns
else:
self.sns = False
# def setFitSubset(self, thres = None, selDims = None, thresDims = None, sq = True, drop = True):
# """
# Threshold and subselect on matrix elements.
#
# Wrapper for :py:func:`epsproc.matEleSelector`.
#
# """
#
# self.subset = matEleSelector(self.dsMatE, thres = thres, inds = selDims, dims = thresDims, sq = sq, drop = drop)
[docs] def backends(self, backend = None):
"""
Set backends (model functions) for fitting & select for use.
Pass backend = 'name of backend' to select and set backend (model function) from the presets.
Pass None to return dict of available backends.
If a function is passed it is set directly.
Settings are pushed to `self.backend` (name) and self.fitOpts['backend'] (function handle).
01/09/22 v2, modified to set directly, rather than by return.
22/08/22 v1
"""
# Define allowed backend fns. by cagegory
backDict = {'af':{'name':'afblm','afblmXprod':afblmXprod, 'keyDim':'t'},
'mf':{'name':'mfblm','mfblmXprod':mfblmXprod, 'keyDim':'pol'}}
# Set selected backend if passed.
if backend is not None:
if isinstance(backend, str):
if self.verbose['sub']:
print(f"* Setting fitting backend to '{backend}'")
self.backend = backend # Use this for human-readable name...?
self.fitOpts['backend'] = backDict[backend[0:2]][backend]
# return backDict[backend[0:2]][backend]
# If func is passed, just set directly.
if callable(backend):
try:
self.backend = backend.__name__ # Use this for human-readable name...?
except:
self.backend = backend
if self.verbose['sub']:
print(f"* Setting fitting backend to '{self.backend}'") # Use name form here
self.fitOpts['backend'] = backend
# return backend
else:
return backDict
# ************* Set & select data
[docs] def setData(self, keyExpt = None, keyData = None):
"""
Data prototype - this will be used to set experimental data to the master structure.
For now, set data by passing, and computational data can be set for testing, by passing a key.
This basically assumes that the expt. provides AFBLMs.
TO CONSIDER:
- Data format, file IO for HDF5?
- Routines to read VMI images and process etc, planned for experimental code-base.
- Further simulation options, e.g. add noise etc., for testing.
"""
if isinstance(keyData, str):
self.data[keyExpt] = {'AFBLM':self.data[keyData]['AFBLM']}
else:
self.data[keyExpt] = {'AFBLM':keyData}
[docs] def setWeights(self, wConfig = None, keyExpt = None, keyData = None, **kwargs):
"""
Wrapper for setting weights for/from data. Basically follows self.setData, with some additional options.
Will set self.data[keyExpt]['weights'] from existing data if keyData is a string, or from keyData as passed otherwise.
Parameters
----------
wConfig : optional, str, default = None
Additional handling for weights.
- 'poission', set Poissionian weights to match data dims using self.setPoissWeights()
- 'errors', set weights as 1/(self.data[keyExpt]['weights']**2)
"""
# Basic setup, as per setData()
if isinstance(keyData, str):
self.data[keyExpt]['weights'] = self.data[keyData]['weights']
else:
self.data[keyExpt]['weights'] = keyData
# Additional data handling & options...
if wConfig == 'poission':
self.data[keyExpt]['weights'] = setPoissWeights(1.0, self.data[keyExpt]['AFBLM'].shape)
elif wConfig == 'errors':
self.data[keyExpt]['weights'] = 1/(self.data[keyExpt]['weights']**2)
[docs] def setMatE(self, **kwargs):
"""Thin wrapper for ep.setMatE.setMatE(), pass args & set returns to self.data['matE']"""
matE = setMatE(**kwargs)
# self.ADM = {'ADMX':ADMX, **kwargs}
self.data['matE'] = {'matE':matE} # Set in main data structure
[docs] def setADMs(self, **kwargs):
"""Thin wrapper for ep.setADMs(), pass args & set returns to self.data['ADM']"""
ADMX = setADMs(**kwargs)
# self.ADM = {'ADMX':ADMX, **kwargs}
self.data['ADM'] = {'ADM':ADMX} # Set in main data structure
[docs] def setPolGeoms(self, **kwargs):
"""Thin wrapper for ep.setPolGeoms(), pass args & set returns to self.data['pol']"""
pol = setPolGeoms(**kwargs)
# self.ADM = {'ADMX':ADMX, **kwargs}
self.data['pol'] = {'pol':pol.swap_dims({'Euler':'Labels'})} # Set in main data structure, force to Labels as dim.
# self.data['pol'] = pol
# def setSubset(self, thres = None, selDims = None, thresDims = None, sq = True, drop = True):
[docs] def setSubset(self, dataKey, dataType, sliceParams = None, subKey = None, resetSelectors = False, **kwargs):
"""
Threshold and subselect on matrix elements.
Wrapper for :py:func:`epsproc.Esubset` and :py:func:`epsproc.matEleSelector`, to handle data array slicing & subselection from params dict.
Subselected elements are set to self.data[subKey][dataType], where subKey defaults to self.subKey (uses existing .data structure for compatibility with existing functions!)
To additionally slice data, set dict of parameters sliceParams = {'sliceDim':[start, stop, step]}
To reset existing parameters, pass resetSelectors = True.
To do: better slice handling - will likely have issues with different slices for different dataTypes in current form.
"""
if subKey is None:
subKey = self.subKey
else:
self.subKey = subKey
# Arg checks
if (dataType not in self.selOpts.keys()) or resetSelectors:
self.selOpts[dataType] = {} # Set empty if missing or to reset.
for key, value in kwargs.items():
# self.selOpts['matEleSelector'][key] = value
self.selOpts[dataType][key] = value # Set any values if passed
# Init data['subset']
if subKey not in self.data.keys():
self.data[subKey] = {}
self.data[subKey][dataType] = self.data[dataKey][dataType]
# Slice on multiple dims if specified, wrap Esubset for this
# Note stacked dims may cause issues... see http://xarray.pydata.org/en/stable/indexing.html#multi-level-indexing
# e.g. data.data['orb5']['matE'].unstack('LM').sel(**{'l':slice(1,3)}) is OK, but data.data['orb5']['matE'].sel(**{'l':slice(1,3)}) is not!
# data.data['orb5']['matE'].sel(LM={'l':slice(1,3), 'm':0}) is OK
# Similar results for loc:
# data.data['orb5']['matE'].loc[{'l':slice(1,3), 'm':0}] is OK, keeps LM
# data.data['orb5']['matE'].loc[{'l':slice(1,3)}] drops l coord
# if sliceParams is None:
# sliceParams = self.d
#
if sliceParams is not None:
for key, value in sliceParams.items():
self.selOpts['slices'][key] = value
self.data[subKey][dataType] = self.Esubset(key = subKey, dataType = dataType, Erange = value, Etype = key)
# self.subset = matEleSelector(self.data[key]['matE'], thres = thres, inds = selDims, dims = thresDims, sq = sq, drop = drop)
self.data[subKey][dataType] = matEleSelector(self.data[subKey][dataType], **self.selOpts[dataType])
if self.verbose:
print(f"Subselected from dataset '{dataKey}', dataType '{dataType}': {self.data[subKey][dataType].size} from {self.data[dataKey][dataType].size} points ({self.data[subKey][dataType].size/self.data[dataKey][dataType].size * 100 :.2f}%)")
# *********** Methods to setup fit
[docs] def setMatEFit(self, matE = None, paramsCons = 'auto', refPhase = 0, colDim = 'it', verbose = 1):
"""
Convert an input Xarray into (mag,phase) array of matrix elements for fitting routine.
Parameters
----------
matE : Xarray
Input set of matrix elements, used to set allowed (l,m,mu) and input parameters.
If not passed, use self.data[self.subKey]['matE'].
paramsCons : dict, optional, default = 'auto'
Input dictionary of constraints (expressions) to be set for the parameters.
See https://lmfit.github.io/lmfit-py/constraints.html
If 'auto', parameters will be set via self.symCheck()
refPhase : int or string, default = 0
Set reference phase by integer index or name (string).
If set to None (or other types) no reference phase will be set.
colDims : dict, default = 'it'
Quick hack to allow for restacking via ep.multiDimXrToPD, this will set to cols = 'it', then restack to 1D dataframe.
This should always work for setting matE > fit parameters, but can be overridden if required.
This is convienient for converting to Pandas > lmfit inputs, but should redo directly from Xarray for more robust treatment.
For ePS matrix elements the default should always work, although will drop degenerate cases (it>1). but shouldn't matter here.
TODO:
- make this better, support for multiple selectors.
- For eps case, matE.pd may already be set?
Returns
-------
params : lmfit parameters object
Set of fitting parameters.
lmmu : dict
List of states and mappings from states to fitting parameters (names & indexes).
29/06/21: Adapted to use 'it' on restack, then set to single-column with dummy dim. No selection methods, use self.setSubset() for this.
"""
# Quick and ugly wrap args for class - should tidy up here!
if matE is None:
# matE = self.subset
matE = self.data[self.subKey]['matE']
params = Parameters()
lmmu = {} # Dict to hold param names & reindexing for abs/phase to complex mappings.
# Try passing dict, https://lmfit.github.io/lmfit-py/parameters.html#lmfit.parameter.Parameters.add_many
# ACTUALLY, not quite right - need tuples per parameter, not dict
# params.add_many(paramDict)
# Try list of tuples: having issues with type?
# params.add_many(testTlist)
# NOPE - doesn't like tuples as name
# params.add_many(((0, 1, 0, 0), -2.3170597),
# ((0, 3, 0, 0), 1.1057219))
# NOPE - invalid name
# params.add_many(('(0, 1, 0, 0)', -2.3170597),
# ('(0, 3, 0, 0)', 1.1057219))
# NOPE - complex not supported (presumably), gives type error: TypeError: '>' not supported between instances of 'complex' and 'float'
# params.add_many(('one', -2.317060+1.358736j),
# ('two', 1.1057219+1.358736j))
# OK
# params.add_many(('one', -2.3170597),
# ('two', 1.1057219))
# Try as list of params with sensible naming... OK
# testMatE = data.data['subset']['matE'] #.drop('Sym') # Subselect before conversion?
# testMatE = data.data['subset']['matE'].sel({'Eke':1.1},drop=False) #.drop('Sym') # Setting single Eke here is annoying, as it messes up PD conversion - no way to keep singleton dim???
# # Using PD conversion routine works, although may have issues with singleton dims again - should set suitable dummy dim here?
# # pdTest, _ = ep.multiDimXrToPD(testMatE, colDims='Eke', dropna=True)
# pdTest, _ = multiDimXrToPD(matE, colDims=colDim, dropna=True, squeeze = False)
# # pdTest, _ = ep.multiDimXrToPD(testMatE, colDims='Sym', dropna=True, squeeze = False)
#
# pdTest = pd.DataFrame(pdTest.stack(colDim)) # Stack to 1D format and force to DF
pdTest = self.pdConvSetFit(matE = matE, colDim = colDim) # Functional version.
# Select column from pd dataset - NOW ASSUMED ABOVE, and use .flatten() below to force to 1 column/dim.
# col = 1.1
# pdSub = pdTest[Eke].dropna()
# pdSub = pdTest.dropna()
# dataNP = pdSub.to_numpy() # Drop Eke with col label - probably not best way to do it here.
# lmmuList = pdSub.index #.to_numpy # This returns list of tuples
dataNP = pdTest.to_numpy().flatten()
lmmuList = pdTest.index # This returns list of tuples
lmmu['Index'] = lmmuList
nMatE = lmmu['Index'].size
if verbose:
print(f"Set {nMatE} complex matrix elements to {2*nMatE} fitting params, see self.params for details.")
# Convert list of labels to short str format
# Parameter names [a-z_][a-z0-9_]*, so replace - sign with
# lmmuListStr = ['_'.join(str(ele).replace('-','n') for ele in sub) for sub in lmmuList]
lmmuListStr = lmmuListStrReformat(lmmuList)
lmmu['Str'] = lmmuListStr
# Generate (l,m) labels
lmList = lmmu['Index'].droplevel(['Cont','Targ','Total','mu','it']) # NOTE this assumes dims, should change to slice?
strLabels = [','.join(str(ele) for ele in sub) for sub in lmList]
# Generate map {full names : lm lables}
lmmu['lmMap'] = dict(zip(lmmu['Str'], strLabels))
# return lmmu
# Set mapping of labels (n, item, abs(item), phase(item)) to use for param remapping
lmmu['map'] = [(item, [n, n+nMatE], 'm_' + item, 'p_' + item) for n, item in enumerate(lmmu['Str'])]
# # Set params with numbered elements
# [params.add(f"mag_{n}", np.abs(item), min = 1e-4, max = 5.0) for n, item in enumerate(dataNP)] # Set min == theshold? Not sure if this will cause issues later.
# [params.add(f"phase_{n}", np.angle(item), min=-np.pi, max=np.pi) for n, item in enumerate(dataNP)]
# # Set ref phase
# params['phase_0'].vary = False
# Set params with named elements
# [params.add(f"m_{lmmuListStr[n]}", np.abs(item), min = 1e-4, max = 5.0) for n, item in enumerate(dataNP)] # Set min == theshold? Not sure if this will cause issues later.
# [params.add(f"p_{lmmuListStr[n]}", np.angle(item), min=-np.pi, max=np.pi) for n, item in enumerate(dataNP)]
# Set parameters with mapping
[params.add(lmmu['map'][n][2], np.abs(item), min = 1e-4, max = 5.0) for n, item in enumerate(dataNP)] # Set min == theshold? Not sure if this will cause issues later.
[params.add(lmmu['map'][n][3], np.angle(item), min=-np.pi, max=np.pi) for n, item in enumerate(dataNP)]
# Set any passed constraints
if paramsCons is not None:
if paramsCons == 'auto':
paramsCons, _ = self.symCheck(pdTest = pdTest, colDim = colDim)
if verbose:
print("Auto-setting parameters.")
for k,v in paramsCons.items():
if k in params.keys():
params[k].set(expr = v)
else:
print(f'*** Warning, parameter {k} not present, skipping constraint {k} = {v}')
# Set ref phase
# if refPhase is not None:
if isinstance(refPhase,int):
params[lmmu['map'][0][3]].vary = False
elif isinstance(refPhase,str):
params[refPhase].vary = False
# return params, lmmuList
# return params, lmmu
self.params = params
self.lmmu = lmmu
# Set also to data dict for easy reference later (handy for quick pickle save/load)
self.data[self.subKey]['params'] = self.params
self.data[self.subKey]['lmmu'] = self.lmmu
if self.verbose:
if self.__notebook__:
display(self.params)
else:
print(self.params)
[docs] def reconParams(self, params = None, lmmuList = None):
"""
Convert parameters object > Xarray for tensor multiplication.
VERY UGLY! Should be a neater way to do this with existing Xarray and just replace/link values (i.e. pointers)?
... but it does work.
"""
if params is None:
params = self.params
if lmmuList is None:
lmmuList = self.lmmu
# Return params to complex vals...
paramList = []
v = params.valuesdict()
# For numbered list
# for n in range(0,lmmuList.shape[0]):
# paramList.append(v[f'mag_{n}']*np.exp(v[f'phase_{n}']*1j))
# For named list
# for n in range(0,lmmuList.shape[0]):
# List with reformatting
# paramList = [v[f'm_{item}']*np.exp(v[f'p_{item}']*1j) for item in lmmuListStrReformat(lmmuList)]
# Dict mapping form
paramList = [v[item[2]]*np.exp(v[item[3]]*1j) for item in lmmuList['map']]
# xr.DataArray(paramList) # OK
# xr.DataArray(paramList, coords = lmmuList) # Not OK
# pdRecon = pd.DataFrame(paramList, lmmuList) # OK
pdRecon = pd.DataFrame(paramList, lmmuList['Index']) # Dict form
xrRecon = xr.DataArray(pdRecon)
return xrRecon.unstack().stack({'LM':['l','m'], 'Sym':['Cont','Targ','Total']}).squeeze(drop=True)
[docs] def randomizeParams(self):
"""Set random values for self.params."""
for item in self.params:
# item.value = np.random.random()
if self.params[item].vary:
self.params[item].value = np.random.random()
# ************ MAIN FITTING ROUTINES
# def afblmMatEfit(self, matE = None, lmmuList = None, data = None, basis = None, ADM = None, pol = None, selDims = {}, thres = None, thresDims = 'Eke', lmModelFlag = False, XSflag = True, **kwargs):
[docs] def afblmMatEfit(self, matE = None, data = None, lmmuList = None, basis = None, ADM = None, pol = None, resetBasis = False,
selDims = {}, thres = None, thresDims = 'Eke', lmModelFlag = False, XSflag = True,
weights = None, backend = None, debug = False, **kwargs):
"""
Wrap :py:func:`epsproc.geomFunc.afblmXprod` for use with lmfit fitting routines.
Parameters
----------
matE : Xarray or lmfit Parameters object
Matrix elements to use in calculation.
For Parameters object, also require lmmuList to convert to Xarray.
If not passed, use self.data[self.subKey]['matE'].
data : Xarray, optional, default = None
Data for fitting.
If set, return residual.
If not set, return model result.
lmmuList : list, optional, default = None
Mapping for paramters.
Uses self.lmmu if not passed.
basis : dict, optional, default = None
Pre-computed basis set to use for calculations.
If not set try to use self.basis, or passed set of ADMs.
NOTE: currently defaults to self.basis if it exists, pass resetBasis=True to force overwrite.
ADM : Xarray
Set of ADMs (alignment parameters). Not required if basis is set.
pol : Xarray
NOTE: currently NOT used for :py:func:`epsproc.geomFunc.afblmXprod`
Set of polarization geometries (Euler angles). Not required if basis is set.
(If not set, defaults to ep.setPolGeoms())
resetBasis : bool, optional, default=False
Force self.basis overwrite with updated values.
NOT YET IMPLEMENTED
selDims = {}, thres = None, thresDims = 'Eke'
Selectors passed to backend.
TODO: should use global options here.
lmModelFlag : bool, optional, default=False
Output option for flat results structure for lmfit testing.
XSflag : bool, optional, default=True
Use absolute cross-section (XS) in fitting?
This is passed to backends as `BLMRenorm` flag.
If true, use passed B00(t) values in fit, and do not renormalise.
If false, renorm by B00(t), i.e. all values will be set to unity (B00(t)=1).
weights : int, Xarray or np.array, optional, default = None
Weights to use for residual calculation.
- If set, return np.sqrt(weights) * residual. (Must match size of data along key dimension(s).)
- If None, try to use use self.data[self.subKey]['weights'].
If that is not found, or is None, an unweighted residual will be returned.
For bootstrap sampling, setting Poissonian weights can be used, see https://en.wikipedia.org/wiki/Bootstrapping_(statistics)#Poisson_bootstrap
Use self.setWeights() for this, e.g. weights = rng.poisson(weights, data.t.size)
To use uncertainties from the data, set weights = 1/(sigma^2)
backend : function, optional, default = None
UPDATED 21/08/22 - now default = None, uses self.fitOpts['backend']
Set at class init, see also self.backends().
Testing 12/08/22
Supports backend = afblmXprod or mfblmXprod, and test OK with latter.
NOTE - when passing fn externally, it may need to be defined in base namespace. (Default case uses locally defined function.)
E.g.
data.afblmMatEfit(backend = ep.mfblmXprod) should be OK following `import epsproc as ep`
data.afblmMatEfit(backend = mfblmXprod) will fail
debug : bool, optional, default = False
Print additional debug output for testing.
**kwargs : optional
Additional args passed to backends.
NOTE:
- some assumptions here, will probably need to run once to setup (with ADMs), then fit using basis returned.
- Currently fitting abs matrix elements and renorm Betas. This sort-of works, but gives big errors on |matE|. Should add options to renorm matE for this case, and options for known B00 values.
TODO:
- Consolidate weights to main data structure.
04/05/22: added to basis return as basis['weights'], may want to pipe back to self.data[self.subKey]['weights'], or just set elsewhere?
- More sophisticated bootstrapping methods, maybe with https://github.com/smartass101/xr-random and https://arch.readthedocs.io/en/latest/index.html
21/08/22: now with improved backend handling, working for AF and MF case.
12/08/22: testing for MF fitting. Initial tests for case where BASIS PASSED ONLY, otherwise still runs AF calc.
02/05/22: added weights options and updated docs.
"""
if backend is not None:
# backend = self.backends(backend)
# 01/09/22 - self.backends now always sets.
self.backends(backend)
backend = self.fitOpts['backend']
if debug:
print(f"Running fits with {backend.__name__}")
# Quick and ugly wrap args for class - should tidy up here!
if matE is None:
# matE = self.subset
matE = self.data[self.subKey]['matE']
if lmmuList is None:
lmmuList = self.lmmu
# if data is None:
# # data = self.data
# data = self.fitData
# if resetBasis:
# basis =
if basis is None:
# if hasattr(self,'basis') and (not resetBasis) and (self.basis is not None):
if (not resetBasis) and (self.basis is not None):
basis = self.basis
# TODO: set other optional params here
else:
if (ADM is None) and ('ADM' in self.data[self.subKey].keys()):
# ADM = self.ADM['subset']
ADM = self.data[self.subKey]['ADM']
if (pol is None) and ('pol' in self.data[self.subKey].keys()):
pol = self.data[self.subKey]['pol']
if isinstance(matE, Parameters):
# Set matE from passed params object
matE = self.reconParams(matE, lmmuList)
# Set to use XS in fit (default), or renorm betas only (B00=1)
# Note this is only used for no basis case, then will be set in basis dictionary.
if XSflag:
BLMRenorm = 0
else:
BLMRenorm = 1
# Run AFBLM calculation; set basis if not already set.
if basis is None:
BetaNormX, basis = backend(matE, AKQS=ADM, # FOR AF ONLY
RX=pol, # FOR MF ONLY - RX removed in ePSproc v1.3.0 for AF - not required/valid for AF calcs.
thres = thres, selDims = selDims, thresDims=thresDims,
basisReturn = 'ProductBasis', BLMRenorm = BLMRenorm, **kwargs)
# return BetaNormX, basis
# if resetBasis:
# self.basis = basis
else:
# Pass **basis here to allow for passing generically through fitting routine and easy/flexible unpacking into afblmXprod()
BetaNormX = backend(matE, **basis,
thres = thres, selDims = selDims, thresDims=thresDims, basisReturn = 'BLM', **kwargs)
# return BetaNormX
# Setup weights if required - v1 with flexibility
# if weights is not None:
# # Poissonian weights
# if isinstance(weights, int) or isinstance(weights, float):
# # rng = np.random.default_rng()
# # weights = rng.poisson(weights, BetaNormX.shape)
# weights = setWeights(weights, BetaNormX.shape)
#
# # Use existing settings if True
# elif weights == True:
# weights = self.data[self.subKey]['weights']
#
# # Add to basis for return if required.
# basis['weights'] = weights
# Weights v2 - just set as per pol, ADM etc. from class.
# If None will be skipped later in any case.
if weights is None:
if 'weights' in self.data[self.subKey].keys():
weights = self.data[self.subKey]['weights']
if data is not None:
if weights is None:
return (np.abs(BetaNormX - data)).values.flatten() # Need to return NP array of residual for lmfit minimize fitting routine
# Poissonian weights
# elif isinstance(weights, int) or isinstance(weights, float):
# rng = np.random.default_rng()
# weights = rng.poisson(weights, BetaNormX.shape)
#
# return (np.sqrt(weights) * np.abs(BetaNormX - data)).values.flatten()
# Weights as passed
else:
return (np.sqrt(weights) * np.abs(BetaNormX - data)).values.flatten()
else:
if lmModelFlag:
return BetaNormX.values.flatten() # Need to return NP array of model output for lmfit Model routines
else:
return BetaNormX, basis
# Wrap fit routine
[docs] def fit(self, fcn_args = None, fcn_kws = None, fitInd = None, keepSubset = False, **kwargs):
"""
Wrapper to run lmfit.Minimizer, for details see https://lmfit.github.io/lmfit-py/fitting.html#lmfit.minimizer.Minimizer
Uses preset self.params for parameters, and self.data[self.subKey] for data.
Default case runs a Levenberg-Marquardt minimization (method='leastsq'), using :py:func:`scipy.optimize.least_squares`, see the
`Scipy docs for more options <https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.least_squares.html>`_, using
the AF fitting model :py:func:`epsproc.geomFunc.afblmXprod` calculation routine. For MF fitting backend set fcn_kws['backend'] = ep.geomFunc.mfblmXprod
Parameters
-----------
fcn_args : tuple, optional, default = None
Positional arguments to pass to the fitting function.
If None, will be set as (self.data[self.subKey]['AFBLM'], self.lmmu, self.basis)
fcn_kws : dict, optional, default = {}
Keyword arguments to pass to the fitting function.
For MF fitting backend set fcn_kws['backend'] = ep.geomFunc.mfblmXprod
fitInd : int, optional, default = None
If None, will use self.fitInd
For parallel usage, supply explicit fitInd instead of using class var to ensure unique key per fit.
keepSubset : bool, optional, default = False
If True, keep a copy of self.data[self.subKey] in self.data[fitInd][self.subKey]
**kwargs
Passed to the fitting functions, for options see:
- For lmfit options and defaults see https://lmfit.github.io/lmfit-py/fitting.html
- For scipy (lmfit backend) see https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.least_squares.html
18/08/22: debugged for MF fitting case, now can pass MF backend via fcn_kws['backend'] = ep.geomFunc.mfblmXprod
02/05/22: added **kwags for backends.
07/09/21: updating for parallel use.
Note that main outputs (self.reults etc.) are now dropped. May want to set to last result?
"""
if fitInd is None:
# Use next index if not specified
self.fitInd += 1
fitInd = self.fitInd
# self.fitInd += 1
if fcn_args is None:
fcn_args = (self.data[self.subKey]['AFBLM'], self.lmmu, self.basis)
# 21/08/22 - now set defaults in self.fitOpts
# TODO: should handle updates here too, but only used for backend functionality at the moment.
if fcn_kws is None:
fcn_kws = self.fitOpts
# Setup fit
# self.minner = Minimizer(self.afblmMatEfit, self.params, fcn_args=(self.data['subset']['AFBLM'])) # , fcn_args=(lmmuList, BetaNormX, basis)) # In this case fn. args present as self.arg, but leave data as passed arg.
# minner = Minimizer(self.afblmMatEfit, self.params, fcn_args=(self.data['subset']['AFBLM'].sel(Eke=1.1), self.lmmu, self.basis)) # Above not working with or without 'self', try explicit args instead... THIS IS WORKING, almost, but not quite passing correct things...
# minner = Minimizer(self.afblmMatEfit, self.params, fcn_args=(self.data['subset']['AFBLM'].sel(Eke=1.1), self.lmmu, self.basis)) # Now working OK, just need to sort data pass/setting.
minner = Minimizer(self.afblmMatEfit, self.params, fcn_args=fcn_args, fcn_kws=fcn_kws, **kwargs) # Now working OK, just need to sort data pass/setting.
# Setup with function version to check arg passing OK - NOTE ORDERING is currently different!
# minner = Minimizer(afblmMatEfit, self.params, fcn_args=(self.lmmu, self.data['subset']['AFBLM'], self.basis))
# Run fit
# self.result = minner.minimize()
result = minner.minimize()
# Check final result
# BetaNormX, _ = self.afblmMatEfit(matE = result.params, **fcn_kws)
BetaNormX, _ = self.afblmMatEfit(result.params, None, *fcn_args[1:], **fcn_kws) # DON'T pass data for BLM return.
# TODO: may want to use all kwargs here for clarity.
# self.betaFit = BetaNormX
# self.residual = self.afblmMatEfit(matE = self.result.params, data = self.data[self.subKey]['AFBLM'])
# residual = self.afblmMatEfit(matE = result.params, data = self.data[self.subKey]['AFBLM'], **fcn_kws)
residual = self.afblmMatEfit(result.params, *fcn_args, **fcn_kws)
#************ Push results to main data structure
# May want to keep multiple sets here?
# if not (self.resultsKey in self.data.keys()):
# self.data[self.resultsKey] = {}
# self.fitInd = 0
# Version with simple numerically-indexed results.
self.data[fitInd] = {}
self.data[fitInd]['AFBLM'] = BetaNormX.copy()
# self.data[fitInd]['residual'] = self.residual.copy()
# self.data[fitInd]['results'] = copy.deepcopy(self.result) # Full object copy here.
self.data[fitInd]['residual'] = residual.copy()
self.data[fitInd]['results'] = result #.copy() # Full object copy here.
self.result = self.data[fitInd]['results'] # Set result to self.result for quick checks.
# Keep subset data? Useful in some cases
if keepSubset:
self.data[fitInd][self.subKey] = self.data[self.subKey].copy()
# Add some metadata
timeString = dt.now().strftime('%Y-%m-%d_%H-%M-%S')
try:
self.data[fitInd]['AFBLM'].attrs['jobLabel'] = f"Fit #{fitInd}, ({self.data[fitInd]['AFBLM']['t'].size} t, {self.data[fitInd]['AFBLM']['Labels'].size} pol) points, $\chi^2$={self.data[fitInd]['results'].chisqr}\n {timeString}"
except:
self.data[fitInd]['AFBLM'].attrs['jobLabel'] = f"Fit #{fitInd}, ({self.data[fitInd]['AFBLM']['Labels'].size} pol) points, $\chi^2$={self.data[fitInd]['results'].chisqr}\n {timeString}"
# self.fitInd += 1