# -*- coding: utf-8 -*-
import numpy as np
import os
import collections
import configparser
import itertools
from delight.utils import approx_DL
from scipy.interpolate import interp1d
[docs]def parseParamFile(fileName, verbose=True, catFilesNeeded=True):
"""
Parser for configuration inputtype parameter files,
see examples for details. A bunch of them ar parsed.
"""
#print(f"\n\n\n using configfile: {fileName}")
config = configparser.ConfigParser()
if not os.path.isfile(fileName):
raise Exception(fileName+' : file not found')
config.read(fileName)
config.sections()
for secName in ['Bands', 'Training', 'Target', 'Other']:
if not config.has_section(secName):
raise Exception(secName+' not found in parameter file')
params = collections.OrderedDict()
params['rootDir'] = config.get('Other', 'rootDir')
if not os.path.isdir(params['rootDir']):
raise Exception(params['rootDir']+' is not a valid directory')
# Parsing Bands
params['bands_directory'] = config.get('Bands', 'directory')
if not os.path.isdir(params['bands_directory']):
raise Exception(params['bands_directory']+' is not a valid directory')
params['bandNames'] = config.get('Bands', 'Names').split(' ')
key= 'numCoefs'
if key in config['Bands']:
params['numCoefs'] = config.getint('Bands', 'numCoefs')
else:
params['numCoefs'] = 7
if 'bands_fmt' in config['Bands']:
params['bands_fmt'] = config.get('Bands', 'bands_fmt')
else:
params['bands_fmt'] = 'res'
if 'bands_verbose' in config['Bands']:
params['bands_verbose'] = config.getboolean('Bands','bands_verbose')
else:
params['bands_verbose'] = False
if 'bands_debug' in config['Bands']:
params['bands_debug'] = config.getboolean('Bands', 'bands_debug')
else:
params['bands_debug'] = False
if 'bands_makeplots' in config['Bands']:
params['bands_makeplots'] = config.getboolean('Bands', 'bands_makeplots')
else:
params['bands_makeplots'] = False
# Parsing Templates
params['templates_directory'] = config.get('Templates', 'directory')
params['sed_fmt'] = config.get('Templates', 'sed_fmt')
if config.get('Templates', 'sed_fmt') is None:
print("sed_fmt not found! Setting default!")
params['sed_fmt'] = 'sed'
params['lambdaRef'] = config.getfloat('Templates', 'lambdaRef')
params['templates_names'] = config.get('Templates', 'names').split(' ')
params['p_t']\
= np.array([float(x) for x in
config.get('Templates', 'p_t').split(' ')])
params['p_z_t']\
= np.array([float(x) for x in
config.get('Templates', 'p_z_t').split(' ')])
assert params['p_z_t'].size == params['p_z_t'].size and\
params['p_z_t'].size == len(params['templates_names'])
# Parsing Training
params['training_numChunks'] = config.getint('Training', 'numChunks')
params['training_paramFile'] = config.get('Training', 'paramFile')
params['training_catFile'] = config.get('Training', 'catFile')
if catFilesNeeded and not os.path.isfile(params['training_catFile']):
raise Exception(params['training_catFile']+' : file does not exist')
params['training_referenceBand'] = config.get('Training', 'referenceBand')
if params['training_referenceBand'] not in params['bandNames']:
raise Exception(params['training_referenceBand']+' : is not a valid')
params['training_bandOrder']\
= config.get('Training', 'bandOrder').split(' ')
params['training_extraFracFluxError']\
= config.getfloat('Training', 'extraFracFluxError')
for band in params['training_bandOrder']:
if (band not in params['bandNames'])\
and (band[:-4] not in params['bandNames'])\
and (band != '_')\
and (band != 'redshift'):
raise Exception(band+' does not exist')
if 'redshift' not in params['training_bandOrder']:
raise Exception('redshift should be included in training')
params['training_crossValidate'] =\
config.getboolean('Training', 'crossValidate')
params['training_CV_bandOrder']\
= config.get('Training', 'crossValidationBandOrder').split(' ')
params['training_CVfile'] = config.get('Training', 'CVfile')
for band in params['training_CV_bandOrder']:
if (band not in params['bandNames'])\
and (band[:-4] not in params['bandNames'])\
and (band != '_')\
and (band != 'redshift'):
raise Exception(band+' does not exist')
# Simulation
params['trainingFile'] = config.get('Simulation', 'trainingFile')
params['targetFile'] = config.get('Simulation', 'targetFile')
params['numObjects'] = int(config.getfloat('Simulation', 'numObjects'))
params['noiseLevel'] = config.getfloat('Simulation', 'noiseLevel')
# Parsing Target
params['target_extraFracFluxError']\
= config.getfloat('Target', 'extraFracFluxError')
params['target_catFile'] = config.get('Target', 'catFile')
if catFilesNeeded and not os.path.isfile(params['target_catFile']):
raise Exception(params['target_catFile']+' : file does not exist')
params['target_bandOrder']\
= config.get('Target', 'bandOrder').split(' ')
params['target_referenceBand'] = config.get('Target', 'referenceBand')
if params['target_referenceBand'] not in params['bandNames']:
raise Exception(params['target_referenceBand']+' : is not a valid')
for band in params['target_bandOrder']:
if (band not in params['bandNames'])\
and (band[:-4] not in params['bandNames'])\
and (band != '_')\
and (band != 'redshift'):
raise Exception(band+' does not exist')
params['compressIndicesFile'] = config.get('Target', 'compressIndicesFile')
params['compressMargLikFile'] = config.get('Target', 'compressMargLikFile')
if os.path.isfile(params['compressIndicesFile'])\
and os.path.isfile(params['compressMargLikFile']):
params['compressionFilesFound'] = True
else:
params['compressionFilesFound'] = False
params['Ncompress'] = config.getint('Target', 'Ncompress')
params['useCompression'] = config.getboolean("Target", 'useCompression')
params['redshiftpdfFile'] = config.get('Target', 'redshiftpdfFile')
params['redshiftpdfFileComp'] = config.get('Target', 'redshiftpdfFileComp')
params['redshiftpdfFileTemp'] = config.get('Target', 'redshiftpdfFileTemp')
params['metricsFile'] = config.get('Target', 'metricsFile')
params['metricsFileTemp'] = config.get('Target', 'metricsFileTemp')
# Parsing other parameters
params['zPriorSigma'] = config.getfloat('Other', 'zPriorSigma')
params['ellPriorSigma'] = config.getfloat('Other', 'ellPriorSigma')
params['fluxLuminosityNorm']\
= config.getfloat('Other', 'fluxLuminosityNorm')
params['alpha_C'] = config.getfloat('Other', 'alpha_C')
params['alpha_L'] = config.getfloat('Other', 'alpha_L')
params['V_C'] = config.getfloat('Other', 'V_C')
params['V_L'] = config.getfloat('Other', 'V_L')
params['redshiftMin'] = config.getfloat('Other', 'redshiftMin')
params['redshiftMax'] = config.getfloat('Other', 'redshiftMax')
params['redshiftBinSize']\
= config.getfloat('Other', 'redshiftBinSize')
params['redshiftNumBinsGPpred']\
= config.getint('Other', 'redshiftNumBinsGPpred')
params['redshiftDisBinSize']\
= config.getfloat('Other', 'redshiftDisBinSize')
params['lines_pos']\
= [float(x) for x in
config.get('Other', 'lines_pos').split(' ')]
params['lines_width']\
= [float(x) for x in
config.get('Other', 'lines_width').split(' ')]
params['confidenceLevels']\
= [float(x) for x in
config.get('Other', 'confidenceLevels').split(' ')]
if verbose:
print('Input parameter file:', fileName)
print('Parameters read:')
for k, v in params.items():
if type(v) is list:
print('> ', "%-20s" % k, ' '.join([str(x) for x in v]))
else:
print('> ', "%-20s" % k, v)
return params
[docs]def readColumnPositions(params, prefix="training_", refFlux=True):
"""
Read column/band information needed for parsing catalog file,
in particular the column positions.
"""
bandIndices = np.array([ib for ib, b in enumerate(params['bandNames'])
if b in params[prefix+'bandOrder']])
bandNames = np.array(params['bandNames'])[bandIndices]
bandColumns = np.array([params[prefix+'bandOrder'].index(b)
for b in bandNames])
bandVarColumns = np.array([params[prefix+'bandOrder'].index(b+'_var')
for b in bandNames])
if 'redshift' in params[prefix+'bandOrder']:
redshiftColumn = params[prefix+'bandOrder'].index('redshift')
else:
redshiftColumn = -1
if refFlux:
refBandColumn = params[prefix+'bandOrder']\
.index(params[prefix+'referenceBand'])
return bandIndices, bandNames, bandColumns, bandVarColumns,\
redshiftColumn, refBandColumn
else:
return bandIndices, bandNames, bandColumns, bandVarColumns,\
redshiftColumn
[docs]def readBandCoefficients(params):
"""
Read band/filter information, in particular the Gaussian Mixture coefs.
"""
bandCoefAmplitudes = []
bandCoefPositions = []
bandCoefWidths = []
for band in params['bandNames']:
fname = params['bands_directory'] + '/' + band\
+ '_gaussian_coefficients.txt'
data = np.loadtxt(fname)
bandCoefAmplitudes.append(data[:, 0])
bandCoefPositions.append(data[:, 1])
bandCoefWidths.append(data[:, 2])
bandCoefAmplitudes = np.vstack(bandCoefAmplitudes)
bandCoefPositions = np.vstack(bandCoefPositions)
bandCoefWidths = np.vstack(bandCoefWidths)
norms =\
np.sqrt(2*np.pi) * np.sum(bandCoefAmplitudes * bandCoefWidths, axis=1)
return bandCoefAmplitudes, bandCoefPositions, bandCoefWidths, norms
[docs]def createGrids(params):
"""
Create redshift grids from parameters in file.
"""
redshiftDistGrid = np.arange(0, params['redshiftMax'],
params['redshiftDisBinSize'])
if True:
redshiftGrid = np.arange(params['redshiftMin'],
params['redshiftMax'],
params['redshiftBinSize'])
else:
num = int((params['redshiftMax'] - params['redshiftMin']) /
params['redshiftBinSize'])
redshiftGrid = np.logspace(np.log10(params['redshiftMin']),
np.log10(params['redshiftMax']*1.01),
num)
redshiftGridGP = np.logspace(np.log10(params['redshiftMin']),
np.log10(params['redshiftMax']*1.01),
params['redshiftNumBinsGPpred'])
return redshiftDistGrid, redshiftGrid, redshiftGridGP
[docs]def readSEDs(params):
"""
Read SED parameters.
"""
redshiftDistGrid, redshiftGrid, redshiftGridGP = createGrids(params)
f_mod = np.zeros((len(params['templates_names']),
len(params['bandNames'])), dtype=object)
for it, sed_name in enumerate(params['templates_names']):
data = np.loadtxt(params['templates_directory'] +
'/' + sed_name + '_fluxredshiftmod.txt')
for jf in range(len(params['bandNames'])):
f_mod[it, jf] = interp1d(redshiftGrid, data[:, jf],
kind='linear', bounds_error=False,
fill_value='extrapolate')
return f_mod
[docs]def getDataFromFile(params, firstLine, lastLine,
prefix="", ftype="catalog", getXY=True, CV=False):
"""
Returns an iterator to parse an input catalog file.
Returns the fluxes, redshifts, etc, and also GP inputs if getXY=True.
"""
if ftype == "gpparams":
with open(params[prefix+'paramFile']) as f:
for line in itertools.islice(f, firstLine, lastLine):
data = np.fromstring(line, dtype=float, sep=' ')
B = int(data[0])
z = data[1]
ell = data[2]
bands = data[3:3+B]
flatarray = data[3+B:]
X = np.zeros((B, 3))
for off, iband in enumerate(bands):
X[off, 0] = iband
X[off, 1] = z
X[off, 2] = ell
yield z, ell, bands, X, B, flatarray
if ftype == "catalog":
DL = approx_DL()
bandIndices, bandNames, bandColumns, bandVarColumns, redshiftColumn,\
refBandColumn = readColumnPositions(params, prefix=prefix)
bandCoefAmplitudes, bandCoefPositions, bandCoefWidths, norms\
= readBandCoefficients(params)
refBandNorm = norms[params['bandNames']
.index(params[prefix+'referenceBand'])]
if CV:
bandIndicesCV, bandNamesCV, bandColumnsCV,\
bandVarColumnsCV, redshiftColumnCV =\
readColumnPositions(params, prefix=prefix+'CV_', refFlux=False)
with open(params[prefix+'catFile']) as f:
for line in itertools.islice(f, firstLine, lastLine):
data = np.array(line.split(' '), dtype=float)
refFlux = data[refBandColumn]
normedRefFlux = refFlux * refBandNorm
if redshiftColumn >= 0:
z = data[redshiftColumn]
else:
z = -1
# drop bad values and find how many bands are valid
mask = np.isfinite(data[bandColumns])
mask &= np.isfinite(data[bandVarColumns])
mask &= data[bandColumns] > 0.0
mask &= data[bandVarColumns] > 0.0
bandsUsed = np.where(mask)[0]
numBandsUsed = mask.sum()
if z > -1:
ell = normedRefFlux * 4 * np.pi \
* params['fluxLuminosityNorm'] * DL(z)**2 * (1+z)
if (refFlux <= 0) or (not np.isfinite(refFlux))\
or (z < 0) or (numBandsUsed <= 1):
print("Skipping galaxy: refflux=", refFlux,
"z=", z, "numBandsUsed=", numBandsUsed)
continue # not valid data - skip to next valid object
fluxes = data[bandColumns[mask]]
fluxesVar = data[bandVarColumns[mask]] +\
(params['training_extraFracFluxError'] * fluxes)**2
if CV:
maskCV = np.isfinite(data[bandColumnsCV])
maskCV &= np.isfinite(data[bandVarColumnsCV])
maskCV &= data[bandColumnsCV] > 0.0
maskCV &= data[bandVarColumnsCV] > 0.0
bandsUsedCV = np.where(maskCV)[0]
numBandsUsedCV = maskCV.sum()
fluxesCV = data[bandColumnsCV[maskCV]]
fluxesCVVar = data[bandVarColumnsCV[maskCV]] +\
(params['training_extraFracFluxError'] * fluxesCV)**2
if not getXY:
if CV:
yield z, normedRefFlux,\
bandIndices[mask], fluxes, fluxesVar,\
bandIndicesCV[maskCV], fluxesCV, fluxesCVVar
else:
yield z, normedRefFlux,\
bandIndices[mask], fluxes, fluxesVar,\
None, None, None
if getXY:
Y = np.zeros((numBandsUsed, 1))
Yvar = np.zeros((numBandsUsed, 1))
X = np.ones((numBandsUsed, 3))
for off, iband in enumerate(bandIndices[mask]):
X[off, 0] = iband
X[off, 1] = z
X[off, 2] = ell
Y[off, 0] = fluxes[off]
Yvar[off, 0] = fluxesVar[off]
if CV:
yield z, normedRefFlux,\
bandIndices[mask], fluxes, fluxesVar,\
bandIndicesCV[maskCV], fluxesCV, fluxesCVVar,\
X, Y, Yvar
else:
yield z, normedRefFlux,\
bandIndices[mask], fluxes, fluxesVar,\
None, None, None,\
X, Y, Yvar