Source code for delight.io

# -*- coding: utf-8 -*-

import numpy as np
import os
import collections
import configparser
import itertools
from delight.utils import approx_DL
from scipy.interpolate import interp1d


[docs]def parseParamFile(fileName, verbose=True, catFilesNeeded=True): """ Parser for configuration inputtype parameter files, see examples for details. A bunch of them ar parsed. """ #print(f"\n\n\n using configfile: {fileName}") config = configparser.ConfigParser() if not os.path.isfile(fileName): raise Exception(fileName+' : file not found') config.read(fileName) config.sections() for secName in ['Bands', 'Training', 'Target', 'Other']: if not config.has_section(secName): raise Exception(secName+' not found in parameter file') params = collections.OrderedDict() params['rootDir'] = config.get('Other', 'rootDir') if not os.path.isdir(params['rootDir']): raise Exception(params['rootDir']+' is not a valid directory') # Parsing Bands params['bands_directory'] = config.get('Bands', 'directory') if not os.path.isdir(params['bands_directory']): raise Exception(params['bands_directory']+' is not a valid directory') params['bandNames'] = config.get('Bands', 'Names').split(' ') key= 'numCoefs' if key in config['Bands']: params['numCoefs'] = config.getint('Bands', 'numCoefs') else: params['numCoefs'] = 7 if 'bands_fmt' in config['Bands']: params['bands_fmt'] = config.get('Bands', 'bands_fmt') else: params['bands_fmt'] = 'res' if 'bands_verbose' in config['Bands']: params['bands_verbose'] = config.getboolean('Bands','bands_verbose') else: params['bands_verbose'] = False if 'bands_debug' in config['Bands']: params['bands_debug'] = config.getboolean('Bands', 'bands_debug') else: params['bands_debug'] = False if 'bands_makeplots' in config['Bands']: params['bands_makeplots'] = config.getboolean('Bands', 'bands_makeplots') else: params['bands_makeplots'] = False # Parsing Templates params['templates_directory'] = config.get('Templates', 'directory') params['sed_fmt'] = config.get('Templates', 'sed_fmt') if config.get('Templates', 'sed_fmt') is None: print("sed_fmt not found! Setting default!") params['sed_fmt'] = 'sed' params['lambdaRef'] = config.getfloat('Templates', 'lambdaRef') params['templates_names'] = config.get('Templates', 'names').split(' ') params['p_t']\ = np.array([float(x) for x in config.get('Templates', 'p_t').split(' ')]) params['p_z_t']\ = np.array([float(x) for x in config.get('Templates', 'p_z_t').split(' ')]) assert params['p_z_t'].size == params['p_z_t'].size and\ params['p_z_t'].size == len(params['templates_names']) # Parsing Training params['training_numChunks'] = config.getint('Training', 'numChunks') params['training_paramFile'] = config.get('Training', 'paramFile') params['training_catFile'] = config.get('Training', 'catFile') if catFilesNeeded and not os.path.isfile(params['training_catFile']): raise Exception(params['training_catFile']+' : file does not exist') params['training_referenceBand'] = config.get('Training', 'referenceBand') if params['training_referenceBand'] not in params['bandNames']: raise Exception(params['training_referenceBand']+' : is not a valid') params['training_bandOrder']\ = config.get('Training', 'bandOrder').split(' ') params['training_extraFracFluxError']\ = config.getfloat('Training', 'extraFracFluxError') for band in params['training_bandOrder']: if (band not in params['bandNames'])\ and (band[:-4] not in params['bandNames'])\ and (band != '_')\ and (band != 'redshift'): raise Exception(band+' does not exist') if 'redshift' not in params['training_bandOrder']: raise Exception('redshift should be included in training') params['training_crossValidate'] =\ config.getboolean('Training', 'crossValidate') params['training_CV_bandOrder']\ = config.get('Training', 'crossValidationBandOrder').split(' ') params['training_CVfile'] = config.get('Training', 'CVfile') for band in params['training_CV_bandOrder']: if (band not in params['bandNames'])\ and (band[:-4] not in params['bandNames'])\ and (band != '_')\ and (band != 'redshift'): raise Exception(band+' does not exist') # Simulation params['trainingFile'] = config.get('Simulation', 'trainingFile') params['targetFile'] = config.get('Simulation', 'targetFile') params['numObjects'] = int(config.getfloat('Simulation', 'numObjects')) params['noiseLevel'] = config.getfloat('Simulation', 'noiseLevel') # Parsing Target params['target_extraFracFluxError']\ = config.getfloat('Target', 'extraFracFluxError') params['target_catFile'] = config.get('Target', 'catFile') if catFilesNeeded and not os.path.isfile(params['target_catFile']): raise Exception(params['target_catFile']+' : file does not exist') params['target_bandOrder']\ = config.get('Target', 'bandOrder').split(' ') params['target_referenceBand'] = config.get('Target', 'referenceBand') if params['target_referenceBand'] not in params['bandNames']: raise Exception(params['target_referenceBand']+' : is not a valid') for band in params['target_bandOrder']: if (band not in params['bandNames'])\ and (band[:-4] not in params['bandNames'])\ and (band != '_')\ and (band != 'redshift'): raise Exception(band+' does not exist') params['compressIndicesFile'] = config.get('Target', 'compressIndicesFile') params['compressMargLikFile'] = config.get('Target', 'compressMargLikFile') if os.path.isfile(params['compressIndicesFile'])\ and os.path.isfile(params['compressMargLikFile']): params['compressionFilesFound'] = True else: params['compressionFilesFound'] = False params['Ncompress'] = config.getint('Target', 'Ncompress') params['useCompression'] = config.getboolean("Target", 'useCompression') params['redshiftpdfFile'] = config.get('Target', 'redshiftpdfFile') params['redshiftpdfFileComp'] = config.get('Target', 'redshiftpdfFileComp') params['redshiftpdfFileTemp'] = config.get('Target', 'redshiftpdfFileTemp') params['metricsFile'] = config.get('Target', 'metricsFile') params['metricsFileTemp'] = config.get('Target', 'metricsFileTemp') # Parsing other parameters params['zPriorSigma'] = config.getfloat('Other', 'zPriorSigma') params['ellPriorSigma'] = config.getfloat('Other', 'ellPriorSigma') params['fluxLuminosityNorm']\ = config.getfloat('Other', 'fluxLuminosityNorm') params['alpha_C'] = config.getfloat('Other', 'alpha_C') params['alpha_L'] = config.getfloat('Other', 'alpha_L') params['V_C'] = config.getfloat('Other', 'V_C') params['V_L'] = config.getfloat('Other', 'V_L') params['redshiftMin'] = config.getfloat('Other', 'redshiftMin') params['redshiftMax'] = config.getfloat('Other', 'redshiftMax') params['redshiftBinSize']\ = config.getfloat('Other', 'redshiftBinSize') params['redshiftNumBinsGPpred']\ = config.getint('Other', 'redshiftNumBinsGPpred') params['redshiftDisBinSize']\ = config.getfloat('Other', 'redshiftDisBinSize') params['lines_pos']\ = [float(x) for x in config.get('Other', 'lines_pos').split(' ')] params['lines_width']\ = [float(x) for x in config.get('Other', 'lines_width').split(' ')] params['confidenceLevels']\ = [float(x) for x in config.get('Other', 'confidenceLevels').split(' ')] if verbose: print('Input parameter file:', fileName) print('Parameters read:') for k, v in params.items(): if type(v) is list: print('> ', "%-20s" % k, ' '.join([str(x) for x in v])) else: print('> ', "%-20s" % k, v) return params
[docs]def readColumnPositions(params, prefix="training_", refFlux=True): """ Read column/band information needed for parsing catalog file, in particular the column positions. """ bandIndices = np.array([ib for ib, b in enumerate(params['bandNames']) if b in params[prefix+'bandOrder']]) bandNames = np.array(params['bandNames'])[bandIndices] bandColumns = np.array([params[prefix+'bandOrder'].index(b) for b in bandNames]) bandVarColumns = np.array([params[prefix+'bandOrder'].index(b+'_var') for b in bandNames]) if 'redshift' in params[prefix+'bandOrder']: redshiftColumn = params[prefix+'bandOrder'].index('redshift') else: redshiftColumn = -1 if refFlux: refBandColumn = params[prefix+'bandOrder']\ .index(params[prefix+'referenceBand']) return bandIndices, bandNames, bandColumns, bandVarColumns,\ redshiftColumn, refBandColumn else: return bandIndices, bandNames, bandColumns, bandVarColumns,\ redshiftColumn
[docs]def readBandCoefficients(params): """ Read band/filter information, in particular the Gaussian Mixture coefs. """ bandCoefAmplitudes = [] bandCoefPositions = [] bandCoefWidths = [] for band in params['bandNames']: fname = params['bands_directory'] + '/' + band\ + '_gaussian_coefficients.txt' data = np.loadtxt(fname) bandCoefAmplitudes.append(data[:, 0]) bandCoefPositions.append(data[:, 1]) bandCoefWidths.append(data[:, 2]) bandCoefAmplitudes = np.vstack(bandCoefAmplitudes) bandCoefPositions = np.vstack(bandCoefPositions) bandCoefWidths = np.vstack(bandCoefWidths) norms =\ np.sqrt(2*np.pi) * np.sum(bandCoefAmplitudes * bandCoefWidths, axis=1) return bandCoefAmplitudes, bandCoefPositions, bandCoefWidths, norms
[docs]def createGrids(params): """ Create redshift grids from parameters in file. """ redshiftDistGrid = np.arange(0, params['redshiftMax'], params['redshiftDisBinSize']) if True: redshiftGrid = np.arange(params['redshiftMin'], params['redshiftMax'], params['redshiftBinSize']) else: num = int((params['redshiftMax'] - params['redshiftMin']) / params['redshiftBinSize']) redshiftGrid = np.logspace(np.log10(params['redshiftMin']), np.log10(params['redshiftMax']*1.01), num) redshiftGridGP = np.logspace(np.log10(params['redshiftMin']), np.log10(params['redshiftMax']*1.01), params['redshiftNumBinsGPpred']) return redshiftDistGrid, redshiftGrid, redshiftGridGP
[docs]def readSEDs(params): """ Read SED parameters. """ redshiftDistGrid, redshiftGrid, redshiftGridGP = createGrids(params) f_mod = np.zeros((len(params['templates_names']), len(params['bandNames'])), dtype=object) for it, sed_name in enumerate(params['templates_names']): data = np.loadtxt(params['templates_directory'] + '/' + sed_name + '_fluxredshiftmod.txt') for jf in range(len(params['bandNames'])): f_mod[it, jf] = interp1d(redshiftGrid, data[:, jf], kind='linear', bounds_error=False, fill_value='extrapolate') return f_mod
[docs]def getDataFromFile(params, firstLine, lastLine, prefix="", ftype="catalog", getXY=True, CV=False): """ Returns an iterator to parse an input catalog file. Returns the fluxes, redshifts, etc, and also GP inputs if getXY=True. """ if ftype == "gpparams": with open(params[prefix+'paramFile']) as f: for line in itertools.islice(f, firstLine, lastLine): data = np.fromstring(line, dtype=float, sep=' ') B = int(data[0]) z = data[1] ell = data[2] bands = data[3:3+B] flatarray = data[3+B:] X = np.zeros((B, 3)) for off, iband in enumerate(bands): X[off, 0] = iband X[off, 1] = z X[off, 2] = ell yield z, ell, bands, X, B, flatarray if ftype == "catalog": DL = approx_DL() bandIndices, bandNames, bandColumns, bandVarColumns, redshiftColumn,\ refBandColumn = readColumnPositions(params, prefix=prefix) bandCoefAmplitudes, bandCoefPositions, bandCoefWidths, norms\ = readBandCoefficients(params) refBandNorm = norms[params['bandNames'] .index(params[prefix+'referenceBand'])] if CV: bandIndicesCV, bandNamesCV, bandColumnsCV,\ bandVarColumnsCV, redshiftColumnCV =\ readColumnPositions(params, prefix=prefix+'CV_', refFlux=False) with open(params[prefix+'catFile']) as f: for line in itertools.islice(f, firstLine, lastLine): data = np.array(line.split(' '), dtype=float) refFlux = data[refBandColumn] normedRefFlux = refFlux * refBandNorm if redshiftColumn >= 0: z = data[redshiftColumn] else: z = -1 # drop bad values and find how many bands are valid mask = np.isfinite(data[bandColumns]) mask &= np.isfinite(data[bandVarColumns]) mask &= data[bandColumns] > 0.0 mask &= data[bandVarColumns] > 0.0 bandsUsed = np.where(mask)[0] numBandsUsed = mask.sum() if z > -1: ell = normedRefFlux * 4 * np.pi \ * params['fluxLuminosityNorm'] * DL(z)**2 * (1+z) if (refFlux <= 0) or (not np.isfinite(refFlux))\ or (z < 0) or (numBandsUsed <= 1): print("Skipping galaxy: refflux=", refFlux, "z=", z, "numBandsUsed=", numBandsUsed) continue # not valid data - skip to next valid object fluxes = data[bandColumns[mask]] fluxesVar = data[bandVarColumns[mask]] +\ (params['training_extraFracFluxError'] * fluxes)**2 if CV: maskCV = np.isfinite(data[bandColumnsCV]) maskCV &= np.isfinite(data[bandVarColumnsCV]) maskCV &= data[bandColumnsCV] > 0.0 maskCV &= data[bandVarColumnsCV] > 0.0 bandsUsedCV = np.where(maskCV)[0] numBandsUsedCV = maskCV.sum() fluxesCV = data[bandColumnsCV[maskCV]] fluxesCVVar = data[bandVarColumnsCV[maskCV]] +\ (params['training_extraFracFluxError'] * fluxesCV)**2 if not getXY: if CV: yield z, normedRefFlux,\ bandIndices[mask], fluxes, fluxesVar,\ bandIndicesCV[maskCV], fluxesCV, fluxesCVVar else: yield z, normedRefFlux,\ bandIndices[mask], fluxes, fluxesVar,\ None, None, None if getXY: Y = np.zeros((numBandsUsed, 1)) Yvar = np.zeros((numBandsUsed, 1)) X = np.ones((numBandsUsed, 3)) for off, iband in enumerate(bandIndices[mask]): X[off, 0] = iband X[off, 1] = z X[off, 2] = ell Y[off, 0] = fluxes[off] Yvar[off, 0] = fluxesVar[off] if CV: yield z, normedRefFlux,\ bandIndices[mask], fluxes, fluxesVar,\ bandIndicesCV[maskCV], fluxesCV, fluxesCVVar,\ X, Y, Yvar else: yield z, normedRefFlux,\ bandIndices[mask], fluxes, fluxesVar,\ None, None, None,\ X, Y, Yvar