diff --git a/CHANGELOG.md b/CHANGELOG.md index b843359..b4c9af4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,7 +7,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ## [0.2.2] - Unreleased - +- [#e42a4a73] QuasarNP internally now uses a WaveGrid object rather than numpy arrays +for wavelength grids. All code is backwards compatible. ## [0.2.1] - 2025-12-18 ### Changed diff --git a/quasarnp/io.py b/quasarnp/io.py index a81aef9..c2c36c6 100644 --- a/quasarnp/io.py +++ b/quasarnp/io.py @@ -14,7 +14,7 @@ import numpy as np from .model import QuasarNP -from .utils import rebin, renormalize, nbins, nbins_linear, wave, linear_wave +from .utils import rebin, renormalize, WaveGrid def load_file(filename): @@ -41,9 +41,22 @@ def load_file(filename): try: w_grid = f["model_grid"][:] + + print(f"{w_grid=}") + + log_grid = WaveGrid(linear=False) + linear_grid = WaveGrid(linear=True) + # Checking some defaults to correctly initialize the default grids. + if (len(w_grid) == len(log_grid)) and np.allclose(w_grid, log_grid.wave): + w_grid = WaveGrid(linear=False) + elif (len(w_grid) == len(linear_grid)) and np.allclose(w_grid, linear_grid.wave): + w_grid = WaveGrid(linear=True) + else: + w_grid = WaveGrid(grid=w_grid) + except KeyError: print("Model grid not found in file, defaulting to logarithmic") - w_grid = wave + w_grid = WaveGrid(linear=False) # Some versions of TF/Keras are 1 indexed and so bn layers start # at batch_normalization_1. Some versions are 0 indexed and start at @@ -347,7 +360,7 @@ def read_data(fi, truth=None, z_lim=2.1, return_pmf=False, nspec=None): def load_desi_exposure(dir_name, spec_number, fibers=np.ones(500, dtype="bool"), - out_grid=wave): + out_grid=WaveGrid(linear=False)): """Load and renormalize a raw DESI spectrographic exposure. This method will load B, R and Z cframe files in sequence. First, spectra @@ -368,7 +381,7 @@ def load_desi_exposure(dir_name, spec_number, Array of length 500 indicating whether each fiber should be loaded. True if the fiber should be loaded, False otherwise. Defaults to True for all 500 fibers. - out_grid : numpy.ndarray, optional + out_grid : WaveGrid, optional The wavelength grid to rebin the loaded exposure to. Defaults to the logarithmic QuasarNET grid. @@ -410,7 +423,7 @@ def load_desi_exposure(dir_name, spec_number, # Load the flux and ivar flux = h["FLUX"].read()[fibers, :] ivar = h["IVAR"].read()[fibers, :] - w_grid = h["WAVELENGTH"].read() + w_grid = WaveGrid(grid=h["WAVELENGTH"].read()) # Rebin the flux and ivar new_flux, new_ivar = rebin(flux, ivar, w_grid, out_grid=out_grid) @@ -430,7 +443,7 @@ def load_desi_exposure(dir_name, spec_number, return X_out, np.where(nonzero_weights)[0] -def load_desi_coadd(filename, rows=None, out_grid=wave): +def load_desi_coadd(filename, rows=None, out_grid=WaveGrid(linear=False)): """Load and renormalize a DESI coadded spectrographic exposure. This method will load a coadd file and renormalize as follows. First, @@ -450,7 +463,8 @@ def load_desi_coadd(filename, rows=None, out_grid=wave): if the row should be loaded, False otherwise. Defaults to None, which loads all rows. out_grid : numpy.ndarray, optional - The wavelength grid to rebin the loaded exposure to. + The wavelength grid to rebin the loaded exposure to. Defaults to the + logarithmic QuasarNET grid. Returns ------- @@ -487,7 +501,7 @@ def load_desi_coadd(filename, rows=None, out_grid=wave): # Load the flux and ivar flux = h[fluxname].read()[rows, :] ivar = h[ivarname].read()[rows, :] - w_grid = h[wname].read() + w_grid = WaveGrid(grid=h[wname].read()) # Rebin the flux and ivar new_flux, new_ivar = rebin(flux, ivar, w_grid, out_grid=out_grid) @@ -511,7 +525,7 @@ def load_desi_coadd(filename, rows=None, out_grid=wave): def load_desi_daily(night, exp_id, spec_number, fibers=np.ones(500, dtype="bool"), - w_grid=wave): + w_grid=WaveGrid(linear=False)): """Load and renormalize a daily DESI spectrographic exposure. This method will load B, R and Z cframe files in sequence. First, spectra @@ -535,7 +549,8 @@ def load_desi_daily(night, exp_id, spec_number, True if the fiber should be loaded, False otherwise. Defaults to True for all 500 fibers. w_grid : numpy.ndarray, optional - The wavelength grid to rebin the loaded exposure to. + The wavelength grid to rebin the loaded exposure to. Defaults to the + logarithmic QuasarNET grid. Returns ------- @@ -560,6 +575,7 @@ def load_desi_daily(night, exp_id, spec_number, # For now load daily cframes files # TODO: add support for loading arbitrary cframes. # TODO: Add support for loading by tile id + e rather than date + e + # TODO desispec.findfile instead. root = "/global/cfs/cdirs/desi/spectro/redux/daily/exposures" file_loc = Path(root, night, exp_id) diff --git a/quasarnp/tests/test_io.py b/quasarnp/tests/test_io.py index 1659c9e..449799f 100644 --- a/quasarnp/tests/test_io.py +++ b/quasarnp/tests/test_io.py @@ -4,12 +4,16 @@ import numpy as np import quasarnp.io -from quasarnp.utils import wave, linear_wave +from quasarnp.utils import WaveGrid file_loc = pathlib.Path(__file__).parent.resolve() / "test_files" class TestLoadingModel(unittest.TestCase): + def setUp(self): + self.log_wave = WaveGrid(linear=False) + self.linear_wave = WaveGrid(linear=True) + def test_load_file(self): # Get the location of this test script and load the test_weights file # in this lower level directory. @@ -64,7 +68,7 @@ def test_load_file(self): observed = config_dict["conv_1"]["padding"] self.assertEqual(observed, expected) - self.assertTrue(np.allclose(w_grid, wave)) + self.assertTrue(np.allclose(w_grid.wave, self.log_wave.wave)) def test_load_linear_weights(self): # Get the location of this test script and load the test_weights file @@ -74,11 +78,11 @@ def test_load_linear_weights(self): # This one should auto derive to log even though it's linear since # we didn't post process to add the linear data. *_, w_grid = quasarnp.io.load_file(loc) - self.assertTrue(np.allclose(w_grid, wave)) + self.assertTrue(np.allclose(w_grid.wave, self.log_wave.wave)) loc = file_loc / "test_post_processed.h5" *_, w_grid = quasarnp.io.load_file(loc) - self.assertTrue(np.allclose(w_grid, linear_wave)) + self.assertTrue(np.allclose(w_grid.wave, self.linear_wave.wave)) class TestLoadingData(unittest.TestCase): def test_load_desi_coadd(self): diff --git a/quasarnp/tests/test_utils.py b/quasarnp/tests/test_utils.py index b8efa09..011c09a 100644 --- a/quasarnp/tests/test_utils.py +++ b/quasarnp/tests/test_utils.py @@ -6,17 +6,21 @@ import fitsio -from quasarnp.utils import regrid, process_preds, rebin, wave, linear_wave +from quasarnp.utils import regrid, process_preds, rebin, WaveGrid file_loc = pathlib.Path(__file__).parent.resolve() / "test_files" class TestUtilities(unittest.TestCase): + def setUp(self): + self.log_wave = WaveGrid(linear=False) + self.linear_wave = WaveGrid(linear=True) + # Test taking the old grid and generating which bins on the new grid # the grid goes into. def test_regrid_log(self): # This is the regrids the grid to itself so shouldn't do anything - ob_bins, ob_keep = regrid(wave, wave) + ob_bins, ob_keep = regrid(self.log_wave, self.log_wave) expected_bins = np.arange(443) self.assertTrue(np.allclose(ob_bins, expected_bins)) @@ -25,7 +29,7 @@ def test_regrid_log(self): # Testing regridding the DESI grid into the SDSS/QuasarNet grid. wmin, wmax, wdelta = 3600, 9824, 0.8 old_grid = np.round(np.arange(wmin, wmax + wdelta, wdelta), 1) - ob_bins, ob_keep = regrid(old_grid, wave) + ob_bins, ob_keep = regrid(WaveGrid(grid=old_grid), self.log_wave) # In order to not have to overload this file with nuisance, I have moved # the actual answer here to regrid.txt. It's quite long, so only @@ -40,17 +44,17 @@ def test_regrid_linear(self): # This is the rebinned DESI grid, so regridding it shouldn't do anything. # Linear DESI grid information wmin, wmax, wdelta = 3600, 9824, 0.8 - wdelta_qnet = wdelta * 17 + wdelta_qnet = wdelta * 17 # For Linear Quasarnet grid new_grid = np.round(np.arange(wmin, wmax + wdelta, wdelta_qnet), 1) - ob_bins, ob_keep = regrid(new_grid, linear_wave) + ob_bins, ob_keep = regrid(WaveGrid(grid=new_grid), self.linear_wave) expected_bins = np.arange(458) self.assertTrue(np.allclose(ob_bins, expected_bins)) self.assertTrue(np.allclose(ob_keep, np.ones_like(ob_keep, dtype=bool))) # Testing regridding the DESI grid into the linear QuasarNet grid. old_grid = np.round(np.arange(wmin, wmax + wdelta, wdelta), 1) - ob_bins, ob_keep = regrid(old_grid, linear_wave) + ob_bins, ob_keep = regrid(WaveGrid(grid=old_grid), self.linear_wave) # 17 DESI bins per linear QuasarNET bin, but 17 * 458 is slightly # longer than the true DESI grid, so the last bin only @@ -62,7 +66,7 @@ def test_regrid_linear(self): def test_regrid_arbitrary(self): # Stephen Bailey's arbitrary grid old_grid = np.arange(3600, 9800, 10) - ob_bins, ob_keep = regrid(old_grid, wave) + ob_bins, ob_keep = regrid(WaveGrid(grid=old_grid), self.log_wave) # In order to not have to overload this file with nuisance, I have moved # the actual answer here to regrid_arbitrary.txt. It's quite long, so only @@ -74,11 +78,11 @@ def test_regrid_arbitrary(self): def test_regrid_failure(self): # Non constant binning should fail and raise a value error. - new_grid = np.concatenate([np.arange(3600, 4000, 10), np.arange(4000, 9800, 40)]) + new_grid = WaveGrid(grid=np.concatenate([np.arange(3600, 4000, 10), np.arange(4000, 9800, 40)])) # Testing regridding the DESI grid onto this broken grid wmin, wmax, wdelta = 3600, 9824, 0.8 - old_grid = np.round(np.arange(wmin, wmax + wdelta, wdelta), 1) + old_grid = WaveGrid(grid=np.round(np.arange(wmin, wmax + wdelta, wdelta), 1)) with self.assertRaises(ValueError): _ = regrid(old_grid, new_grid) @@ -103,10 +107,10 @@ def test_rebin(self): # Load the flux and ivar flux = h[fluxname].read()[:] ivar = h[ivarname].read()[:] - w_grid = h[wname].read() + w_grid = WaveGrid(grid=h[wname].read()) # Rebin the flux and ivar - n_flux, n_ivar = rebin(flux, ivar, w_grid, out_grid=wave) + n_flux, n_ivar = rebin(flux, ivar, w_grid, out_grid=self.log_wave) # Just checks that the rebinned is equal to the known # "correct" rebinning @@ -133,10 +137,10 @@ def test_rebin_linear(self): # Load the flux and ivar flux = h[fluxname].read()[:] ivar = h[ivarname].read()[:] - w_grid = h[wname].read() + w_grid = WaveGrid(grid=h[wname].read()) # Rebin the flux and ivar - n_flux, n_ivar = rebin(flux, ivar, w_grid, out_grid=linear_wave) + n_flux, n_ivar = rebin(flux, ivar, w_grid, out_grid=self.linear_wave) # Just checks that the rebinned is equal to the known # "correct" rebinning diff --git a/quasarnp/utils.py b/quasarnp/utils.py index e266000..46007dc 100644 --- a/quasarnp/utils.py +++ b/quasarnp/utils.py @@ -51,29 +51,88 @@ 'LYB' : 1025.72, } - -# Log SDSS grid information -# TODO: Make these editable and expose them publicly. -# Perhaps in the same way Farr did? -l_min = np.log10(3600.) -l_max = np.log10(10000.) -dl = 1e-3 #* 2 -nbins = int((l_max - l_min) / dl) -wave = 10**(l_min + np.arange(nbins) * dl) - -# Linear DESI grid information -wmin, wmax, wdelta = 3600, 9824, 0.8 -desi_wave = np.round(np.arange(wmin, wmax + wdelta, wdelta), 1) - -# 17 seems arbitrary but its the constant needed to get approximately -# the same number of linear bins as in the logarithmic case -# (458 vs 443) -wdelta_qnet = wdelta * 17 -linear_wave = np.round(np.arange(wmin, wmax + wdelta, wdelta_qnet), 1) -nbins_linear = len(linear_wave) - - -def process_preds(preds, lines, lines_bal, verbose=True, wave=wave): +class WaveGrid(): + def __init__(self, linear=False, wmin=None, wmax=None, wdelta=None, grid=None): + """ + Initialize a WaveGrid object, used for defining spectra wavelength grids. + + Parameters + ---------- + linear : bool, optional + Whether this wavelength grid is equally spaced in linear wavelength + or equally spaced in logarithmic wavelength. + + wmin : float, optional + The lower bound of the wavelength grid. Defaults to None, which uses + 3600 for both the default linear and logarithmic wavelength grids. + + wmax : float, optional + The upper bound of the wavelength grid. Defaults to None, which uses + 10000 for the default logarithmic grid and 9824 for the default + linear grid. + + wdelta : float, optional + The spacing between wavelength bins in the wavelength grid. + If linear, this spacing is the spacing in linear wavelength. + If not linear, then this spacing must be in logarithmic wavelength. + Defaults to None, which uses 1e-3 for the default logarithmic + wavelength grid, and 0.8 for the default linear wavelength grid. + + grid : numpy.ndarray, optional + Override all other parameters, and use this wavelength grid + as the wavelength grid. Allows for unusual or inconsistent spacing + grids, but may break some other functionality. Defaults to None. + + """ + self.is_linear = linear + + # Both grids have the same lower bound. + if wmin is None: + wmin = 3600 + + if self.is_linear: + if wmax is None: + wmax = 9824 + if wdelta is None: + # 17 seems arbitrary but its the constant needed to get approximately + # the same number of linear bins as in the logarithmic case + # (458 vs 443) + wdelta = 0.8 * 17 + + else: + if wmax is None: + wmax = 10000 + if wdelta is None: + wdelta = 1e-3 + wmin = np.log10(wmin) + wmax = np.log10(wmax) + nbins = int((wmax - wmin) / wdelta) + + # Sanity checking. + if wmax < wmin: + raise ValueError(f"wmin ({wmin}) must be less than wmax ({wmax})!") + + if self.is_linear: + self.wave = np.round(np.arange(wmin, wmax + 1e-3, wdelta), 1) + elif grid is not None: + self.wave = grid + self.wmin = grid[0] + self.wmax = grid[-1] + # Assume uniform, but even if not the first grid spacing is a reasonable choice. + if self.is_linear: + self.wdelta = grid[1] - grid[0] + else: + self.wdelta = np.log10(grid[1]) - np.log10(grid[0]) + else: + self.wave = 10**(wmin + np.arange(nbins) * wdelta) + + self.wmin, self.wmax, self.wdelta = wmin, wmax, wdelta + + def __len__(self): + return len(self.wave) + + +def process_preds(preds, lines, lines_bal, verbose=True, wavegrid=WaveGrid(linear=False)): """Convert network output to line confidence and redshift predictions. Parameters @@ -86,6 +145,9 @@ def process_preds(preds, lines, lines_bal, verbose=True, wave=wave): List of BAL line names. verbose : bool, optional Whether or not to print verbose debug output. Defaults to True. + wavegrid : WaveGrid, optional + The wavelength grid used to generate these predictions. Used to + determine redshift. Defaults to the default logarithmic QuasarNET grid. Returns ------- @@ -121,7 +183,8 @@ def process_preds(preds, lines, lines_bal, verbose=True, wave=wave): # Doing non BAL lines first c_line = np.zeros((nlines, nspec)) z_line = np.zeros_like(c_line) # This ensures they're always the same shape. - i_to_wave = lambda x: np.interp(x, np.arange(len(wave)), wave) + nbins = len(wavegrid) + i_to_wave = lambda x: np.interp(x, np.arange(nbins), wavegrid.wave) for il, line in enumerate(lines): l = absorber_IGM[line] @@ -137,7 +200,7 @@ def process_preds(preds, lines, lines_bal, verbose=True, wave=wave): # a box and how far into that box the line is, convert that # box distance to a wavelength. Convert the wavelength to a # redshift. - z_line[il] = (i_to_wave((j + offset) * len(wave) / nboxes) / l) - 1 + z_line[il] = (i_to_wave((j + offset) * nbins / nboxes) / l) - 1 # Not "best redshift", rather "redshift of most confident line" zbest = z_line[c_line.argmax(axis=0), np.arange(nspec)] @@ -155,18 +218,18 @@ def process_preds(preds, lines, lines_bal, verbose=True, wave=wave): offset = preds[il+nlines][np.arange(nspec, dtype=int), nboxes + j] c_line_bal[il] = preds[il + nlines][:, :13].max(axis=1) - z_line_bal[il] = (i_to_wave((j + offset) * len(wave) / nboxes) / l) - 1 + z_line_bal[il] = (i_to_wave((j + offset) * nbins / nboxes) / l) - 1 return c_line, z_line, zbest, c_line_bal, z_line_bal -def regrid(old_grid, new_grid=wave): +def regrid(old_grid, new_grid=WaveGrid(linear=False)): """Generate the mapping from the old wavelength grid to the QuasarNet grid. Parameters ---------- - old_grid : numpy.ndarray + old_grid : WaveGrid The old wavelength grid. - new_grid : numpy.ndarray, optional + new_grid : WaveGrid, optional The wavelength grid to rebin the loaded exposure to. Defaults to the logarithmic QuasarNET grid. @@ -180,22 +243,21 @@ def regrid(old_grid, new_grid=wave): wavelength bin is contained within the new grid boundaries and False if it is not. """ - linear_spacing = np.allclose(np.diff(new_grid)[0], np.diff(new_grid)) - log_spacing = np.allclose(np.diff(np.log10(new_grid))[0], np.diff(np.log10(new_grid))) + linear_spacing = np.allclose(np.diff(new_grid.wave)[0], np.diff(new_grid.wave)) + log_spacing = np.allclose(np.diff(np.log10(new_grid.wave))[0], np.diff(np.log10(new_grid.wave))) if linear_spacing: # Rounding off at the 10th decimal place helps avoid float rounding errors when # rebinning the desi grid to qnet grids since the latter should be an integer # number of the former bins. - wdelta = np.diff(new_grid)[0] - wmin = new_grid[0] - bins = np.floor(np.round(((old_grid - wmin) / (wdelta)), decimals=10)).astype(int) + wdelta = np.diff(new_grid.wave)[0] + wmin = new_grid.wave[0] + bins = np.floor(np.round(((old_grid.wave - wmin) / (wdelta)), decimals=10)).astype(int) elif log_spacing: - l_min = np.log10(new_grid)[0] - dl = np.diff(np.log10(new_grid))[0] - - bins = np.floor((np.log10(old_grid) - l_min) / dl).astype(int) + l_min = np.log10(new_grid.wave)[0] + dl = np.diff(np.log10(new_grid.wave))[0] + bins = np.floor((np.log10(old_grid.wave) - l_min) / dl).astype(int) else: raise ValueError("New grid spacing must be constant in either logarithmic or linear wavelength.") @@ -204,7 +266,7 @@ def regrid(old_grid, new_grid=wave): return bins, w -def rebin(flux, ivar, w_grid, out_grid=wave): +def rebin(flux, ivar, w_grid, out_grid=WaveGrid(linear=False)): """Rebin flux to the QuasarNet wavelength grid. The process for rebinning flux is as follows. First, the flux is multiplied @@ -219,9 +281,9 @@ def rebin(flux, ivar, w_grid, out_grid=wave): Input flux array of shape `(nspec, len(w_grid))`. ivar : numpy.ndarray Input ivar array of shape `(nspec, len(w_grid))`. - w_grid: numpy.ndarray + w_grid: WaveGrid Input wavelength grid. - out_grid : numpy.ndarray, optional + out_grid : WaveGrid, optional The wavelength grid to rebin the loaded exposure to. Defaults to the logarithmic QuasarNET grid. @@ -237,7 +299,7 @@ def rebin(flux, ivar, w_grid, out_grid=wave): regrid : Function that converts the old wavelength grid to the new grid. """ - new_grid, w = regrid(w_grid, out_grid) + bins, w = regrid(w_grid, out_grid) fl_iv = flux * ivar @@ -252,13 +314,13 @@ def rebin(flux, ivar, w_grid, out_grid=wave): # past the QuasarNET grid and give negative bin values. I have tests that # confirm this still works on DESI data, don't worry. fl_iv = fl_iv[:, w] - new_grid = new_grid[w] + bins = bins[w] ivar_temp = ivar[:, w] for i in range(len(flux)): - c = np.bincount(new_grid, weights=fl_iv[i, :]) + c = np.bincount(bins, weights=fl_iv[i, :]) flux_out[i, :len(c)] += c - c = np.bincount(new_grid, weights=ivar_temp[i, :]) + c = np.bincount(bins, weights=ivar_temp[i, :]) ivar_out[i, :len(c)] += c return flux_out, ivar_out