From 233e1c35aa98b701fdfa873a2e4e8f310490b704 Mon Sep 17 00:00:00 2001 From: "Alec Thomson (S&A, Kensington WA)" Date: Wed, 30 Apr 2025 15:10:09 +0800 Subject: [PATCH 1/5] Add weights --- .gitignore | 3 ++ rm_lite/tools_1d/rmsynth.py | 7 ++- rm_lite/utils/synthesis.py | 101 +++++++++++++++++++++++++++++++++--- 3 files changed, 102 insertions(+), 9 deletions(-) diff --git a/.gitignore b/.gitignore index 9496de8..d82a518 100644 --- a/.gitignore +++ b/.gitignore @@ -163,3 +163,6 @@ rm_lite/ms_test.ipynb rm_lite/multiscale.ipynb rm_lite/Untitled-1.ipynb 2d.ipynb +depol.ipynb +nufft_test.ipynb +robust.ipynb diff --git a/rm_lite/tools_1d/rmsynth.py b/rm_lite/tools_1d/rmsynth.py index 32769b2..5c2f948 100644 --- a/rm_lite/tools_1d/rmsynth.py +++ b/rm_lite/tools_1d/rmsynth.py @@ -12,6 +12,7 @@ from rm_lite.utils.logging import logger from rm_lite.utils.synthesis import ( + WEIGHT_TYPES, FDFOptions, StokesData, compute_rmsynth_params, @@ -76,11 +77,12 @@ def run_rmsynth( phi_max_radm2: float | None = None, d_phi_radm2: float | None = None, n_samples: float | None = 10.0, - weight_type: Literal["variance", "uniform"] = "variance", + weight_type: WEIGHT_TYPES = "uniform", do_fit_rmsf: bool = False, do_fit_rmsf_real: bool = False, fit_function: Literal["log", "linear"] = "log", fit_order: int = 2, + robust: float | None = None, ) -> RMSynth1DResults: """Run RM-synthesis on 1D data @@ -100,6 +102,8 @@ def run_rmsynth( do_fit_rmsf_real (bool, optional): The the real part of the RMSF. Defaults to False. fit_function ("log" | "linear", optional): _description_. Defaults to "log". fit_order (int, optional): Polynomial fit order. Defaults to 2. Negative values will iterate until the fit is good. + robust (float | None, optional): Briggs robustness parameter. Defaults to None. + cell_size_m2 (float | None, optional): Cell size in m^2. Defaults to None. Returns: RMSynth1DResults: @@ -124,6 +128,7 @@ def run_rmsynth( weight_type=weight_type, do_fit_rmsf=do_fit_rmsf, do_fit_rmsf_real=do_fit_rmsf_real, + robust=robust, ) return _run_rmsynth(stokes_data, fdf_options, fit_function, fit_order) diff --git a/rm_lite/utils/synthesis.py b/rm_lite/utils/synthesis.py index 55271b7..ac2e88a 100644 --- a/rm_lite/utils/synthesis.py +++ b/rm_lite/utils/synthesis.py @@ -104,6 +104,9 @@ def with_options(self, **kwargs): return TheoreticalNoise(**as_dict) +WEIGHT_TYPES = Literal["variance", "uniform", "natural", "briggs"] + + class FDFOptions(NamedTuple): """Options for RM-synthesis""" @@ -113,12 +116,14 @@ class FDFOptions(NamedTuple): """ Faraday depth resolution """ n_samples: float | None = 10.0 """ Number of samples """ - weight_type: Literal["variance", "uniform"] = "variance" + weight_type: WEIGHT_TYPES = "uniform" """ Weight type """ do_fit_rmsf: bool = False """ Fit RMSF """ do_fit_rmsf_real: bool = False """ Fit real part of the RMSF """ + robust: float | None = None + """ Briggs robust parameter """ def calc_mom2_fdf( @@ -429,6 +434,65 @@ def compute_rmsf_params( ) +def uniform_weight( + freq_hz: NDArray[np.float64], +) -> NDArray[np.float64]: + return np.ones_like(freq_hz) / len(freq_hz) + + +def noise_weight( + noise: NDArray[np.float64], +) -> NDArray[np.float64]: + weights = 1 / noise**2 + weights /= np.nansum(weights) + return weights + + +def natural_weight( + freq_hz: NDArray[np.float64], + phi_max_radm2: float, + noise: NDArray[np.float64] | None = None, +) -> NDArray[np.float64]: + lambda_sq_arr_m2 = freq_to_lambda2(freq_hz) + + # Compute the 'cell n_bins' in lambda^2 space + rmsf_properties = get_fwhm_rmsf(lambda_sq_arr_m2) + + cell_size_m2 = np.sqrt(3) / phi_max_radm2 + + n_bins = int(rmsf_properties.lambda_sq_range_m2 / cell_size_m2) + + binned_stat = stats.binned_statistic( + x=lambda_sq_arr_m2, + values=lambda_sq_arr_m2, + statistic="count", + bins=n_bins, + ) + + weight = binned_stat.statistic[binned_stat.binnumber - 1] + weight /= np.nansum(weight) + if noise is None: + return weight + var_weight = noise_weight(noise) + return weight * var_weight + + +def briggs_weight( + robust: float, + freq_hz: NDArray[np.float64], + phi_max_radm2: float, + noise: NDArray[np.float64] | None = None, +) -> NDArray[np.float64]: + # Briggs robust factor - CASA callsed it f**2 + eff_squared = (5 * 10 ** -float(robust)) ** 2 + nat_weights = natural_weight( + freq_hz=freq_hz, + noise=noise, + phi_max_radm2=phi_max_radm2, + ) + return nat_weights / (1 + nat_weights * eff_squared) + + def compute_rmsynth_params( freq_arr_hz: NDArray[np.float64], complex_pol_arr: NDArray[np.complex128], @@ -480,13 +544,34 @@ def compute_rmsynth_params( f"phi = {phi_arr_radm2[0]:0.2f} to {phi_arr_radm2[-1]:0.2f} by {d_phi_radm2:0.2f} ({len(phi_arr_radm2)} chans)." ) - # Calculate the weighting as 1/sigma^2 or all 1s (uniform) - if fdf_options.weight_type == "variance": - if (real_qu_error == 0).all(): - real_qu_error = np.ones(len(real_qu_error)) - weight_arr = 1.0 / real_qu_error**2 - else: - weight_arr = np.ones_like(freq_arr_hz) + if (real_qu_error == 0).all(): + real_qu_error = np.ones(len(real_qu_error)) + + logger.info(f"Computing weights for type {fdf_options.weight_type}...") + match fdf_options.weight_type: + case "variance": + weight_arr = noise_weight(noise=real_qu_error) + case "uniform": + weight_arr = uniform_weight(freq_hz=freq_arr_hz) + case "natural": + weight_arr = natural_weight( + freq_hz=freq_arr_hz, + noise=real_qu_error, + phi_max_radm2=phi_max_radm2, + ) + case "briggs": + if fdf_options.robust is None: + msg = "Briggs weighting requires a robust parameter." + raise ValueError(msg) + weight_arr = briggs_weight( + robust=fdf_options.robust, + freq_hz=freq_arr_hz, + noise=real_qu_error, + phi_max_radm2=phi_max_radm2, + ) + case _: + msg = f"Unknown weighting type: {fdf_options.weight_type}" + raise ValueError(msg) logger.debug(f"Weighting type: {fdf_options.weight_type}") From 12f2349a3328007f4040673ae7cdc7a607e1fd99 Mon Sep 17 00:00:00 2001 From: "Alec Thomson (S&A, Kensington WA)" Date: Thu, 1 May 2025 11:53:09 +0800 Subject: [PATCH 2/5] formatting --- .github/workflows/ci.yml | 2 +- .pre-commit-config.yaml | 7 ------- docs/examples/rmclean_2d.ipynb | 2 +- docs/examples/rmsynth_2d.ipynb | 2 +- pyproject.toml | 6 +++--- rm_lite/utils/fitting.py | 2 +- rm_lite/utils/multiscale.py | 3 ++- rm_lite/utils/synthesis.py | 2 +- tests/test_nufft.py | 1 + 9 files changed, 11 insertions(+), 16 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 1cca750..6daece1 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -44,7 +44,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"] + python-version: ["3.10", "3.11", "3.12", "3.13"] runs-on: [ubuntu-latest] steps: diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index d0c4699..8252909 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -32,13 +32,6 @@ repos: - id: rst-directive-colons - id: rst-inline-touching-normal - - repo: https://github.com/pre-commit/mirrors-prettier - rev: "v4.0.0-alpha.8" - hooks: - - id: prettier - types_or: [yaml, markdown, html, css, scss, javascript, json] - args: [--prose-wrap=always] - - repo: https://github.com/astral-sh/ruff-pre-commit rev: "v0.11.7" hooks: diff --git a/docs/examples/rmclean_2d.ipynb b/docs/examples/rmclean_2d.ipynb index cff8666..cf08bf1 100644 --- a/docs/examples/rmclean_2d.ipynb +++ b/docs/examples/rmclean_2d.ipynb @@ -96,7 +96,7 @@ "\n", "\n", "for time_step, (rm_radm2, frac_pol, psi0_deg) in enumerate(\n", - " zip(rm_time, frac_pol_time, psi0_time)\n", + " zip(rm_time, frac_pol_time, psi0_time, strict=False)\n", "):\n", " complex_data_noiseless = faraday_simple_spectrum(\n", " freq_hz,\n", diff --git a/docs/examples/rmsynth_2d.ipynb b/docs/examples/rmsynth_2d.ipynb index c964a27..c8be5ac 100644 --- a/docs/examples/rmsynth_2d.ipynb +++ b/docs/examples/rmsynth_2d.ipynb @@ -97,7 +97,7 @@ "\n", "\n", "for time_step, (rm_radm2, frac_pol, psi0_deg) in enumerate(\n", - " zip(rm_time, frac_pol_time, psi0_time)\n", + " zip(rm_time, frac_pol_time, psi0_time, strict=False)\n", "):\n", " complex_data_noiseless = faraday_simple_spectrum(\n", " freq_hz,\n", diff --git a/pyproject.toml b/pyproject.toml index d24d794..4696847 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -11,7 +11,7 @@ authors = [ ] license.file = "LICENSE" readme = "README.md" -requires-python = ">=3.9" +requires-python = ">=3.10" classifiers = [ "Development Status :: 1 - Planning", "Intended Audience :: Science/Research", @@ -21,10 +21,10 @@ classifiers = [ "Programming Language :: Python", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3 :: Only", - "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", "Topic :: Scientific/Engineering", "Typing :: Typed", ] @@ -118,7 +118,7 @@ isort.required-imports = ["from __future__ import annotations"] [tool.mypy] files = ["rm_tools", "tests"] -python_version = "3.9" +python_version = "3.10" warn_unused_configs = true strict = true enable_error_code = ["ignore-without-code", "redundant-expr", "truthy-bool"] diff --git a/rm_lite/utils/fitting.py b/rm_lite/utils/fitting.py index e788418..772ed1a 100644 --- a/rm_lite/utils/fitting.py +++ b/rm_lite/utils/fitting.py @@ -289,7 +289,7 @@ def static_fit( ) errors = np.sqrt(np.diag(pcov)) - fit_vals = [sf.round(p, e) for p, e in zip(popt, errors)] + fit_vals = [sf.round(p, e) for p, e in zip(popt, errors, strict=False)] logger.info(f"Fit results: {fit_vals}") return FitResult( diff --git a/rm_lite/utils/multiscale.py b/rm_lite/utils/multiscale.py index b3211ae..1ce7905 100644 --- a/rm_lite/utils/multiscale.py +++ b/rm_lite/utils/multiscale.py @@ -3,8 +3,9 @@ from __future__ import annotations import logging +from collections.abc import Callable from functools import partial -from typing import Callable, Literal +from typing import Literal import numpy as np from numpy.typing import NDArray diff --git a/rm_lite/utils/synthesis.py b/rm_lite/utils/synthesis.py index 26b7208..785a457 100644 --- a/rm_lite/utils/synthesis.py +++ b/rm_lite/utils/synthesis.py @@ -592,7 +592,7 @@ def compute_rmsynth_params( phi_max_radm2=phi_max_radm2, ) case _: - msg = f"Unknown weighting type: {fdf_options.weight_type}" + msg = f"Unknown weighting type: {fdf_options.weight_type}" # type: ignore[unreachable] raise ValueError(msg) logger.debug(f"Weighting type: {fdf_options.weight_type}") diff --git a/tests/test_nufft.py b/tests/test_nufft.py index 6099fae..6198c6a 100644 --- a/tests/test_nufft.py +++ b/tests/test_nufft.py @@ -427,6 +427,7 @@ def test_rmsf(): ["phi2Arr", "RMSFcube", "fwhmRMSFArr"], [phi2Arr, RMSFcube, fwhmRMSFArr], [phi2Arr_old, RMSFcube_old, fwhmRMSFArr_old], + strict=False, ): msg = f"Testing {name}" logger.info(msg) From 35440f402206e7316e9fa8178ace8f5c7bb8298e Mon Sep 17 00:00:00 2001 From: Alec Thomson Date: Thu, 1 May 2025 11:53:33 +0800 Subject: [PATCH 3/5] Update rm_lite/utils/synthesis.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- rm_lite/utils/synthesis.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rm_lite/utils/synthesis.py b/rm_lite/utils/synthesis.py index 785a457..8bd0e00 100644 --- a/rm_lite/utils/synthesis.py +++ b/rm_lite/utils/synthesis.py @@ -505,7 +505,7 @@ def briggs_weight( phi_max_radm2: float, noise: NDArray[np.float64] | None = None, ) -> NDArray[np.float64]: - # Briggs robust factor - CASA callsed it f**2 + # Briggs robust factor - CASA called it f**2 eff_squared = (5 * 10 ** -float(robust)) ** 2 nat_weights = natural_weight( freq_hz=freq_hz, From 944f4cad70aabf3b3c6b8785bf845ae750115d40 Mon Sep 17 00:00:00 2001 From: "Alec Thomson (S&A, Kensington WA)" Date: Thu, 1 May 2025 12:00:58 +0800 Subject: [PATCH 4/5] Docstrings --- rm_lite/tools_1d/rmsynth.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rm_lite/tools_1d/rmsynth.py b/rm_lite/tools_1d/rmsynth.py index 54371d4..c4a7e1f 100644 --- a/rm_lite/tools_1d/rmsynth.py +++ b/rm_lite/tools_1d/rmsynth.py @@ -97,7 +97,7 @@ def run_rmsynth( phi_max_radm2 (float | None, optional): Maximum Faraday depth. Defaults to None. d_phi_radm2 (float | None, optional): Spacing in Faraday depth. Defaults to None. n_samples (float | None, optional): Number of samples across the RMSF. Defaults to 10.0. - weight_type ("variance", "uniform", optional): Type of weighting. Defaults to "variance". + weight_type ("variance", "uniform", "natural", "briggs"): Weighting type. Defaults to "uniform". do_fit_rmsf (bool, optional): Fit the RMSF main lobe. Defaults to False. do_fit_rmsf_real (bool, optional): The the real part of the RMSF. Defaults to False. fit_function ("log" | "linear", optional): _description_. Defaults to "log". From 122a846c15aa7e6c4689aa54a05740e879b45ee9 Mon Sep 17 00:00:00 2001 From: "Alec Thomson (S&A, Kensington WA)" Date: Fri, 9 May 2025 08:39:16 +0800 Subject: [PATCH 5/5] Fix weights --- .gitignore | 1 + pyproject.toml | 1 + rm_lite/utils/synthesis.py | 50 ++++++++++++++++++++++++++++++-------- 3 files changed, 42 insertions(+), 10 deletions(-) diff --git a/.gitignore b/.gitignore index 98ecf36..de8e281 100644 --- a/.gitignore +++ b/.gitignore @@ -167,3 +167,4 @@ depol.ipynb nufft_test.ipynb robust.ipynb *.ipynb +dragons_120.23_9.59.txt diff --git a/pyproject.toml b/pyproject.toml index 4696847..d8bccdc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -42,6 +42,7 @@ dependencies = [ "corner", "polars", "sigfig", + "pandas", ] [project.optional-dependencies] diff --git a/rm_lite/utils/synthesis.py b/rm_lite/utils/synthesis.py index 8bd0e00..ca227d3 100644 --- a/rm_lite/utils/synthesis.py +++ b/rm_lite/utils/synthesis.py @@ -8,6 +8,7 @@ import finufft import numpy as np +import pandas as pd import polars as pl from astropy.constants import c as speed_of_light from astropy.stats import mad_std @@ -470,11 +471,19 @@ def noise_weight( return weights -def natural_weight( +class WaveSqBins(NamedTuple): + categories: pd.Categorical + """Categories for the bins""" + bins: NDArray[np.intp] + """The bins in wavelength^2 space""" + bin_counts: NDArray[np.intp] + """The counts in each bin per channel""" + + +def _get_wave_sq_bins( freq_hz: NDArray[np.float64], phi_max_radm2: float, - noise: NDArray[np.float64] | None = None, -) -> NDArray[np.float64]: +) -> WaveSqBins: lambda_sq_arr_m2 = freq_to_lambda2(freq_hz) # Compute the 'cell n_bins' in lambda^2 space @@ -484,19 +493,38 @@ def natural_weight( n_bins = int(rmsf_properties.lambda_sq_range_m2 / cell_size_m2) - binned_stat = stats.binned_statistic( - x=lambda_sq_arr_m2, - values=lambda_sq_arr_m2, - statistic="count", + categories, bins = pd.cut( + lambda_sq_arr_m2, bins=n_bins, + include_lowest=True, + retbins=True, ) + bin_idx = categories.codes + bin_counts = np.bincount(bin_idx) - weight = binned_stat.statistic[binned_stat.binnumber - 1] + # repeat the bin_counts to match the original array + bin_counts = bin_counts[bin_idx] + return WaveSqBins(categories, bins, bin_counts) + + +def natural_weight( + freq_hz: NDArray[np.float64], + phi_max_radm2: float, + noise: NDArray[np.float64] | None = None, +) -> NDArray[np.float64]: + wave_sq_bins = _get_wave_sq_bins( + freq_hz=freq_hz, + phi_max_radm2=phi_max_radm2, + ) + + weight = wave_sq_bins.bin_counts.astype(np.float64) weight /= np.nansum(weight) if noise is None: return weight var_weight = noise_weight(noise) - return weight * var_weight + weight *= var_weight + weight /= np.nansum(weight) + return weight def briggs_weight( @@ -512,7 +540,9 @@ def briggs_weight( noise=noise, phi_max_radm2=phi_max_radm2, ) - return nat_weights / (1 + nat_weights * eff_squared) + weights = nat_weights / (1 + nat_weights * eff_squared) + weights /= np.nansum(weights) + return weights def compute_rmsynth_params(