Autostronomy · ConnorStoneAstro · Apr 2, 2026 · Mar 31, 2026 · Apr 2, 2026 · Apr 2, 2026
diff --git a/astrophot/fit/lm.py b/astrophot/fit/lm.py
@@ -1,7 +1,6 @@
 # Levenberg-Marquardt algorithm
 from typing import Sequence, Optional
 
-import torch
 import numpy as np
 
 from .base import BaseOptimizer
@@ -238,7 +237,6 @@ def poisson_2nll_ndf(self):
         M = self.forward(self.current_state)
         return 2 * backend.sum(M - self.Y * backend.log(M + 1e-10)) / self.ndf
 
-    @torch.no_grad()
     def fit(self, update_uncertainty=True) -> BaseOptimizer:
         """This performs the fitting operation. It iterates the LM step
         function until convergence is reached. Includes a message
@@ -366,7 +364,6 @@ def check_convergence(self) -> bool:
         return False
 
     @property
-    @torch.no_grad()
     def covariance_matrix(self) -> ArrayLike:
         """The covariance matrix for the model at the current
         parameters. This can be used to construct a full Gaussian PDF for the
@@ -391,7 +388,6 @@ def covariance_matrix(self) -> ArrayLike:
             self._covariance_matrix = backend.linalg.pinv(hess)
         return self._covariance_matrix
 
-    @torch.no_grad()
     def update_uncertainty(self) -> None:
         """Call this function after optimization to set the uncertainties for
         the parameters. This will use the diagonal of the covariance

diff --git a/astrophot/models/airy.py b/astrophot/models/airy.py
@@ -1,6 +1,3 @@
-import torch
-import numpy as np
-
 from ..utils.decorators import ignore_numpy_warnings, combine_docstrings
 from .psf_model_object import PSFModel
 from .mixins import RadialMixin
@@ -15,9 +12,13 @@ class AiryPSF(RadialMixin, PSFModel):
     """The Airy disk is an analytic description of the diffraction pattern
     for a circular aperture.
 
-    The diffraction pattern is described exactly by the configuration
-    of the lens system under the assumption that all elements are
-    perfect. This expression goes as:
+    WARNING: This model does not work in JAX (it doesn't have the required Bessel function implemented)
+
+    WARNING: PyTorch appears to have an issue with gradients wrt the R1 parameter. Optimization doesn't seem to work for this model (maybe try scipy optimize?).
+
+    The diffraction pattern is described exactly by the configuration of the
+    lens system under the assumption that all elements are perfect. This
+    expression goes as:
 
     .. math::
 
@@ -27,50 +28,46 @@ class AiryPSF(RadialMixin, PSFModel):
 
        x = ka\\sin(\\theta) = \\frac{2\\pi a r}{\\lambda R}
 
-    where :math:`I(\\theta)` is the intensity as a function of the
-    angular position within the diffraction system along its main
-    axis, :math:`I_0` is the central intensity of the airy disk,
-    :math:`J_1` is the Bessel function of the first kind of order one,
-    :math:`k = \\frac{2\\pi}{\\lambda}` is the wavenumber of the
-    light, :math:`a` is the aperture radius, :math:`r` is the radial
-    position from the center of the pattern, :math:`R` is the distance
+    where :math:`I(\\theta)` is the intensity as a function of the angular
+    position within the diffraction system along its main axis, :math:`I_0` is
+    the central intensity of the airy disk, :math:`J_1` is the Bessel function
+    of the first kind of order one, :math:`k = \\frac{2\\pi}{\\lambda}` is the
+    wavenumber of the light, :math:`a` is the aperture radius, :math:`r` is the
+    radial position from the center of the pattern, :math:`R` is the distance
     from the circular aperture to the observation plane.
 
-    In the ``Airy_PSF`` class we combine the parameters
-    :math:`a,R,\\lambda` into a single ratio to be optimized (or fixed
-    by the optical configuration).
+    In the ``AiryPSF`` class we combine the parameters :math:`a,R,\\lambda` and
+    scale based on the first zero of the :math:`J_1` function. This way you can
+    work with the more intuitive radius parameter.
 
-    :param I0: The central intensity of the airy disk in flux/arcsec^2.
-    :param aRL: The ratio of the aperture radius to the
-        product of the wavelength and the distance from the aperture to the
-        observation plane, :math:`\\frac{a}{R \\lambda}`.
+    :param I0: The central intensity of the airy disk in flux/pix^2.
+    :param R1: The radius of the first zero of the airy disk in pix.
 
     """
 
     _model_type = "airy"
     _parameter_specs = {
         "I0": {
-            "units": "flux/arcsec^2",
+            "units": "flux/pix^2",
             "value": 1.0,
             "shape": (),
             "dynamic": False,
-            "description": "The central intensity of the airy disk in flux/arcsec^2.",
+            "description": "The central intensity of the airy disk in flux/pix^2.",
         },
-        "aRL": {
-            "units": "a/(R lambda)",
+        "R1": {
+            "units": "pix",
             "shape": (),
             "dynamic": True,
-            "description": "The ratio of the aperture radius to the product of the wavelength and the distance from the aperture to the observation plane.",
+            "description": "The radius of the first zero of the airy disk in pix.",
         },
     }
     usable = True
 
-    @torch.no_grad()
     @ignore_numpy_warnings
     def initialize(self):
         super().initialize()
 
-        if self.I0.initialized and self.aRL.initialized:
+        if self.I0.initialized and self.R1.initialized:
             return
         icenter = self.target.targpixel_to_mypixel(*self.center.value)
 
@@ -80,10 +77,10 @@ def initialize(self):
                 int(icenter[1]) - 2 : int(icenter[1]) + 2,
             ]
             self.I0.value = backend.mean(mid_chunk) / self.target.upsample**2
-        if not self.aRL.initialized:
-            self.aRL.value = (5.0 / 8.0) * 2
+        if not self.R1.initialized:
+            self.R1.value = 2.0
 
     @forward
-    def radial_model(self, R: ArrayLike, I0: ArrayLike, aRL: ArrayLike) -> ArrayLike:
-        x = 2 * np.pi * aRL * R
+    def radial_model(self, R: ArrayLike, I0: ArrayLike, R1: ArrayLike) -> ArrayLike:
+        x = 3.8317 * R / R1
         return I0 * (2 * backend.bessel_j1(x) / x) ** 2
diff --git a/astrophot/models/basis_psf.py b/astrophot/models/basis_psf.py
@@ -16,13 +16,28 @@
 
 @combine_docstrings
 class PixelBasisPSF(PSFModel):
-    """point source model which uses multiple images as a basis for the
-    PSF as its representation for point sources. Using bilinear interpolation it
-    will shift the PSF within a pixel to accurately represent the center
-    location of a point source. There is no functional form for this object type
-    as any image can be supplied. Bilinear interpolation is very fast and
-    accurate for smooth models, so it is possible to do the expensive
-    interpolation before optimization and save time.
+    """A point source defined by a linear combination of basis images.
+
+    Point source model which uses multiple images as a basis for the PSF as its
+    representation for point sources. Using bilinear interpolation it will shift
+    the PSF within a pixel to accurately represent the center location of a
+    point source. There is no functional form for this object type as any image
+    can be supplied. Bilinear interpolation is very fast and accurate for smooth
+    models, so it is possible to do the expensive interpolation before
+    optimization and save time.
+
+    The initialization of the weights is currently done by setting random
+    values. This almost certainly produces a bad initial model. You may either
+    set weights manually, or use a fitting step to get good starting weights.
+
+    Note: The resulting PSF from the combined basis set will be normalized
+        before being used as a PSF model, so the sum of the `weights` does not
+        need to be restricted to any particular value.
+
+    Note: It is possible for the basis elements to combine to give a PSF model
+        that is negative in some areas. This is likely not desired, if this is a
+        concern then use a non-negative basis and set the valid range of the
+        weights to be `(0, None)`.
 
     :param weights: The weights of the basis set of images in units of flux.
     """
@@ -67,9 +82,15 @@ def initialize(self):
             self.basis = func.zernike_basis(order, N) / self.target.pixel_area
 
         if not self.weights.initialized:
-            w = np.zeros(self.basis.shape[0])
-            w[0] = 1.0
-            self.weights.value = w
+            w = backend.as_array(
+                1 / np.arange(1, self.basis.shape[0] + 1),
+                dtype=config.DTYPE,
+                device=config.DEVICE,
+            )
+            scale = backend.mean(self.target[self.window].data) / backend.mean(
+                backend.sum(w[:, None, None] * self.basis, dim=0)
+            )
+            self.weights.value = w * scale
 
     @forward
     def brightness(self, x: ArrayLike, y: ArrayLike, weights: ArrayLike) -> ArrayLike:

diff --git a/astrophot/models/batch_model_object.py b/astrophot/models/batch_model_object.py
@@ -19,9 +19,17 @@ class BatchModel(GradMixin, SampleMixin, Model):
     model). If you want to model the same object in multiple images, see the
     BatchSceneModel instead.
 
+    Once placed in a BatchModel, parameters for the base Model may now be given
+    values with an extra dimension. This extra dimension is the batch dimension
+    that will be vectorized over. For example, with a Gaussian Model, the
+    `sigma` parameter is normally a single value scalar. You may now set it with
+    a vector of values, and the length of that vector determines how many
+    Gaussians the BatchModel will generate. Of course, every dynamic parameter
+    that is batched must have the same size for its batch dimension.
+
     **Note:** any model parameters that you wish to batch over must be set to
-        dynamic=True. See [caskade hierarchical
-        models](https://caskade.readthedocs.io/en/latest/notebooks/HierarchicalModels.html)
+        dynamic=True. See `caskade hierarchical models
+        <https://caskade.readthedocs.io/en/latest/notebooks/HierarchicalModels.html>`_
         for more details.
     """
 

diff --git a/astrophot/models/bilinear_sky.py b/astrophot/models/bilinear_sky.py
@@ -1,6 +1,5 @@
 from typing import Tuple
 import numpy as np
-import torch
 
 from .sky_model_object import SkyModel
 from ..utils.decorators import ignore_numpy_warnings, combine_docstrings
@@ -10,13 +9,19 @@
 from . import func
 from ..utils.initialize import polar_decomposition
 
-__all__ = ["BilinearSky"]
+__all__ = ("BilinearSky",)
 
 
 @combine_docstrings
 class BilinearSky(SkyModel):
     """Sky background model using a coarse bilinear grid for the sky flux.
 
+    This allows for modelling more complex sky surfaces, such as dust or
+    galactic cirrus, without needing to specify a functional form. It is
+    possible to specify a position angle and grid scale to control how it is
+    oriented relative to the model target. By default it will just align with
+    the image.
+
     :param I: sky brightness grid
     :param PA: position angle of the sky grid in radians.
     :param scale: scale of the sky grid in arcseconds per grid unit.
@@ -34,13 +39,13 @@ class BilinearSky(SkyModel):
         "PA": {
             "units": "radians",
             "shape": (),
-            "dynamic": True,
+            "dynamic": False,
             "description": "position angle of the sky grid in radians",
         },
         "scale": {
             "units": "arcsec/grid-unit",
             "shape": (),
-            "dynamic": True,
+            "dynamic": False,
             "description": "scale of the sky grid in arcseconds per grid unit",
         },
     }
@@ -51,28 +56,27 @@ def __init__(self, *args, nodes: Tuple[int, int] = (3, 3), **kwargs):
         super().__init__(*args, **kwargs)
         self.nodes = nodes
 
-    @torch.no_grad()
     @ignore_numpy_warnings
     def initialize(self):
         super().initialize()
 
         if self.I.initialized:
             self.nodes = tuple(self.I.value.shape)
 
+        target_area = self.target[self.window]
         if not self.PA.initialized:
             R, _ = polar_decomposition(self.target.CD.npvalue)
             self.PA.value = np.arccos(np.abs(R[0, 0]))
         if not self.scale.initialized:
             self.scale.value = (
-                self.target.pixelscale.item() * self.target._data.shape[0] / self.nodes[0]
+                self.target.pixelscale.item() * target_area._data.shape[0] / self.nodes[0]
             )
 
         if self.I.initialized:
             return
 
-        target_dat = self.target[self.window]
-        dat = backend.to_numpy(target_dat._data).copy()
-        mask = backend.to_numpy(target_dat._mask).copy()
+        dat = backend.to_numpy(target_area._data).copy()
+        mask = backend.to_numpy(target_area._mask).copy()
         dat[mask] = np.nanmedian(dat)
         iS = dat.shape[0] // self.nodes[0]
         jS = dat.shape[1] // self.nodes[1]

diff --git a/astrophot/models/flatsky.py b/astrophot/models/flatsky.py
@@ -14,7 +14,7 @@ class FlatSky(SkyModel):
     """Model for the sky background in which all values across the image
     are the same.
 
-    :param I0: brightness for the sky, represented as the log of the brightness over pixel scale squared, this is proportional to a surface brightness
+    :param I0: brightness for the sky in flux/arcsec^2
 
     """
 
@@ -24,7 +24,7 @@ class FlatSky(SkyModel):
             "units": "flux/arcsec^2",
             "shape": (),
             "dynamic": True,
-            "description": "brightness for the sky, proportional to a surface brightness",
+            "description": "brightness for the sky in flux/arcsec^2",
         }
     }
     usable = True

diff --git a/astrophot/models/func/gaussian.py b/astrophot/models/func/gaussian.py
@@ -1,9 +1,6 @@
-import torch
 from ...backend_obj import backend, ArrayLike
 import numpy as np
 
-sq_2pi = np.sqrt(2 * np.pi)
-
 
 def gaussian(R: ArrayLike, sigma: ArrayLike, flux: ArrayLike) -> ArrayLike:
     """Gaussian 1d profile function, specifically designed for pytorch
@@ -12,6 +9,6 @@ def gaussian(R: ArrayLike, sigma: ArrayLike, flux: ArrayLike) -> ArrayLike:
     **Args:**
     -  `R`: Radii tensor at which to evaluate the gaussian function
     -  `sigma`: Standard deviation of the gaussian in the same units as R
-    -  `flux`: Central surface density
+    -  `flux`: Total flux of the Gaussian
     """
-    return (flux / (sq_2pi * sigma)) * backend.exp(-0.5 * (R / sigma) ** 2)
+    return (flux / (2 * np.pi * sigma**2)) * backend.exp(-0.5 * (R / sigma) ** 2)
diff --git a/astrophot/models/gaussian_ellipsoid.py b/astrophot/models/gaussian_ellipsoid.py
@@ -1,4 +1,3 @@
-import torch
 import numpy as np
 
 from .model_object import ComponentModel
@@ -104,7 +103,6 @@ class GaussianEllipsoid(ComponentModel):
     }
     usable = True
 
-    @torch.no_grad()
     @ignore_numpy_warnings
     def initialize(self):
         super().initialize()

diff --git a/astrophot/models/group_psf_model.py b/astrophot/models/group_psf_model.py
@@ -8,14 +8,23 @@
 
 class PSFGroupModel(GroupModel):
     """
-    A group of PSF models. Behaves similarly to a `GroupModel`, but specifically designed for PSF models.
+    A group of PSF models. Behaves similarly to a `GroupModel`, but specifically
+    designed for PSF models. Note that there is no concept of a PSFImageList, so
+    they always represent a single PSF model.
+
+    When sampling, a PSFGroupModel tells each sub-PSFModel (including nested
+    sub-PSFGroupModels) to sample without normalization. This way they can fit
+    with relative strengths. The final top-level PSFGroupModel will normalize
+    the resulting PSF, so that the image that gets passed to the regular model
+    objects for the purpose of convolution is always normalized. This means that
+    the sub-PSFModels in a PSFGroupModel should have their brightness parameters
+    (i.e., `I0` for the MoffatPSF) set to dynamic so they can participate in the
+    fit. Though this is not strictly a requirement (say you already know the
+    relative brightnesses).
     """
 
     _model_type = "psf"
     usable = True
-    normalize_psf = True
-
-    _options = ("normalize_psf",)
 
     @property
     def target(self):
@@ -36,15 +45,23 @@ def target(self, target):
         self._target = target
 
     @forward
-    def sample(self, *args, **kwargs):
+    def sample(self, normalize_psf=True) -> PSFImage:
         """Sample the PSF group model on the target image."""
         image = self.target.model_image(self.window)
 
         for model in self.models:
-            model_image = model()
+            model_image = model(normalize_psf=False)
             self._ensure_vmap_compatible(image, model_image)
             image += model_image
 
-        if self.normalize_psf:
+        if normalize_psf:
             image.normalize()
         return image
+
+    @forward
+    def __call__(
+        self,
+        normalize_psf=True,
+    ) -> PSFImage:
+
+        return self.sample(normalize_psf=normalize_psf)