diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml index eb25d303..13e8cc46 100644 --- a/.github/workflows/run_tests.yml +++ b/.github/workflows/run_tests.yml @@ -23,3 +23,28 @@ jobs: uses: julia-actions/julia-buildpkg@v1 - name: Run tests uses: julia-actions/julia-runtest@v1 + test-mkl: + runs-on: ubuntu-latest + if: github.event.pull_request.draft == false + steps: + - uses: actions/checkout@v4 + - uses: julia-actions/setup-julia@latest + with: + version: '1' + - name: Set FFTW provider to MKL + run: | + printf '[FFTW]\nprovider = "mkl"\n' > LocalPreferences.toml + - name: Install dependencies and build + env: + PYTHON: + uses: julia-actions/julia-buildpkg@v1 + - name: Verify FFTW provider is MKL + run: | + julia --project -e ' + using FFTW + prov = FFTW.fftw_provider + println("FFTW provider: ", prov) + @assert prov == "mkl" string("Expected MKL but got ", prov) + ' + - name: Run tests + uses: julia-actions/julia-runtest@v1 diff --git a/.gitignore b/.gitignore index 9b1a5785..5e6bfa9b 100644 --- a/.gitignore +++ b/.gitignore @@ -10,3 +10,4 @@ _git2_* docs/build *.info .vscode/spellright.dict +LocalPreferences.toml diff --git a/Project.toml b/Project.toml index 9a23c237..1fdd9e8c 100644 --- a/Project.toml +++ b/Project.toml @@ -38,6 +38,7 @@ Peaks = "18e31ff7-3703-566c-8e60-38913d67486b" PhysicalConstants = "5ad8b20f-a522-5ce9-bfc9-ddf1d5bda6ab" Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" Polynomials = "f27b6e38-b328-58d1-80ce-0feddd5e7a45" +Preferences = "21216c6a-2e73-6563-6e65-726566657250" Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" ProgressLogging = "33c8b6b6-d38a-422a-b730-caa89a2f386c" PyCall = "438e738f-606a-5dbb-bf0a-cddfbfd45ab0" @@ -88,6 +89,7 @@ Peaks = "0.3.2, 0.4, 0.5, 0.6" PhysicalConstants = "0.2" Pkg = "1.9" Polynomials = "2, 3, 4" +Preferences = "1.5.1" Printf = "1.9" ProgressLogging = "0.1" PyCall = "1.92" diff --git a/README.md b/README.md index c2433bc2..321a13c4 100644 --- a/README.md +++ b/README.md @@ -36,7 +36,7 @@ There are two ways of using Luna: For a short introduction on how to use the simple interface, see the [Quickstart](#quickstart) or [GNLSE](#gnlse) sections below. More information, including on the internals of Luna, can be found in the [Documentation](http://lupo-lab.com/Luna.jl). ## Installation -Luna requires Julia v1.9 or later, which can be obtained from [here](https://julialang.org/downloads/). In a Julia terminal, to install Luna simply enter the package manager with `]` and run `add Luna`: +Luna requires Julia v1.9 or later (we currently recommend v1.10), which can be obtained from [here](https://julialang.org/downloads/). In a Julia terminal, to install Luna simply enter the package manager with `]` and run `add Luna`: ```julia ] @@ -44,6 +44,27 @@ add Luna ``` This will install and precompile Luna and all its dependencies. +### Using Intel MKL for FFTs (Linux and Windows) +By default, Luna uses [FFTW](http://www.fftw.org/) for fast Fourier transforms. On Linux and Windows, you can optionally switch to the Intel MKL backend, which can provide improved FFT performance on Intel hardware. + +To switch to MKL, run the following in a Julia session: +```julia +using FFTW +FFTW.set_provider!("mkl") +``` +This sets a preference that is stored in `LocalPreferences.toml` in your project directory. **You must restart Julia for the change to take effect.** After restarting, FFTW will use MKL for all FFT operations. + +To switch back to the default FFTW backend: +```julia +using FFTW +FFTW.set_provider!("fftw") +``` +Again, a restart is required. + +> [!NOTE] +> MKL is only available on Linux and Windows. On macOS, FFTW is the only supported backend. +> When using MKL, FFTW wisdom (plan caching) is not supported and will be skipped automatically. + ## Quickstart To run a simple simulation of ultrafast pulse propagation in a gas-filled hollow capillary fibre, you can use `prop_capillary`. As an example, take a 3-metre length of HCF with 125 μm core radius, filled with 1 bar of helium gas, and driving pulses centred at 800 nm wavelength with 120 μJ of energy and 10 fs duration. We consider a frequency grid which spans from 120 nm to 4 μm and a time window of 1 ps. ```julia diff --git a/docs/make.jl b/docs/make.jl index 8dc13153..c062216b 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -8,6 +8,7 @@ makedocs( authors = "Christian Brahms and John C. Travers", pages = Any[ "Home" => "index.md", + "Installation" => "installation.md", "The numerical model" => [ "General description" => "model/model.md", "Modal decompositions" => "model/modal_decompositions.md", diff --git a/docs/src/installation.md b/docs/src/installation.md new file mode 100644 index 00000000..33d5eb3a --- /dev/null +++ b/docs/src/installation.md @@ -0,0 +1,62 @@ +# Installation + +## Requirements +Luna requires Julia v1.9 or later (we currently recommend v1.10). You can download Julia from the [official website](https://julialang.org/downloads/). + +## Installing Luna +To install Luna, open a Julia terminal and enter the package manager by pressing `]`, then run: +```julia +pkg> add Luna +``` +This will download and install Luna along with all of its dependencies. The first time you load Luna with `using Luna`, it will be precompiled, which may take a few minutes. + +### Development version +If you want to use the latest development version of Luna (or contribute to it), you can install it directly from the GitHub repository: +```julia +pkg> dev Luna +``` +or, equivalently: +```julia +pkg> add Luna#master +``` + +## Using Intel MKL for FFTs +By default, Luna uses [FFTW](http://www.fftw.org/) for fast Fourier transforms. On **Linux and Windows**, you can optionally switch to the Intel Math Kernel Library (MKL) backend for FFTs, which can provide improved performance on Intel hardware. + +### Switching to MKL +To switch the FFT backend to MKL, run the following in a Julia session: +```julia +using FFTW +FFTW.set_provider!("mkl") +``` +This writes the preference `provider = "mkl"` to a `LocalPreferences.toml` file in your active project directory. + +!!! warning "Restart required" + You **must restart Julia** for the change to take effect. The FFT backend is selected when FFTW.jl is precompiled, so a simple `using FFTW` in the same session will not pick up the new provider. + +After restarting Julia, FFTW.jl will load the MKL backend instead of the default FFTW3 library. You can verify which backend is active: +```julia +using FFTW +println(FFTW.fftw_provider) # should print "mkl" +``` + +### Switching back to FFTW +To switch back to the default FFTW3 backend: +```julia +using FFTW +FFTW.set_provider!("fftw") +``` +As before, a restart of Julia is required for the change to take effect. + +### Platform support +MKL is only available on **Linux** and **Windows**. On macOS, FFTW3 is the only supported FFT backend. + +### FFTW wisdom +When using MKL, FFTW wisdom (cached FFT plans for faster planning on subsequent runs) is not supported. Luna detects the MKL backend automatically and skips wisdom loading and saving. + +## Plotting +Luna's built-in plotting functions (in the `Plotting` module) use [PyPlot.jl](https://github.com/JuliaPy/PyPlot.jl), which requires a Python installation with `matplotlib`. If you encounter issues with plotting, you can install `PyPlot` separately: +```julia +pkg> add PyPlot +``` +By default, Julia will manage a private Python (Conda) environment for this purpose (and we recommend this route). See the [PyCall.jl documentation](https://github.com/JuliaPy/PyCall.jl) for more details on using a system Python installation instead. diff --git a/src/Fields.jl b/src/Fields.jl index d7437b9b..68697742 100644 --- a/src/Fields.jl +++ b/src/Fields.jl @@ -503,8 +503,8 @@ end It(Et, grid::Grid.RealGrid) = abs2.(Maths.hilbert(Et)) It(Et, grid::Grid.EnvGrid) = abs2.(Et) -iFT(Eω, grid::Grid.RealGrid) = FFTW.irfft(Eω, length(grid.t), 1) -iFT(Eω, grid::Grid.EnvGrid) = FFTW.ifft(Eω, 1) +iFT(Eω, grid::Grid.RealGrid) = Maths._irfft_dim(Eω, length(grid.t), 1) +iFT(Eω, grid::Grid.EnvGrid) = Maths._ifft_dim(Eω, 1) "Calculate energy from modal field E(t)" function energyfuncs(grid::Grid.RealGrid) diff --git a/src/Luna.jl b/src/Luna.jl index 24de3188..0b24d6b0 100644 --- a/src/Luna.jl +++ b/src/Luna.jl @@ -42,6 +42,7 @@ FFTW is determined automatically (see [`Utils.FFTWthreads()`](@ref)) function set_fftw_threads(nthr=0) settings["fftw_threads"] = nthr FFTW.set_num_threads(Utils.FFTWthreads()) + Logging.@info("FFTW threads set to $(Utils.FFTWthreads())") end function __init__() diff --git a/src/Maths.jl b/src/Maths.jl index 39ac9b13..5e8905ab 100644 --- a/src/Maths.jl +++ b/src/Maths.jl @@ -552,20 +552,59 @@ end wigner(t, A::Vector{<:Real}; kwargs...) = wigner(t, hilbert(A); kwargs...) +# MKL's FFTW compatibility layer cannot create partial-dimension FFT plans for 3D+ arrays. +# For dim=1, we reshape to 2D (zero-copy), apply the FFT, and reshape back. +# For other dims, we fall back to mapslices with 1D transforms. +function _fft_dim(x::AbstractArray, dim::Integer) + ndims(x) <= 2 && return FFTW.fft(x, dim) + if dim == 1 + sz = size(x) + return reshape(FFTW.fft(reshape(x, sz[1], :), 1), :, sz[2:end]...) + end + return mapslices(s -> FFTW.fft(vec(s)), x; dims=dim) +end + +function _ifft_dim(x::AbstractArray, dim::Integer) + ndims(x) <= 2 && return FFTW.ifft(x, dim) + if dim == 1 + sz = size(x) + return reshape(FFTW.ifft(reshape(x, sz[1], :), 1), :, sz[2:end]...) + end + return mapslices(s -> FFTW.ifft(vec(s)), x; dims=dim) +end + +function _rfft_dim(x::AbstractArray, dim::Integer) + ndims(x) <= 2 && return FFTW.rfft(x, dim) + if dim == 1 + sz = size(x) + return reshape(FFTW.rfft(reshape(x, sz[1], :), 1), :, sz[2:end]...) + end + return mapslices(s -> FFTW.rfft(vec(s)), x; dims=dim) +end + +function _irfft_dim(x::AbstractArray, d::Integer, dim::Integer) + ndims(x) <= 2 && return FFTW.irfft(x, d, dim) + if dim == 1 + sz = size(x) + return reshape(FFTW.irfft(reshape(x, sz[1], :), d, 1), d, sz[2:end]...) + end + return mapslices(s -> FFTW.irfft(vec(s), d), x; dims=dim) +end + """ hilbert(x; dim=1) Compute the Hilbert transform, i.e. find the analytic signal from a real signal. """ function hilbert(x::Array{T,N}; dim = 1) where T <: Real where N - xf = FFTW.fft(x, dim) + xf = _fft_dim(x, dim) n1 = size(xf, dim)÷2 n2 = size(xf, dim) idxlo = CartesianIndices(size(xf)[1:dim - 1]) idxhi = CartesianIndices(size(xf)[dim + 1:end]) xf[idxlo, 2:n1, idxhi] .*= 2 xf[idxlo, (n1+1):n2, idxhi] .= 0 - return FFTW.ifft(xf, dim) + return _ifft_dim(xf, dim) end """ @@ -627,7 +666,7 @@ function oversample(t, x::Array{<:Real, N}; factor::Int=4, dim=1) where N if factor == 1 return t, x end - xf = FFTW.rfft(x, dim) + xf = _rfft_dim(x, dim) len = size(xf, dim) newlen_t = factor * length(t) @@ -648,14 +687,14 @@ function oversample(t, x::Array{<:Real, N}; factor::Int=4, dim=1) where N idxlo = CartesianIndices(size(xfo)[1:dim - 1]) idxhi = CartesianIndices(size(xfo)[dim + 1:end]) xfo[idxlo, 1:len, idxhi] .= factor .* xf - return to, FFTW.irfft(xfo, newlen_t, dim) + return to, _irfft_dim(xfo, newlen_t, dim) end function oversample(t, x::Array{<:Complex,N}; factor::Int=4, dim=1) where N if factor == 1 return t, x end - xf = FFTW.fftshift(FFTW.fft(x, dim), dim) + xf = FFTW.fftshift(_fft_dim(x, dim), dim) len = size(xf, dim) newlen = factor * length(t) @@ -677,7 +716,7 @@ function oversample(t, x::Array{<:Complex,N}; factor::Int=4, dim=1) where N idxlo = CartesianIndices(size(xfo)[1:dim - 1]) idxhi = CartesianIndices(size(xfo)[dim + 1:end]) xfo[idxlo, startidx:endidx, idxhi] .= factor .* xf - return to, FFTW.ifft(FFTW.ifftshift(xfo, dim), dim) + return to, _ifft_dim(FFTW.ifftshift(xfo, dim), dim) end """ diff --git a/src/Processing.jl b/src/Processing.jl index 7ec50404..d172a01e 100644 --- a/src/Processing.jl +++ b/src/Processing.jl @@ -369,11 +369,11 @@ end Calculate the field autocorrelation of `Et`. """ function field_autocorrelation(Et, grid::EnvGrid; dims=1) - FFTW.fftshift(FFTW.ifft(abs2.(FFTW.fft(Et, dims)), dims), dims) + FFTW.fftshift(Maths._ifft_dim(abs2.(Maths._fft_dim(Et, dims)), dims), dims) end function field_autocorrelation(Et, grid::RealGrid; dims=1) - fac = FFTW.fftshift(FFTW.irfft(abs2.(FFTW.rfft(Et, dims)), length(grid.t), dims), dims) + fac = FFTW.fftshift(Maths._irfft_dim(abs2.(Maths._rfft_dim(Et, dims)), length(grid.t), dims), dims) Maths.hilbert(fac, dim=dims) end @@ -383,7 +383,7 @@ end Calculate the intensity autocorrelation of `Et` over `grid`. """ function intensity_autocorrelation(Et, grid; dims=1) - real.(FFTW.fftshift(FFTW.irfft(abs2.(FFTW.rfft(Fields.It(Et, grid), dims)), length(grid.t), dims), dims)) + real.(FFTW.fftshift(Maths._irfft_dim(abs2.(Maths._rfft_dim(Fields.It(Et, grid), dims)), length(grid.t), dims), dims)) end """ @@ -815,8 +815,8 @@ prop_maybe(grid, Eω, propagator) = propagator(grid, Eω) Get the envelope electric field including the carrier wave from the frequency-domain field `Eω` sampled on `grid`. """ -envelope(grid::RealGrid, Eω) = Maths.hilbert(FFTW.irfft(Eω, length(grid.t), 1)) -envelope(grid::EnvGrid, Eω) = FFTW.ifft(Eω, 1) .* exp.(im.*grid.ω0.*grid.t) +envelope(grid::RealGrid, Eω) = Maths.hilbert(Maths._irfft_dim(Eω, length(grid.t), 1)) +envelope(grid::EnvGrid, Eω) = Maths._ifft_dim(Eω, 1) .* exp.(im.*grid.ω0.*grid.t) """ makegrid(output) diff --git a/src/Utils.jl b/src/Utils.jl index b79d0214..bb1ddfe9 100644 --- a/src/Utils.jl +++ b/src/Utils.jl @@ -9,6 +9,7 @@ import Luna: settings import Printf: @sprintf import Scratch: get_scratch!, clear_scratchspaces! import Luna +import Preferences subzero = '\u2080' subscript(digit::Char) = string(Char(codepoint(subzero)+parse(Int, digit))) @@ -81,8 +82,16 @@ function FFTWthreads() end end +function fft_provider_is_mkl() + Preferences.load_preference("FFTW", "provider", nothing) === "mkl" +end + function loadFFTwisdom() FFTW.set_num_threads(FFTWthreads()) + if fft_provider_is_mkl() + Logging.@info("FFTW provider is MKL; skipping wisdom loading") + return + end fpath = joinpath(cachedir(), "FFTWcache_$(FFTWthreads())threads") lockpath = joinpath(cachedir(), "FFTWlock") isdir(cachedir()) || mkpath(cachedir()) @@ -97,6 +106,10 @@ function loadFFTwisdom() end function saveFFTwisdom() + if fft_provider_is_mkl() + Logging.@info("FFTW provider is MKL; skipping wisdom saving") + return + end fpath = joinpath(cachedir(), "FFTWcache_$(FFTWthreads())threads") lockpath = joinpath(cachedir(), "FFTWlock") mkpidlock(lockpath; stale_age=600) do