Skip to content

Commit fe7d50c

Browse files
committed
- Move to cpp basix implementation for FE assembling.
- Remove deprecated modules. - Move MR signal generation to Phantom class (as a method) - Implement signal, full_signal, signal_sum, and signal nodal methods of SignalAssembler class.
1 parent 7d4ea8b commit fe7d50c

18 files changed

Lines changed: 1865 additions & 524 deletions

CMakeLists.txt

Lines changed: 31 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,29 @@ endif()
9696
find_package(Python REQUIRED COMPONENTS Interpreter Development.Module)
9797
find_package(pybind11 CONFIG REQUIRED)
9898

99+
# --- Basix: use the basix that is installed in this Python env (fenics-basix wheel) ---
100+
execute_process(
101+
COMMAND "${Python_EXECUTABLE}" -c
102+
"import basix, os; print(os.path.join(os.path.dirname(basix.__file__), 'lib'))"
103+
OUTPUT_VARIABLE BASIX_PY_LIBDIR
104+
OUTPUT_STRIP_TRAILING_WHITESPACE
105+
RESULT_VARIABLE _basix_rc
106+
)
107+
if(NOT _basix_rc EQUAL 0)
108+
message(FATAL_ERROR
109+
"Basix not importable in this Python env. Install: python -m pip install fenics-basix")
110+
endif()
111+
112+
find_library(BASIX_LIB
113+
NAMES basix
114+
PATHS "${BASIX_PY_LIBDIR}"
115+
NO_DEFAULT_PATH
116+
REQUIRED
117+
)
118+
119+
message(STATUS "Basix wheel libdir: ${BASIX_PY_LIBDIR}")
120+
message(STATUS "Basix wheel lib: ${BASIX_LIB}")
121+
99122
# ------------------------------------------------------------
100123
# Eigen3 automatic installation (cached and robust)
101124
# ------------------------------------------------------------
@@ -130,7 +153,7 @@ endif()
130153
# Source setup (core modules)
131154
# ------------------------------------------------------------
132155
set(SRC_DIR cpp/feelmri)
133-
set(MODULES Assemble BlochSimulator MRI POD)
156+
set(MODULES MRIAssemble Assemble BlochSimulator PODHelper)
134157

135158
foreach(mod ${MODULES})
136159
message(STATUS "Building module: ${mod}")
@@ -145,7 +168,9 @@ foreach(mod ${MODULES})
145168
${Python_INCLUDE_DIRS}
146169
)
147170

148-
# Apply high-performance compile flags
171+
target_link_libraries(${mod} PRIVATE "${BASIX_LIB}")
172+
173+
# Apply high-performance compile flags
149174
target_compile_options(${mod} PRIVATE
150175
-Ofast
151176
-ffast-math
@@ -154,6 +179,10 @@ foreach(mod ${MODULES})
154179
-ffp-contract=fast
155180
-fvisibility=hidden
156181
-fPIC
182+
-funroll-loops
183+
-fopenmp-simd
184+
-mavx2
185+
-mfma
157186
-DNDEBUG
158187
-DEIGEN_NO_DEBUG
159188
-DEIGEN_FAST_MATH

cpp/feelmri/Assemble.cpp

Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,13 @@
11
#include "Assemble.h"
2+
#include "FEUtils.h"
3+
#include <pybind11/pybind11.h>
4+
#include <pybind11/eigen.h>
5+
#include <pybind11/stl.h>
6+
#include <string>
7+
#include <vector>
8+
#include <tuple>
9+
#include <cmath>
10+
#include <span>
211

312
namespace py = pybind11;
413

@@ -114,3 +123,138 @@ Eigen::SparseMatrix<T> MassAssemble(
114123
M.setFromTriplets(coefficients.begin(), coefficients.end());
115124
return M;
116125
}
126+
127+
128+
template <typename T, std::size_t d>
129+
using mdspan_t = basix::md::mdspan<T, basix::md::dextents<std::size_t, d>>;
130+
131+
// -----------------------------------------------------------------------------
132+
// Local mass assembly using *pre-tabulated* basis values/derivatives
133+
// -----------------------------------------------------------------------------
134+
// -----------------------------------------------------------------------------
135+
// Optimized Local mass assembly
136+
// -----------------------------------------------------------------------------
137+
template <typename T>
138+
static inline void basixLocalMassAssemble(
139+
Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>& Me, // Passed by reference (no alloc)
140+
const Eigen::Matrix<T, Eigen::Dynamic, 3>& elem_nodes, // Forced 3 columns (no alloc)
141+
const std::vector<T>& wts,
142+
const mdspan_t<const T, 4>& tab,
143+
int nb_dofs,
144+
int nq)
145+
{
146+
using Vec = Eigen::Matrix<T, Eigen::Dynamic, 1>;
147+
using Mat3 = Eigen::Matrix<T, 3, 3>;
148+
149+
Me.setZero(); // Reset thread-local buffer
150+
151+
for (int q = 0; q < nq; ++q)
152+
{
153+
// Zero-copy mapping directly from Basix tabulation.
154+
// Assumes C-style contiguous memory layout where 'dofs' vary fastest after 'value_size' (1).
155+
Eigen::Map<const Vec> S (&tab(0, q, 0, 0), nb_dofs);
156+
Eigen::Map<const Vec> dSdr(&tab(1, q, 0, 0), nb_dofs);
157+
Eigen::Map<const Vec> dSds(&tab(2, q, 0, 0), nb_dofs);
158+
Eigen::Map<const Vec> dSdt(&tab(3, q, 0, 0), nb_dofs);
159+
160+
// Build Jacobian
161+
Mat3 J;
162+
J.row(0) = dSdr.transpose() * elem_nodes;
163+
J.row(1) = dSds.transpose() * elem_nodes;
164+
J.row(2) = dSdt.transpose() * elem_nodes;
165+
166+
const T detJ = std::abs(J.determinant());
167+
const T c = detJ * wts[q];
168+
169+
// Rank-1 update. .noalias() prevents Eigen from creating a temporary matrix
170+
Me.noalias() += c * (S * S.transpose());
171+
}
172+
}
173+
174+
// -----------------------------------------------------------------------------
175+
// Optimized Global Mass assemble
176+
// -----------------------------------------------------------------------------
177+
template <typename T>
178+
Eigen::SparseMatrix<T> basixMassAssemble(
179+
const Eigen::MatrixXi& elems,
180+
const Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>& nodes,
181+
const std::string& meshio_type,
182+
const std::string& /*quadrature_variant*/,
183+
const std::string& /*quadrature_rule*/,
184+
const int quadrature_degree)
185+
{
186+
const int nb_elems = elems.rows();
187+
const int nb_nodes = nodes.rows();
188+
const int nb_nodes_e = elems.cols();
189+
190+
// Basix setup
191+
const auto fe_info = get_fe_info(meshio_type);
192+
const auto variant = basix::element::lagrange_variant::equispaced;
193+
194+
const basix::FiniteElement<T> finite_element =
195+
basix::create_element<T>(
196+
fe_info.family, fe_info.cell, fe_info.degree, variant,
197+
basix::element::dpc_variant::unset, false);
198+
199+
const int nb_dofs = finite_element.dim();
200+
201+
auto qw = basix::quadrature::make_quadrature<T>(
202+
basix::quadrature::type::Default, fe_info.cell,
203+
basix::polyset::type::standard, quadrature_degree);
204+
205+
const std::vector<T>& qpts_flat = qw[0];
206+
const std::vector<T>& wts = qw[1];
207+
208+
const std::size_t gdim = 3;
209+
const int nq = static_cast<int>(wts.size());
210+
211+
auto [tab_data, tab_shape] =
212+
finite_element.tabulate(
213+
1, std::span<const T>(qpts_flat.data(), qpts_flat.size()),
214+
{static_cast<std::size_t>(nq), gdim});
215+
216+
mdspan_t<const T, 4> tab(tab_data.data(), tab_shape);
217+
218+
Eigen::SparseMatrix<T> M(nb_nodes, nb_nodes);
219+
using TripletType = Eigen::Triplet<T>;
220+
221+
// Pre-allocate the EXACT size required to avoid resizing and allow threaded assignment
222+
const std::size_t total_triplets = static_cast<std::size_t>(nb_elems) * nb_nodes_e * nb_nodes_e;
223+
std::vector<TripletType> coefficients(total_triplets);
224+
225+
// Multithread the assembly loop
226+
#pragma omp parallel
227+
{
228+
// Thread-local buffers avoid heap allocations inside the hot loop
229+
Eigen::Matrix<T, Eigen::Dynamic, 3> elem_nodes(nb_nodes_e, 3);
230+
Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic> Me(nb_dofs, nb_dofs);
231+
232+
#pragma omp for
233+
for (int e = 0; e < nb_elems; ++e)
234+
{
235+
const auto elem = elems.row(e);
236+
237+
for (int i = 0; i < nb_nodes_e; ++i)
238+
elem_nodes.row(i) = nodes.row(elem(i));
239+
240+
// Populate thread-local Me
241+
basixLocalMassAssemble<T>(Me, elem_nodes, wts, tab, nb_dofs, nq);
242+
243+
// Compute precise offset for lock-free parallel insertion
244+
const std::size_t offset = static_cast<std::size_t>(e) * nb_nodes_e * nb_nodes_e;
245+
std::size_t idx = 0;
246+
247+
for (int i = 0; i < nb_nodes_e; ++i)
248+
{
249+
for (int j = 0; j < nb_nodes_e; ++j)
250+
{
251+
coefficients[offset + idx++] = TripletType(elem(i), elem(j), Me(i, j));
252+
}
253+
}
254+
}
255+
} // implicit OpenMP barrier sync
256+
257+
// Build sparse matrix
258+
M.setFromTriplets(coefficients.begin(), coefficients.end());
259+
return M;
260+
}

cpp/feelmri/Assemble.h

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,19 @@
11
#pragma once
2+
23
#include <pybind11/pybind11.h>
34
#include <pybind11/eigen/tensor.h>
45
#include <pybind11/eigen.h>
56
#include <pybind11/stl.h>
67
#include <Eigen/Dense>
78
#include <Eigen/Sparse>
89

10+
#include <basix/finite-element.h>
11+
#include <basix/quadrature.h>
12+
#include <basix/mdspan.hpp>
13+
#include <basix/cell.h>
14+
#include <basix/element-families.h>
15+
#include <basix/polyset.h>
16+
917
namespace py = pybind11;
1018

1119
// MassAssemble
@@ -17,6 +25,17 @@ Eigen::SparseMatrix<T> MassAssemble(
1725
const py::object &quadrature_rule
1826
);
1927

28+
// MassAssemble
29+
template <typename T>
30+
Eigen::SparseMatrix<T> basixMassAssemble(
31+
const Eigen::MatrixXi &elems,
32+
const Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic> &nodes,
33+
const std::string &meshio_type,
34+
const std::string &quadrature_variant,
35+
const std::string &quadrature_rule,
36+
const int quadrature_degree
37+
);
38+
2039

2140
// PYBIND11 module
2241
PYBIND11_MODULE(Assemble, m)
@@ -36,4 +55,22 @@ PYBIND11_MODULE(Assemble, m)
3655
const py::object &,
3756
const py::object &
3857
>(&MassAssemble<double>));
58+
59+
m.def("basixMassAssemble", py::overload_cast<
60+
const Eigen::MatrixXi &,
61+
const Eigen::MatrixXf &,
62+
const std::string &,
63+
const std::string &,
64+
const std::string &,
65+
const int
66+
>(&basixMassAssemble<float>));
67+
68+
m.def("basixMassAssemble", py::overload_cast<
69+
const Eigen::MatrixXi &,
70+
const Eigen::MatrixXd &,
71+
const std::string &,
72+
const std::string &,
73+
const std::string &,
74+
const int
75+
>(&basixMassAssemble<double>));
3976
}

cpp/feelmri/BlochSimulator.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,10 @@
11
#include "BlochSimulator.h"
2+
#include <pybind11/pybind11.h>
3+
#include <pybind11/numpy.h>
4+
#include <pybind11/complex.h>
5+
#include <complex>
6+
#include <cmath>
7+
#include <utility>
28

39
using namespace Eigen;
410
namespace py = pybind11;

cpp/feelmri/BlochSimulator.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
#pragma once
2+
13
#include <pybind11/eigen/tensor.h>
24
#include <pybind11/pybind11.h>
35
#include <pybind11/eigen.h>

0 commit comments

Comments
 (0)