diff --git a/.github/workflows/run_tests_linux.yml b/.github/workflows/run_tests_linux.yml
index 0f7a02e50..b1dce64ff 100644
--- a/.github/workflows/run_tests_linux.yml
+++ b/.github/workflows/run_tests_linux.yml
@@ -75,6 +75,18 @@ jobs:
       - name: Checkout code
         uses: actions/checkout@v3
 
+      - name: Setup environment
+        uses: conda-incubator/setup-miniconda@v3
+        with:
+          miniforge-version: latest
+          activate-environment: bioptim
+          environment-file: environment.yml
+
+      - name: Print conda info
+        run: |
+          conda info
+          conda list
+
       - name: Install extra dependencies
         run: |
           sudo apt install -y python3-pip
diff --git a/.gitignore b/.gitignore
index 28ba13771..3b0a16dd5 100644
--- a/.gitignore
+++ b/.gitignore
@@ -109,7 +109,7 @@ c_generated_code
 *.gv.pdf
 
 # Bioptim files
-bioptim/sandbox/
+sandbox/
 *.pkl
 *.mtx
 *.casadi
@@ -120,10 +120,6 @@ _l4c_generated/
 # Mac dev
 *.DS_store
 
-*.png
-
-sandbox/
-
 # Ignore all npy files in tests folder except those in tests/shard6
 /tests/*.npy
 !/tests/shard6/v_bounds_max.npy
diff --git a/.gitmodules b/.gitmodules
index 3df878b8e..b867fe47b 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,3 +1,3 @@
 [submodule "external/acados"]
 	path = external/acados
-	url = https://github.com/acados/acados.git
+	url = https://github.com/acados/acados.git
\ No newline at end of file
diff --git a/bioptim/__init__.py b/bioptim/__init__.py
index 56188df4a..8332f0e99 100644
--- a/bioptim/__init__.py
+++ b/bioptim/__init__.py
@@ -166,7 +166,7 @@
 from .dynamics.ode_solvers import OdeSolver, OdeSolverBase
 from .gui.online_callback_server import PlottingServer
 from .gui.plot import CustomPlot
-from .interfaces import Solver
+from .interfaces import Solver, CasadiFunctionInterface
 from .limits.constraints import ConstraintFcn, ConstraintList, Constraint, ParameterConstraintList
 from .limits.fatigue_path_conditions import FatigueBounds, FatigueInitialGuess
 from .limits.multinode_constraint import MultinodeConstraintFcn, MultinodeConstraintList, MultinodeConstraint
@@ -196,7 +196,7 @@
     OnlineOptim,
     ContactType,
 )
-from .misc.mapping import BiMappingList, BiMapping, Mapping, SelectionMapping, Dependency
+from .misc.mapping import BiMappingList, BiMapping, Mapping, NodeMapping, NodeMappingList, SelectionMapping, Dependency
 from .models.biorbd.biorbd_model import BiorbdModel
 from .models.biorbd.external_forces import ExternalForceSetTimeSeries, ExternalForceSetVariables
 from .models.biorbd.holonomic_biorbd_model import HolonomicBiorbdModel
diff --git a/bioptim/examples/__main__.py b/bioptim/examples/__main__.py
index 995472e3b..19b1b2a82 100644
--- a/bioptim/examples/__main__.py
+++ b/bioptim/examples/__main__.py
@@ -40,6 +40,7 @@
                     ("Custom Bounds", "custom_bounds.py"),
                     ("Custom constraint", "custom_constraint.py"),
                     ("Custom initial guess", "custom_initial_guess.py"),
+                    ("Custom non casadi dynamics", "custom_non_casadi_dynamics.py"),
                     ("Custom objectives", "custom_objectives.py"),
                     ("Custom parameters", "custom_parameters.py"),
                     ("Custom phase transitions", "custom_phase_transitions.py"),
@@ -134,6 +135,14 @@
                 ]
             ),
         ),
+        (
+            "deep_neural_network",
+            OrderedDict(
+                [
+                    ("pytorch ocp", "pytorch_ocp.py"),
+                ]
+            ),
+        ),
     ]
 )
 
diff --git a/bioptim/examples/toy_examples/deep_neural_network/pytorch_ocp.py b/bioptim/examples/toy_examples/deep_neural_network/pytorch_ocp.py
new file mode 100644
index 000000000..e4a24cb75
--- /dev/null
+++ b/bioptim/examples/toy_examples/deep_neural_network/pytorch_ocp.py
@@ -0,0 +1,281 @@
+"""
+This example is similar to the getting_started/pendulum.py example, but the dynamics are computed using a deep neural 
+network driven by PyTorch. For this example to work, we create a neural network model that trains against the biorbd 
+library to predict the acceleration of the pendulum. Obviously, one can replace the prediction model with any other
+pytorch model. The class TorchModel is thereafter used to wrap the model using L4casadi to be used in the bioptim framework, 
+if one needs a more flexible way to define the dynamics, objective functions or constraints, they can start from that 
+class and modify it to their needs.
+
+Extra information:
+All information regarding the installation, limitations, credits and known problems can be found in the header of the
+TorchModel class.
+"""
+
+import os
+from typing import Self
+
+import biorbd
+from bioptim import OptimalControlProgram, DynamicsFcn, BoundsList, Dynamics, Objective, ObjectiveFcn, Solver
+from bioptim.models.torch.torch_model import TorchModel
+import numpy as np
+import torch
+
+
+class EarlyStopper:
+    def __init__(self, patience=1, min_delta=0):
+        self.patience = patience
+        self.min_delta = min_delta
+        self.counter = 0
+        self.min_validation_loss = float("inf")
+
+    def early_stop(self, validation_loss):
+        if validation_loss < self.min_validation_loss:
+            self.min_validation_loss = validation_loss
+            self.counter = 0
+        elif validation_loss > (self.min_validation_loss + self.min_delta):
+            self.counter += 1
+            if self.counter >= self.patience:
+                return True
+        return False
+
+
+class NeuralNetworkModel(torch.nn.Module):
+    def __init__(self, layer_node_count: tuple[int], dropout_probability: float):
+        super(NeuralNetworkModel, self).__init__()
+        activations = torch.nn.GELU()
+
+        # Initialize the layers of the neural network
+        self._size_in = layer_node_count[0]
+        self._size_out = layer_node_count[-1]
+        first_and_hidden_layers_node_count = layer_node_count[:-1]
+        layers = torch.nn.ModuleList()
+        for i in range(len(first_and_hidden_layers_node_count) - 1):
+            layers.append(
+                torch.nn.Linear(first_and_hidden_layers_node_count[i], first_and_hidden_layers_node_count[i + 1])
+            )
+            layers.append(activations)
+            layers.append(torch.nn.Dropout(dropout_probability))
+        layers.append(torch.nn.Linear(first_and_hidden_layers_node_count[-1], layer_node_count[-1]))
+
+        self._forward_model = torch.nn.Sequential(*layers)
+        self._forward_model.to(self.get_torch_device())
+
+        self._optimizer = torch.optim.Adam(self.parameters(), lr=1e-3)
+        self._loss_function = torch.nn.HuberLoss()
+
+        # Put the model in evaluation mode
+        self.eval()
+
+    @property
+    def size_in(self) -> int:
+        return self._size_in
+
+    @property
+    def size_out(self) -> int:
+        return self._size_out
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        output = torch.Tensor(x.shape[0], self._forward_model[-1].out_features)
+        for i, data in enumerate(x):
+            output[i, :] = self._forward_model(data)
+        return output.to(self.get_torch_device())
+
+    @staticmethod
+    def get_torch_device() -> torch.device:
+        return torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
+    def train_me(
+        self, training_data: list[torch.Tensor], validation_data: list[torch.Tensor], max_epochs: int = 100
+    ) -> None:
+        # More details about scheduler in documentation
+        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
+            self._optimizer, mode="min", factor=0.1, patience=20, min_lr=1e-8
+        )
+
+        early_stopper = EarlyStopper(patience=20, min_delta=1e-5)
+        for i in range(max_epochs):
+            self._perform_epoch_training(targets=training_data)
+            validation_loss = self._perform_epoch_training(targets=validation_data, only_compute=True)
+            scheduler.step(validation_loss)  # Adjust/reduce learning rate
+
+            # Check if the training should stop
+            print(f"Validation loss: {validation_loss} (epoch: {i})")
+            if early_stopper.early_stop(validation_loss):
+                print("Early stopping")
+                break
+
+    def save_me(self, path: str) -> None:
+        layer_node_count = tuple(
+            [model.in_features for model in self._forward_model if isinstance(model, torch.nn.Linear)]
+            + [self._forward_model[-1].out_features]
+        )
+
+        dropout_probability = tuple([model.p for model in self._forward_model if isinstance(model, torch.nn.Dropout)])
+        if len(dropout_probability) == 0:
+            dropout_probability = 0
+        elif len(dropout_probability) > 1:
+            # make sure that the dropout probability is the same for all layers
+            if not all(prob == dropout_probability[0] for prob in dropout_probability):
+                raise ValueError("Different dropout probabilities for different layers")
+            dropout_probability = dropout_probability[0]
+        else:
+            dropout_probability = dropout_probability[0]
+
+        dico = {
+            "layer_node_count": layer_node_count,
+            "dropout_probability": dropout_probability,
+            "state_dict": self.state_dict(),
+        }
+        if not os.path.isdir(os.path.dirname(path)):
+            os.makedirs(os.path.dirname(path))
+        torch.save(dico, path)
+
+    @classmethod
+    def load_me(cls, path: str) -> Self:
+        data = torch.load(path, weights_only=True)
+        inputs = {
+            "layer_node_count": data["layer_node_count"],
+            "dropout_probability": data["dropout_probability"],
+        }
+        model = NeuralNetworkModel(**inputs)
+        model.load_state_dict(data["state_dict"])
+        return model
+
+    def _perform_epoch_training(
+        self,
+        targets: list[torch.Tensor],
+        only_compute: bool = False,
+    ) -> tuple[float, float]:
+
+        # Perform the predictions
+        if only_compute:
+            with torch.no_grad():
+                all_predictions = self(targets[0])
+                all_targets = targets[1]
+
+        else:
+            # Put the model in training mode
+            self.train()
+
+            # If it is training, we are updating the model with each prediction, we therefore need to do it in a loop
+            all_predictions = torch.tensor([]).to(self.get_torch_device())
+            all_targets = torch.tensor([]).to(self.get_torch_device())
+            for input, target in zip(*targets):
+                self._optimizer.zero_grad()
+
+                # Get the predictions and targets
+                output = self(input[None, :])
+
+                # Do some machine learning shenanigans
+                current_loss = self._loss_function.forward(output, target[None, :])
+                current_loss.backward()  # Backpropagation
+                self._optimizer.step()  # Updating weights
+
+                # Populate the return values
+                all_predictions = torch.cat((all_predictions, output))
+                all_targets = torch.cat((all_targets, target[None, :]))
+
+            # Put back the model in evaluation mode
+            self.eval()
+
+        # Calculation of mean distance and error %
+        epoch_accuracy = (all_predictions - all_targets).abs().mean().item()
+        return epoch_accuracy
+
+
+def prepare_ocp(
+    model: torch.nn.Module,
+    final_time: float,
+    n_shooting: int,
+) -> OptimalControlProgram:
+
+    # Dynamics
+    dynamics = Dynamics(DynamicsFcn.TORQUE_DRIVEN)
+    torch_model = TorchModel(torch_model=model)
+
+    # Path bounds
+    x_bounds = BoundsList()
+    x_bounds["q"] = [-3.14 * 1.5] * torch_model.nb_q, [3.14 * 1.5] * torch_model.nb_q
+    x_bounds["q"][:, [0, -1]] = 0  # Start and end at 0...
+    x_bounds["q"][1, -1] = 3.14  # ...but end with pendulum 180 degrees rotated
+    x_bounds["qdot"] = [-3.14 * 10.0] * torch_model.nb_qdot, [3.14 * 10.0] * torch_model.nb_qdot
+    x_bounds["qdot"][:, [0, -1]] = 0  # Start and end without any velocity
+
+    # Define control path bounds
+    u_bounds = BoundsList()
+    u_bounds["tau"] = [-100] * torch_model.nb_tau, [100] * torch_model.nb_tau
+    u_bounds["tau"][1, :] = 0  # ...but remove the capability to actively rotate
+
+    # Add objective functions
+    objective_functions = Objective(ObjectiveFcn.Lagrange.MINIMIZE_CONTROL, key="tau")
+
+    return OptimalControlProgram(
+        torch_model,
+        dynamics,
+        n_shooting,
+        final_time,
+        x_bounds=x_bounds,
+        u_bounds=u_bounds,
+        objective_functions=objective_functions,
+        use_sx=False,
+    )
+
+
+def generate_dataset(biorbd_model: biorbd.Model, data_point_count: int) -> list[torch.Tensor]:
+    q_ranges = np.array(
+        [[[q_range.min(), q_range.max()] for q_range in segment.QRanges()] for segment in biorbd_model.segments()]
+    ).squeeze()
+    qdot_ranges = np.array(
+        [
+            [[qdot_range.min(), qdot_range.max()] for qdot_range in segment.QdotRanges()]
+            for segment in biorbd_model.segments()
+        ]
+    ).squeeze()
+    tau_ranges = np.array([-100, 100] * biorbd_model.nbGeneralizedTorque()).reshape(-1, 2)
+    tau_ranges[1, :] = 0
+
+    q = torch.rand(data_point_count, biorbd_model.nbQ()) * (q_ranges[:, 1] - q_ranges[:, 0]) + q_ranges[:, 0]
+    qdot = (
+        torch.rand(data_point_count, biorbd_model.nbQdot()) * (qdot_ranges[:, 1] - qdot_ranges[:, 0])
+        + qdot_ranges[:, 0]
+    )
+    tau = (
+        torch.rand(data_point_count, biorbd_model.nbGeneralizedTorque()) * (tau_ranges[:, 1] - tau_ranges[:, 0])
+        + tau_ranges[:, 0]
+    )
+
+    q = q.to(torch.float)
+    qdot = qdot.to(torch.float)
+    tau = tau.to(torch.float)
+
+    qddot = torch.zeros(data_point_count, biorbd_model.nbQddot())
+    for i in range(data_point_count):
+        qddot[i, :] = torch.tensor(
+            biorbd_model.ForwardDynamics(np.array(q[i, :]), np.array(qdot[i, :]), np.array(tau[i, :])).to_array()
+        )
+
+    return [torch.cat((q, qdot, tau), dim=1), qddot]
+
+
+def main():
+    # --- Prepare a predictive model --- #
+    force_new_training = False
+    biorbd_model = biorbd.Model("../getting_started/models/pendulum.bioMod")
+    training_data = generate_dataset(biorbd_model, data_point_count=30000)
+    validation_data = generate_dataset(biorbd_model, data_point_count=3000)
+
+    if force_new_training or not os.path.isfile("models/trained_pendulum_model.pt"):
+        model = NeuralNetworkModel(layer_node_count=(6, 8, 2), dropout_probability=0.2)
+        model.train_me(training_data, validation_data, max_epochs=300)
+        model.save_me("models/trained_pendulum_model.pt")
+    else:
+        model = NeuralNetworkModel.load_me("models/trained_pendulum_model.pt")
+
+    ocp = prepare_ocp(model=model, final_time=1, n_shooting=40)
+
+    # --- Solve the ocp --- #
+    ocp.solve(Solver.IPOPT(show_online_optim=True))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/bioptim/interfaces/__init__.py b/bioptim/interfaces/__init__.py
index ef615140f..221e45bae 100644
--- a/bioptim/interfaces/__init__.py
+++ b/bioptim/interfaces/__init__.py
@@ -2,6 +2,7 @@
 from .fratrop_options import FATROP
 from .acados_options import ACADOS
 from .sqp_options import SQP_METHOD
+from .casadi_function_interface import CasadiFunctionInterface
 
 
 class Solver:
diff --git a/bioptim/interfaces/casadi_function_interface.py b/bioptim/interfaces/casadi_function_interface.py
new file mode 100644
index 000000000..65d1954cc
--- /dev/null
+++ b/bioptim/interfaces/casadi_function_interface.py
@@ -0,0 +1,166 @@
+from abc import ABC, abstractmethod
+
+from casadi import Callback, Function, Sparsity, DM, MX, jacobian
+import numpy as np
+
+
+class CasadiFunctionInterface(Callback, ABC):
+    def __init__(self, name: str, opts={}):
+        self.reverse_function = None
+
+        super(CasadiFunctionInterface, self).__init__()
+        self.construct(name, opts)  # Defines the self.mx_in()
+        self._cached_mx_in = super().mx_in()
+
+    @abstractmethod
+    def inputs_len(self) -> list[int]:
+        """
+        The len of the inputs of the function. This will help create the MX/SX vectors such that each element of the list
+        is the length of the input vector (i.e. the sparsity of the input vector).
+
+        Example:
+        def inputs_len(self) -> list[int]:
+            return [3, 4]  # Assuming two inputs x and y of length 3 and 4 respectively
+        """
+        pass
+
+    @abstractmethod
+    def outputs_len(self) -> list[int]:
+        """
+        The len of the outputs of the function. This will help create the MX/SX vectors such that each element of the list
+        is the length of the output vector (i.e. the sparsity of the output vector).
+
+        Example:
+        def outputs_len(self) -> list[int]:
+            return [5]  # Assuming the output is a 5x1 vector
+        """
+        pass
+
+    @abstractmethod
+    def function(self, *args) -> np.ndarray | DM:
+        """
+        The actual function to interface with casadi. The callable that returns should be callable by function(*mx_in).
+        If your function needs more parameters, they should be encapsulated in a partial.
+
+        Example:
+        def function(self, x, y):
+            x = np.array(x)[:, 0]
+            y = np.array(y)[:, 0]
+            return np.array(
+                [
+                    x[0] * y[1] + x[0] * y[0] * y[0],
+                    x[1] * x[1] + 2 * y[1],
+                    x[0] * x[1] * x[2],
+                    x[2] * x[1] * y[2] + 2 * y[3] * y[2],
+                    y[0] * y[1] * y[2] * y[3],
+                ]
+            )
+        """
+        pass
+
+    @abstractmethod
+    def jacobians(self, *args) -> list[np.ndarray | DM]:
+        """
+        All the jacobians evaluated at *args. Each of the jacobian should be of the shape (n_out, n_in), where n_out is
+        the length of the output vector (the same for all) and n_in is the length of the input element (specific to each
+        input element).
+
+        Example:
+        def jacobians(self, x, y):
+            x = np.array(x)[:, 0]
+            y = np.array(y)[:, 0]
+            jacobian_x = np.array(
+                [
+                    [y[1] + y[0] * y[0], 0, 0],
+                    [0, 2 * x[1], 0],
+                    [x[1] * x[2], x[0] * x[2], x[0] * x[1]],
+                    [0, x[2] * y[2], x[1] * y[2]],
+                    [0, 0, 0],
+                ]
+            )
+            jacobian_y = np.array(
+                [
+                    [x[0] * 2 * y[0], x[0], 0, 0],
+                    [0, 2, 0, 0],
+                    [0, 0, 0, 0],
+                    [0, 0, x[1] * x[2] + 2 * y[3], 2 * y[2]],
+                    [y[1] * y[2] * y[3], y[0] * y[2] * y[3], y[0] * y[1] * y[3], y[0] * y[1] * y[2]],
+                ]
+            )
+            return [jacobian_x, jacobian_y]  # There are as many jacobians as there are inputs
+        """
+        pass
+
+    def mx_in(self) -> MX:
+        """
+        Get the MX in, but it is ensured that the MX are the same at each call
+        """
+        return self._cached_mx_in
+
+    def get_n_in(self):
+        return len(self.inputs_len())
+
+    def get_n_out(self):
+        return len(self.outputs_len())
+
+    def get_sparsity_in(self, i):
+        return Sparsity.dense(self.inputs_len()[i], 1)
+
+    def get_sparsity_out(self, i):
+        return Sparsity.dense(self.outputs_len()[i], 1)
+
+    def eval(self, *args):
+        return [self.function(*args[0])]
+
+    def has_reverse(self, nadj):
+        return nadj == 1
+
+    def get_reverse(self, nadj, name, inames, onames, opts):
+        class Reverse(Callback):
+            def __init__(self, parent, jacobian_functions, opts={}):
+                self._sparsity_in = parent.mx_in() + parent.mx_out()
+                self._sparsity_out = parent.mx_in()
+
+                self.jacobian_functions = jacobian_functions
+                self.reverse_function = None
+                Callback.__init__(self)
+                self.construct("Reverse", opts)
+
+            def get_n_in(self):
+                return len(self._sparsity_in)
+
+            def get_n_out(self):
+                return len(self._sparsity_out)
+
+            def get_sparsity_in(self, i):
+                return Sparsity.dense(self._sparsity_in[i].shape)
+
+            def get_sparsity_out(self, i):
+                return Sparsity.dense(self._sparsity_out[i].shape)
+
+            def eval(self, arg):
+                # Find the index to evaluate from the last parameter which is a DM vector of 0s with one value being 1
+                index = arg[-1].toarray()[:, 0].tolist().index(1.0)
+                inputs = arg[:-1]
+                return [jaco[index, :].T for jaco in self.jacobian_functions(*inputs)]
+
+            def has_reverse(self, nadj):
+                return nadj == 1
+
+            def get_reverse(self, nadj, name, inames, onames, opts):
+                if self.reverse_function is None:
+                    self.reverse_function = Reverse(self, jacobian(self.jacobian_functions))
+
+                cx_in = self.mx_in()
+                nominal_out = self.mx_out()
+                adj_seed = self.mx_out()
+                return Function(name, cx_in + nominal_out + adj_seed, self.reverse_function.call(cx_in + adj_seed))
+
+        # Package it in the [nominal_in + nominal_out + adj_seed] form that CasADi expects
+        if self.reverse_function is None:
+            self.reverse_function = Reverse(self, self.jacobians)
+
+        cx_in = self.mx_in()
+        nominal_out = self.mx_out()
+        adj_seed = self.mx_out()
+        return Function(name, cx_in + nominal_out + adj_seed, self.reverse_function.call(cx_in + adj_seed))
diff --git a/bioptim/interfaces/interface_utils.py b/bioptim/interfaces/interface_utils.py
index 6d91d416b..b8cf51512 100644
--- a/bioptim/interfaces/interface_utils.py
+++ b/bioptim/interfaces/interface_utils.py
@@ -1,3 +1,4 @@
+import warnings
 from time import perf_counter
 
 from casadi import Importer, Function
@@ -214,6 +215,7 @@ def _shake_penalties_tree(ocp, penalties_cx: CX, v: CX, v_bounds: DoubleNpArrayT
             penalty = penalty.expand()
         except RuntimeError:
             # This happens mostly when, for instance, there is a Newton decent in the penalty
+            warnings.warn("Could not expand during the shake_tree.")
             pass
     return penalty(vertcat(*dt, v[len(dt) :]))
 
diff --git a/bioptim/models/torch/torch_model.py b/bioptim/models/torch/torch_model.py
new file mode 100644
index 000000000..270dbeeea
--- /dev/null
+++ b/bioptim/models/torch/torch_model.py
@@ -0,0 +1,133 @@
+"""
+This wrapper can be used to wrap a PyTorch model and use it in bioptim. This is a much incomplete class though as compared
+to the BiorbdModel class. The main reason is that as opposed to Biorbd, the dynamics produced by a PyTorch model can be
+of any nature. This means this wrapper be more viewed as an example of how to wrap a PyTorch model in bioptim than an actual
+generic wrapper.
+
+This wrapper is based on the l4casadi library (https://github.com/Tim-Salzmann/l4casadi) which is a bridge between CasADi 
+and PyTorch. 
+
+INSTALLATION:
+Note these instructions may be outdated. Please refer to the l4casadi documentation for the most up-to-date instructions.
+
+First, make sure pytorch is installed by running the following command:
+    pip install torch>=2.0
+Please note that some depencecies are required. At the time of writing, the following packages were required:
+    pip install setuptools>=68.1 scikit-build>=0.17 cmake>=3.27 ninja>=1.11
+Then, install l4casadi as the interface between CasADi and PyTorch, by running the following command:
+    pip install l4casadi --no-build-isolation
+
+    
+LIMITATIONS:
+Since pytorch is wrapped using L4casadi, the casadi functions are generated using External. This means SX variables and
+expanding functions are not supported. This will be computationally intensive when solving, making such approach rather slow when
+compared to a programmatic approach. Still, it uses much less memory than the symbolic approach, so it has its own advantages.
+
+
+
+KNOWN ISSUES: 
+    On Windows (and possibly other platforms), you may randomly get the following error when running this example:
+        ```python
+        OMP: Error #15: Initializing libomp.dll, but found libiomp5md.dll already initialized.
+        ```
+    This error comes from the fact that installing the libraries copies the libiomp5md.dll file at two different locations.
+    When it tries to load them, it notices that "another" library is already loaded. If you are 100% sure that both libraries
+    are the exact same version, you can safely ignore this error. To do so, you can add the following lines at the beginning
+    of your script:
+        ```python
+        import os
+        os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
+        ```
+    That being said, this can be very problematic if the two libraries are not the exact same version. The safer approach is
+    to delete one of the file. To do so, simply navigate to the folders where the files libiomp5md.dll are located and delete it 
+    (or back-it up by adding ".bak" to the name) keeping only one (keeping the one in site-packages seems to work fine).
+"""
+
+from casadi import MX, vertcat, Function
+import l4casadi as l4c
+import torch
+
+
+class TorchModel:
+    """
+    This class wraps a pytorch model and allows the user to call some useful functions on it.
+    """
+
+    def __init__(self, torch_model: torch.nn.Module, device="cuda" if torch.cuda.is_available() else "cpu"):
+        self._dynamic_model = l4c.L4CasADi(
+            torch_model, device=device, generate_jac_jac=True, generate_adj1=False, generate_jac_adj1=False
+        )
+
+        self._nb_dof = torch_model.size_in // 3
+        self._symbolic_variables()
+
+    def _symbolic_variables(self):
+        """Declaration of MX variables of the right shape for the creation of CasADi Functions"""
+        self.q = MX.sym("q_mx", self.nb_dof, 1)
+        self.qdot = MX.sym("qdot_mx", self.nb_dof, 1)
+        self.tau = MX.sym("tau_mx", self.nb_dof, 1)
+        self.external_forces = MX.sym("external_forces_mx", 0, 1)
+        self.parameters = MX.sym("parameters_mx", 0, 1)
+
+    @property
+    def name(self) -> str:
+        # parse the path and split to get the .bioMod name
+        return "forward_dynamics_torch_model"
+
+    @property
+    def name_dof(self) -> list[str]:
+        return [f"q_{i}" for i in range(self.nb_dof)]
+
+    @property
+    def nb_dof(self) -> int:
+        return self._nb_dof
+
+    @property
+    def nb_q(self) -> int:
+        return self.nb_dof
+
+    @property
+    def nb_qdot(self) -> int:
+        return self.nb_dof
+
+    @property
+    def nb_tau(self) -> int:
+        return self.nb_dof
+
+    def forward_dynamics(self, with_contact: bool = False) -> Function:
+        if with_contact:
+            raise NotImplementedError("Contact dynamics are not implemented for torch models")
+
+        return Function(
+            "forward_dynamics",
+            [self.q, self.qdot, self.tau, self.external_forces, self.parameters],
+            [self._dynamic_model(vertcat(self.q, self.qdot, self.tau).T).T],
+            ["q", "qdot", "tau", "external_forces", "parameters"],
+            ["qddot"],
+        )
+
+    @property
+    def nb_contacts(self) -> int:
+        return 0
+
+    @property
+    def nb_soft_contacts(self) -> int:
+        return 0
+
+    def reshape_qdot(self, k_stab=1) -> Function:
+        return Function(
+            "reshape_qdot",
+            [self.q, self.qdot, self.parameters],
+            [self.qdot],
+            ["q", "qdot", "parameters"],
+            ["Reshaped qdot"],
+        ).expand()
+
+    def soft_contact_forces(self) -> Function:
+        return Function(
+            "soft_contact_forces",
+            [self.q, self.qdot, self.parameters],
+            [MX(0)],
+            ["q", "qdot", "parameters"],
+            ["Soft contact forces"],
+        ).expand()
diff --git a/tests/shard1/test_biorbd_model_holonomic.py b/tests/shard1/test_biorbd_model_holonomic.py
index 490fcbe65..332b2ae60 100644
--- a/tests/shard1/test_biorbd_model_holonomic.py
+++ b/tests/shard1/test_biorbd_model_holonomic.py
@@ -1,10 +1,10 @@
 import platform
+from bioptim import HolonomicBiorbdModel, HolonomicConstraintsFcn, HolonomicConstraintsList, Solver, SolutionMerge
+from casadi import DM, MX
 import numpy as np
 import numpy.testing as npt
 import pytest
-from casadi import DM, MX
 
-from bioptim import HolonomicBiorbdModel, HolonomicConstraintsFcn, HolonomicConstraintsList, Solver, SolutionMerge
 from ..utils import TestUtils
 
 
diff --git a/tests/shard3/test_global_getting_started.py b/tests/shard3/test_global_getting_started.py
index 899233b66..212d265a8 100644
--- a/tests/shard3/test_global_getting_started.py
+++ b/tests/shard3/test_global_getting_started.py
@@ -4,6 +4,7 @@
 
 import tracemalloc
 import gc
+import os
 import pickle
 import platform
 import re
@@ -2515,3 +2516,16 @@ def test_memory_and_execution_time():
         npt.assert_array_less(test_memory[key][0], ref[key][0] * factor)
         npt.assert_array_less(test_memory[key][1], ref[key][1] * factor)
         npt.assert_array_less(test_memory[key][2], ref[key][2] * factor)
+
+
+def test_deep_neural_network():
+    from bioptim.examples.deep_neural_network import pytorch_ocp as ocp_module
+
+    model = ocp_module.NeuralNetworkModel(layer_node_count=(6, 8, 2), dropout_probability=0.2)
+    ocp = ocp_module.prepare_ocp(model=model, final_time=1, n_shooting=3)
+    solver = Solver.IPOPT()
+    solver.set_maximum_iterations(1)
+
+    # We can launch the solving, but won't be able to check the results as the model is not trained so it is random
+    os.environ["KMP_DUPLICATE_LIB_OK"] = "True"
+    ocp.solve(solver=solver)
diff --git a/tests/shard3/test_initial_condition.py b/tests/shard3/test_initial_condition.py
index 5a12d0cb3..989634a47 100644
--- a/tests/shard3/test_initial_condition.py
+++ b/tests/shard3/test_initial_condition.py
@@ -1,9 +1,5 @@
 import re
 
-import numpy as np
-import numpy.testing as npt
-import pytest
-
 from bioptim import (
     InterpolationType,
     Solution,
@@ -19,6 +15,10 @@
     SolutionMerge,
 )
 from bioptim.limits.path_conditions import InitialGuess
+import numpy as np
+import numpy.testing as npt
+import pytest
+
 from ..utils import TestUtils
 
 
diff --git a/tests/shard5/test_casadi_function_interface.py b/tests/shard5/test_casadi_function_interface.py
new file mode 100644
index 000000000..e3e4be44e
--- /dev/null
+++ b/tests/shard5/test_casadi_function_interface.py
@@ -0,0 +1,110 @@
+from casadi import MX, vertcat, Function, jacobian
+import numpy as np
+import numpy.testing as npt
+from bioptim import CasadiFunctionInterface
+
+
+class CasadiFunctionInterfaceTest(CasadiFunctionInterface):
+    """
+    This example implements a somewhat simple 5x1 function, with x and y inputs (x => 3x1; y => 4x1) of the form
+        f(x, y) = np.array(
+            [
+                x[0] * y[1] + y[0] * y[0],
+                x[1] * x[1] + 2 * y[1],
+                x[0] * x[1] * x[2],
+                x[2] * x[1] + 2 * y[3] * y[2],
+                y[0] * y[1] * y[2] * y[3],
+            ]
+        )
+
+    It implements the equation (5x1) and the jacobians for the inputs x (5x3) and y (5x4).
+    """
+
+    def __init__(self, opts={}):
+        super(CasadiFunctionInterfaceTest, self).__init__("CasadiFunctionInterfaceTest", opts)
+
+    def inputs_len(self) -> list[int]:
+        return [3, 4]
+
+    def outputs_len(self) -> list[int]:
+        return [5]
+
+    def function(self, *args):
+        x, y = args
+        x = np.array(x)[:, 0]
+        y = np.array(y)[:, 0]
+        return np.array(
+            [
+                x[0] * y[1] + x[0] * y[0] * y[0],
+                x[1] * x[1] + 2 * y[1],
+                x[0] * x[1] * x[2],
+                x[2] * x[1] * y[2] + 2 * y[3] * y[2],
+                y[0] * y[1] * y[2] * y[3],
+            ]
+        )
+
+    def jacobians(self, *args):
+        x, y = args
+        x = np.array(x)[:, 0]
+        y = np.array(y)[:, 0]
+        jacobian_x = np.array(
+            [
+                [y[1] + y[0] * y[0], 0, 0],
+                [0, 2 * x[1], 0],
+                [x[1] * x[2], x[0] * x[2], x[0] * x[1]],
+                [0, x[2] * y[2], x[1] * y[2]],
+                [0, 0, 0],
+            ]
+        )
+        jacobian_y = np.array(
+            [
+                [x[0] * 2 * y[0], x[0], 0, 0],
+                [0, 2, 0, 0],
+                [0, 0, 0, 0],
+                [0, 0, x[1] * x[2] + 2 * y[3], 2 * y[2]],
+                [y[1] * y[2] * y[3], y[0] * y[2] * y[3], y[0] * y[1] * y[3], y[0] * y[1] * y[2]],
+            ]
+        )
+        return [jacobian_x, jacobian_y]
+
+
+def test_penalty_minimize_time():
+    """
+    These tests seem to test the interface, but actually all the internal methods are also called, which is what should
+    be tested.
+    """
+
+    # Computing the example
+    interface_test = CasadiFunctionInterfaceTest()
+
+    # Testing the interface
+    npt.assert_equal(interface_test.inputs_len(), [3, 4])
+    npt.assert_equal(interface_test.outputs_len(), [5])
+    assert id(interface_test.mx_in()) == id(interface_test.mx_in())  # Calling twice returns the same object
+
+    # Test the class can be called with DM
+    x_num = np.array([1.1, 2.3, 3.5])
+    y_num = np.array([4.2, 5.4, 6.6, 7.7])
+    npt.assert_almost_equal(interface_test(x_num, y_num), np.array([[25.344, 16.09, 8.855, 154.77, 1152.5976]]).T)
+
+    # Test the jacobian is correct
+    x = MX.sym("x", interface_test.inputs_len()[0], 1)
+    y = MX.sym("y", interface_test.inputs_len()[1], 1)
+    jaco_x = Function("jaco_x", [x, y], [jacobian(interface_test(x, y), x)])
+    jaco_y = Function("jaco_y", [x, y], [jacobian(interface_test(x, y), y)])
+
+    # Computing the same equations (and derivative) by casadi
+    real = vertcat(
+        x[0] * y[1] + x[0] * y[0] * y[0],
+        x[1] * x[1] + 2 * y[1],
+        x[0] * x[1] * x[2],
+        x[2] * x[1] * y[2] + 2 * y[3] * y[2],
+        y[0] * y[1] * y[2] * y[3],
+    )
+    real_function = Function("real", [x, y], [real])
+    jaco_x_real = Function("jaco_x_real", [x, y], [jacobian(real, x)])
+    jaco_y_real = Function("jaco_y_real", [x, y], [jacobian(real, y)])
+
+    npt.assert_almost_equal(np.array(interface_test(x_num, y_num)), real_function(x_num, y_num))
+    npt.assert_almost_equal(np.array(jaco_x(x_num, y_num)), jaco_x_real(x_num, y_num))
+    npt.assert_almost_equal(np.array(jaco_y(x_num, y_num)), jaco_y_real(x_num, y_num))