diff --git a/Project.toml b/Project.toml
index deb73c9..f73a6d5 100644
--- a/Project.toml
+++ b/Project.toml
@@ -7,21 +7,30 @@ version = "1.0.0-DEV"
 ExaModels = "1037b233-b668-4ce9-9b63-f9f681f55dd2"
 KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c"
 
-[compat]
-ExaModels = "0.8.3"
+[weakdeps]
+ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
+JuMP = "4076af6c-e467-56ae-b986-b466b2749572"
 
 [extensions]
+BNKChainRulesCore = "ChainRulesCore"
 BNKJuMP = "JuMP"
 
-[weakdeps]
-JuMP = "4076af6c-e467-56ae-b986-b466b2749572"
+[compat]
+ExaModels = "0.8.3"
 
 [extras]
-AcceleratedKernels = "6a4ca0a5-0e36-4168-a932-d9be78d558f1"
+Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 OpenCL = "08131aa3-fb12-5dee-8b74-c09406e224a2"
-Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 pocl_jll = "627d6b7a-bbe6-5189-83e7-98cc0a5aeadd"
+AcceleratedKernels = "6a4ca0a5-0e36-4168-a932-d9be78d558f1"
+DifferentiationInterface = "a0c0ee7d-e4b9-4e03-894e-1c5f64a51d63"
+FiniteDifferences = "26cc04aa-876d-5657-8c51-4c34ba976000"
+Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
 
 [targets]
-test = ["Test", "LinearAlgebra", "OpenCL", "pocl_jll", "AcceleratedKernels"]
+test = [
+    "Test", "LinearAlgebra",
+    "OpenCL", "pocl_jll", "AcceleratedKernels",
+    "DifferentiationInterface", "FiniteDifferences", "Zygote"
+]
diff --git a/ext/BNKChainRulesCore.jl b/ext/BNKChainRulesCore.jl
new file mode 100644
index 0000000..56b678b
--- /dev/null
+++ b/ext/BNKChainRulesCore.jl
@@ -0,0 +1,59 @@
+module BNKChainRulesCore
+
+using BatchNLPKernels
+using ChainRulesCore
+
+function ChainRulesCore.rrule(::typeof(BatchNLPKernels.obj_batch!), bm::BatchModel, X, Θ)
+    y = BatchNLPKernels.obj_batch!(bm, X, Θ)
+    
+    function obj_batch_pullback(Ȳ)
+        Ȳ = ChainRulesCore.unthunk(Ȳ)
+        gradients = BatchNLPKernels.grad_batch!(bm, X, Θ)
+        
+        X̄ = gradients .* Ȳ'
+        
+        return ChainRulesCore.NoTangent(), ChainRulesCore.NoTangent(), X̄, ChainRulesCore.NoTangent()
+    end
+    
+    return y, obj_batch_pullback
+end
+function ChainRulesCore.rrule(::typeof(BatchNLPKernels.obj_batch!), bm::BatchModel, X)
+    y = BatchNLPKernels.obj_batch!(bm, X)
+    
+    function obj_batch_pullback(Ȳ)
+        Ȳ = ChainRulesCore.unthunk(Ȳ)
+        gradients = BatchNLPKernels.grad_batch!(bm, X)
+
+        X̄ = gradients .* Ȳ'
+        
+        return ChainRulesCore.NoTangent(), ChainRulesCore.NoTangent(), X̄
+    end
+    
+    return y, obj_batch_pullback
+end
+
+
+function ChainRulesCore.rrule(::typeof(BatchNLPKernels.cons_nln_batch!), bm::BatchModel, X, Θ)
+    y = BatchNLPKernels.cons_nln_batch!(bm, X, Θ)
+    
+    function cons_nln_batch_pullback(Ȳ)
+        Ȳ = ChainRulesCore.unthunk(Ȳ)
+        X̄ = BatchNLPKernels.jtprod_nln_batch!(bm, X, Θ, Ȳ)
+        return ChainRulesCore.NoTangent(), ChainRulesCore.NoTangent(), X̄, ChainRulesCore.NoTangent()
+    end
+    
+    return y, cons_nln_batch_pullback
+end
+function ChainRulesCore.rrule(::typeof(BatchNLPKernels.cons_nln_batch!), bm::BatchModel, X)
+    y = BatchNLPKernels.cons_nln_batch!(bm, X)
+    
+    function cons_nln_batch_pullback(Ȳ)
+        Ȳ = ChainRulesCore.unthunk(Ȳ)
+        X̄ = BatchNLPKernels.jtprod_nln_batch!(bm, X, Ȳ)
+        return ChainRulesCore.NoTangent(), ChainRulesCore.NoTangent(), X̄
+    end
+    
+    return y, cons_nln_batch_pullback
+end
+
+end # module BNKChainRulesCore 
\ No newline at end of file
diff --git a/test/runtests.jl b/test/runtests.jl
index fbfddbb..4f3f8e6 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -17,5 +17,6 @@ Base.findall(bitarray::CLArray) = Base.findall(identity, bitarray)
 
 
 include("luksan.jl")
+include("test_diff.jl")
 include("api.jl")
 include("config.jl")
\ No newline at end of file
diff --git a/test/test_diff.jl b/test/test_diff.jl
new file mode 100644
index 0000000..0539b96
--- /dev/null
+++ b/test/test_diff.jl
@@ -0,0 +1,132 @@
+using DifferentiationInterface
+const DI = DifferentiationInterface
+
+import Zygote
+import FiniteDifferences
+
+
+function test_diff_gpu(model::ExaModel, batch_size::Int)
+    bm = BOI.BatchModel(model, batch_size, config=BOI.BatchModelConfig(:full))
+    
+    nvar = model.meta.nvar
+    ncon = model.meta.ncon
+    nθ = length(model.θ)
+    
+    X_cpu = randn(nvar, batch_size)
+    Θ_cpu = randn(nθ, batch_size)
+    
+    X_gpu = CLArray(X_cpu)
+    Θ_gpu = CLArray(Θ_cpu)
+    
+    @testset "obj_batch! CLArray" begin
+        y = BOI.obj_batch!(bm, X_gpu, Θ_gpu)
+        @test y isa CLArray
+        @test size(y) == (batch_size,)
+
+        function f_gpu(params)
+            X = params[1:nvar, :]
+            Θ = params[nvar+1:end, :]
+            return sum(BOI.obj_batch!(bm, X, Θ))
+        end
+        
+        params = vcat(X_gpu, Θ_gpu)
+        grad = DI.gradient(f_gpu, AutoZygote(), params)
+        @test grad isa AbstractMatrix && grad isa CLArray
+        @test size(grad) == size(params)
+    end
+    
+    ncon == 0 && return
+
+    @testset "cons_nln_batch! CLArray" begin
+        y = BOI.cons_nln_batch!(bm, X_gpu, Θ_gpu)
+        @test y isa CLArray
+        @test size(y) == (ncon, batch_size)
+
+        function f_gpu(params)
+            X = params[1:nvar, :]
+            Θ = params[nvar+1:end, :]
+            return sum(BOI.cons_nln_batch!(bm, X, Θ))
+        end
+        
+        params = vcat(X_gpu, Θ_gpu)
+        grad = DI.gradient(f_gpu, AutoZygote(), params)
+        @test grad isa AbstractMatrix && grad isa CLArray
+        @test size(grad) == size(params)
+    end
+end
+
+function test_diff_cpu(model::ExaModel, batch_size::Int)
+    bm = BOI.BatchModel(model, batch_size, config=BOI.BatchModelConfig(:full))
+    
+    nvar = model.meta.nvar
+    ncon = model.meta.ncon
+    nθ = length(model.θ)
+    
+    X_cpu = randn(nvar, batch_size)
+    Θ_cpu = randn(nθ, batch_size)
+    
+    @testset "obj_batch! CPU" begin
+        y = BOI.obj_batch!(bm, X_cpu, Θ_cpu)
+        @test size(y) == (batch_size,)
+
+        function f_cpu(params)
+            X = params[1:nvar, :]
+            Θ = params[nvar+1:end, :]
+            return sum(BOI.obj_batch!(bm, X, Θ))
+        end
+        
+        params = vcat(X_cpu, Θ_cpu)
+        grad = DI.gradient(f_cpu, AutoZygote(), params)
+        @test grad isa AbstractMatrix
+        @test size(grad) == size(params)
+
+        @testset "FiniteDifferences obj_batch!" begin
+            gradfd = DI.gradient(f_cpu, AutoFiniteDifferences(fdm=FiniteDifferences.central_fdm(3,1)), params)
+            @test gradfd[1:nvar,:] ≈ grad[1:nvar,:] atol=1e-4 rtol=1e-4
+        end
+    end
+
+    ncon == 0 && return
+    
+    @testset "cons_nln_batch! CPU" begin
+        y = BOI.cons_nln_batch!(bm, X_cpu, Θ_cpu)
+        @test size(y) == (ncon, batch_size)
+
+        function f_cpu(params)
+            X = params[1:nvar, :]
+            Θ = params[nvar+1:end, :]
+            return sum(BOI.cons_nln_batch!(bm, X, Θ))
+        end
+        
+        params = vcat(X_cpu, Θ_cpu)
+        grad = DI.gradient(f_cpu, AutoZygote(), params)
+        @test grad isa AbstractMatrix
+        @test size(grad) == size(params)
+
+        @testset "FiniteDifferences cons_nln_batch!" begin
+            gradfd = DI.gradient(f_cpu, AutoFiniteDifferences(fdm=FiniteDifferences.central_fdm(3,1)), params)
+            @test gradfd[1:nvar,:] ≈ grad[1:nvar,:] atol=1e-4 rtol=1e-4
+        end
+    end
+end
+
+
+@testset "AD rules" begin
+    cpu_models, names = create_luksan_models(CPU())
+    gpu_models, _ = create_luksan_models(OpenCLBackend())
+    
+    for (name, (cpu_model, gpu_model)) in zip(names, zip(cpu_models, gpu_models))
+        @testset "$name Model" begin
+            for batch_size in [1, 4]
+                @testset "Batch Size $batch_size" begin
+                    @testset "CPU Diff" begin
+                        test_diff_cpu(cpu_model, batch_size)
+                    end
+                    @testset "GPU Diff" begin
+                        test_diff_gpu(gpu_model, batch_size)
+                    end
+                end
+            end
+        end
+    end
+end