diff --git a/Project.toml b/Project.toml
index f73a6d5..e319649 100644
--- a/Project.toml
+++ b/Project.toml
@@ -19,18 +19,18 @@ BNKJuMP = "JuMP"
 ExaModels = "0.8.3"
 
 [extras]
-Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
-LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
-OpenCL = "08131aa3-fb12-5dee-8b74-c09406e224a2"
-pocl_jll = "627d6b7a-bbe6-5189-83e7-98cc0a5aeadd"
 AcceleratedKernels = "6a4ca0a5-0e36-4168-a932-d9be78d558f1"
+CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
 DifferentiationInterface = "a0c0ee7d-e4b9-4e03-894e-1c5f64a51d63"
 FiniteDifferences = "26cc04aa-876d-5657-8c51-4c34ba976000"
+GPUArraysCore = "46192b85-c4d5-4398-a991-12ede77f4527"
+LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
+OpenCL = "08131aa3-fb12-5dee-8b74-c09406e224a2"
+PGLib = "07a8691f-3d11-4330-951b-3c50f98338be"
+PowerModels = "c36e90e8-916a-50a6-bd94-075b64ef4655"
+Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
+pocl_jll = "627d6b7a-bbe6-5189-83e7-98cc0a5aeadd"
 
 [targets]
-test = [
-    "Test", "LinearAlgebra",
-    "OpenCL", "pocl_jll", "AcceleratedKernels",
-    "DifferentiationInterface", "FiniteDifferences", "Zygote"
-]
+test = ["Test", "CUDA", "GPUArraysCore", "LinearAlgebra", "OpenCL", "pocl_jll", "AcceleratedKernels", "DifferentiationInterface", "FiniteDifferences", "Zygote", "PGLib", "PowerModels"]
diff --git a/ext/BNKChainRulesCore.jl b/ext/BNKChainRulesCore.jl
index 56b678b..a9791c9 100644
--- a/ext/BNKChainRulesCore.jl
+++ b/ext/BNKChainRulesCore.jl
@@ -56,4 +56,50 @@ function ChainRulesCore.rrule(::typeof(BatchNLPKernels.cons_nln_batch!), bm::Bat
     return y, cons_nln_batch_pullback
 end
 
+
+function ChainRulesCore.rrule(::typeof(BatchNLPKernels.constraint_violations!), bm::BatchModel, V)
+    Vc = BatchNLPKernels.constraint_violations!(bm, V)
+    
+    function constraint_violations_pullback(Ȳ)
+        Ȳ = ChainRulesCore.unthunk(Ȳ)
+        
+        # violation(v, s) = max(s.l - v, v - s.u, 0)
+        # ∂violation/∂v = -1 if v < s.l, +1 if v > s.u, 0 otherwise
+        
+        V̄ = if isempty(bm.viols_cons)
+            zeros(eltype(V), size(V))
+        else
+            lower_viols = V .< bm.viols_cons.l
+            upper_viols = V .> bm.viols_cons.u
+            lower_viols .* (-Ȳ) .+ upper_viols .* Ȳ
+        end
+        
+        return ChainRulesCore.NoTangent(), ChainRulesCore.NoTangent(), V̄
+    end
+    
+    return Vc, constraint_violations_pullback
+end
+function ChainRulesCore.rrule(::typeof(BatchNLPKernels.bound_violations!), bm::BatchModel, X)
+    Vb = BatchNLPKernels.bound_violations!(bm, X)
+    
+    function bound_violations_pullback(Ȳ)
+        Ȳ = ChainRulesCore.unthunk(Ȳ)
+        
+        # violation(x, s) = max(s.l - x, x - s.u, 0)
+        # ∂violation/∂x = -1 if x < s.l, +1 if x > s.u, 0 otherwise
+        
+        X̄ = if isempty(bm.viols_vars)
+            zeros(eltype(X), size(X))
+        else
+            lower_viols = X .< bm.viols_vars.l
+            upper_viols = X .> bm.viols_vars.u
+            lower_viols .* (-Ȳ) .+ upper_viols .* Ȳ
+        end
+        
+        return ChainRulesCore.NoTangent(), ChainRulesCore.NoTangent(), X̄
+    end
+    
+    return Vb, bound_violations_pullback
+end
+
 end # module BNKChainRulesCore 
\ No newline at end of file
diff --git a/src/BatchNLPKernels.jl b/src/BatchNLPKernels.jl
index a18b73f..d09c930 100644
--- a/src/BatchNLPKernels.jl
+++ b/src/BatchNLPKernels.jl
@@ -6,6 +6,7 @@ using KernelAbstractions
 const ExaKA = Base.get_extension(ExaModels, :ExaModelsKernelAbstractions)
 const KAExtension = ExaKA.KAExtension
 
+include("interval.jl")
 include("batch_model.jl")
 
 const BOI = BatchNLPKernels
@@ -22,5 +23,6 @@ include("api/jac.jl")
 include("api/obj.jl")
 include("api/jprod.jl")
 include("api/hprod.jl")
+include("api/viols.jl")
 
 end # module BatchNLPKernels
\ No newline at end of file
diff --git a/src/api/viols.jl b/src/api/viols.jl
new file mode 100644
index 0000000..360d3e5
--- /dev/null
+++ b/src/api/viols.jl
@@ -0,0 +1,57 @@
+"""
+    all_violations!(bm::BatchModel, X::AbstractMatrix)
+
+Compute all constraint and variable violations for a batch of solutions.
+"""
+function all_violations!(bm::BatchModel, X::AbstractMatrix)
+    V = cons_nln_batch!(bm, X)
+
+    Vc = constraint_violations!(bm, V)
+    Vb = bound_violations!(bm, X)
+
+    return Vc, Vb
+end
+
+"""
+    all_violations!(bm::BatchModel, X::AbstractMatrix, Θ::AbstractMatrix)
+
+Compute all constraint and variable violations for a batch of solutions and parameters.
+"""
+function all_violations!(bm::BatchModel, X::AbstractMatrix, Θ::AbstractMatrix)
+    V = cons_nln_batch!(bm, X, Θ)
+
+    Vc = constraint_violations!(bm, V)
+    Vb = bound_violations!(bm, X)
+
+    return Vc, Vb
+end
+
+"""
+    constraint_violations!(bm::BatchModel, V::AbstractMatrix)
+
+Compute constraint violations for a batch of constraint primal values.
+"""
+function constraint_violations!(bm::BatchModel, V::AbstractMatrix)
+    viols_cons_out = _maybe_view(bm, :viols_cons_out, V)
+    
+    _violation!.(eachcol(viols_cons_out), eachcol(V), bm.viols_cons)
+
+    return viols_cons_out
+end
+
+"""
+    bound_violations!(bm::BatchModel, X::AbstractMatrix)
+
+Compute variable violations for a batch of variable primal values.
+"""
+function bound_violations!(bm::BatchModel, X::AbstractMatrix)
+    viols_vars_out = _maybe_view(bm, :viols_vars_out, X)
+    
+    _violation!.(eachcol(viols_vars_out), eachcol(X), bm.viols_vars)
+
+    return viols_vars_out
+end
+
+@inline _violation!(d, v, s::S) where {S} = begin
+    d .= _violation(v, s)
+end
diff --git a/src/batch_model.jl b/src/batch_model.jl
index dccb6b4..e9cb51d 100644
--- a/src/batch_model.jl
+++ b/src/batch_model.jl
@@ -12,6 +12,7 @@ Configuration struct for controlling which buffers are allocated in a BatchModel
 - `jprod::Bool`: Allocate jacobian-vector product buffer (default: false)
 - `jtprod::Bool`: Allocate jacobian transpose-vector product buffer (default: false)
 - `hprod::Bool`: Allocate hessian-vector product buffer (default: false)
+- `viols::Bool`: Allocate constraint and variable violation buffers (default: false)
 """
 struct BatchModelConfig
     obj::Bool
@@ -22,15 +23,16 @@ struct BatchModelConfig
     jprod::Bool
     jtprod::Bool
     hprod::Bool
+    viols::Bool
 end
 
 """
-    BatchModelConfig(; obj=true, cons=true, grad=false, jac=false, hess=false, jprod=false, jtprod=false, hprod=false)
+    BatchModelConfig(; obj=true, cons=true, grad=false, jac=false, hess=false, jprod=false, jtprod=false, hprod=false, viols=false)
 
 Create a BatchModelConfig with specified buffer allocations.
 """
-function BatchModelConfig(; obj=true, cons=true, grad=false, jac=false, hess=false, jprod=false, jtprod=false, hprod=false)
-    return BatchModelConfig(obj, cons, grad, jac, hess, jprod, jtprod, hprod)
+function BatchModelConfig(; obj=true, cons=true, grad=false, jac=false, hess=false, jprod=false, jtprod=false, hprod=false, viols=false)
+    return BatchModelConfig(obj, cons, grad, jac, hess, jprod, jtprod, hprod, viols)
 end
 
 """
@@ -38,21 +40,35 @@ end
 
 Minimal configuration with only objective and constraint buffers.
 """
-BatchModelConfig(::Val{:minimal}) = BatchModelConfig(obj=true, cons=true, grad=false, jac=false, hess=false, jprod=false, jtprod=false, hprod=false)
+BatchModelConfig(::Val{:minimal}) = BatchModelConfig(obj=true, cons=true, grad=false, jac=false, hess=false, jprod=false, jtprod=false, hprod=false, viols=false)
 
 """
     BatchModelConfig(:gradients)
 
 Configuration to support obj, cons, and their gradients (grad, jtprod).
 """
-BatchModelConfig(::Val{:gradients}) = BatchModelConfig(obj=true, cons=true, grad=true, jac=false, hess=false, jprod=false, jtprod=true, hprod=false)
+BatchModelConfig(::Val{:gradients}) = BatchModelConfig(obj=true, cons=true, grad=true, jac=true, hess=false, jprod=false, jtprod=true, hprod=false, viols=false)
+
+"""
+    BatchModelConfig(:violations)
+
+Configuration to support obj, cons, and constraint/variable violations.
+"""
+BatchModelConfig(::Val{:violations}) = BatchModelConfig(obj=true, cons=true, grad=false, jac=false, hess=false, jprod=false, jtprod=false, hprod=false, viols=true)
+
+"""
+    BatchModelConfig(:viol_grad)
+
+Configuration to support obj, cons, constraint/variable violations, and their gradients.
+"""
+BatchModelConfig(::Val{:viol_grad}) = BatchModelConfig(obj=true, cons=true, grad=true, jac=true, hess=false, jprod=false, jtprod=true, hprod=false, viols=true)
 
 """
     BatchModelConfig(:full)
 
 Full configuration with all buffers allocated.
 """
-BatchModelConfig(::Val{:full}) = BatchModelConfig(obj=true, cons=true, grad=true, jac=true, hess=true, jprod=true, jtprod=true, hprod=true)
+BatchModelConfig(::Val{:full}) = BatchModelConfig(obj=true, cons=true, grad=true, jac=true, hess=true, jprod=true, jtprod=true, hprod=true, viols=true)
 
 BatchModelConfig(s::Symbol) = BatchModelConfig(Val(s))
 
@@ -65,16 +81,20 @@ Allows efficient evaluation of multiple points simultaneously.
 ## Fields
 - `model::ExaModel`: The underlying ExaModel
 - `batch_size::Int`: Number of points to evaluate simultaneously
-- `obj_work::MT`: Batch objective values (nobj × batch_size), (0 × batch_size) if not allocated
-- `cons_work::MT`: Batch constraint values (nconaug × batch_size), (0 × batch_size) if not allocated
-- `cons_out::MT`: Dense constraint output buffer (ncon × batch_size), (0 × batch_size) if not allocated
-- `grad_work::MT`: Batch gradient values (nnzg × batch_size), (0 × batch_size) if not allocated
-- `grad_out::MT`: Dense gradient output buffer (nvar × batch_size), (0 × batch_size) if not allocated
-- `jprod_work::MT`: Batch jacobian values (nnzj × batch_size), (0 × batch_size) if not allocated
-- `hprod_work::MT`: Batch hessian values (nnzh × batch_size), (0 × batch_size) if not allocated
-- `jprod_out::MT`: Batch jacobian-vector product buffer (ncon × batch_size), (0 × batch_size) if not allocated
-- `jtprod_out::MT`: Batch jacobian transpose-vector product buffer (nvar × batch_size), (0 × batch_size) if not allocated
-- `hprod_out::MT`: Batch hessian-vector product buffer (nvar × batch_size), (0 × batch_size) if not allocated
+- `obj_work::MT`: Batch objective values (nobj × batch_size)
+- `cons_work::MT`: Batch constraint values (nconaug × batch_size)
+- `cons_out::MT`: Dense constraint output buffer (ncon × batch_size)
+- `grad_work::MT`: Batch gradient values (nnzg × batch_size)
+- `grad_out::MT`: Dense gradient output buffer (nvar × batch_size)
+- `jprod_work::MT`: Batch jacobian values (nnzj × batch_size)
+- `hprod_work::MT`: Batch hessian values (nnzh × batch_size)
+- `jprod_out::MT`: Batch jacobian-vector product buffer (ncon × batch_size)
+- `jtprod_out::MT`: Batch jacobian transpose-vector product buffer (nvar × batch_size)
+- `hprod_out::MT`: Batch hessian-vector product buffer (nvar × batch_size)
+- `viols_cons_out::MT`: Constraint violation output buffer (ncon × batch_size)
+- `viols_vars_out::MT`: Variable violation output buffer (nvar × batch_size)
+- `viols_cons::Interval`: Constraint bounds as interval set
+- `viols_vars::Interval`: Variable bounds as interval set
 """
 struct BatchModel{MT,E}
     model::E
@@ -90,6 +110,11 @@ struct BatchModel{MT,E}
     jprod_out::MT
     jtprod_out::MT
     hprod_out::MT
+
+    viols_cons_out::MT
+    viols_vars_out::MT
+    viols_cons::Interval
+    viols_vars::Interval
 end
 
 """
@@ -133,6 +158,12 @@ function BatchModel(model::ExaModels.ExaModel{T,VT,E,O,C}, batch_size::Int; conf
     jprod_out = config.jprod ? similar(o, ncon, batch_size) : similar(o, 0, batch_size)
     jtprod_out = config.jtprod ? similar(o, nvar, batch_size) : similar(o, 0, batch_size)
     hprod_out = config.hprod ? similar(o, nvar, batch_size) : similar(o, 0, batch_size)
+
+    # FIXME: consider don't allocate vars if there are no bound constraints
+    viols_cons_out = config.viols ? similar(o, ncon, batch_size) : similar(o, 0, batch_size)
+    viols_vars_out = config.viols ? similar(o, nvar, batch_size) : similar(o, 0, batch_size)
+    viols_cons = config.viols ? Interval(model.meta.lcon, model.meta.ucon) : Interval()
+    viols_vars = config.viols ? Interval(model.meta.lvar, model.meta.uvar) : Interval()
     
     return BatchModel(
         model,
@@ -147,6 +178,10 @@ function BatchModel(model::ExaModels.ExaModel{T,VT,E,O,C}, batch_size::Int; conf
         jprod_out,
         jtprod_out,
         hprod_out,
+        viols_cons_out,
+        viols_vars_out,
+        viols_cons,
+        viols_vars,
     )
 end
 
diff --git a/src/interval.jl b/src/interval.jl
new file mode 100644
index 0000000..fd884f5
--- /dev/null
+++ b/src/interval.jl
@@ -0,0 +1,21 @@
+"""
+    Interval{VT}
+
+Represents the RHS of M constraints g(xᵢ) ∈ [lᵢ, uᵢ]  ∀i ∈ 1:M.
+"""
+struct Interval{VT}
+    l::VT
+    u::VT
+end
+@inline _violation(v, s::Interval{VT}) where {VT} = begin
+    @. max(s.l - v, v - s.u, zero(v))
+end
+
+Base.broadcastable(s::Interval) = Ref(s)
+Base.isempty(s::Interval{VT}) where {VT} = isempty(s.l) || isempty(s.u)
+
+# empty support (unconstrained)
+Interval(::Nothing) = Interval()
+Interval() = Interval(nothing, nothing)
+Base.isempty(::Interval{Nothing}) = true
+@inline _violation(v, ::Interval{Nothing}) = zero(v)
diff --git a/test/api.jl b/test/api.jl
index 230a019..c1d5f33 100644
--- a/test/api.jl
+++ b/test/api.jl
@@ -1,5 +1,5 @@
 function test_batch_model(model::ExaModel, batch_size::Int; 
-                                   atol::Float64=1e-10, rtol::Float64=1e-10)
+                                   atol::Float64=1e-10, rtol::Float64=1e-10, MOD=OpenCL)
     
     bm = BOI.BatchModel(model, batch_size, config=BOI.BatchModelConfig(:full))
     
@@ -7,8 +7,8 @@ function test_batch_model(model::ExaModel, batch_size::Int;
     ncon = model.meta.ncon
     nθ = length(model.θ)
     
-    X = OpenCL.randn(nvar, batch_size)
-    Θ = OpenCL.randn(nθ, batch_size)
+    X = MOD.randn(nvar, batch_size)
+    Θ = MOD.randn(nθ, batch_size)
     
     @testset "Model Info: $(nvar) vars, $(ncon) cons, $(nθ) params" begin
         @testset "Objective" begin
@@ -16,8 +16,8 @@ function test_batch_model(model::ExaModel, batch_size::Int;
             @test length(obj_vals) == batch_size
             @test all(isfinite, obj_vals)
             for i in 1:batch_size
-                OpenCL.@allowscalar nθ > 0 && (model.θ .= Θ[:, i])
-                OpenCL.@allowscalar @test obj_vals[i] ≈ ExaModels.obj(model, X[:, i]) atol=atol rtol=rtol
+                @allowscalar nθ > 0 && (model.θ .= Θ[:, i])
+                @allowscalar @test obj_vals[i] ≈ ExaModels.obj(model, X[:, i]) atol=atol rtol=rtol
             end
         end
         
@@ -27,10 +27,10 @@ function test_batch_model(model::ExaModel, batch_size::Int;
                 @test size(cons_vals) == (ncon, batch_size)
                 @test all(isfinite, cons_vals)
                 for i in 1:batch_size
-                    OpenCL.@allowscalar nθ > 0 && (model.θ .= Θ[:, i])
-                    cons_single = CLVector{Float64}(undef, ncon)
-                    OpenCL.@allowscalar ExaModels.cons_nln!(model, X[:, i], cons_single)
-                    OpenCL.@allowscalar @test cons_vals[:, i] ≈ cons_single atol=atol rtol=rtol
+                    @allowscalar nθ > 0 && (model.θ .= Θ[:, i])
+                    cons_single = similar(cons_vals, ncon)
+                    @allowscalar ExaModels.cons_nln!(model, X[:, i], cons_single)
+                    @allowscalar @test cons_vals[:, i] ≈ cons_single atol=atol rtol=rtol
                 end
             end
         end
@@ -40,72 +40,72 @@ function test_batch_model(model::ExaModel, batch_size::Int;
             @test size(grad_vals) == (nvar, batch_size)
             @test all(isfinite, grad_vals)
             for i in 1:batch_size
-                OpenCL.@allowscalar nθ > 0 && (model.θ .= Θ[:, i])
-                grad_single = CLVector{Float64}(undef, nvar)
-                OpenCL.@allowscalar ExaModels.grad!(model, X[:, i], grad_single)
-                OpenCL.@allowscalar @test grad_vals[:, i] ≈ grad_single atol=atol rtol=rtol
+                @allowscalar nθ > 0 && (model.θ .= Θ[:, i])
+                grad_single = similar(grad_vals, nvar)
+                @allowscalar ExaModels.grad!(model, X[:, i], grad_single)
+                @allowscalar @test grad_vals[:, i] ≈ grad_single atol=atol rtol=rtol
             end
         end
         
         @testset "Jacobian-Vector Product" begin
             if ncon > 0
-                V = OpenCL.randn(nvar, batch_size)
+                V = MOD.randn(nvar, batch_size)
                 jprod_vals = BOI.jprod_nln_batch!(bm, X, Θ, V)
                 @test size(jprod_vals) == (ncon, batch_size)
                 @test all(isfinite, jprod_vals)
                 for i in 1:batch_size
-                    OpenCL.@allowscalar nθ > 0 && (model.θ .= Θ[:, i])
-                    jprod_single = CLVector{Float64}(undef, ncon)
-                    OpenCL.@allowscalar ExaModels.jprod_nln!(model, X[:, i], V[:, i], jprod_single)
-                    OpenCL.@allowscalar @test jprod_vals[:, i] ≈ jprod_single atol=atol rtol=rtol
+                    @allowscalar nθ > 0 && (model.θ .= Θ[:, i])
+                    jprod_single = similar(jprod_vals, ncon)
+                    @allowscalar ExaModels.jprod_nln!(model, X[:, i], V[:, i], jprod_single)
+                    @allowscalar @test jprod_vals[:, i] ≈ jprod_single atol=atol rtol=rtol
                 end
             end
         end
         
         @testset "Jacobian-Transpose-Vector Product" begin
             if ncon > 0
-                V = OpenCL.randn(ncon, batch_size)
+                V = MOD.randn(ncon, batch_size)
                 jtprod_vals = BOI.jtprod_nln_batch!(bm, X, Θ, V)
                 @test size(jtprod_vals) == (nvar, batch_size)
                 @test all(isfinite, jtprod_vals)
                 for i in 1:batch_size
-                    OpenCL.@allowscalar nθ > 0 && (model.θ .= Θ[:, i])
-                    jtprod_single = CLVector{Float64}(undef, nvar)
-                    OpenCL.@allowscalar ExaModels.jtprod_nln!(model, X[:, i], V[:, i], jtprod_single)
-                    OpenCL.@allowscalar @test jtprod_vals[:, i] ≈ jtprod_single atol=atol rtol=rtol
+                    @allowscalar nθ > 0 && (model.θ .= Θ[:, i])
+                    jtprod_single = similar(jtprod_vals, nvar)
+                    @allowscalar ExaModels.jtprod_nln!(model, X[:, i], V[:, i], jtprod_single)
+                    @allowscalar @test jtprod_vals[:, i] ≈ jtprod_single atol=atol rtol=rtol
                 end
             end
         end
         
         @testset "Hessian-Vector Product" begin
-            V = OpenCL.randn(nvar, batch_size)
+            V = MOD.randn(nvar, batch_size)
             if ncon > 0
-                Y = OpenCL.randn(ncon, batch_size)
+                Y = MOD.randn(ncon, batch_size)
                 hprod_vals = BOI.hprod_batch!(bm, X, Θ, Y, V)
                 @test size(hprod_vals) == (nvar, batch_size)
                 @test all(isfinite, hprod_vals)
                 for i in 1:batch_size
-                    OpenCL.@allowscalar nθ > 0 && (model.θ .= Θ[:, i])
-                    hprod_single = CLVector{Float64}(undef, nvar)
-                    OpenCL.@allowscalar ExaModels.hprod!(model, X[:, i], Y[:, i], V[:, i], hprod_single)
-                    OpenCL.@allowscalar @test hprod_vals[:, i] ≈ hprod_single atol=atol rtol=rtol
+                    @allowscalar nθ > 0 && (model.θ .= Θ[:, i])
+                    hprod_single = similar(hprod_vals, nvar)
+                    @allowscalar ExaModels.hprod!(model, X[:, i], Y[:, i], V[:, i], hprod_single)
+                    @allowscalar @test hprod_vals[:, i] ≈ hprod_single atol=atol rtol=rtol
                 end
             else
-                Y = OpenCL.zeros(ncon, batch_size)
+                Y = MOD.zeros(ncon, batch_size)
                 hprod_vals = BOI.hprod_batch!(bm, X, Θ, Y, V)
                 @test size(hprod_vals) == (nvar, batch_size)
                 @test all(isfinite, hprod_vals)
                 for i in 1:batch_size
-                    OpenCL.@allowscalar nθ > 0 && (model.θ .= Θ[:, i])
-                    hprod_single = CLVector{Float64}(undef, nvar)
-                    OpenCL.@allowscalar ExaModels.hprod!(model, X[:, i], Y[:, i], V[:, i], hprod_single)
-                    OpenCL.@allowscalar @test hprod_vals[:, i] ≈ hprod_single atol=atol rtol=rtol
+                    @allowscalar nθ > 0 && (model.θ .= Θ[:, i])
+                    hprod_single = similar(hprod_vals, nvar)
+                    @allowscalar ExaModels.hprod!(model, X[:, i], Y[:, i], V[:, i], hprod_single)
+                    @allowscalar @test hprod_vals[:, i] ≈ hprod_single atol=atol rtol=rtol
                 end
             end
         end
         
         @testset "Batch Size Validation" begin
-            X_large = OpenCL.randn(nvar, batch_size + 1)
+            X_large = MOD.randn(nvar, batch_size + 1)
             @test_throws AssertionError BOI.obj_batch!(bm, X_large)
             
             if ncon > 0
@@ -115,66 +115,111 @@ function test_batch_model(model::ExaModel, batch_size::Int;
             @test_throws AssertionError BOI.grad_batch!(bm, X_large)
             
             if ncon > 0
-                V_jprod = OpenCL.randn(nvar, batch_size + 1)
+                V_jprod = MOD.randn(nvar, batch_size + 1)
                 @test_throws AssertionError BOI.jprod_nln_batch!(bm, X_large, V_jprod)
                 
-                V_jtprod = OpenCL.randn(ncon, batch_size + 1)
+                V_jtprod = MOD.randn(ncon, batch_size + 1)
                 @test_throws AssertionError BOI.jtprod_nln_batch!(bm, X_large, V_jtprod)
             end
             
-            V_hprod = OpenCL.randn(nvar, batch_size + 1)
+            V_hprod = MOD.randn(nvar, batch_size + 1)
             if ncon > 0
-                Y_large = OpenCL.randn(ncon, batch_size + 1)
+                Y_large = MOD.randn(ncon, batch_size + 1)
                 @test_throws AssertionError BOI.hprod_batch!(bm, X_large, Y_large, V_hprod)
             else
-                Y_large = OpenCL.zeros(ncon, batch_size + 1)
+                Y_large = MOD.zeros(ncon, batch_size + 1)
                 @test_throws AssertionError BOI.hprod_batch!(bm, X_large, Y_large, V_hprod)
             end
         end
         
         @testset "Dimension Validation" begin
-            X_wrong = OpenCL.randn(nvar + 1, batch_size)
+            X_wrong = MOD.randn(nvar + 1, batch_size)
             @test_throws DimensionMismatch BOI.obj_batch!(bm, X_wrong)
 
             if nθ > 0
-                Θ_wrong = OpenCL.randn(nθ + 1, batch_size)
+                Θ_wrong = MOD.randn(nθ + 1, batch_size)
                 @test_throws DimensionMismatch BOI.obj_batch!(bm, X, Θ_wrong)
             end
             
             if ncon > 0
-                V_jprod_wrong = OpenCL.randn(nvar + 1, batch_size)
+                V_jprod_wrong = MOD.randn(nvar + 1, batch_size)
                 @test_throws DimensionMismatch BOI.jprod_nln_batch!(bm, X, V_jprod_wrong)
                 
-                V_jtprod_wrong = OpenCL.randn(ncon + 1, batch_size)
+                V_jtprod_wrong = MOD.randn(ncon + 1, batch_size)
                 @test_throws DimensionMismatch BOI.jtprod_nln_batch!(bm, X, V_jtprod_wrong)
                 
-                Y_wrong = OpenCL.randn(ncon + 1, batch_size)
-                V_hprod = OpenCL.randn(nvar, batch_size)
+                Y_wrong = MOD.randn(ncon + 1, batch_size)
+                V_hprod = MOD.randn(nvar, batch_size)
                 @test_throws DimensionMismatch BOI.hprod_batch!(bm, X, Y_wrong, V_hprod)
             end
 
-            V_hprod_wrong = OpenCL.randn(nvar + 1, batch_size)
+            V_hprod_wrong = MOD.randn(nvar + 1, batch_size)
             if ncon > 0
-                Y = OpenCL.randn(ncon, batch_size)
+                Y = MOD.randn(ncon, batch_size)
                 @test_throws DimensionMismatch BOI.hprod_batch!(bm, X, Y, V_hprod_wrong)
             else
-                Y = OpenCL.zeros(ncon, batch_size)
+                Y = MOD.zeros(ncon, batch_size)
                 @test_throws DimensionMismatch BOI.hprod_batch!(bm, X, Y, V_hprod_wrong)
             end
         end
     end
 end
 
-@testset "API" begin
-    models, names = create_luksan_models()
+@testset "API - Luksan - OpenCL" begin
+    models, names = create_luksan_models(OpenCLBackend())
     
     for (name, model) in zip(names, models)
         @testset "$name Model" begin
             for batch_size in [1, 2, 4]
                 @testset "Batch Size $batch_size" begin
-                    test_batch_model(model, batch_size, atol=1e-5, rtol=1e-5)
+                    test_batch_model(model, batch_size, atol=1e-5, rtol=1e-5, MOD=OpenCL)
                 end
             end
         end
     end
 end
+
+@testset "API - Power - OpenCL" begin
+    models_p, names_p = create_power_models(OpenCLBackend())
+
+    for (name, model) in zip(names_p, models_p)
+        @testset "$(name) Model" begin
+            for batch_size in [1, 2, 4]
+                @testset "Batch Size $(batch_size)" begin
+                    test_batch_model(model, batch_size; atol=1e-5, rtol=1e-5, MOD=OpenCL)
+                end
+            end
+        end
+    end
+end
+
+
+if haskey(ENV, "BNK_TEST_CUDA")
+    @testset "API - Luksan - CUDA" begin
+        models_c, names = create_luksan_models(CUDABackend())
+        
+        for (name, model) in zip(names, models_c)
+            @testset "$name Model" begin
+                for batch_size in [1, 2, 4]
+                    @testset "Batch Size $batch_size" begin
+                        test_batch_model(model, batch_size, atol=1e-5, rtol=1e-5, MOD=CUDA)
+                    end
+                end
+            end
+        end
+    end
+
+    @testset "API - Power - CUDA" begin
+        models_pc, names_p = create_power_models(CUDABackend())
+
+        for (name, model) in zip(names_p, models_pc)
+            @testset "$(name) Model" begin
+                for batch_size in [1, 2, 4]
+                    @testset "Batch Size $(batch_size)" begin
+                        test_batch_model(model, batch_size; atol=1e-5, rtol=1e-5, MOD=CUDA)
+                    end
+                end
+            end
+        end
+    end
+end
\ No newline at end of file
diff --git a/test/power.jl b/test/power.jl
new file mode 100644
index 0000000..2f43016
--- /dev/null
+++ b/test/power.jl
@@ -0,0 +1,198 @@
+function _get_case_file(filename::String)
+    isfile(filename) && return filename
+
+    cached = joinpath(PGLib.PGLib_opf, filename)
+    !isfile(cached) && error("File $filename not found in PGLib/pglib-opf")
+    return cached
+end
+function _build_power_ref(filename)
+    path = _get_case_file(filename)
+    data = PowerModels.parse_file(path)
+    PowerModels.standardize_cost_terms!(data, order = 2)
+    PowerModels.calc_thermal_limits!(data)
+    return PowerModels.build_ref(data)[:it][:pm][:nw][0]
+end
+
+
+_convert_array(data::N, backend) where {names,N<:NamedTuple{names}} =
+    NamedTuple{names}(ExaModels.convert_array(d, backend) for d in data)
+function _parse_ac_data_raw(filename)
+    ref = _build_power_ref(filename) # FIXME: only parse once
+    arcdict    = Dict(a => k for (k, a) in enumerate(ref[:arcs]))
+    busdict    = Dict(k => i for (i, (k, _)) in enumerate(ref[:bus]))
+    gendict    = Dict(k => i for (i, (k, _)) in enumerate(ref[:gen]))
+    branchdict = Dict(k => i for (i, (k, _)) in enumerate(ref[:branch]))
+    return (
+        bus = [
+            begin
+                loads   = [ref[:load][l]   for l in ref[:bus_loads][k]]
+                shunts  = [ref[:shunt][s]  for s in ref[:bus_shunts][k]]
+                pd  = sum(load["pd"]  for load  in loads;  init = 0.0)
+                qd  = sum(load["qd"]  for load  in loads;  init = 0.0)
+                gs  = sum(shunt["gs"] for shunt in shunts; init = 0.0)
+                bs  = sum(shunt["bs"] for shunt in shunts; init = 0.0)
+                (i = busdict[k], pd = pd, gs = gs, qd = qd, bs = bs)
+            end for (k, _) in ref[:bus]
+        ],
+        gen = [
+            (
+                i     = gendict[k],
+                cost1 = v["cost"][1],
+                cost2 = v["cost"][2],
+                cost3 = v["cost"][3],
+                bus   = busdict[v["gen_bus"]],
+            ) for (k, v) in ref[:gen]
+        ],
+        arc = [
+            (i = k, rate_a = ref[:branch][l]["rate_a"], bus = busdict[i]) for
+            (k, (l, i, _)) in enumerate(ref[:arcs])
+        ],
+        branch = [
+            begin
+                branch = branch_raw
+                f_idx = arcdict[i, branch["f_bus"], branch["t_bus"]]
+                t_idx = arcdict[i, branch["t_bus"], branch["f_bus"]]
+
+                g,  b  = PowerModels.calc_branch_y(branch)
+                tr, ti = PowerModels.calc_branch_t(branch)
+                ttm    = tr^2 + ti^2
+
+                g_fr = branch["g_fr"]; b_fr = branch["b_fr"]
+                g_to = branch["g_to"]; b_to = branch["b_to"]
+
+                (
+                    i      = branchdict[i],
+                    j      = 1,
+                    f_idx  = f_idx,
+                    t_idx  = t_idx,
+                    f_bus  = busdict[branch["f_bus"]],
+                    t_bus  = busdict[branch["t_bus"]],
+                    c1     = (-g * tr - b * ti) / ttm,
+                    c2     = (-b * tr + g * ti) / ttm,
+                    c3     = (-g * tr + b * ti) / ttm,
+                    c4     = (-b * tr - g * ti) / ttm,
+                    c5     = (g + g_fr) / ttm,
+                    c6     = (b + b_fr) / ttm,
+                    c7     = (g + g_to),
+                    c8     = (b + b_to),
+                    rate_a_sq = branch["rate_a"]^2,
+                )
+            end for (i, branch_raw) in ref[:branch]
+        ],
+        ref_buses = [busdict[i] for (i, _) in ref[:ref_buses]],
+        vmax      = [v["vmax"] for (_, v) in ref[:bus]],
+        vmin      = [v["vmin"] for (_, v) in ref[:bus]],
+        pmax      = [v["pmax"] for (_, v) in ref[:gen]],
+        pmin      = [v["pmin"] for (_, v) in ref[:gen]],
+        qmax      = [v["qmax"] for (_, v) in ref[:gen]],
+        qmin      = [v["qmin"] for (_, v) in ref[:gen]],
+        rate_a    = [ref[:branch][l]["rate_a"] for (l, _, _) in ref[:arcs]],
+        angmax    = [b["angmax"] for (_, b) in ref[:branch]],
+        angmin    = [b["angmin"] for (_, b) in ref[:branch]],
+    )
+end
+
+_parse_ac_data(filename) = _parse_ac_data_raw(filename)
+function _parse_ac_data(filename, backend)
+    _convert_array(_parse_ac_data_raw(filename), backend)
+end
+
+function create_ac_power_model(
+    filename::String = "pglib_opf_case14_ieee.m";
+    prod::Bool = true,
+    backend    = OpenCLBackend(),
+)
+    data = _parse_ac_data(filename, backend)
+
+    c = ExaCore(backend = backend)
+
+    # Decision variables ------------------------------------------------------
+    va = variable(c, length(data.bus))               # voltage angles (rad)
+    vm = variable(c, length(data.bus);
+                start = fill!(similar(data.bus, Float64), 1.0),
+                lvar  = data.vmin, uvar = data.vmax) # voltage magnitude (p.u.)
+
+    pg = variable(c, length(data.gen); lvar = data.pmin, uvar = data.pmax)
+    qg = variable(c, length(data.gen); lvar = data.qmin, uvar = data.qmax)
+
+    p  = variable(c, length(data.arc); lvar = -data.rate_a, uvar = data.rate_a)
+    q  = variable(c, length(data.arc); lvar = -data.rate_a, uvar = data.rate_a)
+
+    # Objective – quadratic generation cost -----------------------------------
+    objective(c, g.cost1 * pg[g.i]^2 + g.cost2 * pg[g.i] + g.cost3 for g in data.gen)
+
+    # Reference bus angle ------------------------------------------------------
+    c1 = constraint(c, va[i] for i in data.ref_buses)
+
+    # Branch power-flow equations ---------------------------------------------
+    constraint(
+        c,
+        (p[b.f_idx] - b.c5 * vm[b.f_bus]^2 -
+        b.c3 * (vm[b.f_bus] * vm[b.t_bus] * cos(va[b.f_bus] - va[b.t_bus])) -
+        b.c4 * (vm[b.f_bus] * vm[b.t_bus] * sin(va[b.f_bus] - va[b.t_bus])) for
+        b in data.branch),
+    )
+
+    constraint(
+        c,
+        (q[b.f_idx] + b.c6 * vm[b.f_bus]^2 +
+        b.c4 * (vm[b.f_bus] * vm[b.t_bus] * cos(va[b.f_bus] - va[b.t_bus])) -
+        b.c3 * (vm[b.f_bus] * vm[b.t_bus] * sin(va[b.f_bus] - va[b.t_bus])) for
+        b in data.branch),
+    )
+
+    constraint(
+        c,
+        (p[b.t_idx] - b.c7 * vm[b.t_bus]^2 -
+        b.c1 * (vm[b.t_bus] * vm[b.f_bus] * cos(va[b.t_bus] - va[b.f_bus])) -
+        b.c2 * (vm[b.t_bus] * vm[b.f_bus] * sin(va[b.t_bus] - va[b.f_bus])) for
+        b in data.branch),
+    )
+
+    constraint(
+        c,
+        (q[b.t_idx] + b.c8 * vm[b.t_bus]^2 +
+        b.c2 * (vm[b.t_bus] * vm[b.f_bus] * cos(va[b.t_bus] - va[b.f_bus])) -
+        b.c1 * (vm[b.t_bus] * vm[b.f_bus] * sin(va[b.t_bus] - va[b.f_bus])) for
+        b in data.branch),
+    )
+
+    # Angle difference limits --------------------------------------------------
+    constraint(
+        c,
+        va[b.f_bus] - va[b.t_bus] for b in data.branch; lcon = data.angmin, ucon = data.angmax,
+    )
+
+    # Apparent power thermal limits -------------------------------------------
+    constraint(
+        c,
+        p[b.f_idx]^2 + q[b.f_idx]^2 - b.rate_a_sq for b in data.branch;
+        lcon = fill!(similar(data.branch, Float64, length(data.branch)), -Inf),
+    )
+    constraint(
+        c,
+        p[b.t_idx]^2 + q[b.t_idx]^2 - b.rate_a_sq for b in data.branch;
+        lcon = fill!(similar(data.branch, Float64, length(data.branch)), -Inf),
+    )
+
+    # Power balance at each bus -----------------------------------------------
+    load_balance_p = constraint(c, b.pd + b.gs * vm[b.i]^2 for b in data.bus)
+    load_balance_q = constraint(c, b.qd - b.bs * vm[b.i]^2 for b in data.bus)
+
+    # Map arc & generator variables into the bus balance equations
+    constraint!(c, load_balance_p, a.bus => p[a.i]   for a in data.arc)
+    constraint!(c, load_balance_q, a.bus => q[a.i]   for a in data.arc)
+    constraint!(c, load_balance_p, g.bus => -pg[g.i] for g in data.gen)
+    constraint!(c, load_balance_q, g.bus => -qg[g.i] for g in data.gen)
+
+    return ExaModel(c; prod = prod)
+end
+
+# TODO: parametric version
+
+function create_power_models(backend = OpenCLBackend())
+    models = ExaModel[]
+    push!(models, create_ac_power_model("pglib_opf_case14_ieee.m"; backend = backend))
+    names  = ["AC-OPF – IEEE-14"]
+    return models, names
+end 
\ No newline at end of file
diff --git a/test/runtests.jl b/test/runtests.jl
index 4f3f8e6..ae48720 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -2,7 +2,16 @@ using BatchNLPKernels
 using Test
 using ExaModels
 using KernelAbstractions
+using DifferentiationInterface
+const DI = DifferentiationInterface
 
+import Zygote
+import FiniteDifferences
+
+using PowerModels
+PowerModels.silence()
+using PGLib
+using LinearAlgebra
 
 using OpenCL, pocl_jll, AcceleratedKernels
 ExaModels.convert_array(x, ::OpenCLBackend) = CLArray(x)
@@ -15,8 +24,17 @@ function Base.findall(f::F, bitarray::CLArray) where {F<:Function}
 end
 Base.findall(bitarray::CLArray) = Base.findall(identity, bitarray)
 
+import GPUArraysCore: @allowscalar
+
+if haskey(ENV, "BNK_TEST_CUDA")
+    using CUDA
+    @info "CUDA detected"
+end
+
 
 include("luksan.jl")
+include("power.jl")
+include("test_viols.jl")
 include("test_diff.jl")
 include("api.jl")
 include("config.jl")
\ No newline at end of file
diff --git a/test/test_diff.jl b/test/test_diff.jl
index 0539b96..6298fb3 100644
--- a/test/test_diff.jl
+++ b/test/test_diff.jl
@@ -1,26 +1,15 @@
-using DifferentiationInterface
-const DI = DifferentiationInterface
-
-import Zygote
-import FiniteDifferences
-
-
-function test_diff_gpu(model::ExaModel, batch_size::Int)
+function test_diff_gpu(model::ExaModel, batch_size::Int; MOD=OpenCL)
     bm = BOI.BatchModel(model, batch_size, config=BOI.BatchModelConfig(:full))
     
     nvar = model.meta.nvar
     ncon = model.meta.ncon
     nθ = length(model.θ)
     
-    X_cpu = randn(nvar, batch_size)
-    Θ_cpu = randn(nθ, batch_size)
-    
-    X_gpu = CLArray(X_cpu)
-    Θ_gpu = CLArray(Θ_cpu)
-    
-    @testset "obj_batch! CLArray" begin
+    X_gpu = MOD.randn(nvar, batch_size)
+    Θ_gpu = MOD.randn(nθ, batch_size)
+
+    @testset "obj_batch!" begin
         y = BOI.obj_batch!(bm, X_gpu, Θ_gpu)
-        @test y isa CLArray
         @test size(y) == (batch_size,)
 
         function f_gpu(params)
@@ -31,15 +20,14 @@ function test_diff_gpu(model::ExaModel, batch_size::Int)
         
         params = vcat(X_gpu, Θ_gpu)
         grad = DI.gradient(f_gpu, AutoZygote(), params)
-        @test grad isa AbstractMatrix && grad isa CLArray
+        @test grad isa AbstractMatrix
         @test size(grad) == size(params)
     end
     
     ncon == 0 && return
 
-    @testset "cons_nln_batch! CLArray" begin
+    @testset "cons_nln_batch!" begin
         y = BOI.cons_nln_batch!(bm, X_gpu, Θ_gpu)
-        @test y isa CLArray
         @test size(y) == (ncon, batch_size)
 
         function f_gpu(params)
@@ -50,7 +38,7 @@ function test_diff_gpu(model::ExaModel, batch_size::Int)
         
         params = vcat(X_gpu, Θ_gpu)
         grad = DI.gradient(f_gpu, AutoZygote(), params)
-        @test grad isa AbstractMatrix && grad isa CLArray
+        @test grad isa AbstractMatrix
         @test size(grad) == size(params)
     end
 end
@@ -111,7 +99,7 @@ function test_diff_cpu(model::ExaModel, batch_size::Int)
 end
 
 
-@testset "AD rules" begin
+@testset "AD rules - Luksan" begin
     cpu_models, names = create_luksan_models(CPU())
     gpu_models, _ = create_luksan_models(OpenCLBackend())
     
@@ -130,3 +118,58 @@ end
         end
     end
 end
+
+@testset "AD rules - Power" begin
+    cpu_models_p, names_p = create_power_models(CPU())
+    gpu_models_p, _       = create_power_models(OpenCLBackend())
+
+    for (name, (cpu_model, gpu_model)) in zip(names_p, zip(cpu_models_p, gpu_models_p))
+        @testset "$(name) Model" begin
+            for batch_size in [1, 4]
+                @testset "Batch Size $(batch_size)" begin
+                    @testset "CPU Diff" begin
+                        test_diff_cpu(cpu_model, batch_size)
+                    end
+                    @testset "OpenCL Diff" begin
+                        test_diff_gpu(gpu_model, batch_size)
+                    end
+                end
+            end
+        end
+    end
+end
+
+
+if haskey(ENV, "BNK_TEST_CUDA")
+    @testset "AD rules - Luksan - CUDA" begin
+        gpu_models, names = create_luksan_models(CUDABackend())
+        
+        for (name, gpu_model) in zip(names, gpu_models)
+            @testset "$name Model" begin
+                for batch_size in [1, 4]
+                    @testset "Batch Size $batch_size" begin
+                        @testset "GPU Diff" begin
+                            test_diff_gpu(gpu_model, batch_size, MOD=CUDA)
+                        end
+                    end
+                end
+            end
+        end
+    end
+    
+    @testset "AD rules - Power - CUDA" begin
+        gpu_models_p, names_p = create_power_models(CUDABackend())
+    
+        for (name, gpu_model) in zip(names_p, gpu_models_p)
+            @testset "$(name) Model" begin
+                for batch_size in [1, 4]
+                    @testset "Batch Size $(batch_size)" begin
+                        @testset "OpenCL Diff" begin
+                            test_diff_gpu(gpu_model, batch_size, MOD=CUDA)
+                        end
+                    end
+                end
+            end
+        end
+    end
+end
\ No newline at end of file
diff --git a/test/test_viols.jl b/test/test_viols.jl
new file mode 100644
index 0000000..e678db8
--- /dev/null
+++ b/test/test_viols.jl
@@ -0,0 +1,339 @@
+function test_violations_correctness(model::ExaModel, batch_size::Int; 
+                                   atol::Float64=1e-10, rtol::Float64=1e-10, MOD=OpenCL)
+    bm = BOI.BatchModel(model, batch_size, config=BOI.BatchModelConfig(:violations))
+    
+    nvar = model.meta.nvar
+    ncon = model.meta.ncon
+    nθ = length(model.θ)
+
+    X = MOD.randn(nvar, batch_size)
+    Θ = MOD.randn(nθ, batch_size)
+
+    @allowscalar if !isempty(model.meta.lvar) && !isempty(model.meta.uvar)
+        if isfinite(model.meta.lvar[1])
+            X[1, :] .= model.meta.lvar[1] - 0.1
+        end
+        if isfinite(model.meta.uvar[end])
+            X[end, :] .= model.meta.uvar[end] + 0.1
+        end
+    end
+    
+    @testset "Violations Correctness: $(nvar) vars, $(ncon) cons, $(nθ) params" begin
+        
+        @testset "All Violations" begin
+            if nθ > 0
+                Vc, Vb = BOI.all_violations!(bm, X, Θ)
+                @test size(Vc) == (ncon, batch_size)
+                @test size(Vb) == (nvar, batch_size)
+                @test all(>=(0), Vc)
+                @test all(>=(0), Vb)
+                @test all(isfinite, Vc)
+                @test all(isfinite, Vb)
+                @allowscalar begin
+                    if !isempty(model.meta.lvar) && !isempty(model.meta.uvar)
+                        if isfinite(model.meta.lvar[1])
+                            @test Vb[1, :] .≈ 0.1 atol=atol rtol=rtol
+                        end
+                        if isfinite(model.meta.uvar[end])
+                            @test Vb[end, :] .≈ 0.1 atol=atol rtol=rtol
+                        end
+                    end
+                end
+            end
+
+            Vc, Vb = BOI.all_violations!(bm, X)
+            @test size(Vc) == (ncon, batch_size)
+            @test size(Vb) == (nvar, batch_size)
+            @test all(>=(0), Vc)
+            @test all(>=(0), Vb)
+            @test all(isfinite, Vc)
+            @test all(isfinite, Vb)
+        end
+        
+        @testset "Constraint Violations" begin
+            if ncon > 0
+                V_cons = BOI.cons_nln_batch!(bm, X, Θ)
+                Vc = BOI.constraint_violations!(bm, V_cons)
+                
+                @test size(Vc) == (ncon, batch_size)
+                @test all(>=(0), Vc)
+                @test all(isfinite, Vc)
+                
+                for i in 1:batch_size
+                    @allowscalar nθ > 0 && (model.θ .= Θ[:, i])
+                    cons_vals = MOD.zeros(ncon)
+                    @allowscalar ExaModels.cons_nln!(model, X[:, i], cons_vals)
+                    
+                    @allowscalar for j in 1:ncon
+                        if isempty(model.meta.lcon) && isempty(model.meta.ucon)
+                            expected_viol = 0.0
+                        else
+                            lcon = model.meta.lcon[j]
+                            ucon = model.meta.ucon[j]
+                            
+                            lower_viol = isfinite(lcon) ? max(lcon - cons_vals[j], 0.0) : 0.0
+                            upper_viol = isfinite(ucon) ? max(cons_vals[j] - ucon, 0.0) : 0.0
+                            expected_viol = lower_viol + upper_viol
+                        end
+                        @test Vc[j, i] ≈ expected_viol atol=atol rtol=rtol
+                    end
+                end
+            end
+        end
+        
+        @testset "Bound Violations" begin
+            Vb = BOI.bound_violations!(bm, X)
+            @test size(Vb) == (nvar, batch_size)
+            @test all(>=(0), Vb)
+            @test all(isfinite, Vb)
+            
+            for i in 1:batch_size
+                @allowscalar for j in 1:nvar
+                    lvar = model.meta.lvar[j]
+                    uvar = model.meta.uvar[j]
+                    
+                    lower_viol = isfinite(lvar) ? max(lvar - X[j, i], 0.0) : 0.0
+                    upper_viol = isfinite(uvar) ? max(X[j, i] - uvar, 0.0) : 0.0
+                    expected_viol = lower_viol + upper_viol
+                    
+                    @test Vb[j, i] ≈ expected_viol atol=atol rtol=rtol
+                end
+            end
+        end
+        
+        @testset "Feasible Points" begin
+            X_feasible = MOD.zeros(nvar, batch_size)
+            if !isempty(model.meta.lvar) && !isempty(model.meta.uvar)
+                @allowscalar begin
+                    X_feasible .= (model.meta.lvar .+ model.meta.uvar) ./ 2
+                    unconstr = findall(isinf.(model.meta.lvar) .&& isinf.(model.meta.uvar))
+                    X_feasible[unconstr, :] .= zero(model.meta.lvar)[unconstr]
+                    lunconstr = findall(isinf.(model.meta.lvar) .&& isfinite.(model.meta.uvar))
+                    X_feasible[lunconstr, :] .= model.meta.uvar[lunconstr] .- 0.1
+                    uunconstr = findall(isfinite.(model.meta.lvar) .&& isinf.(model.meta.uvar))
+                    X_feasible[uunconstr, :] .= model.meta.lvar[uunconstr] .+ 0.1
+                    @assert all(isfinite, X_feasible)
+                end
+                Vb_feasible = BOI.bound_violations!(bm, X_feasible)
+                @test all(==(0), Vb_feasible)
+            end
+        end
+        
+        @testset "Dimension Validation" begin
+            X_wrong = MOD.randn(nvar + 1, batch_size)
+            @test_throws DimensionMismatch BOI.all_violations!(bm, X_wrong)
+            @test_throws DimensionMismatch BOI.bound_violations!(bm, X_wrong)
+            
+            if nθ > 0
+                Θ_wrong = MOD.randn(nθ + 1, batch_size)
+                @test_throws DimensionMismatch BOI.all_violations!(bm, X, Θ_wrong)
+            end
+            
+            if ncon > 0
+                V_wrong = MOD.randn(ncon + 1, batch_size)
+                @test_throws DimensionMismatch BOI.constraint_violations!(bm, V_wrong)
+            end
+        end
+        
+        @testset "Batch Size Validation" begin
+            X_large = MOD.randn(nvar, batch_size + 1)
+            @test_throws AssertionError BOI.all_violations!(bm, X_large)
+            @test_throws AssertionError BOI.bound_violations!(bm, X_large)
+            
+            if ncon > 0
+                V_large = MOD.randn(ncon, batch_size + 1)
+                @test_throws AssertionError BOI.constraint_violations!(bm, V_large)
+            end
+        end
+    end
+end
+
+function test_violations_differentiability_gpu(model::ExaModel, batch_size::Int; MOD=OpenCL)
+    bm = BOI.BatchModel(model, batch_size, config=BOI.BatchModelConfig(:viol_grad))
+    
+    nvar = model.meta.nvar
+    ncon = model.meta.ncon
+    nθ = length(model.θ)
+    
+    X_gpu = MOD.randn(nvar, batch_size)
+    Θ_gpu = MOD.randn(nθ, batch_size)
+    
+    @testset "Violations Differentiability GPU" begin
+        @testset "All Violations Sum" begin
+            function f_all_viols(params)
+                X = params[1:nvar, :]
+                Θ = params[nvar+1:end, :]
+                Vc, Vb = BOI.all_violations!(bm, X, Θ)
+                return sum(Vc) + sum(Vb)
+            end
+            
+            params = vcat(X_gpu, Θ_gpu)
+            grad = DI.gradient(f_all_viols, AutoZygote(), params)
+            @test size(grad) == size(params)
+            @test all(isfinite, grad)
+        end
+        
+        @testset "Constraint Violations Sum" begin
+            if ncon > 0
+                function f_cons_viols(params)
+                    X = params[1:nvar, :]
+                    Θ = params[nvar+1:end, :]
+                    V_cons = BOI.cons_nln_batch!(bm, X, Θ)
+                    Vc = BOI.constraint_violations!(bm, V_cons)
+                    return sum(Vc)
+                end
+                
+                params = vcat(X_gpu, Θ_gpu)
+                grad = DI.gradient(f_cons_viols, AutoZygote(), params)
+                @test size(grad) == size(params)
+                @test all(isfinite, grad)
+            end
+        end
+        
+        @testset "Bound Violations Sum" begin
+            function f_bound_viols(X)
+                Vb = BOI.bound_violations!(bm, X)
+                return sum(Vb)
+            end
+            
+            grad = DI.gradient(f_bound_viols, AutoZygote(), X_gpu)
+            @test size(grad) == size(X_gpu)
+            @test all(isfinite, grad)
+        end
+    end
+end
+
+function test_violations_differentiability_cpu(model::ExaModel, batch_size::Int)
+    bm = BOI.BatchModel(model, batch_size, config=BOI.BatchModelConfig(:viol_grad))
+    
+    nvar = model.meta.nvar
+    nθ = length(model.θ)
+    
+    X_cpu = randn(nvar, batch_size)
+    Θ_cpu = randn(nθ, batch_size)
+    
+    @testset "Violations Differentiability CPU" begin
+        @testset "All Violations Sum" begin
+            function f_all_viols(params)
+                X = params[1:nvar, :]
+                Θ = params[nvar+1:end, :]
+                Vc, Vb = BOI.all_violations!(bm, X, Θ)
+                return sum(Vc) + sum(Vb)
+            end
+            
+            params = vcat(X_cpu, Θ_cpu)
+            grad = DI.gradient(f_all_viols, AutoZygote(), params)
+            @test grad isa AbstractMatrix
+            @test size(grad) == size(params)
+            @test all(isfinite, grad)
+            @testset "FiniteDifferences" begin
+                gradfd = DI.gradient(f_all_viols, AutoFiniteDifferences(fdm=FiniteDifferences.central_fdm(3,1)), params)
+                @test gradfd[1:nvar,:] ≈ grad[1:nvar,:] atol=1e-4 rtol=1e-4
+            end
+        end
+        
+        @testset "Bound Violations Sum" begin
+            function f_bound_viols(X)
+                Vb = BOI.bound_violations!(bm, X)
+                return sum(Vb)
+            end
+            
+            grad = DI.gradient(f_bound_viols, AutoZygote(), X_cpu)
+            @test grad isa AbstractMatrix
+            @test size(grad) == size(X_cpu)
+            @test all(isfinite, grad)
+            @testset "FiniteDifferences" begin
+                gradfd = DI.gradient(f_bound_viols, AutoFiniteDifferences(fdm=FiniteDifferences.central_fdm(3,1)), X_cpu)
+                @test gradfd ≈ grad atol=1e-4 rtol=1e-4
+            end
+        end
+    end
+end
+
+function test_violations_config_errors(MOD=OpenCL)
+    model = create_luksan_vlcek_model(5; M = 1)
+    batch_size = 2
+    nvar = model.meta.nvar
+    ncon = model.meta.ncon
+    nθ = length(model.θ)
+    
+    X = MOD.randn(nvar, batch_size)
+    Θ = MOD.randn(nθ, batch_size)
+    
+    @testset "Config Errors" begin
+        bm_no_viols = BOI.BatchModel(model, batch_size, config=BOI.BatchModelConfig(:minimal))
+        @test_throws ArgumentError BOI.all_violations!(bm_no_viols, X, Θ)
+        @test_throws ArgumentError BOI.bound_violations!(bm_no_viols, X)
+        
+        if ncon > 0
+            V = MOD.randn(ncon, batch_size)
+            @test_throws ArgumentError BOI.constraint_violations!(bm_no_viols, V)
+        end
+    end
+end
+
+@testset "Violations API - Luksan" begin
+    @testset "Config Errors" begin
+        test_violations_config_errors(OpenCL)
+    end
+    
+    cpu_models, names = create_luksan_models(CPU())
+    gpu_models, _ = create_luksan_models(OpenCLBackend())
+    
+    for (name, (cpu_model, gpu_model)) in zip(names, zip(cpu_models, gpu_models))
+        @testset "$name Model" begin
+            for batch_size in [1, 4]
+                @testset "Batch Size $batch_size" begin
+                    @testset "Correctness" begin
+                        test_violations_correctness(gpu_model, batch_size, atol=1e-5, rtol=1e-5)
+                    end
+                    @testset "CPU Differentiability" begin
+                        test_violations_differentiability_cpu(cpu_model, batch_size)
+                    end
+                    @testset "GPU Differentiability" begin
+                        test_violations_differentiability_gpu(gpu_model, batch_size)
+                    end
+                end
+            end
+        end
+    end
+end
+
+@testset "Violations API - Power" begin
+    cpu_models_p, names_p = create_power_models(CPU())
+    gpu_models_p, _       = create_power_models(OpenCLBackend())
+
+    for (name, (cpu_model, gpu_model)) in zip(names_p, zip(cpu_models_p, gpu_models_p))
+        @testset "$(name) Model" begin
+            for batch_size in [1, 4]
+                @testset "Batch Size $(batch_size)" begin
+                    @testset "Correctness" begin
+                        test_violations_correctness(gpu_model, batch_size; atol=1e-5, rtol=1e-5)
+                    end
+                    @testset "CPU Differentiability" begin
+                        test_violations_differentiability_cpu(cpu_model, batch_size)
+                    end
+                    @testset "GPU Differentiability" begin
+                        test_violations_differentiability_gpu(gpu_model, batch_size)
+                    end
+                end
+            end
+        end
+    end
+end 
+
+if haskey(ENV, "BNK_TEST_CUDA")
+    @testset "Violations API - CUDA" begin
+        gpu_models_p, names_p = create_power_models(CUDABackend())
+        for (name, gpu_model) in zip(names_p, gpu_models_p)
+            @testset "$name Model" begin
+                for batch_size in [1, 4]
+                    @testset "Batch Size $batch_size" begin
+                        test_violations_correctness(gpu_model, batch_size, atol=1e-5, rtol=1e-5, MOD=CUDA)
+                        test_violations_differentiability_gpu(gpu_model, batch_size, MOD=CUDA)
+                    end
+                end
+            end
+        end
+    end
+end
\ No newline at end of file