diff --git a/test/ArrayDiff.jl b/test/ArrayDiff.jl index 1a888e7..99ef909 100644 --- a/test/ArrayDiff.jl +++ b/test/ArrayDiff.jl @@ -32,11 +32,13 @@ function test_objective_dot_univariate() @test sizes.size_offset == [0, 1, 0, 0, 0] @test sizes.size == [1, 1] @test sizes.storage_offset == [0, 1, 2, 3, 4, 5] - x = [1.2] - @test MOI.eval_objective(evaluator, x) == x[1]^2 + xv = [1.2] + @test MOI.eval_objective(evaluator, xv) == xv[1]^2 + @test 0 == @allocated MOI.eval_objective(evaluator, xv) g = ones(1) - MOI.eval_objective_gradient(evaluator, g, x) - @test g[1] == 2x[1] + MOI.eval_objective_gradient(evaluator, g, xv) + @test g[1] == 2xv[1] + @test 0 == @allocated MOI.eval_objective_gradient(evaluator, g, xv) return end @@ -51,11 +53,13 @@ function test_objective_dot_univariate_and_scalar_mult() @test sizes.size_offset == [0, 0, 0, 1, 0, 0, 0] @test sizes.size == [1, 1] @test sizes.storage_offset == [0, 1, 2, 3, 4, 5, 6, 7] - x = [1.2] - @test MOI.eval_objective(evaluator, x) == 2*x[1]^2 + xv = [1.2] + @test MOI.eval_objective(evaluator, xv) == 2*xv[1]^2 + @test 0 == @allocated MOI.eval_objective(evaluator, xv) g = ones(1) - MOI.eval_objective_gradient(evaluator, g, x) - @test g[1] == 4x[1] + MOI.eval_objective_gradient(evaluator, g, xv) + @test g[1] == 4xv[1] + @test 0 == @allocated MOI.eval_objective_gradient(evaluator, g, xv) return end @@ -75,11 +79,13 @@ function test_objective_dot_bivariate() @test sizes.size == [2, 2, 2, 2, 2, 2, 2] @test sizes.storage_offset == [0, 1, 3, 5, 6, 7, 9, 10, 11, 13, 15, 17, 18, 19, 21, 22, 23] - x = [5, -1] - @test MOI.eval_objective(evaluator, x) ≈ 25 + xv = [5.0, -1.0] + @test MOI.eval_objective(evaluator, xv) ≈ 25 + @test 0 == @allocated MOI.eval_objective(evaluator, xv) g = ones(2) - MOI.eval_objective_gradient(evaluator, g, x) - @test g == 2(x - [1, 2]) + MOI.eval_objective_gradient(evaluator, g, xv) + @test g == 2(xv - [1, 2]) + @test 0 == @allocated MOI.eval_objective_gradient(evaluator, g, xv) return end @@ -101,10 +107,13 @@ function test_objective_hcat_scalars() x2 = 2.0 x3 = 3.0 x4 = 4.0 - @test MOI.eval_objective(evaluator, [x1, x2, x3, x4]) == 14.0 + x = [x1, x2, x3, x4] + @test MOI.eval_objective(evaluator, x) == 14.0 + @test 0 == @allocated MOI.eval_objective(evaluator, x) g = ones(4) - MOI.eval_objective_gradient(evaluator, g, [x1, x2, x3, x4]) + MOI.eval_objective_gradient(evaluator, g, x) @test g == [2.0, 1.0, 4.0, 3.0] + @test 0 == @allocated MOI.eval_objective_gradient(evaluator, g, x) return end @@ -129,10 +138,13 @@ function test_objective_hcat_vectors() x2 = 2.0 x3 = 3.0 x4 = 4.0 - @test MOI.eval_objective(evaluator, [x1, x2, x3, x4]) == 14.0 + x = [x1, x2, x3, x4] + @test MOI.eval_objective(evaluator, x) == 14.0 + @test 0 == @allocated MOI.eval_objective(evaluator, x) g = ones(4) - MOI.eval_objective_gradient(evaluator, g, [x1, x2, x3, x4]) + MOI.eval_objective_gradient(evaluator, g, x) @test g == [2.0, 1.0, 4.0, 3.0] + @test 0 == @allocated MOI.eval_objective_gradient(evaluator, g, x) return end @@ -150,11 +162,13 @@ function test_objective_dot_bivariate_on_rows() @test sizes.size == [1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2] @test sizes.storage_offset == [0, 1, 3, 5, 6, 7, 9, 10, 11, 13, 15, 17, 18, 19, 21, 22, 23] - x = [5, -1] - @test MOI.eval_objective(evaluator, x) ≈ 25 + xv = [5.0, -1.0] + @test MOI.eval_objective(evaluator, xv) ≈ 25 + @test 0 == @allocated MOI.eval_objective(evaluator, xv) g = ones(2) - MOI.eval_objective_gradient(evaluator, g, x) - @test g == 2(x - [1, 2]) + MOI.eval_objective_gradient(evaluator, g, xv) + @test g == 2(xv - [1, 2]) + @test 0 == @allocated MOI.eval_objective_gradient(evaluator, g, xv) return end @@ -169,11 +183,13 @@ function test_objective_norm_univariate() @test sizes.size_offset == [0, 0, 0] @test sizes.size == [1] @test sizes.storage_offset == [0, 1, 2, 3] - x = [1.2] - @test MOI.eval_objective(evaluator, x) == abs(x[1]) + xv = [1.2] + @test MOI.eval_objective(evaluator, xv) == abs(xv[1]) + @test 0 == @allocated MOI.eval_objective(evaluator, xv) g = ones(1) - MOI.eval_objective_gradient(evaluator, g, x) - @test g[1] == sign(x[1]) + MOI.eval_objective_gradient(evaluator, g, xv) + @test g[1] == sign(xv[1]) + @test 0 == @allocated MOI.eval_objective_gradient(evaluator, g, xv) return end @@ -189,16 +205,20 @@ function test_objective_norm_bivariate() @test sizes.size_offset == [0, 0, 0, 0] @test sizes.size == [2] @test sizes.storage_offset == [0, 1, 3, 4, 5] - x = [3.0, 4.0] - @test MOI.eval_objective(evaluator, x) == 5.0 + xv = [3.0, 4.0] + @test MOI.eval_objective(evaluator, xv) == 5.0 + @test 0 == @allocated MOI.eval_objective(evaluator, xv) g = ones(2) - MOI.eval_objective_gradient(evaluator, g, x) - @test g == x / 5.0 - y = [0.0, 0.0] - @test MOI.eval_objective(evaluator, y) == 0.0 + MOI.eval_objective_gradient(evaluator, g, xv) + @test g == xv / 5.0 + @test 0 == @allocated MOI.eval_objective_gradient(evaluator, g, xv) + yv = [0.0, 0.0] + @test MOI.eval_objective(evaluator, yv) == 0.0 + @test 0 == @allocated MOI.eval_objective(evaluator, yv) g = ones(2) - MOI.eval_objective_gradient(evaluator, g, y) + MOI.eval_objective_gradient(evaluator, g, yv) @test g == [0.0, 0.0] + @test 0 == @allocated MOI.eval_objective_gradient(evaluator, g, yv) return end @@ -216,10 +236,13 @@ function test_objective_norm_of_row_vector() @test sizes.storage_offset == [0, 1, 3, 4, 5] x1 = 1.0 x2 = 2.0 - @test MOI.eval_objective(evaluator, [x1, x2]) == sqrt(5.0) + x = [x1, x2] + @test MOI.eval_objective(evaluator, x) == sqrt(5.0) + @test 0 == @allocated MOI.eval_objective(evaluator, x) g = ones(2) - MOI.eval_objective_gradient(evaluator, g, [x1, x2]) + MOI.eval_objective_gradient(evaluator, g, x) @test g == [1.0 / sqrt(5.0), 2.0 / sqrt(5.0)] + @test 0 == @allocated MOI.eval_objective_gradient(evaluator, g, x) return end @@ -241,10 +264,13 @@ function test_objective_norm_of_vcat_vector() x2 = 2.0 x3 = 3.0 x4 = 4.0 - @test MOI.eval_objective(evaluator, [x1, x2, x3, x4]) == sqrt(10.0) + x = [x1, x2, x3, x4] + @test MOI.eval_objective(evaluator, x) == sqrt(10.0) + @test 0 == @allocated MOI.eval_objective(evaluator, x) g = ones(4) - MOI.eval_objective_gradient(evaluator, g, [x1, x2, x3, x4]) + MOI.eval_objective_gradient(evaluator, g, x) @test g == [1.0 / sqrt(10.0), 0.0, 3.0 / sqrt(10.0), 0.0] + @test 0 == @allocated MOI.eval_objective_gradient(evaluator, g, x) return end @@ -266,15 +292,18 @@ function test_objective_norm_of_vcat_matrix() x2 = 2.0 x3 = 3.0 x4 = 4.0 - @test MOI.eval_objective(evaluator, [x1, x2, x3, x4]) == sqrt(30.0) + x = [x1, x2, x3, x4] + @test MOI.eval_objective(evaluator, x) == sqrt(30.0) + @test 0 == @allocated MOI.eval_objective(evaluator, x) g = ones(4) - MOI.eval_objective_gradient(evaluator, g, [x1, x2, x3, x4]) + MOI.eval_objective_gradient(evaluator, g, x) @test g == [ 1.0 / sqrt(30.0), 2.0 / sqrt(30.0), 3.0 / sqrt(30.0), 4.0 / sqrt(30.0), ] + @test 0 == @allocated MOI.eval_objective_gradient(evaluator, g, x) return end @@ -292,10 +321,13 @@ function test_objective_norm_of_row() @test sizes.storage_offset == [0, 1, 3, 4, 5] x1 = 1.0 x2 = 2.0 - @test MOI.eval_objective(evaluator, [x1, x2]) == sqrt(5.0) + x = [x1, x2] + @test MOI.eval_objective(evaluator, x) == sqrt(5.0) + @test 0 == @allocated MOI.eval_objective(evaluator, x) g = ones(2) - MOI.eval_objective_gradient(evaluator, g, [x1, x2]) + MOI.eval_objective_gradient(evaluator, g, x) @test g == [1.0 / sqrt(5.0), 2.0 / sqrt(5.0)] + @test 0 == @allocated MOI.eval_objective_gradient(evaluator, g, x) return end @@ -317,15 +349,18 @@ function test_objective_norm_of_matrix() x2 = 2.0 x3 = 3.0 x4 = 4.0 - @test MOI.eval_objective(evaluator, [x1, x2, x3, x4]) == sqrt(30.0) + x = [x1, x2, x3, x4] + @test MOI.eval_objective(evaluator, x) == sqrt(30.0) + @test 0 == @allocated MOI.eval_objective(evaluator, x) g = ones(4) - MOI.eval_objective_gradient(evaluator, g, [x1, x2, x3, x4]) + MOI.eval_objective_gradient(evaluator, g, x) @test g == [ 1.0 / sqrt(30.0), 2.0 / sqrt(30.0), 3.0 / sqrt(30.0), 4.0 / sqrt(30.0), ] + @test 0 == @allocated MOI.eval_objective_gradient(evaluator, g, x) return end @@ -349,10 +384,13 @@ function test_objective_norm_of_matrix_with_sum() x2 = 2.0 x3 = 3.0 x4 = 4.0 - @test MOI.eval_objective(evaluator, [x1, x2, x3, x4]) == sqrt(14.0) + x = [x1, x2, x3, x4] + @test MOI.eval_objective(evaluator, x) == sqrt(14.0) + @test 0 == @allocated MOI.eval_objective(evaluator, x) g = ones(4) - MOI.eval_objective_gradient(evaluator, g, [x1, x2, x3, x4]) + MOI.eval_objective_gradient(evaluator, g, x) @test g == [0.0, 1.0 / sqrt(14.0), 2.0 / sqrt(14.0), 3.0 / sqrt(14.0)] + @test 0 == @allocated MOI.eval_objective_gradient(evaluator, g, x) return end @@ -376,15 +414,18 @@ function test_objective_norm_of_product_of_matrices() x2 = 2.0 x3 = 3.0 x4 = 4.0 - @test MOI.eval_objective(evaluator, [x1, x2, x3, x4]) == sqrt(30.0) + x = [x1, x2, x3, x4] + @test MOI.eval_objective(evaluator, x) == sqrt(30.0) + @test 0 == @allocated MOI.eval_objective(evaluator, x) g = ones(4) - MOI.eval_objective_gradient(evaluator, g, [x1, x2, x3, x4]) + MOI.eval_objective_gradient(evaluator, g, x) @test g == [ 1.0 / sqrt(30.0), 2.0 / sqrt(30.0), 3.0 / sqrt(30.0), 4.0 / sqrt(30.0), ] + @test 0 == @allocated MOI.eval_objective_gradient(evaluator, g, x) return end @@ -486,15 +527,18 @@ function test_objective_norm_of_product_of_matrices_with_sum() x2 = 2.0 x3 = 3.0 x4 = 4.0 - @test MOI.eval_objective(evaluator, [x1, x2, x3, x4]) == sqrt(54.0) + x = [x1, x2, x3, x4] + @test MOI.eval_objective(evaluator, x) == sqrt(54.0) + @test 0 == @allocated MOI.eval_objective(evaluator, x) g = ones(4) - MOI.eval_objective_gradient(evaluator, g, [x1, x2, x3, x4]) + MOI.eval_objective_gradient(evaluator, g, x) @test g == [ 2.0 / sqrt(54.0), 3.0 / sqrt(54.0), 4.0 / sqrt(54.0), 5.0 / sqrt(54.0), ] + @test 0 == @allocated MOI.eval_objective_gradient(evaluator, g, x) return end @@ -517,15 +561,18 @@ function test_objective_norm_of_mtx_vector_product() x2 = 2.0 x3 = 3.0 x4 = 4.0 - @test MOI.eval_objective(evaluator, [x1, x2, x3, x4]) == sqrt(58.0) + x = [x1, x2, x3, x4] + @test MOI.eval_objective(evaluator, x) == sqrt(58.0) + @test 0 == @allocated MOI.eval_objective(evaluator, x) g = ones(4) - MOI.eval_objective_gradient(evaluator, g, [x1, x2, x3, x4]) + MOI.eval_objective_gradient(evaluator, g, x) @test g == [ 3.0 / sqrt(58.0), 3.0 / sqrt(58.0), 7.0 / sqrt(58.0), 7.0 / sqrt(58.0), ] + @test 0 == @allocated MOI.eval_objective_gradient(evaluator, g, x) return end @@ -540,11 +587,13 @@ function test_objective_univariate_operator() @test sizes.size_offset == [0, 0] @test sizes.size == [] @test sizes.storage_offset == [0, 1, 2] - x = [pi / 4] - @test MOI.eval_objective(evaluator, x) ≈ sqrt(2) / 2 + xv = [pi / 4] + @test MOI.eval_objective(evaluator, xv) ≈ sqrt(2) / 2 + @test 0 == @allocated MOI.eval_objective(evaluator, xv) g = ones(1) - MOI.eval_objective_gradient(evaluator, g, x) + MOI.eval_objective_gradient(evaluator, g, xv) @test g[1] ≈ cos(pi / 4) + @test 0 == @allocated MOI.eval_objective_gradient(evaluator, g, xv) return end @@ -566,10 +615,13 @@ function test_objective_broadcasted_product() x2 = 2.0 x3 = 3.0 x4 = 4.0 - @test MOI.eval_objective(evaluator, [x1, x2, x3, x4]) == sqrt(3.0^2 + 8.0^2) + x = [x1, x2, x3, x4] + @test MOI.eval_objective(evaluator, x) == sqrt(3.0^2 + 8.0^2) + @test 0 == @allocated MOI.eval_objective(evaluator, x) g = ones(4) - MOI.eval_objective_gradient(evaluator, g, [x1, x2, x3, x4]) + MOI.eval_objective_gradient(evaluator, g, x) @test g == [9.0, 32.0, 3.0, 16.0] / sqrt(3.0^2 + 8.0^2) + @test 0 == @allocated MOI.eval_objective_gradient(evaluator, g, x) return end @@ -596,16 +648,19 @@ function test_objective_broadcasted_matrix_product() x2 = 2.0 x3 = 3.0 x4 = 4.0 - @test MOI.eval_objective(evaluator, [x1, x2, x3, x4]) == + x = [x1, x2, x3, x4] + @test MOI.eval_objective(evaluator, x) == sqrt(1.0^2 + 4.0^2 + 9.0^2 + 16.0^2) + @test 0 == @allocated MOI.eval_objective(evaluator, x) g = ones(4) - MOI.eval_objective_gradient(evaluator, g, [x1, x2, x3, x4]) + MOI.eval_objective_gradient(evaluator, g, x) @test g == [ 2 * 1.0^3 / sqrt(354), 2 * 2.0^3 / sqrt(354), 2 * 3.0^3 / sqrt(354), 2 * 4.0^3 / sqrt(354), ] + @test 0 == @allocated MOI.eval_objective_gradient(evaluator, g, x) return end @@ -623,14 +678,16 @@ function test_objective_broadcasted_tanh() @test sizes.storage_offset == [0, 1, 3, 5, 6, 7] x1 = 1.0 x2 = 2.0 - @test MOI.eval_objective(evaluator, [x1, x2]) == - sqrt(tanh(1.0)^2 + tanh(2.0)^2) + x = [x1, x2] + @test MOI.eval_objective(evaluator, x) == sqrt(tanh(1.0)^2 + tanh(2.0)^2) + @test 0 == @allocated MOI.eval_objective(evaluator, x) g = ones(2) - MOI.eval_objective_gradient(evaluator, g, [x1, x2]) + MOI.eval_objective_gradient(evaluator, g, x) @test g ≈ [ tanh(1.0) * (1 - tanh(1.0)^2) / sqrt(tanh(1.0)^2 + tanh(2.0)^2), tanh(2.0) * (1 - tanh(2.0)^2) / sqrt(tanh(1.0)^2 + tanh(2.0)^2), ] + @test 0 == @allocated MOI.eval_objective_gradient(evaluator, g, x) return end @@ -643,10 +700,13 @@ function test_objective_broadcasted_pow_vector_1() MOI.initialize(evaluator, [:Grad]) x1v = 3.0 x2v = -4.0 - @test MOI.eval_objective(evaluator, [x1v, x2v]) == x1v + x2v + x = [x1v, x2v] + @test MOI.eval_objective(evaluator, x) == x1v + x2v + @test 0 == @allocated MOI.eval_objective(evaluator, x) g = zeros(2) - MOI.eval_objective_gradient(evaluator, g, [x1v, x2v]) + MOI.eval_objective_gradient(evaluator, g, x) @test g == ones(2) + @test 0 == @allocated MOI.eval_objective_gradient(evaluator, g, x) return end @@ -659,10 +719,13 @@ function test_objective_broadcasted_pow_vector_2() MOI.initialize(evaluator, [:Grad]) x1v = 3.0 x2v = -4.0 - @test MOI.eval_objective(evaluator, [x1v, x2v]) == x1v^2 + x2v^2 + x = [x1v, x2v] + @test MOI.eval_objective(evaluator, x) == x1v^2 + x2v^2 + @test 0 == @allocated MOI.eval_objective(evaluator, x) g = ones(2) - MOI.eval_objective_gradient(evaluator, g, [x1v, x2v]) + MOI.eval_objective_gradient(evaluator, g, x) @test g == [2 * x1v, 2 * x2v] + @test 0 == @allocated MOI.eval_objective_gradient(evaluator, g, x) return end @@ -681,9 +744,11 @@ function test_objective_broadcasted_pow_matrix_with_constant() xs = [1.0, 2.0, 3.0, 4.0] @test MOI.eval_objective(evaluator, xs) == (1-1)^2 + (2-1)^2 + (3-1)^2 + (4-1)^2 + @test 0 == @allocated MOI.eval_objective(evaluator, xs) g = ones(4) MOI.eval_objective_gradient(evaluator, g, xs) @test g == [2 * (1 - 1), 2 * (2 - 1), 2 * (3 - 1), 2 * (4 - 1)] + @test 0 == @allocated MOI.eval_objective_gradient(evaluator, g, xs) return end @@ -696,9 +761,11 @@ function test_objective_broadcasted_pow_cubed() MOI.initialize(evaluator, [:Grad]) xs = [2.0, 3.0] @test MOI.eval_objective(evaluator, xs) ≈ 2.0^3 + 3.0^3 + @test 0 == @allocated MOI.eval_objective(evaluator, xs) g = ones(2) MOI.eval_objective_gradient(evaluator, g, xs) @test g ≈ [3 * 2.0^2, 3 * 3.0^2] + @test 0 == @allocated MOI.eval_objective_gradient(evaluator, g, xs) return end @@ -781,9 +848,11 @@ function test_model_typed_float32_evaluator_runs() MOI.initialize(evaluator, [:Grad]) xv = [1.5] @test MOI.eval_objective(evaluator, xv) ≈ 2 * xv[1]^2 + 1.0 + @test 0 == @allocated MOI.eval_objective(evaluator, xv) g = ones(1) MOI.eval_objective_gradient(evaluator, g, xv) @test g[1] ≈ 4 * xv[1] + @test 0 == @allocated MOI.eval_objective_gradient(evaluator, g, xv) return end @@ -805,15 +874,35 @@ function test_residual_with_subexpression() F = zeros(2) ArrayDiff.eval_residual!(evaluator, F, x) @test F == [15.0, -8.0] + @test 0 == @allocated ArrayDiff.eval_residual!(evaluator, F, x) # J = [1+x2 x1 ; -x2 1-x1] = [5 3 ; -4 -2] Jtv = zeros(2) - ArrayDiff.eval_residual_jtprod!(evaluator, Jtv, x, [1.0, 1.0]) + v_ones = [1.0, 1.0] + ArrayDiff.eval_residual_jtprod!(evaluator, Jtv, x, v_ones) @test Jtv == [1.0, 1.0] + @test 0 == + @allocated ArrayDiff.eval_residual_jtprod!(evaluator, Jtv, x, v_ones) Jv = zeros(2) - ArrayDiff.eval_residual_jprod!(evaluator, Jv, x, [1.0, 0.0]) + v_e1 = [1.0, 0.0] + ArrayDiff.eval_residual_jprod!(evaluator, Jv, x, v_e1) @test Jv == [5.0, -4.0] - ArrayDiff.eval_residual_jprod!(evaluator, Jv, x, [0.0, 1.0]) + # `eval_residual_jprod!` is not allocation-free: it allocates `seed` and + # `row` on every call (see `src/mathoptinterface_api.jl`). + @test_broken 0 == @allocated ArrayDiff.eval_residual_jprod!( + evaluator, + Jv, + x, + v_e1, + ) + v_e2 = [0.0, 1.0] + ArrayDiff.eval_residual_jprod!(evaluator, Jv, x, v_e2) @test Jv == [3.0, -2.0] + @test_broken 0 == @allocated ArrayDiff.eval_residual_jprod!( + evaluator, + Jv, + x, + v_e2, + ) return end diff --git a/test/JuMP.jl b/test/JuMP.jl index 9c44143..56219fc 100644 --- a/test/JuMP.jl +++ b/test/JuMP.jl @@ -164,7 +164,13 @@ function test_parse_moi() return end -function _eval(model::JuMP.GenericModel{T}, func, x) where {T} +function _eval( + model::JuMP.GenericModel{T}, + func, + x, + obj_val, + grad_val, +) where {T} mode = ArrayDiff.Mode{Vector{T}}() ad = ArrayDiff.model(mode) MOI.Nonlinear.set_objective(ad, JuMP.moi_function(func)) @@ -174,12 +180,20 @@ function _eval(model::JuMP.GenericModel{T}, func, x) where {T} JuMP.index.(JuMP.all_variables(model)), ) MOI.initialize(evaluator, [:Grad]) - val = MOI.eval_objective(evaluator, x) + x_grad = T.(collect(1:8)) + @test MOI.eval_objective(evaluator, x) ≈ obj_val + if VERSION >= v"1.12" + @test 0 == @allocated MOI.eval_objective(evaluator, x) + end g = zero(x) - MOI.eval_objective_gradient(evaluator, g, T.(collect(1:8))) + MOI.eval_objective_gradient(evaluator, g, x_grad) + @test g ≈ grad_val + if VERSION >= v"1.12" + @test 0 == @allocated MOI.eval_objective_gradient(evaluator, g, x_grad) + end MOI.Nonlinear.set_objective(ad, nothing) @test isnothing(ad.objective) - return val, g + return end function _test_neural( @@ -242,7 +256,6 @@ function _test_neural( end W1_val = T[0.3 -0.2; 0.1 0.4] W2_val = T[-0.1 0.5; 0.2 -0.3] - obj, g = _eval(model, loss, [vec(W1_val); vec(W2_val)]) # Reference computed from the same hand-written forward/reverse formulas # as `perf/cuda_vs_pytorch.jl::forward_pass`/`reverse_diff`, adapted to # this test's loss `sum((Y - target).^2)` (no `/ n` scaling, full gradient @@ -255,7 +268,6 @@ function _test_neural( if with_norm obj_val = sqrt(obj_val) end - @test obj ≈ obj_val W1_at_grad = reshape(T[1.0, 2.0, 3.0, 4.0], 2, 2) W2_at_grad = reshape(T[5.0, 6.0, 7.0, 8.0], 2, 2) grad_sumsq = _ref_gradient(W1_at_grad, W2_at_grad, X_const, target_const) @@ -264,10 +276,11 @@ function _test_neural( # taken at the gradient evaluation point. norm_at_grad = sqrt(_ref_objective(W1_at_grad, W2_at_grad, X_const, target_const)) - @test g ≈ grad_sumsq ./ (2 * norm_at_grad) + grad_val = grad_sumsq ./ (2 * norm_at_grad) else - @test g ≈ grad_sumsq + grad_val = grad_sumsq end + _eval(model, loss, [vec(W1_val); vec(W2_val)], obj_val, grad_val) return end @@ -318,61 +331,6 @@ function test_neural() end end -# Builds the same `sum((W2*tanh.(W1*X) - target)^2)` MLP that `test_neural` -# exercises and checks that, after warmup, both `eval_objective` and -# `eval_objective_gradient` are allocation-free on the CPU `Vector{Float64}` -# tape — including when the input `x` has changed since the last call (which -# is the path that actually re-runs forward+reverse, not the -# `last_x == x` short-circuit). -function test_neural_allocations() - if VERSION < v"1.12" - return - end - n = 2 - X = [1.0 0.5; 0.3 0.8] - target = [0.5 0.2; 0.1 0.7] - model = Model() - @variable(model, W1[1:n, 1:n], container = ArrayDiff.ArrayOfVariables) - @variable(model, W2[1:n, 1:n], container = ArrayDiff.ArrayOfVariables) - Y = W2 * tanh.(W1 * X) - loss = sum((Y .- target) .^ 2) - mode = ArrayDiff.Mode() - ad = ArrayDiff.model(mode) - MOI.Nonlinear.set_objective(ad, JuMP.moi_function(loss)) - evaluator = MOI.Nonlinear.Evaluator( - ad, - mode, - JuMP.index.(JuMP.all_variables(model)), - ) - MOI.initialize(evaluator, [:Grad]) - x1 = Float64.(collect(1:8)) - x2 = Float64.(collect(2:9)) - g = zeros(8) - # Wrapped in typed functions so `@allocated` doesn't capture the - # return-value boxing that happens when calling `eval_objective` - # directly from the macro's untyped scope (each `MOI.eval_objective` - # returns a `Float64` which then escapes into `Any`-typed scope). - _obj(ev, x) = MOI.eval_objective(ev, x) - function _grad!(ev, g, x) - MOI.eval_objective_gradient(ev, g, x) - return nothing - end - # Warmup: trigger JIT compilation for both `eval_objective` and - # `eval_objective_gradient`. Two distinct inputs so `_reverse_mode`'s - # `last_x == x` short-circuit doesn't elide the work on the second call. - _obj(evaluator, x1) - _obj(evaluator, x2) - _grad!(evaluator, g, x1) - _grad!(evaluator, g, x2) - # Now alternate: each measured call sees `last_x ≠ x`, so it actually - # runs the full forward + reverse passes through the block tape. - @test 0 == @allocated _obj(evaluator, x1) - @test 0 == @allocated _obj(evaluator, x2) - @test 0 == @allocated _grad!(evaluator, g, x1) - @test 0 == @allocated _grad!(evaluator, g, x2) - return -end - function test_moi_function() model = Model() @variable(model, W[1:2, 1:2], container = ArrayDiff.ArrayOfVariables) diff --git a/test/Project.toml b/test/Project.toml index 7788561..cf8011e 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -1,8 +1,6 @@ [deps] ArrayDiff = "c45fa1ca-6901-44ac-ae5b-5513a4852d50" -CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" Calculus = "49dc2e85-a5d0-5ad3-a950-438e2897f1b9" -JLArrays = "27aeb0d3-9eb9-45fb-866b-73c2ecf80fcb" GenOpt = "f2c049d8-7489-4223-990c-4f1c121a4cde" JSOSolvers = "10dff2fc-5484-5881-a0e0-c90441020f8a" JuMP = "4076af6c-e467-56ae-b986-b466b2749572"