From eed7ce900a72fa9382f7d5bab33822258db08b3a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Beno=C3=AEt=20Legat?= Date: Fri, 8 May 2026 14:04:44 +0200 Subject: [PATCH 1/4] Fix size inference with broadcast --- src/sizes.jl | 56 ++++++++++++++++------------------------------- test/ArrayDiff.jl | 4 ++-- 2 files changed, 21 insertions(+), 39 deletions(-) diff --git a/src/sizes.jl b/src/sizes.jl index 7fc3c90..0ca4cf7 100644 --- a/src/sizes.jl +++ b/src/sizes.jl @@ -357,47 +357,29 @@ function _infer_sizes( continue end op = DEFAULT_MULTIVARIATE_OPERATORS[node.index] - if op == :+ || op == :- - # Broadcasted +/- preserves shape - _copy_size!(sizes, k, children_arr[first(children_indices)]) - elseif op == :^ - # Broadcasted ^ with scalar exponent preserves base shape - _copy_size!(sizes, k, children_arr[first(children_indices)]) - elseif op == :* - # TODO assert compatible sizes and all ndims should be 0 or 2 + if op == :+ || op == :- || op == :* + # Broadcasted +/-/* takes the largest child's shape (for + # scalar+matrix that's the matrix; for matrix+matrix we + # currently assume they match and pick the first). first_matrix = findfirst(children_indices) do i return !iszero(sizes.ndims[children_arr[i]]) end - if !isnothing(first_matrix) - if sizes.ndims[children_arr[first(children_indices)]] == 0 - _add_size!(sizes, k, (1, 1)) - continue - else - if sizes.ndims[children_arr[first(children_indices)]] == - 1 - nb_cols = 1 - else - nb_cols = _size( - sizes, - children_arr[first(children_indices)], - 1, - ) - end - _add_size!( - sizes, - k, - ( - _size( - sizes, - children_arr[first(children_indices)], - 1, - ), - nb_cols, - ), - ) - continue - end + if isnothing(first_matrix) + _copy_size!( + sizes, + k, + children_arr[first(children_indices)], + ) + else + _copy_size!( + sizes, + k, + children_arr[children_indices[first_matrix]], + ) end + elseif op == :^ + # Broadcasted ^ with scalar exponent preserves base shape + _copy_size!(sizes, k, children_arr[first(children_indices)]) end elseif node.type == NODE_CALL_UNIVARIATE if !( diff --git a/test/ArrayDiff.jl b/test/ArrayDiff.jl index 99ef909..067eae8 100644 --- a/test/ArrayDiff.jl +++ b/test/ArrayDiff.jl @@ -607,9 +607,9 @@ function test_objective_broadcasted_product() evaluator = ArrayDiff.Evaluator(model, ArrayDiff.Mode(), [x1, x2, x3, x4]) MOI.initialize(evaluator, [:Grad]) sizes = evaluator.backend.objective.expr.sizes - @test sizes.ndims == [0, 2, 1, 0, 0, 1, 0, 0] + @test sizes.ndims == [0, 1, 1, 0, 0, 1, 0, 0] @test sizes.size_offset == [0, 2, 1, 0, 0, 0, 0, 0] - @test sizes.size == [2, 2, 2, 1] + @test sizes.size == [2, 2, 2] @test sizes.storage_offset == [0, 1, 3, 5, 6, 7, 9, 10, 11] x1 = 1.0 x2 = 2.0 From d69013e54299d1ea4d1129490fe57757466811a0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Beno=C3=AEt=20Legat?= Date: Fri, 8 May 2026 15:39:31 +0200 Subject: [PATCH 2/4] Add unit tests --- test/JuMP.jl | 89 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 89 insertions(+) diff --git a/test/JuMP.jl b/test/JuMP.jl index 56219fc..27384a0 100644 --- a/test/JuMP.jl +++ b/test/JuMP.jl @@ -345,6 +345,95 @@ function test_moi_function() return end +# Build an evaluator for `expr` with `vars` as the variable order. Returns the +# inferred sizes plus the evaluated objective and gradient at `x`. +function _build_and_eval(expr, vars, x) + mode = ArrayDiff.Mode() + ad = ArrayDiff.model(mode) + MOI.Nonlinear.set_objective(ad, JuMP.moi_function(expr)) + evaluator = MOI.Nonlinear.Evaluator(ad, mode, JuMP.index.(vars)) + MOI.initialize(evaluator, [:Grad]) + sizes = evaluator.backend.objective.expr.sizes + val = MOI.eval_objective(evaluator, x) + g = zero(x) + MOI.eval_objective_gradient(evaluator, g, x) + return sizes, val, g +end + +# The previous size-inference code special-cased broadcasted `+/-/*` with a +# `nb_cols` formula that happened to match for square matrices. A 2x3 input +# shape exercises the bug: with the old code the result would be reported as +# (2, 2) instead of (2, 3), and `eval_objective` would read past the tape. +function test_broadcast_nonsquare_matrix() + model = Model() + @variable(model, W[1:2, 1:3], container = ArrayDiff.ArrayOfVariables) + Y = [10.0 20.0 30.0; 40.0 50.0 60.0] + x = Float64.(collect(1:6)) + W_val = reshape(x, 2, 3) + @testset "$(op)" for (op, expr, ref_mat) in [ + (:+, LinearAlgebra.norm(W .+ Y), W_val .+ Y), + (:-, LinearAlgebra.norm(W .- Y), W_val .- Y), + (:*, LinearAlgebra.norm(W .* W), W_val .* W_val), + ] + sizes, val, g = _build_and_eval(expr, JuMP.all_variables(model), x) + # Outer norm scalar, then the broadcasted op produces a 2x3 matrix, + # then the two 2x3 leaves: 4 nodes, three of them ndims=2 with size + # (2, 3). The old bug would report (2, 2) for the broadcast node. + @test sizes.ndims == [0, 2, 2, 2] + @test sizes.size == [2, 3, 2, 3, 2, 3] + @test sizes.size_offset == [0, 4, 2, 0] + @test sizes.storage_offset == [0, 1, 7, 13, 19] + @test val ≈ LinearAlgebra.norm(ref_mat) + ref_g = if op == :+ + vec(W_val .+ Y) ./ LinearAlgebra.norm(ref_mat) + elseif op == :- + vec(W_val .- Y) ./ LinearAlgebra.norm(ref_mat) + else # :* + # d(norm(W .* W))/dW = 2 .* W .^ 3 / norm(W .* W) + vec(2 .* W_val .^ 3) ./ LinearAlgebra.norm(ref_mat) + end + @test g ≈ ref_g + end + return +end + +# The fix replaced the old `(1, 1)` stub for `scalar .op matrix` with a copy +# of the matrix child's full shape. Eval/reverse for these mixed-rank +# broadcasts isn't implemented yet (and is out of scope for the fix), so we +# only assert the inferred shape — that's where the previous code was wrong. +function test_broadcast_scalar_matrix_size_inference() + model = Model() + @variable(model, W[1:2, 1:3], container = ArrayDiff.ArrayOfVariables) + mode = ArrayDiff.Mode() + @testset "$(name)" for (name, expr) in [ + ("scalar .* M", LinearAlgebra.norm(2.5 .* W)), + ("M .* scalar", LinearAlgebra.norm(W .* 2.5)), + ("scalar .+ M", LinearAlgebra.norm(2.5 .+ W)), + ("M .+ scalar", LinearAlgebra.norm(W .+ 2.5)), + ("scalar .- M", LinearAlgebra.norm(2.5 .- W)), + ("M .- scalar", LinearAlgebra.norm(W .- 2.5)), + ] + ad = ArrayDiff.model(mode) + MOI.Nonlinear.set_objective(ad, JuMP.moi_function(expr)) + evaluator = MOI.Nonlinear.Evaluator( + ad, + mode, + JuMP.index.(JuMP.all_variables(model)), + ) + MOI.initialize(evaluator, [:Grad]) + sizes = evaluator.backend.objective.expr.sizes + # Broadcast node is at index 2; it should inherit the matrix child's + # (2, 3) shape, not the old `(1, 1)` stub. + @test sizes.ndims[2] == 2 + broadcast_size_off = sizes.size_offset[2] + @test sizes.size[broadcast_size_off+1] == 2 + @test sizes.size[broadcast_size_off+2] == 3 + # And the scalar leaf among the children stays ndims=0. + @test 0 in sizes.ndims[3:4] + end + return +end + end # module TestJuMP.runtests() From 473f9835ab332f262173dd2407978185088ec462 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Beno=C3=AEt=20Legat?= Date: Wed, 13 May 2026 18:37:32 +0200 Subject: [PATCH 3/4] Fix format --- src/sizes.jl | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/sizes.jl b/src/sizes.jl index 0ca4cf7..2c315da 100644 --- a/src/sizes.jl +++ b/src/sizes.jl @@ -365,11 +365,7 @@ function _infer_sizes( return !iszero(sizes.ndims[children_arr[i]]) end if isnothing(first_matrix) - _copy_size!( - sizes, - k, - children_arr[first(children_indices)], - ) + _copy_size!(sizes, k, children_arr[first(children_indices)]) else _copy_size!( sizes, From 61364bdc150ecc122b7be99942f994cd1936c336 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Beno=C3=AEt=20Legat?= Date: Wed, 13 May 2026 18:50:47 +0200 Subject: [PATCH 4/4] Add assert --- src/sizes.jl | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/src/sizes.jl b/src/sizes.jl index 2c315da..3308df6 100644 --- a/src/sizes.jl +++ b/src/sizes.jl @@ -359,22 +359,28 @@ function _infer_sizes( op = DEFAULT_MULTIVARIATE_OPERATORS[node.index] if op == :+ || op == :- || op == :* # Broadcasted +/-/* takes the largest child's shape (for - # scalar+matrix that's the matrix; for matrix+matrix we - # currently assume they match and pick the first). + # scalar+matrix that's the matrix; for matrix+matrix the + # shapes must match). first_matrix = findfirst(children_indices) do i return !iszero(sizes.ndims[children_arr[i]]) end if isnothing(first_matrix) _copy_size!(sizes, k, children_arr[first(children_indices)]) else - _copy_size!( - sizes, - k, - children_arr[children_indices[first_matrix]], - ) + ref = children_arr[children_indices[first_matrix]] + for c_idx in children_indices + ix = children_arr[c_idx] + iszero(sizes.ndims[ix]) && continue + @assert _size(sizes, ix) == _size(sizes, ref) "Incompatible array shapes $(Tuple(_size(sizes, ix))) and $(Tuple(_size(sizes, ref))) for broadcasted operator `$op`" + end + _copy_size!(sizes, k, ref) end elseif op == :^ # Broadcasted ^ with scalar exponent preserves base shape + @assert length(children_indices) == 2 "Expected two arguments for broadcasted operator `$op`, got $(length(children_indices))" + @assert iszero( + sizes.ndims[children_arr[children_indices[2]]], + ) "Expected scalar exponent for broadcasted operator `$op`" _copy_size!(sizes, k, children_arr[first(children_indices)]) end elseif node.type == NODE_CALL_UNIVARIATE