Using Flux with structs: unexpected channel dimension

Question

I seem to be getting different behaviour (different output dimensions) when calling Flux functions in a struct, versus directly applying the functions to a tensor:

Direct application:

m = Chain(MaxPool((2,2), stride=2),Conv((3,3), 32*8=>32*16, pad=1), BatchNorm(32*16, relu),Conv((3,3), 32*16=>32*16, pad=1), BatchNorm(32*16, relu))
println(size(m(ones((32, 32, 256, 1))))) #gives the expected (16, 16, 512, 1)

Via a struct:

block(in_channels, features) = Chain(MaxPool((2,2), stride=2), Conv((3,3), in_channels=>features, pad=1), BatchNorm(features, relu), Conv((3,3), features=>features, pad=1), BatchNorm(features, relu))

struct test
    b
end

function test()
    b = (block(32*8, 32*16))
    test(b)
end

function (t::test)(x)
    x1 = t.b[1](x)
    println(size(x1)) 
end

test1 = test()
test1(ones((32, 32, 256, 1))) #gives (16, 16, 256, 1)

Why is the output channel dimensions different for the 2 snippets? What am I missing about structs in Julia? Thanks!

Avik Pal · Accepted Answer

The correct way to define the (t::test)(x) function would be

function (t::test)(x)
    x1 = t.b(x)  # Note the absence of [1]
    println(size(x1)) 
end

t.b[1] would give the first layer in Chain, i.e., MaxPool((2, 2), pad = (0, 0, 0, 0), stride = (2, 2)) and as such your input is never passed through the Conv Layers.

Using Flux with structs: unexpected channel dimension

Answers (2)

Related Questions