Reputation: 6423
Let's say I have a df looking like this one, where I have multiple categorical values and several variables:
df = wsv"""
region product year prod cons
US apples 2010 1 2
US appels 2011 3 4
US banana 2010 5 6
US banana 2011 7 8
EU apples 2010 9 10
EU appels 2011 11 12
EU banana 2010 13 14
EU banana 2011 15 16
"""
How can I transform it to have category totals/subtotals as new rows, i.e.
df2 = wsv"""
index prod cons
US 16 20
apples 4 6
2010 1 2
2011 3 4
banana 12 14
2010 5 6
2011 7 8
EU 48 52
apples 20 22
2010 9 10
2011 11 12
banana 28 30
2010 13 14
2011 15 16
"""
This is often useful, after proper formatting (e.g. totals in bold..), to report data, as many reports use actually this kind of structure..
Upvotes: 1
Views: 240
Reputation: 6423
You can use nested by
to achieve something similar:
df2 = by(df, :region) do sub1
t = DataFrame(product=NA, year=NA, prod=sum(sub1[:prod]), cons=sum(sub1[:cons]))
sub1mod = by(sub1, [:region,:product]) do sub2
t2 = DataFrame(year=NA, prod=sum(sub2[:prod]), cons=sum(sub2[:cons]))
t3 = vcat(t2,sub2)
end
t2 = vcat(t,sub1mod)
end
delete!(df2,[:region_1,:region_2,:product_1])
Out:
14×5 DataFrames.DataFrame
│ Row │ region │ product │ year │ prod │ cons │
├─────┼────────┼──────────┼──────┼──────┼──────┤
│ 1 │ "EU" │ NA │ NA │ 48 │ 52 │
│ 2 │ "EU" │ "apples" │ NA │ 20 │ 22 │
│ 3 │ "EU" │ "apples" │ 2010 │ 9 │ 10 │
│ 4 │ "EU" │ "apples" │ 2011 │ 11 │ 12 │
│ 5 │ "EU" │ "banana" │ NA │ 28 │ 30 │
│ 6 │ "EU" │ "banana" │ 2010 │ 13 │ 14 │
│ 7 │ "EU" │ "banana" │ 2011 │ 15 │ 16 │
│ 8 │ "US" │ NA │ NA │ 16 │ 20 │
│ 9 │ "US" │ "apples" │ NA │ 4 │ 6 │
│ 10 │ "US" │ "apples" │ 2010 │ 1 │ 2 │
│ 11 │ "US" │ "apples" │ 2011 │ 3 │ 4 │
│ 12 │ "US" │ "banana" │ NA │ 12 │ 14 │
│ 13 │ "US" │ "banana" │ 2010 │ 5 │ 6 │
│ 14 │ "US" │ "banana" │ 2011 │ 7 │ 8 │
Upvotes: 2