mmvsbg
mmvsbg

Reputation: 3588

Merge Ruby nested hashes with same keys

I have several hashes in Ruby which have nested hashes inside of them an share very similar structure. They look something like this:

a = {
    "year_1": {
        "sub_type_a": {
            "label1": value1
        }
    },
    "year_2": {
        "sub_type_a": {
            "label2": value2
        }
    }
}

b = {
    "year_1": {
        "sub_type_a": {
            "label3": value3
        }
    },
    "year_2": {
        "sub_type_a": {
            "label4": value4
        }
    }
}

c = {
    "year_1": {
        "sub_type_a": {
            "label5": value5
        }
    },
    "year_2": {
        "sub_type_a": {
            "label6": value6
        }
    }
}

I want to combine them in one single hash which would have the nested data combined where possible without overwriting other values like this:

result = {
    "year_1": {
        "sub_type_a": {
            "label1": value1,
            "label3": value3,
            "label5": value5
        }
    },
    "year_2": {
        "sub_type_a": {
            "label2": value2,
            "label4": value4,
            "label6": value6
        }
    }
}

There could also be several sub types instead of just one but that's the general idea.

If I use the merge function it just overwrites the label-value data inside the sub_type hashes and I am left with only one record.

Is there a simple way to achieve this? I can write a function that iterates the hashes recursively and figure out inside what to add where but it feels like that there should be a simpler way.

Upvotes: 3

Views: 902

Answers (4)

Cary Swoveland
Cary Swoveland

Reputation: 110675

We are given the following.

a = {:year_1=>{:sub_type_a=>{:label1=>"value1"}},
     :year_2=>{:sub_type_a=>{:label2=>"value2"}}} 

b = {:year_1=>{:sub_type_a=>{:label3=>"value3"}},
     :year_2=>{:sub_type_a=>{:label4=>"value4"}}} 

c = {:year_1=>{:sub_type_a=>{:label5=>"value5"}},
     :year_2=>{:sub_type_a=>{:label6=>"value6"}}}

arr = [a, b, c]

We may construct the desired hash as follows.

arr.each_with_object({}) do |g,h|
  g.each do |yr,v|
    k,f = v.first
    h.update(yr=>{ k=>f }) { |_,o,n| { k=>o[k].merge(n[k]) } }
  end
end
  #=> {:year_1=>{:sub_type_a=>{:label1=>"value1", :label3=>"value3",
  #                            :label5=>"value5"}},
  #    :year_2=>{:sub_type_a=>{:label2=>"value2", :label4=>"value4",
  #                            :label6=>"value6"}}}  

This uses the form of Hash#update (a.k.a merge!) that employs a block to determine the values of keys that are present in both hashes being merged. See the link for an explanation of that block's three block variables. I've used an underscore (a valid local variable) for the first block variable, the common key, to signal to the reader that it is not used in the block calculation. That is a common convention.

For anyone interested in the gory detail of the calculations (the one sure-fire way to understand what's going on) I will execute the code with some puts statements added.

arr.each_with_object({}) do |g,h|
  puts "g=#{g}"
  puts "h=#{h}"
  g.each do |yr,v|
    puts "  yr=#{yr}"
    puts "  v=#{v}"
    k,f = v.first
    puts "  k=#{k}"
    puts "  f=#{f}"
    puts "  yr=>{ k=>f } = #{yr}=>#{v} = #{{ yr=>v }}"
    h.update(yr=>{ k=>f }) do |_,o,n|
      puts "    _=#{_}"
      puts "    o=#{o}"
      puts "    n=#{n}"
      puts "    { k=>o[k].merge(n[k]) }"
      puts "      => { #{k}=>#{o[k]}.merge(#{n[k]}) }"           
      { k=>o[k].merge(n[k]) }.tap { |e| puts "      => #{e}" }
    end
  end
end

The following is displayed.

g={:year_1=>{:sub_type_a=>{:label1=>"value1"}},
   :year_2=>{:sub_type_a=>{:label2=>"value2"}}}
h={}
  yr=year_1
  v={:sub_type_a=>{:label1=>"value1"}}
  k=sub_type_a
  f={:label1=>"value1"}
  yr=>{ k=>f } = year_1=>{:sub_type_a=>{:label1=>"value1"}} = 
    {:year_1=>{:sub_type_a=>{:label1=>"value1"}}}
  yr=year_2
  v={:sub_type_a=>{:label2=>"value2"}}
  k=sub_type_a
  f={:label2=>"value2"}
  yr=>{ k=>f } = year_2=>{:sub_type_a=>{:label2=>"value2"}} =
    {:year_2=>{:sub_type_a=>{:label2=>"value2"}}}

g={:year_1=>{:sub_type_a=>{:label3=>"value3"}},
   :year_2=>{:sub_type_a=>{:label4=>"value4"}}}
h={:year_1=>{:sub_type_a=>{:label1=>"value1"}},
   :year_2=>{:sub_type_a=>{:label2=>"value2"}}}
  yr=year_1
  v={:sub_type_a=>{:label3=>"value3"}}
  k=sub_type_a
  f={:label3=>"value3"}
  yr=>{ k=>f } = year_1=>{:sub_type_a=>{:label3=>"value3"}} =
    {:year_1=>{:sub_type_a=>{:label3=>"value3"}}}
    _=year_1
    o={:sub_type_a=>{:label1=>"value1"}}
    n={:sub_type_a=>{:label3=>"value3"}}
    { k=>o[k].merge(n[k]) }
      => { sub_type_a=>{:label1=>"value1"}.
           merge({:label3=>"value3"}) }
      => {:sub_type_a=>{:label1=>"value1", :label3=>"value3"}}
yr=year_2
  v={:sub_type_a=>{:label4=>"value4"}}
  k=sub_type_a
  f={:label4=>"value4"}
  yr=>{ k=>f } = year_2=>{:sub_type_a=>{:label4=>"value4"}} =
    {:year_2=>{:sub_type_a=>{:label4=>"value4"}}}
    _=year_2
    o={:sub_type_a=>{:label2=>"value2"}}
    n={:sub_type_a=>{:label4=>"value4"}}
    { k=>o[k].merge(n[k]) }
      => { sub_type_a=>{:label2=>"value2"}.
           merge({:label4=>"value4"}) }
      => {:sub_type_a=>{:label2=>"value2", :label4=>"value4"}}

g={:year_1=>{:sub_type_a=>{:label5=>"value5"}},
   :year_2=>{:sub_type_a=>{:label6=>"value6"}}}
h={:year_1=>{:sub_type_a=>{:label1=>"value1", :label3=>"value3"}},
   :year_2=>{:sub_type_a=>{:label2=>"value2", :label4=>"value4"}}}
  yr=year_1
  v={:sub_type_a=>{:label5=>"value5"}}
  k=sub_type_a
  f={:label5=>"value5"}
  yr=>{ k=>f } = year_1=>{:sub_type_a=>{:label5=>"value5"}} =
    {:year_1=>{:sub_type_a=>{:label5=>"value5"}}}
    _=year_1
    o={:sub_type_a=>{:label1=>"value1", :label3=>"value3"}}
    n={:sub_type_a=>{:label5=>"value5"}}
    { k=>o[k].merge(n[k]) }
      => { sub_type_a=>{:label1=>"value1", :label3=>"value3"}.
           merge({:label5=>"value5"}) }
      => {:sub_type_a=>{:label1=>"value1", :label3=>"value3",
          :label5=>"value5"}}
  yr=year_2
  v={:sub_type_a=>{:label6=>"value6"}}
  k=sub_type_a
  f={:label6=>"value6"}
  yr=>{ k=>f } = year_2=>{:sub_type_a=>{:label6=>"value6"}} =
    {:year_2=>{:sub_type_a=>{:label6=>"value6"}}}
    _=year_2
    o={:sub_type_a=>{:label2=>"value2", :label4=>"value4"}}
    n={:sub_type_a=>{:label6=>"value6"}}
    { k=>o[k].merge(n[k]) }
      => { sub_type_a=>{:label2=>"value2", :label4=>"value4"}.
           merge({:label6=>"value6"}) }
      => {:sub_type_a=>{:label2=>"value2", :label4=>"value4",
                        :label6=>"value6"}}
 => {:year_1=>{:sub_type_a=>{:label1=>"value1", :label3=>"value3",
       :label5=>"value5"}},
     :year_2=>{:sub_type_a=>{:label2=>"value2", :label4=>"value4",
       :label6=>"value6"}}} 

Upvotes: 1

johansenja
johansenja

Reputation: 678

If you are using Rails (or ActiveSupport) you might want to look at deep_merge, which handles merging of nested hashes for you

Upvotes: 1

Sebastián Palma
Sebastián Palma

Reputation: 33420

Something similar.

Combine each_with_object, each and merge so you can iterate trough each hash and assign the merged values when they exist to a temporal new one:

[a, b, c].each_with_object({}) do |years_data, hash|
  years_data.each do |year, data|
    hash[year] = (hash[year] || {}).merge(data) { |_, oldval, newval| oldval.merge(newval) }
  end
end
# {
#     :year_1 => {
#         :sub_type_a => {
#             :label1 => :value1,
#             :label3 => :value3,
#             :label5 => :value5
#         }
#     },
#     :year_2 => {
#         :sub_type_a => {
#             :label2 => :value2,
#             :label4 => :value4,
#             :label6 => :value6
#         }
#     }
# }

Upvotes: 4

Masklinn
Masklinn

Reputation: 42272

Hash#merge takes an optional conflict resolution block, which will be called any time a key is present in both the subject and the parameter.

You can use this to e.g. recursively merge your hashes.

Upvotes: -1

Related Questions