theideasmith
theideasmith

Reputation: 2925

Algorithm for Deep Hash Invert (should be in ruby)

I have a hash H (see bottom) and need to perform a deep invert operation on it, such that a new hash H2 is returned where each key K is a value inside the original hash. The keys in H2 map to an array of arrays of all the sequences of keys that, when applied onto the original hash H would give you the key K which is a value in the original hash.

Perhaps I should use a different data structure for the output, such as a hash of hashes?

I'd like this to work with hashes of arbitrary nesting levels.

I don't know where to start designing an optimal algorithm

Original Hash

What the input might look like

{
  u: {
    u: { u: :phe, c: :phe, a: :leu, g: :leu },
    c: { u: :ser, c: :ser, a: :ser, g: :ser },
    a: { u: :tyr, c: :tyr, a: :STOP, g: :STOP },
    g: { u: :cys, c: :cys, a: :STOP, g: :trp }
  },
  c: {
    u: { u: :leu, c: :leu, a: :leu, g: :leu },
    c: { u: :pro, c: :pro, a: :pro, g: :pro },
    a: { u: :his, c: :his, a: :gln, g: :gln },
    g: { u: :arg, c: :arg, a: :arg, g: :arg }
  },
  {...}
}

Simplified Output

What output would look like

{
  phe: [[:u,:u,:u],[:u,:u,:c]],
  leu: [[:u,:u,:a],[:u,:u,:g]],
  ser: [[:u,:c,:u],[:u,:c,:c],[:u,:u,:a],[:u,:u,:g]],
  tyr: [[:u,:a,:u],[:u,:a,:c]],
  "...": [[...]]
}

Why? I'm writing my own bioinformatics library and want to be able to return the possible nucleotide sequences for a given protein, denoted by the three character :symbols

Upvotes: 1

Views: 172

Answers (1)

Cary Swoveland
Cary Swoveland

Reputation: 110685

Code

def recurse(h, arr=[])
  h.each_with_object({}) { |(k,v),g| g.update((Hash===v) ?
    recurse(v, arr + [k]) : { v=>[arr+[k]] }) { |_,o,n| o+n } }
end

The recursion uses the form of Hash#update (aka merge!) that employs the block { |_,o,n| o+n } } to determine the values of keys that are present in both hashes being merged.

Example 1

h =
{
  u: {
    u: { u: :phe, c: :phe, a: :leu, g: :leu },
    c: { u: :ser, c: :ser, a: :ser, g: :ser },
    a: { u: :tyr, c: :tyr, a: :STOP, g: :STOP },
    g: { u: :cys, c: :cys, a: :STOP, g: :trp }
  },
  c: {
    u: { u: :leu, c: :leu, a: :leu, g: :leu },
    c: { u: :pro, c: :pro, a: :pro, g: :pro },
    a: { u: :his, c: :his, a: :gln, g: :gln },
    g: { u: :arg, c: :arg, a: :arg, g: :arg }
  },
}

recurse h
  #=> {:phe=>[[:u, :u, :u], [:u, :u, :c]],
  #    :leu=>[[:u, :u, :a], [:u, :u, :g], [:c, :u, :u],
  #      [:c, :u, :c], [:c, :u, :a], [:c, :u, :g]],
  #    :ser=>[[:u, :c, :u], [:u, :c, :c], [:u, :c, :a], [:u, :c, :g]], 
  #    :tyr=>[[:u, :a, :u], [:u, :a, :c]],
  #    :STOP=>[[:u, :a, :a], [:u, :a, :g], [:u, :g, :a]],
  #    :cys=>[[:u, :g, :u], [:u, :g, :c]],
  #    :trp=>[[:u, :g, :g]],
  #    :pro=>[[:c, :c, :u], [:c, :c, :c], [:c, :c, :a], [:c, :c, :g]], 
  #    :his=>[[:c, :a, :u], [:c, :a, :c]],
  #    :gln=>[[:c, :a, :a], [:c, :a, :g]],
  #    :arg=>[[:c, :g, :u], [:c, :g, :c], [:c, :g, :a], [:c, :g, :g]]}

Example 2

h =
{
  u: {
    u: { u: :phe, a: :leu },
    c: { u: :ser, c: :phe },
    a: { u: :tyr, c: { a: { u: :leu, c: :ser }, u: :tyr } }
  },
  c: {
    u: { u: :leu, c: :pro },
    a: { u: :arg }
  },
}

recurse(h)
  #=> {:phe=>[[:u, :u, :u], [:u, :c, :c]],
  #    :leu=>[[:u, :u, :a], [:u, :a, :c, :a, :u], [:c, :u, :u]],
  #    :ser=>[[:u, :c, :u], [:u, :a, :c, :a, :c]],
  #    :tyr=>[[:u, :a, :u], [:u, :a, :c, :u]],
  #    :pro=>[[:c, :u, :c]], :arg=>[[:c, :a, :u]]}

Explanation

Here is the code modified to display the calculations that are being performed:

def recurse(h, arr=[], level = 0)
  indent = ' '*(2*level)
  puts "#{indent}level = #{level}"
  puts "#{indent}h= #{h}"
  puts "#{indent}arr= #{arr}"
  g = h.each_with_object({}) do |(k,v),g|
    puts "#{indent}  level = #{level}"
    puts "#{indent}  k=#{k}"
    puts "#{indent}  v=#{v}"
    puts "#{indent}  g=#{g}"
    case v
    when Hash
      puts "#{indent}  v is Hash"
      g.update(recurse(v, arr + [k], level+1)) { |_,o,n| o+n }
    else
      puts "#{indent}  v is not a Hash"
      g.update({ v=>[arr+[k]] }) { |_,o,n| o+n }
    end
  end
  puts "#{indent}return #{g}"
  g
end

The output for recurse h follows, for Example 2 (for diehards only).

level = 0
h= {:u=>{:u=>{:u=>:phe, :a=>:leu}, :c=>{:u=>:ser, :c=>:phe}, :a=>{:u=>:tyr, :c=>{:a=>{:u=>:leu, :c=>:ser}, :u=>:tyr}}}, :c=>{:u=>{:u=>:leu, :c=>:pro}, :a=>{:u=>:arg}}}
arr= []
  level = 0
  k=u
  v={:u=>{:u=>:phe, :a=>:leu}, :c=>{:u=>:ser, :c=>:phe},
     :a=>{:u=>:tyr, :c=>{:a=>{:u=>:leu, :c=>:ser}, :u=>:tyr}}}
  g={}
  v is Hash
  level = 1
  h= {:u=>{:u=>:phe, :a=>:leu}, :c=>{:u=>:ser, :c=>:phe},
      :a=>{:u=>:tyr, :c=>{:a=>{:u=>:leu, :c=>:ser}, :u=>:tyr}}}
  arr= [:u]
    level = 1
    k=u
    v={:u=>:phe, :a=>:leu}
    g={}
    v is Hash
    level = 2
    h= {:u=>:phe, :a=>:leu}
    arr= [:u, :u]
      level = 2
      k=u
      v=phe
      g={}
      v is not a Hash
      level = 2
      k=a
      v=leu
      g={:phe=>[[:u, :u, :u]]}
      v is not a Hash
    return {:phe=>[[:u, :u, :u]], :leu=>[[:u, :u, :a]]}
    level = 1
    k=c
    v={:u=>:ser, :c=>:phe}
    g={:phe=>[[:u, :u, :u]], :leu=>[[:u, :u, :a]]}
    v is Hash
    level = 2
    h= {:u=>:ser, :c=>:phe}
    arr= [:u, :c]
      level = 2
      k=u
      v=ser
      g={}
      v is not a Hash
      level = 2
      k=c
      v=phe
      g={:ser=>[[:u, :c, :u]]}
      v is not a Hash
    return {:ser=>[[:u, :c, :u]], :phe=>[[:u, :c, :c]]}
    level = 1
    k=a
    v={:u=>:tyr, :c=>{:a=>{:u=>:leu, :c=>:ser}, :u=>:tyr}}
    g={:phe=>[[:u, :u, :u], [:u, :c, :c]], :leu=>[[:u, :u, :a]], :ser=>[[:u, :c, :u]]}
    v is Hash
    level = 2
    h= {:u=>:tyr, :c=>{:a=>{:u=>:leu, :c=>:ser}, :u=>:tyr}}
    arr= [:u, :a]
      level = 2
      k=u
      v=tyr
      g={}
      v is not a Hash
      level = 2
      k=c
      v={:a=>{:u=>:leu, :c=>:ser}, :u=>:tyr}
      g={:tyr=>[[:u, :a, :u]]}
      v is Hash
      level = 3
      h= {:a=>{:u=>:leu, :c=>:ser}, :u=>:tyr}
      arr= [:u, :a, :c]
        level = 3
        k=a
        v={:u=>:leu, :c=>:ser}
        g={}
        v is Hash
        level = 4
        h= {:u=>:leu, :c=>:ser}
        arr= [:u, :a, :c, :a]
          level = 4
          k=u
          v=leu
          g={}
          v is not a Hash
          level = 4
          k=c
          v=ser
          g={:leu=>[[:u, :a, :c, :a, :u]]}
          v is not a Hash
        return {:leu=>[[:u, :a, :c, :a, :u]], :ser=>[[:u, :a, :c, :a, :c]]}
        level = 3
        k=u
        v=tyr
        g={:leu=>[[:u, :a, :c, :a, :u]], :ser=>[[:u, :a, :c, :a, :c]]}
        v is not a Hash
      return {:leu=>[[:u, :a, :c, :a, :u]], :ser=>[[:u, :a, :c, :a, :c]],
              :tyr=>[[:u, :a, :c, :u]]}
    return {:tyr=>[[:u, :a, :u], [:u, :a, :c, :u]], :leu=>[[:u, :a, :c, :a, :u]],
            :ser=>[[:u, :a, :c, :a, :c]]}
  return {:phe=>[[:u, :u, :u], [:u, :c, :c]], :leu=>[[:u, :u, :a], [:u, :a, :c, :a, :u]],
          :ser=>[[:u, :c, :u], [:u, :a, :c, :a, :c]], :tyr=>[[:u, :a, :u], [:u, :a, :c, :u]]}
  level = 0
  k=c
  v={:u=>{:u=>:leu, :c=>:pro}, :a=>{:u=>:arg}}
  g={:phe=>[[:u, :u, :u], [:u, :c, :c]], :leu=>[[:u, :u, :a], [:u, :a, :c, :a, :u]],
     :ser=>[[:u, :c, :u], [:u, :a, :c, :a, :c]], :tyr=>[[:u, :a, :u], [:u, :a, :c, :u]]}
  v is Hash
  level = 1
  h= {:u=>{:u=>:leu, :c=>:pro}, :a=>{:u=>:arg}}
  arr= [:c]
    level = 1
    k=u
    v={:u=>:leu, :c=>:pro}
    g={}
    v is Hash
    level = 2
    h= {:u=>:leu, :c=>:pro}
    arr= [:c, :u]
      level = 2
      k=u
      v=leu
      g={}
      v is not a Hash
      level = 2
      k=c
      v=pro
      g={:leu=>[[:c, :u, :u]]}
      v is not a Hash
    return {:leu=>[[:c, :u, :u]], :pro=>[[:c, :u, :c]]}
    level = 1
    k=a
    v={:u=>:arg}
    g={:leu=>[[:c, :u, :u]], :pro=>[[:c, :u, :c]]}
    v is Hash
    level = 2
    h= {:u=>:arg}
    arr= [:c, :a]
      level = 2
      k=u
      v=arg
      g={}
      v is not a Hash
    return {:arg=>[[:c, :a, :u]]}
  return {:leu=>[[:c, :u, :u]], :pro=>[[:c, :u, :c]], :arg=>[[:c, :a, :u]]}
return {:phe=>[[:u, :u, :u], [:u, :c, :c]],
        :leu=>[[:u, :u, :a], [:u, :a, :c, :a, :u], [:c, :u, :u]],
        :ser=>[[:u, :c, :u], [:u, :a, :c, :a, :c]],
        :tyr=>[[:u, :a, :u], [:u, :a, :c, :u]],
        :pro=>[[:c, :u, :c]],
        :arg=>[[:c, :a, :u]]}
  #=> <the last value returned above> 

Upvotes: 3

Related Questions