Reputation: 2925
I have a hash H
(see bottom) and need to perform a deep invert operation on it, such that a new hash H2
is returned where each key K
is a value inside the original hash. The keys in H2
map to an array of arrays of all the sequences of keys that, when applied onto the original hash H
would give you the key K
which is a value in the original hash.
Perhaps I should use a different data structure for the output, such as a hash of hashes?
I'd like this to work with hashes of arbitrary nesting levels.
I don't know where to start designing an optimal algorithm
What the input might look like
{
u: {
u: { u: :phe, c: :phe, a: :leu, g: :leu },
c: { u: :ser, c: :ser, a: :ser, g: :ser },
a: { u: :tyr, c: :tyr, a: :STOP, g: :STOP },
g: { u: :cys, c: :cys, a: :STOP, g: :trp }
},
c: {
u: { u: :leu, c: :leu, a: :leu, g: :leu },
c: { u: :pro, c: :pro, a: :pro, g: :pro },
a: { u: :his, c: :his, a: :gln, g: :gln },
g: { u: :arg, c: :arg, a: :arg, g: :arg }
},
{...}
}
What output would look like
{
phe: [[:u,:u,:u],[:u,:u,:c]],
leu: [[:u,:u,:a],[:u,:u,:g]],
ser: [[:u,:c,:u],[:u,:c,:c],[:u,:u,:a],[:u,:u,:g]],
tyr: [[:u,:a,:u],[:u,:a,:c]],
"...": [[...]]
}
Why? I'm writing my own bioinformatics library and want to be able to return the possible nucleotide sequences for a given protein, denoted by the three character :symbols
Upvotes: 1
Views: 172
Reputation: 110685
Code
def recurse(h, arr=[])
h.each_with_object({}) { |(k,v),g| g.update((Hash===v) ?
recurse(v, arr + [k]) : { v=>[arr+[k]] }) { |_,o,n| o+n } }
end
The recursion uses the form of Hash#update (aka merge!
) that employs the block { |_,o,n| o+n } }
to determine the values of keys that are present in both hashes being merged.
Example 1
h =
{
u: {
u: { u: :phe, c: :phe, a: :leu, g: :leu },
c: { u: :ser, c: :ser, a: :ser, g: :ser },
a: { u: :tyr, c: :tyr, a: :STOP, g: :STOP },
g: { u: :cys, c: :cys, a: :STOP, g: :trp }
},
c: {
u: { u: :leu, c: :leu, a: :leu, g: :leu },
c: { u: :pro, c: :pro, a: :pro, g: :pro },
a: { u: :his, c: :his, a: :gln, g: :gln },
g: { u: :arg, c: :arg, a: :arg, g: :arg }
},
}
recurse h
#=> {:phe=>[[:u, :u, :u], [:u, :u, :c]],
# :leu=>[[:u, :u, :a], [:u, :u, :g], [:c, :u, :u],
# [:c, :u, :c], [:c, :u, :a], [:c, :u, :g]],
# :ser=>[[:u, :c, :u], [:u, :c, :c], [:u, :c, :a], [:u, :c, :g]],
# :tyr=>[[:u, :a, :u], [:u, :a, :c]],
# :STOP=>[[:u, :a, :a], [:u, :a, :g], [:u, :g, :a]],
# :cys=>[[:u, :g, :u], [:u, :g, :c]],
# :trp=>[[:u, :g, :g]],
# :pro=>[[:c, :c, :u], [:c, :c, :c], [:c, :c, :a], [:c, :c, :g]],
# :his=>[[:c, :a, :u], [:c, :a, :c]],
# :gln=>[[:c, :a, :a], [:c, :a, :g]],
# :arg=>[[:c, :g, :u], [:c, :g, :c], [:c, :g, :a], [:c, :g, :g]]}
Example 2
h =
{
u: {
u: { u: :phe, a: :leu },
c: { u: :ser, c: :phe },
a: { u: :tyr, c: { a: { u: :leu, c: :ser }, u: :tyr } }
},
c: {
u: { u: :leu, c: :pro },
a: { u: :arg }
},
}
recurse(h)
#=> {:phe=>[[:u, :u, :u], [:u, :c, :c]],
# :leu=>[[:u, :u, :a], [:u, :a, :c, :a, :u], [:c, :u, :u]],
# :ser=>[[:u, :c, :u], [:u, :a, :c, :a, :c]],
# :tyr=>[[:u, :a, :u], [:u, :a, :c, :u]],
# :pro=>[[:c, :u, :c]], :arg=>[[:c, :a, :u]]}
Explanation
Here is the code modified to display the calculations that are being performed:
def recurse(h, arr=[], level = 0)
indent = ' '*(2*level)
puts "#{indent}level = #{level}"
puts "#{indent}h= #{h}"
puts "#{indent}arr= #{arr}"
g = h.each_with_object({}) do |(k,v),g|
puts "#{indent} level = #{level}"
puts "#{indent} k=#{k}"
puts "#{indent} v=#{v}"
puts "#{indent} g=#{g}"
case v
when Hash
puts "#{indent} v is Hash"
g.update(recurse(v, arr + [k], level+1)) { |_,o,n| o+n }
else
puts "#{indent} v is not a Hash"
g.update({ v=>[arr+[k]] }) { |_,o,n| o+n }
end
end
puts "#{indent}return #{g}"
g
end
The output for recurse h
follows, for Example 2 (for diehards only).
level = 0
h= {:u=>{:u=>{:u=>:phe, :a=>:leu}, :c=>{:u=>:ser, :c=>:phe}, :a=>{:u=>:tyr, :c=>{:a=>{:u=>:leu, :c=>:ser}, :u=>:tyr}}}, :c=>{:u=>{:u=>:leu, :c=>:pro}, :a=>{:u=>:arg}}}
arr= []
level = 0
k=u
v={:u=>{:u=>:phe, :a=>:leu}, :c=>{:u=>:ser, :c=>:phe},
:a=>{:u=>:tyr, :c=>{:a=>{:u=>:leu, :c=>:ser}, :u=>:tyr}}}
g={}
v is Hash
level = 1
h= {:u=>{:u=>:phe, :a=>:leu}, :c=>{:u=>:ser, :c=>:phe},
:a=>{:u=>:tyr, :c=>{:a=>{:u=>:leu, :c=>:ser}, :u=>:tyr}}}
arr= [:u]
level = 1
k=u
v={:u=>:phe, :a=>:leu}
g={}
v is Hash
level = 2
h= {:u=>:phe, :a=>:leu}
arr= [:u, :u]
level = 2
k=u
v=phe
g={}
v is not a Hash
level = 2
k=a
v=leu
g={:phe=>[[:u, :u, :u]]}
v is not a Hash
return {:phe=>[[:u, :u, :u]], :leu=>[[:u, :u, :a]]}
level = 1
k=c
v={:u=>:ser, :c=>:phe}
g={:phe=>[[:u, :u, :u]], :leu=>[[:u, :u, :a]]}
v is Hash
level = 2
h= {:u=>:ser, :c=>:phe}
arr= [:u, :c]
level = 2
k=u
v=ser
g={}
v is not a Hash
level = 2
k=c
v=phe
g={:ser=>[[:u, :c, :u]]}
v is not a Hash
return {:ser=>[[:u, :c, :u]], :phe=>[[:u, :c, :c]]}
level = 1
k=a
v={:u=>:tyr, :c=>{:a=>{:u=>:leu, :c=>:ser}, :u=>:tyr}}
g={:phe=>[[:u, :u, :u], [:u, :c, :c]], :leu=>[[:u, :u, :a]], :ser=>[[:u, :c, :u]]}
v is Hash
level = 2
h= {:u=>:tyr, :c=>{:a=>{:u=>:leu, :c=>:ser}, :u=>:tyr}}
arr= [:u, :a]
level = 2
k=u
v=tyr
g={}
v is not a Hash
level = 2
k=c
v={:a=>{:u=>:leu, :c=>:ser}, :u=>:tyr}
g={:tyr=>[[:u, :a, :u]]}
v is Hash
level = 3
h= {:a=>{:u=>:leu, :c=>:ser}, :u=>:tyr}
arr= [:u, :a, :c]
level = 3
k=a
v={:u=>:leu, :c=>:ser}
g={}
v is Hash
level = 4
h= {:u=>:leu, :c=>:ser}
arr= [:u, :a, :c, :a]
level = 4
k=u
v=leu
g={}
v is not a Hash
level = 4
k=c
v=ser
g={:leu=>[[:u, :a, :c, :a, :u]]}
v is not a Hash
return {:leu=>[[:u, :a, :c, :a, :u]], :ser=>[[:u, :a, :c, :a, :c]]}
level = 3
k=u
v=tyr
g={:leu=>[[:u, :a, :c, :a, :u]], :ser=>[[:u, :a, :c, :a, :c]]}
v is not a Hash
return {:leu=>[[:u, :a, :c, :a, :u]], :ser=>[[:u, :a, :c, :a, :c]],
:tyr=>[[:u, :a, :c, :u]]}
return {:tyr=>[[:u, :a, :u], [:u, :a, :c, :u]], :leu=>[[:u, :a, :c, :a, :u]],
:ser=>[[:u, :a, :c, :a, :c]]}
return {:phe=>[[:u, :u, :u], [:u, :c, :c]], :leu=>[[:u, :u, :a], [:u, :a, :c, :a, :u]],
:ser=>[[:u, :c, :u], [:u, :a, :c, :a, :c]], :tyr=>[[:u, :a, :u], [:u, :a, :c, :u]]}
level = 0
k=c
v={:u=>{:u=>:leu, :c=>:pro}, :a=>{:u=>:arg}}
g={:phe=>[[:u, :u, :u], [:u, :c, :c]], :leu=>[[:u, :u, :a], [:u, :a, :c, :a, :u]],
:ser=>[[:u, :c, :u], [:u, :a, :c, :a, :c]], :tyr=>[[:u, :a, :u], [:u, :a, :c, :u]]}
v is Hash
level = 1
h= {:u=>{:u=>:leu, :c=>:pro}, :a=>{:u=>:arg}}
arr= [:c]
level = 1
k=u
v={:u=>:leu, :c=>:pro}
g={}
v is Hash
level = 2
h= {:u=>:leu, :c=>:pro}
arr= [:c, :u]
level = 2
k=u
v=leu
g={}
v is not a Hash
level = 2
k=c
v=pro
g={:leu=>[[:c, :u, :u]]}
v is not a Hash
return {:leu=>[[:c, :u, :u]], :pro=>[[:c, :u, :c]]}
level = 1
k=a
v={:u=>:arg}
g={:leu=>[[:c, :u, :u]], :pro=>[[:c, :u, :c]]}
v is Hash
level = 2
h= {:u=>:arg}
arr= [:c, :a]
level = 2
k=u
v=arg
g={}
v is not a Hash
return {:arg=>[[:c, :a, :u]]}
return {:leu=>[[:c, :u, :u]], :pro=>[[:c, :u, :c]], :arg=>[[:c, :a, :u]]}
return {:phe=>[[:u, :u, :u], [:u, :c, :c]],
:leu=>[[:u, :u, :a], [:u, :a, :c, :a, :u], [:c, :u, :u]],
:ser=>[[:u, :c, :u], [:u, :a, :c, :a, :c]],
:tyr=>[[:u, :a, :u], [:u, :a, :c, :u]],
:pro=>[[:c, :u, :c]],
:arg=>[[:c, :a, :u]]}
#=> <the last value returned above>
Upvotes: 3