Reputation: 2188
I'm looking through output on a Cisco ASA of show conn protocol tcp
.
Every line has this format:
TCP OUTSIDE 4.2.2.2:443 INSIDE 10.17.21.44:63314, idle 0:00:44, bytes 11365, flags UIO
I would like for my hashes to look like this:
h = {:dst => 4.2.2.2:443,
:src => 10.17.21.44:63314,
:bytes => 11356,
:flags => UIO,
}
Here is my attempt to do this, but I'm getting "undefined method 'captures' for nil:NilClass".
I think this is saying the line is not matching, but I'm pretty sure my regex is correct. One issue I know I will have is that the src
and dst
in this setup will match both addresses. I'm not sure how I can make dst
match the first address, and src
match the first address.
h = {}
fp = File.open('conns.txt','r+')
fp.each_with_index do |line, i|
dst = line.match(/(?:[0-9]{1,3}\.){3}[0-9]{1,3}:\d+/).captures
src = line.match(/(?:[0-9]{1,3}\.){3}[0-9]{1,3}:\d+/).captures
bytes = line.match(/(?<=bytes\s)(\d+)/).captures
flags = line.match(/(?<=flags\s)(\w+)/).captures
h[i+1] = {dst: dst, src: src, bytes: bytes, flags: flags}
end
Here is the irb session:
irb(main):001:0> h = {}
=> {}
irb(main):002:0> fp = File.open('conns.txt','r+')
=> #<File:conns.txt>
irb(main):003:0> #TCP OUTSIDE 4.2.2.2:443 INSIDE 10.17.21.44:63314, idle 0:00:44, bytes 11365, flags UIO
irb(main):004:0* fp.each_with_index do |line, i|
irb(main):005:1* dst = line.match(/(?:[0-9]{1,3}\.){3}[0-9]{1,3}:\d+/).captures
irb(main):006:1> src = line.match(/(?:[0-9]{1,3}\.){3}[0-9]{1,3}:\d+/).captures
irb(main):007:1> bytes = line.match(/(?<=bytes\s)(\d+)/).captures
irb(main):008:1> flags = line.match(/(?<=flags\s)(\w+)/).captures
irb(main):009:1> h[i+1] = {dst: dst, src: src, bytes: bytes, flags: flags}
irb(main):010:1> end
NoMethodError: undefined method `captures' for nil:NilClass
from (irb):5:in `block in irb_binding'
from (irb):4:in `each'
from (irb):4:in `each_with_index'
from (irb):4
from /usr/bin/irb:12:in `<main>'
irb(main):011:0>
If I remove captures
it no longer fails but I get #<MatchData
prepended to each field. The hash looks like this:
{1=>{:dst=>#<MatchData "4.2.2.2:443">, :src=>#<MatchData "4.2.2.2:443">, :bytes=>#<MatchData "11365" 1:"11365">, :flags=>#<MatchData "UIO" 1:"UIO">}}
Using scan
works and I figured out the dst
vs src
issue too:
h = {}
fp = File.open('conns.txt','r+')
#TCP OUTSIDE 4.2.2.2:443 INSIDE 10.17.21.44:63314, idle 0:00:44, bytes 11365, flags UIO
fp.each_with_index do |line, i|
ip = line.scan(/(?:[0-9]{1,3}\.){3}[0-9]{1,3}:\d+/)
dst = ip[0]
src = ip[1]
bytes = line.scan(/(?<=bytes\s)(\d+)/)
flags = line.scan(/(?<=flags\s)(\w+)/)
h[i+1] = {dst: dst, src: src, bytes: bytes, flags: flags}
end
Upvotes: 1
Views: 124
Reputation: 110685
Here's another way that may be easier to understand and maintain than solutions involving more complex regular expressions.
Code
def hashify_file(fname)
IO.foreach(fname).with_object([]) { |str,arr| arr << hashify_str(str) }
end
def hashify_str(str)
h = strip
.sub(/\s*TCP OUTSIDE/, 'dst') # convert first key
.sub(/\s+INSIDE/,', src') # convert second key and insert comma
.sub(/\s+idle[^,]+,/,'') # remove 'idle' field
.split(/\s*,\s+/)
.map {|s| s.split(/\s/)
.map { |ss|(ss =~ /[a-z]+/) ? ss.to_sym : ss } }
.to_h
h[:bytes] = h[:bytes].to_i
h
end
Hash#to_h became available in v. 2.1. With earlier versions use Hash[arr]
rather than arr.to_h
.
(Note IRB and PRY may have a problem executing this format, but it runs fine from the command line.)
Example
str = "TCP OUTSIDE 4.2.2.2:443 INSIDE 10.17.21.44:63314, " +
"idle 0:00:44, bytes 11365, flags UIO"
hashify_str(str)
#=> {:dst=>"4.2.2.2:443", :src=>"10.17.21.44:63314",
# :bytes=>"11365", :flags=>"UIO"}
Explanation
s0 = str.strip
#=> "TCP OUTSIDE 4.2.2.2:443 INSIDE 10.17.21.44:63314, idle 0:00:44, bytes 11365, flags UIO"
s1 = s0.sub(/\s*TCP OUTSIDE/, 'dst')
#=> "dst 4.2.2.2:443 INSIDE 10.17.21.44:63314, idle 0:00:44, bytes 11365, flags UIO"
s2 = s1.sub(/\s+INSIDE/,', src')
#=> "dst 4.2.2.2:443, src 10.17.21.44:63314, idle 0:00:44, bytes 11365, flags UIO"
s3 = s2.sub(/\s+idle[^,]+,/,'')
#=> "dst 4.2.2.2:443, src 10.17.21.44:63314, bytes 11365, flags UIO"
arr0 = s3.split(/\s*,\s+/)
#=> ["dst 4.2.2.2:443", "src 10.17.21.44:63314", "bytes 11365", "flags UIO"]
arr1 = arr0.map {|s| s.split(/\s/).map {|ss|(ss =~ /[a-z]+/) ? ss.to_sym : ss}}
#=> [[:dst, "4.2.2.2:443"], [:src, "10.17.21.44:63314"],
# [:bytes, "11365"], [:flags, "UIO"]]
h = arr1.to_h
#=> {:dst=>"4.2.2.2:443", :src=>"10.17.21.44:63314",
# :bytes=>"11365", :flags=>"UIO"}
h[:bytes] = h[:bytes].to_i
#=> 11365
h
#=> {:dst=>"4.2.2.2:443", :src=>"10.17.21.44:63314",
:bytes=>11365, :flags=>"UIO"}
Upvotes: 0
Reputation: 160551
Things to consider:
IP_REGEX = '(?:\d{1,3}\.){3}\d{1,3}(?::\d+)?'
input = 'TCP OUTSIDE 4.2.2.2:443 INSIDE 10.17.21.44:63314, idle 0:00:44, bytes 11365, flags UIO'
input.scan(/(\w+)\s(#{ IP_REGEX })/)
# => [["OUTSIDE", "4.2.2.2:443"], ["INSIDE", "10.17.21.44:63314"]]
scan
looks for the pattern given and returns an array of all matching hits. Because I'm using captures, they're returned as sub-arrays.
If you want the result to be a hash you can do:
input.scan(/(\w+)\s(#{ IP_REGEX })/).to_h # => {"OUTSIDE"=>"4.2.2.2:443", "INSIDE"=>"10.17.21.44:63314"}
or, if you're on an older Ruby that doesn't support to_h
:
Hash[input.scan(/(\w+)\s(#{ IP_REGEX })/)] # => {"OUTSIDE"=>"4.2.2.2:443", "INSIDE"=>"10.17.21.44:63314"}
You could use a simpler scan
pattern and allow parallel assignment help you grab the IPs in order:
src, dst = input.scan(/#{ IP_REGEX }/)
Then grab the other two fields however you want and assign them all to your hash:
foo = {
src: src,
dst: dst,
...
}
But, really, I'd take advantage of named captures:
matches = input.match(/(?<src>#{ IP_REGEX }) \w+ (?<dst>#{ IP_REGEX }), idle (?<idle>\S+), bytes (?<bytes>\d+), flags (?<flags>\S+)/)
# => #<MatchData
# "4.2.2.2:443 INSIDE 10.17.21.44:63314, idle 0:00:44, bytes 11365, flags UIO"
# src:"4.2.2.2:443"
# dst:"10.17.21.44:63314"
# idle:"0:00:44"
# bytes:"11365"
# flags:"UIO">
matches['src'] # => "4.2.2.2:443"
matches['dst'] # => "10.17.21.44:63314"
matches['idle'] # => "0:00:44"
matches['bytes'] # => "11365"
matches['flags'] # => "UIO"
At this point matches
acts like a hash as far as allowing access to the individual elements.
If you don't like that it's a simple step to getting a real hash:
matches.names.zip(matches.captures).to_h
# => {"src"=>"4.2.2.2:443",
# "dst"=>"10.17.21.44:63314",
# "idle"=>"0:00:44",
# "bytes"=>"11365",
# "flags"=>"UIO"}
Upvotes: 2
Reputation: 121000
The goal may be achieved with one regexp:
▶ s='TCP OUTSIDE 4.2.2.2:443 INSIDE 10.17.21.44:63314, idle 0:00:44, bytes 11365, flags UIO'
# => "TCP OUTSIDE 4.2.2.2:443 INSIDE 10.17.21.44:63314, idle 0:00:44, bytes 11365, flags UIO"
▶ s.match('(?<proto>.*?)\s+OUTSIDE\s+(?<dst>\S+)\s+INSIDE\s+(?<src>[\d\.\:]+).*?idle\s+(?<idle>[\d\.\:]+).*?bytes\s+(?<bytes>\d+).*?flags\s+(?<flags>\S+)')
# => #<MatchData "TCP OUTSIDE 4.2.2.2:443 INSIDE 10.17.21.44:63314, idle 0:00:44, bytes 11365, flags UIO" proto:"TCP" dst:"4.2.2.2:443" src:"10.17.21.44:63314" idle:"0:00:44" bytes:"11365" flags:"UIO">
▶ h = {:dst => $~['dst'], :src => $~['src'], :bytes => $~['bytes'], :flags => $~['flags'] }
# => {
# :bytes => "11365",
# :dst => "4.2.2.2:443",
# :flags => "UIO",
# :src => "10.17.21.44:63314"
# }
Here magic $~
variable has a direct access to captures array.
UPD: You might want to go further and automate hash assignment:
▶ $~.names.reduce({}) { |memo, k| memo[k.to_sym] = $~[k]; memo }
# => {
# :bytes => "11365",
# :dst => "4.2.2.2:443",
# :flags => "UIO",
# :idle => "0:00:44",
# :proto => "TCP",
# :src => "10.17.21.44:63314"
# }
Hope it helps.
Upvotes: 2