Reputation: 285
I'm trying to extract IP addresses from an asp
file in Python, the file looks something like this:
onInternalNet = (
isInNet(hostDNS, "147.163.1.0", "255.255.0.0") ||
isInNet(hostDNS, "123.264.0.0", "255.255.0.0") ||
isInNet(hostDNS, "137.5.0.0", "255.0.0.0") ||
isInNet(hostDNS, "100.01.02.0", "255.0.0.0") ||
isInNet(hostDNS, "172.146.30.0", "255.240.0.0") ||
isInNet(hostDNS, "112.268.0.0", "255.255.0.0") ||
How I'm attempting to extract them is with a regex:
if re.compile(r"^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$", line):
However I'm getting an error:
Traceback (most recent call last):
File "pull_proxy.py", line 27, in <module>
write_to_file(extract_proxies(in_file), out_file)
File "pull_proxy.py", line 8, in extract_proxies
if re.compile(r"^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$", line):
File "C:\Python27\lib\re.py", line 194, in compile
return _compile(pattern, flags)
File "C:\Python27\lib\re.py", line 233, in _compile
bypass_cache = flags & DEBUG
TypeError: unsupported operand type(s) for &: 'str' and 'int'
I don't understand why I'm getting that error, what can I do to this code to make it extract the information like I want it to?
import re
def extract_proxies(in_file):
buffer = []
for line in in_file:
if re.compile(r"^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$", line):
print "{} appened to buffer.".format(line)
buffer.append(line)
else:
pass
return buffer
def write_to_file(buffer, out_file):
for proxy in buffer:
with open(out_file, "a+") as res:
res.write(proxy)
if __name__ == '__main__':
print "Running...."
in_file = "C:/Users/thomas_j_perkins/Downloads/test.asp"
out_file = "c:/users/thomas_j_perkins/Downloads/results.txt"
write_to_file(extract_proxies(in_file), out_file)
EDIT
Realized I hadn't opened the file:
import re
def extract_proxies(in_file):
buffer = []
for line in in_file:
if re.compile(r"^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$", line):
print "{} appened to buffer.".format(line)
buffer.append(line)
else:
pass
in_file.close()
return buffer
def write_to_file(buffer, out_file):
for proxy in buffer:
with open(out_file, "a+") as res:
res.write(proxy)
if __name__ == '__main__':
print "Running...."
in_file = "C:/Users/thomas_j_perkins/Downloads/PAC-Global-Vista.asp"
out_file = "c:/users/thomas_j_perkins/Downloads/results.txt"
write_to_file(extract_proxies(open(in_file, "r+")), out_file)
Still getting the same error:
Running....
Traceback (most recent call last):
File "pull_proxy.py", line 28, in <module>
write_to_file(extract_proxies(open(in_file)), out_file)
File "pull_proxy.py", line 8, in extract_proxies
if re.compile(r"^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$", line):
File "C:\Python27\lib\re.py", line 194, in compile
return _compile(pattern, flags)
File "C:\Python27\lib\re.py", line 233, in _compile
bypass_cache = flags & DEBUG
TypeError: unsupported operand type(s) for &: 'str' and 'int'
Upvotes: 0
Views: 3710
Reputation: 4196
Please check the below code:
Did couple of changes
import re
def extract_proxies(in_file):
buffer1 = []
#Regex compiled here
m = re.compile(r'\s*\w+\(\w+,\s+\"(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})\"')
for line in in_file:
#Used here to match
r = m.match(line)
if r is not None:
print "{} appened to buffer.".format(line)
buffer1.append(r.group(1))
else:
pass
in_file.close()
return buffer1
def write_to_file(buffer1, out_file):
for proxy in buffer1:
with open(out_file, "a+") as res:
res.write(proxy+'\n')
if __name__ == '__main__':
print "Running...."
in_file = "sample.txt"
out_file = "results.txt"
write_to_file(extract_proxies(open(in_file)), out_file)
Output:
C:\Users\dinesh_pundkar\Desktop>python c.py
Running....
isInNet(hostDNS, "147.163.1.0", "255.255.0.0") ||
appened to buffer.
isInNet(hostDNS, "123.264.0.0", "255.255.0.0") ||
appened to buffer.
isInNet(hostDNS, "137.5.0.0", "255.0.0.0") ||
appened to buffer.
isInNet(hostDNS, "100.01.02.0", "255.0.0.0") ||
appened to buffer.
isInNet(hostDNS, "172.146.30.0", "255.240.0.0") ||
appened to buffer.
isInNet(hostDNS, "112.268.0.0", "255.255.0.0") || appened to buffer.
C:\Users\dinesh_pundkar\Desktop>python c.py
Upvotes: 1
Reputation: 132018
Your initial error
TypeError: unsupported operand type(s) for &: 'str' and 'int'
is caused by exactly what @Moses said in his answer. flags are supposed to be int values, not strings.
You should compile your regex once. Also, you need to use an open file handle when you iterate over the lines.
import re
IP_MATCHER = re.compile(r"(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})")
def extract_proxies(fh):
for line in fh:
line = line.strip()
match = IP_MATCHER.findall(line)
if match:
print "{} appened to buffer.".format(line)
print match
else:
pass
def write_to_file(buffer, out_file):
for proxy in buffer:
with open(out_file, "a+") as res:
res.write(proxy)
if __name__ == '__main__':
print "Running...."
in_file = "in.txt"
with open(in_file) as fh:
extract_proxies(fh)
This will find all matches, if you only want the first, then use IP_MATCHER.search
and match.groups()
. This is of course assuming you actually want to extract the IP addresses.
For instance:
def extract_proxies(fh):
for line in fh:
line = line.strip()
match = IP_MATCHER.findall(line)
if len(match) == 2:
print "{} appened to buffer.".format(line)
ip, mask = match
print "IP: %s => Mask: %s" % (ip, mask)
else:
pass
Upvotes: 1
Reputation: 78556
re.compile
was expecting an appropriate flags
parameter (an integer) of which line
(a string) is not.
You should be doing re.match
not re.compile
:
Compile a regular expression pattern into a regular expression object, which can be used for matching using its
match()
andsearch()
methods...
Upvotes: 2