Reputation: 55
I am trying to write multiple matches from regex to a file on a single line a certain way.
matches = re.findall(<form>(.*?)</form>, line, re.DOTALL)
for form in matches:
form = ("'" + form + "', ")
f = open(new_file, 'a+')
f.write(form.rstrip('\n'), )
The above gives me this:
'form1', 'form2',....,'formN',
How can I have them enclosed in parentheses and no comma at the end like below?
('form1', 'form2',....,'formN')
Thanks so much.
Upvotes: 0
Views: 112
Reputation: 1
I am new to python and just stuck in problem that i have script which can parse html lines using single regex .I want to parse using multiple words so that to get more appropriate results . my code is attached below
#!/usr/bin/python
#!/usr/bin/env python
# Python file to monitor pastebin for pastes containing the passed regex
import sys
import time
import urllib
import re
f = open('sj1.txt', 'w')
# User-defined variables
time_between = 7 #Seconds between iterations (not including time used to fetch pages - setting below 5s may cause a pastebin IP block, too high may miss pastes)
error_on_cl_args = "Please provide a single regex search via the command line" #Error to display if improper command line arguments are provided
# Check for command line argument (a single regex)
if len(sys.argv) != 3:
search_term = sys.argv[1] and
sys.argv[2] and
sys.argv[3]
print search_term
else:
print error_on_cl_args
exit()
iterater = 1
while(iterater):
counter = 0
print "Scanning pastebin - iteration " + str(iterater) + "..."
#Open the recently posted pastes page
try:
url = urllib.urlopen("http://pastebin.com/archive")
html = url.read()
url.close()
html_lines = html.split('\n')
for line in html_lines:
if counter < 308:
#print line
if re.search(r'<td><img src="/i/t.gif" class="i_p0" alt="" /><a href="/[0-9a-zA-Z]{8}">.*</a></td>', line ):
#print 'I am here'
link_id = line[61:69]
print link_id
#Begin loading of raw paste text
url_2 = urllib.urlopen("http://pastebin.com/raw.php?i=" + link_id)
raw_text = url_2.read()
#print raw_text
url_2.close()
#if search_term in raw_text:
if re.search(r''+search_term, raw_text):
print >> f, "FOUND " + search_term + " in http://pastebin.com/raw.php?i=" + link_id
counter += 1
except(IOError):
print "Network error - are `enter code here`you connected?"
except:
print "Fatal error! Exiting."
exit()
iterater =0
time.sleep(time_between)
Upvotes: 0
Reputation: 15641
As per official documentation, re.findall
returns "all non-overlapping matches of pattern in string, as a list of strings".
Thus,
myline = "("
n = len( matches )
if ( n > 0 ) :
myline = myline + "'" + matches[ 0 ] + "'"
for i in range( 1, n ) :
myline = myline + ", '" + matches[ i ] + "'"
myline = myline + ")"
# WRITE TO FILE
Upvotes: 0
Reputation: 2743
Something like this?
matches = re.findall(<form>(.*?)</form>, line, re.DOTALL)
if matches:
f = open(new_file, 'a+')
f.write("('%s')" % "', '".join(matches))
f.close()
Upvotes: 1