Reputation: 3
First Check the website for link, then get all the links. I need help to the check the mysql, if the links is already there, if those exist then don't insert them, if some of them doesnt exist, then insert them.
created_at = time.strftime("%Y/%d/%m/ %H:%M:%S")
afdelings = 'it-support'
url = 'www.careerjet.dk/sog/jobs?s=L%C3%A6rling&l=Danmark'
r = requests.get("http://" +url)
data = r.text
soup = BeautifulSoup(data, "html.parser")
side1 = "http://www.careerjet.dk/"
cur = connect.cursor()
for link in soup.select('.title > a'):
linkfrom = side1 + (link.get('href'))
f = string.split(linkfrom, '\n')
for line in f:
if ("""SELECT count(*) from jobtest WHERE link = %s""", (line)) == 0:
cur.execute("""INSERT INTO jobtest (afdeling, dato, link) VALUES (%s, %s, %s)""", (afdelings, created_at, line))
with connect:
connect.commit()
connect.close()
please any help is deeply appreciated.
Upvotes: 0
Views: 5142
Reputation: 701
You need to execute the select first.
Some thing like this
created_at = time.strftime("%Y/%d/%m/ %H:%M:%S")
afdelings = 'it-support'
url = 'www.careerjet.dk/sog/jobs?s=L%C3%A6rling&l=Danmark'
r = requests.get("http://" +url)
data = r.text
soup = BeautifulSoup(data, "html.parser")
side1 = "http://www.careerjet.dk/"
cur = connect.cursor()
for link in soup.select('.title > a'):
linkfrom = side1 + (link.get('href'))
f = string.split(linkfrom, '\n')
for line in f:
#-------ADDED CODE
data_tmp = """SELECT count(*) from jobtest WHERE link = %s""", (line)
data_tmp = cur.fetchall()
#-------END ADDED CODE
if (data_tmp == 0 ) :
cur.execute("""INSERT INTO jobtest (afdeling, dato, link) VALUES (%s, %s, %s)""", (afdelings, created_at, line))
with connect:
connect.commit()
connect.close()
Upvotes: 1