Reputation: 1
<pubDate> <![CDATA[ Wed, 17 Aug 2022 14:32:47 +0530 ]]></pubDate>
Above is the xml tag now how can I store this date tag into dbms?
from bs4 import BeautifulSoup import requests import pymysql
headers = {
'User-Agent': ' agent'
}
class ReadRss:
def __init__(self, rss_url, headers):
self.url = rss_url
self.headers = headers
try:
self.conn=pymysql.connect(host="localhost", user="root", passwd= "", db="my_python")
self.r = requests.get(rss_url, headers=self.headers)
self.status_code = self.r.status_code
except Exception as e:
print('Error fetching the URL: ', rss_url)
print(e)
try:
self.soup = BeautifulSoup(self.r.text, 'lxml')
except Exception as e:
print('Could not parse the xml: ', self.url)
print(e)
self.articles = self.soup.findAll('item')
for a in self.articles:
self.insertData(a.find('title').text,a.find('pubdate').text,a.link.next_sibling.replace('\n','').replace('\t',''),a.find('description').text,'thehindu')
self.articles_dicts if 'pubdate' in d]
self.conn.close()
def insertData(self,title,date,url,description, source):
myCursor= self.conn.cursor()
query = "INSERT INTO `delhi`(`title`, `url`, `description`, `source`) VALUES(%s,%s,%s,%s)"
args=(title,url,description,source)
myCursor.execute(query,args)
self.conn.commit()
if __name__ == '__main__':
feed = ReadRss('https://www.thehindu.com/news/cities/Delhi/feeder/default.rss', headers)
In this code I've imported beautifulsoup to exract the news from RSS feed and then I've to store it into database but everytime it gives the error for date [CDATA[ Wed, 17 Aug 2022 14:32:47 +0530 ]
How do I store it in database?
Upvotes: 0
Views: 42