Reputation: 21
Here's the code. It runs for usually somewhere less than an hour and stops giving out the notifications or writing anything to the log file. The code is run as a Startup application on Ubuntu 16.04. I have tried adding exceptions for all kinds of errors in the hope of understanding why the program might be stopping, but to no avail. It just stops. And running it from the terminal used to work earlier, but even that has stopped giving results after a period of time.
import urllib2, os, time, sys, re, pynotify
from bs4 import BeautifulSoup
from os import listdir
from os.path import isfile, join
from random import sample
from datetime import datetime
savepath = '/mnt/8E1C30331C301923/PersonalAthul/CS/Insights/Current/'
count = -1
iflp = True
innerct = 0
rndct = 0
rndct2 = 0
flag = False
try:
while(count < 1000):
time.sleep(3)
count = (count+1) % 30
url = "http://www.insightsonindia.com/category/current-affairs-2"
connect = False
netcnct = True
itntchk = 0
while not connect:
try:
text = urllib2.urlopen('http://www.insightsonindia.com/category/current-affairs-2').read()
with open('/mnt/8E1C30331C301923/PersonalAthul/CS/Insights/Current/log.txt', "a") as file:
file.write("{:%B %d, %Y - %H. %M. %S}".format(datetime.now()) + '\t\tIndicator of Something\n')
connect = True
except urllib2.URLError as e:
itntchk = itntchk + 1
if (itntchk > 20):
netcnct = False
with open('/mnt/8E1C30331C301923/PersonalAthul/CS/Insights/Current/log.txt', "a") as file:
file.write("{:%B %d, %Y - %H. %M. %S}".format(datetime.now()) + '\t\tInternet Connection Issues\n')
pass
if (netcnct):
soup = BeautifulSoup(text, "lxml")
data = soup.findAll('div',attrs={'class':'post-thumbnail'})
if count == 0:
with open('/mnt/8E1C30331C301923/PersonalAthul/CS/Insights/Current/log.txt', "a") as file:
file.write("{:%B %d, %Y - %H. %M. %S}".format(datetime.now()) + '\t\tConnection Established After Sleep\n')
for div in data:
links = div.findAll('a')
for a in links:
if os.path.exists(savepath + 'urlist.txt'):
with open(savepath + 'urlist.txt', "r+") as file:
line_found = any(a['href'] in line for line in file)
if not line_found:
file.seek(0, os.SEEK_END)
connected = False
while not connected:
try:
html = urllib2.urlopen(a['href']).read()
connected = True
except urllib2.URLError as e:
pass
else:
file.write(a['href'] + '\n')
sp = BeautifulSoup(html, "lxml")
txt = sp.get_text()
realtxt = txt.encode('utf-8')
s2 = "print PDFInsights "
s3 = "Please Share This Post If You"
dtxt = realtxt[realtxt.index(s2) + len(s2):realtxt.index(s3)]
if ('Current' in a.get('title')):
p1 = [m.start() for m in re.finditer('Paper 1 Topic', dtxt)]
p2 = [m.start() for m in re.finditer('Paper 2 Topic', dtxt)]
p3 = [m.start() for m in re.finditer('Paper 3 Topic', dtxt)]
p4 = [m.start() for m in re.finditer('Paper 4 Topic', dtxt)]
pre = [m.start() for m in re.finditer('Facts for Prelims', dtxt)]
onlyfiles = [f for f in listdir('/mnt/8E1C30331C301923/PersonalAthul/CS/Insights/Current/Pre/') if isfile(join('/mnt/8E1C30331C301923/PersonalAthul/CS/Insights/Current/Pre/', f))]
ltvr = []
while(len(p4)>0):
ltvr.append(min(p4))
p4.remove(min(p4))
while(len(p3)>0):
ltvr.append(min(p3))
p3.remove(min(p3))
while(len(p2)>0):
ltvr.append(min(p2))
p2.remove(min(p2))
while(len(p1)>0):
ltvr.append(min(p1))
p1.remove(min(p1))
while(len(pre)>0):
ltvr.append(min(pre))
pre.remove(min(pre))
i = 0
ltvr.sort()
while (i < len(ltvr)):
if(dtxt[ltvr[i]+6]=='1'):
with open('/mnt/8E1C30331C301923/PersonalAthul/CS/Insights/Current/Paper1/' + a.get('title') + '.txt', "a") as file:
file.write(dtxt[ltvr[i]:ltvr[i+1]] + '\n')
elif(dtxt[ltvr[i]+6]=='2'):
with open('/mnt/8E1C30331C301923/PersonalAthul/CS/Insights/Current/Paper2/' + a.get('title') + '.txt', "a") as file:
file.write(dtxt[ltvr[i]:ltvr[i+1]] + '\n')
elif(dtxt[ltvr[i]+6]=='3'):
with open('/mnt/8E1C30331C301923/PersonalAthul/CS/Insights/Current/Paper3/' + a.get('title') + '.txt', "a") as file:
file.write(dtxt[ltvr[i]:ltvr[i+1]] + '\n')
elif(dtxt[ltvr[i]+6]=='4'):
with open('/mnt/8E1C30331C301923/PersonalAthul/CS/Insights/Current/Paper4/' + a.get('title') + '.txt', "a") as file:
file.write(dtxt[ltvr[i]:ltvr[i+1]] + '\n')
elif(dtxt[ltvr[i]+6]=='f'):
with open('/mnt/8E1C30331C301923/PersonalAthul/CS/Insights/Current/Pre/' + str(len(onlyfiles)+1) + '.txt', "w+") as file:
file.write(dtxt[ltvr[i]:])
i = i + 1
elif ('Editorial' in a.get('title')):
with open('/mnt/8E1C30331C301923/PersonalAthul/CS/Insights/Current/Editorials/' + a.get('title') + '.txt', "w+") as file:
file.write(dtxt)
else:
with open(savepath + 'urlist.txt', "w+") as file:
file.write(a['href'] + '\n')
if ((count == 0) or (netcnct == False)):
innerct = (innerct + 1) % 2
if (netcnct == False):
innerct = 0
if (innerct == 0):
pynotify.init("Basic")
onlyfiles = [f for f in listdir('/mnt/8E1C30331C301923/PersonalAthul/CS/Insights/Current/Pre/') if isfile(join('/mnt/8E1C30331C301923/PersonalAthul/CS/Insights/Current/Pre/', f))]
Tr = True
Tre = True
while (Tr):
if (rndct == 0):
rndn = sample(xrange(1, len(onlyfiles)), len(onlyfiles) - 1)
try:
filename = '/mnt/8E1C30331C301923/PersonalAthul/CS/Insights/Current/Pre/' + str(rndn[rndct]) + '.txt'
if (flag):
rndct = (rndct + 1) % (len(onlyfiles) - 1)
flag = False
lines = open(filename).read().splitlines()
if (rndct2 == 0):
rnd2 = sample(xrange(1, len(lines)), len(lines) - 1)
while (Tre):
chk = rndct2
rndct2 = (rndct2 + 1) % (len(lines) - 1)
if(rndct2 < chk):
flag = True
try:
if (lines[rnd2[rndct2]][0].isalnum()):
Tre = False
except IndexError:
pass
Tr = False
except (IOError,IndexError):
pass
with open('/mnt/8E1C30331C301923/PersonalAthul/CS/Insights/Current/log.txt', "a") as file:
file.write("{:%B %d, %Y - %H. %M. %S}".format(datetime.now()) + '\tFileCt\t' + str(rndct) + '\t' + str(rndn[rndct]) + '\tLineCt\t' + str(rndct2) + '\t' + str(rnd2[rndct2]) + '\n')
n = pynotify.Notification("Fun Facts",
lines[rnd2[rndct2]]
)
n.show()
except Exception:
with open('/mnt/8E1C30331C301923/PersonalAthul/CS/Insights/Current/log.txt', "a") as file:
file.write("{:%B %d, %Y - %H. %M. %S}".format(datetime.now()) + '\t\t Sorry bro') #+ sys.exc_info()[0])
Upvotes: 2
Views: 74