Reputation: 53
I have the following R script running on my linux VPS that frequently returns errors thus interrupting the script. I am not sure how to program around the errors and was wondering if there was a way to force the script to continue to run despite them. The errors generally occur as out of bounds errors for the "results" table. The errors still occur when pasting the code directly into R< but when the out of bounds reference to the "results" table occurs, it stays at its previously set value of 0 and thus the code works as intended. Any help as to how to make this run automatically from linux command line example: (Rscript /folder/file.R) would be greatly appreciated.
library(RMySQL)
library(twitteR)
library(plyr)
library(stringr)
library(sentiment)
Date<-format(Sys.time(),"%Y-%m-%d %H:%M")
Time<-format(Sys.time(),"%H:%M")
tweets.con<-dbConnect(MySQL(),user="xxxxxxxxxxxx",password="xxxxxxxxxxxx",dbname="xxxxxxxxxx",host="xxxxxxxxxxxxxxxxxxxx.com")
Feel<-dbGetQuery(tweets.con,"select `tweet_text` from `tweets` where `created_at` BETWEEN timestamp(DATE_ADD(NOW(), INTERVAL 49 MINUTE)) AND timestamp(DATE_ADD(NOW(), INTERVAL 60 MINUTE))")
length(as.matrix(Feel))
n<-length(as.matrix(Feel))
Total_Count<-length(as.matrix(Feel))
results.con<-dbConnect(MySQL(),user="xxxxxxxxxxx",password="xxxxxxxxxxxxxxxxxx",dbname="xxxxxxxxxxxxxx",host="xxxxxxxxxxxxxxxxxx")
last.results.alt<-dbGetQuery(results.con,"select `Neg_Prop_Alt`,`Neu_Prop_Alt`,`Pos_Prop_Alt`,`neg5_Prop`,`neg4_Prop`,`neg3_Prop`,`neg2_Prop`,`neg1_Prop`,`zero_Prop`,`pos1_Prop`,`pos2_Prop`,`pos3_Prop`,`pos4_Prop`,`pos5_Prop` from `results_10m_alt` ORDER BY Date DESC LIMIT 1")
# function score.sentiment
score.sentiment = function(sentences, pos.words, neg.words, .progress='none')
{
# Parameters
# sentences: vector of text to score
# pos.words: vector of words of postive sentiment
# neg.words: vector of words of negative sentiment
# .progress: passed to laply() to control of progress bar
# create simple array of scores with laply
scores = laply(sentences,
function(sentence, pos.words, neg.words)
{
# remove retweet entities
sentence = gsub("(RT|via)((?:\\b\\W*@\\w+)+)", "", sentence)
# remove at people
sentence = gsub("@\\w+", "", sentence)
# remove punctuation
sentence = gsub("[[:punct:]]", "", sentence)
# remove numbers
sentence = gsub("[[:digit:]]", "", sentence)
# remove control characters
sentence = gsub("[[:cntrl:]]", "", sentence)
# remove html links
sentence = gsub("http\\w+", "", sentence)
# remove unnecessary spaces
sentence = gsub("[ \t]{2,}", "", sentence)
sentence = gsub("^\\s+|\\s+$", "", sentence)
# define error handling function when trying tolower
tryTolower = function(x)
{
# create missing value
y = NA
# tryCatch error
try_error = tryCatch(tolower(x), error=function(e) e)
# if not an error
if (!inherits(try_error, "error"))
y = tolower(x)
# result
return(y)
}
# use tryTolower with sapply
sentence = sapply(sentence, tryTolower)
# split sentence into words with str_split (stringr package)
word.list = str_split(sentence, "\\s+")
words = unlist(word.list)
# compare words to the dictionaries of positive & negative terms
pos.matches = match(words, pos.words)
neg.matches = match(words, neg.words)
# get the position of the matched term or NA
# we just want a TRUE/FALSE
pos.matches = !is.na(pos.matches)
neg.matches = !is.na(neg.matches)
# final score
score = sum(pos.matches) - sum(neg.matches)
return(score)
}, pos.words, neg.words, .progress=.progress )
# data frame with scores for each sentence
scores.df = data.frame(text=sentences, score=scores)
return(scores.df)
}
# import positive and negative words
pos = readLines("/home/jgraab/R/scripts/positive_words.txt")
neg = readLines("/home/jgraab/R/scripts/negative_words.txt")
Feel_txt = sapply(Feel, function(x) gettext(x))
scores.df = score.sentiment(Feel_txt, pos, neg, .progress='text')
results<-table(scores.df[,2])+.0001
#Set Table Defaults
Neg_Count_Alt<-0
Neg_Prop_Alt<-0
Neg_Change_Alt<-0
Neu_Count_Alt<-0
Neu_Prop_Alt<-0
Neu_Change_Alt<-0
Pos_Count_Alt<-0
Pos_Prop_Alt<-0
Pos_Change_Alt<-0
neg5_Count<-0
neg5_Prop<-0
neg5_Change<-0
neg4_Count<-0
neg4_Prop<-0
neg4_Change<-0
neg3_Count<-0
neg3_Prop<-0
neg3_Change<-0
neg2_Count<-0
neg2_Prop<-0
neg2_Change<-0
neg1_Count<-0
neg1_Prop<-0
neg1_Change<-0
zero_Count<-0
zero_Prop<-0
zero_Change<-0
pos1_Count<-0
pos1_Prop<-0
pos1_Change<-0
pos2_Count<-0
pos2_Prop<-0
pos2_Change<-0
pos3_Count<-0
pos3_Prop<-0
pos3_Change<-0
pos4_Count<-0
pos4_Prop<-0
pos4_Change<-0
pos5_Count<-0
pos5_Prop<-0
pos5_Change<-0
#Get Table Results
neg5_Count<-results[["-5"]]
neg5_Prop<-neg5_Count/Total_Count
neg5_Change<-(neg5_Prop-as.numeric(last.results.alt[[4]]))/as.numeric(last.results.alt[[4]])*100
neg4_Count<-results[["-4"]]
neg4_Prop<-neg4_Count/Total_Count
neg4_Change<-(neg4_Prop-as.numeric(last.results.alt[[5]]))/as.numeric(last.results.alt[[5]])*100
neg3_Count<-results[["-3"]]
neg3_Prop<-neg3_Count/Total_Count
neg3_Change<-(neg3_Prop-as.numeric(last.results.alt[[6]]))/as.numeric(last.results.alt[[6]])*100
neg2_Count<-results[["-2"]]
neg2_Prop<-neg2_Count/Total_Count
neg2_Change<-(neg2_Prop-as.numeric(last.results.alt[[7]]))/as.numeric(last.results.alt[[7]])*100
neg1_Count<-results[["-1"]]
neg1_Prop<-neg1_Count/Total_Count
neg1_Change<-(neg1_Prop-as.numeric(last.results.alt[[8]]))/as.numeric(last.results.alt[[8]])*100
zero_Count<-results[["0"]]
zero_Prop<-zero_Count/Total_Count
zero_Change<-(zero_Prop-as.numeric(last.results.alt[[9]]))/as.numeric(last.results.alt[[9]])*100
pos1_Count<-results[["1"]]
pos1_Prop<-pos1_Count/Total_Count
pos1_Change<-(pos1_Prop-as.numeric(last.results.alt[[10]]))/as.numeric(last.results.alt[[10]])*100
pos2_Count<-results[["2"]]
pos2_Prop<-pos2_Count/Total_Count
pos2_Change<-(pos2_Prop-as.numeric(last.results.alt[[11]]))/as.numeric(last.results.alt[[11]])*100
pos3_Count<-results[["3"]]
pos3_Prop<-pos3_Count/Total_Count
pos3_Change<-(pos3_Prop-as.numeric(last.results.alt[[12]]))/as.numeric(last.results.alt[[12]])*100
pos4_Count<-results[["4"]]
pos4_Prop<-pos4_Count/Total_Count
pos4_Change<-(pos4_Prop-as.numeric(last.results.alt[[13]]))/as.numeric(last.results.alt[[13]])*100
pos5_Count<-results[["5"]]
Pos5_Prop<-pos5_Count/Total_Count
Pos5_Change<-(pos5_Prop-as.numeric(last.results.alt[[14]]))/as.numeric(last.results.alt[[14]])*100
#Get Negative, Neutral, and Positive Totals
Neg_Count_Alt<-neg5_Count+neg4_Count+neg3_Count+neg2_Count+neg1_Count
Neg_Prop_Alt<-Neg_Count_Alt/Total_Count
Neg_Change_Alt<-(Neg_Prop_Alt-as.numeric(last.results.alt[[1]]))/as.numeric(last.results.alt[[1]])*100
Neu_Count_Alt<-zero_Count
Neu_Prop_Alt<-Neu_Count_Alt/Total_Count
Neu_Change_Alt<-(Neu_Prop_Alt-as.numeric(last.results.alt[[2]]))/as.numeric(last.results.alt[[2]])*100
Pos_Count_Alt<-pos1_Count+pos2_Count+pos3_Count+pos4_Count+pos5_Count
Pos_Prop_Alt<-Pos_Count_Alt/Total_Count
Pos_Change_Alt<-(Pos_Prop_Alt-as.numeric(last.results.alt[[3]]))/as.numeric(last.results.alt[[3]])*100
Mean<-(-5*neg5_Count-4*neg4_Count-3*neg3_Count-2*neg2_Count-neg1_Count+pos1_Count+2*pos2_Count+3*pos3_Count+4*pos4_Count+5*pos5_Count)/Total_Count
Feel_alt.df<-data.frame(Date,Time,Total_Count,Mean,Neg_Count_Alt,Neg_Prop_Alt,Neg_Change_Alt,Neu_Count_Alt,Neu_Prop_Alt,Neu_Change_Alt,Pos_Count_Alt,Pos_Prop_Alt,Pos_Change_Alt,
neg5_Count,neg5_Prop,neg5_Change,neg4_Count,neg4_Prop,neg4_Change,neg3_Count,neg3_Prop,neg3_Change,neg2_Count,neg2_Prop,neg2_Change,neg1_Count,neg1_Prop,neg1_Change,
zero_Count,zero_Prop,zero_Change,pos1_Count,pos1_Prop,pos1_Change,pos2_Count,pos2_Prop,pos2_Change,pos3_Count,pos3_Prop,pos3_Change,pos4_Count,pos4_Prop,pos4_Change,pos5_Count,pos5_Prop,pos5_Change)
dbWriteTable(results.con,name="results_10m_alt",Feel_alt.df,append=T,overwrite=F,row.names=F)
Upvotes: 0
Views: 2241
Reputation: 121568
It is normal you have a lot of errors with such code!!
trycatch
after.Your program should look to something like this :
data <- load.tweets() ## read inputs
scores <- score.sentiment(data,...)) ## clean data/extract info
ratios <- compute.ratios(scores,data,...)) ## analysis
save.results(ratios,data,...)) ## save results
For example , Here I try to factorize 2 parts :
here i am using mapply
since you repeat the same statements for all your coefficients.
compute.ratios <- function(){
mapply(function(x,y){
pos5_Count <-results[[x]]
Pos5_Prop <- pos5_Count/Total_Count
val <- as.numeric(last.results.alt[[y]]) ## you should check that val !=0
Pos5_Change <- (pos5_Prop-val)/val*100
},names(results),seq_along(last.results.alt))
}
load.tweets <- function(){
tweets.con <- dbConnect(MySQL(),user="xxxxxxxxxxxx",password="xxxxxxxxxxxx",
dbname="xxxxxxxxxx",host="xxxxxxxxxxxxxxxxxxxx.com")
Feel <- dbGetQuery(tweets.con,"SELECT `tweet_text`
FROM `tweets`
WHERE `created_at`
BETWEEN timestamp(DATE_ADD(NOW(), INTERVAL 49 MINUTE))
AND timestamp(DATE_ADD(NOW(), INTERVAL 60 MINUTE))")
n <- length(as.matrix(Feel))
Total_Count<- n
results.con<-dbConnect(MySQL(),user="xxxxxxxxxxx",password="xxxxxxxxxxxxxxxxxx",
dbname="xxxxxxxxxxxxxx",host="xxxxxxxxxxxxxxxxxx")
last.results.alt <- dbGetQuery(results.con,"SELECT `Neg_Prop_Alt`,`Neu_Prop_Alt`,
`Pos_Prop_Alt`,`neg5_Prop`,`neg4_Prop`,
`neg3_Prop`,`neg2_Prop`,`neg1_Prop`,
`zero_Prop`,`pos1_Prop`,`pos2_Prop`,`pos3_Prop`,
`pos4_Prop`,`pos5_Prop`
FROM `results_10m_alt`
ORDER BY Date DESC LIMIT 1")
list(Feel=Feel,last.results.alt =last.results.alt )
}
Upvotes: 1
Reputation: 115392
You can use the evaluate
package. knitr
uses the evaluate
package to process literate documents.
We can use knit
to use evaulate
A simple example would be a script test.r
x <- -1:5
a <- 'a'
x <- x + a
print(x)
We can use `kintr
library(knitr)
knit(text = paste('```{r}',source('test.r'),'```',collapse = '\n'))
## Error in x + a : non-numeric argument to binary operator
# note the error has occured, but evalulation proceeded on
# and x is defined
x
## [1] -1 0 1 2 3 4 5
Using evaluate
in a rudimentary manner
# cleaning up to show that x will be redefined
rm(x)
evaluate(input = paste(readLines('test.r')))
## [[1]]
## $src
## [1] "x <- -1:5\n"
##
## attr(,"class")
## [1] "source"
##
## [[2]]
## $src
## [1] "a <- 'a'\n"
##
## attr(,"class")
## [1] "source"
##
## [[3]]
## $src
## [1] "x <- x + a\n"
##
## attr(,"class")
## [1] "source"
##
## [[4]]
## <simpleError in x + a: non-numeric argument to binary operator>
##
## [[5]]
## $src
## [1] "print(x)"
##
## attr(,"class")
## [1] "source"
##
## [[6]]
## [1] "[1] -1 0 1 2 3 4 5\n"
# and x is defined!
x
## [1] -1 0 1 2 3 4 5
Upvotes: 0
Reputation: 57686
Use try
or tryCatch
(the former is simpler and generally all you need). You're already using tryCatch
later on, so use it to deal with your problematic query as well.
Upvotes: 1