Reputation: 53

Ignoring errors and continuing to run when executing R script from command line Linux

I have the following R script running on my linux VPS that frequently returns errors thus interrupting the script. I am not sure how to program around the errors and was wondering if there was a way to force the script to continue to run despite them. The errors generally occur as out of bounds errors for the "results" table. The errors still occur when pasting the code directly into R< but when the out of bounds reference to the "results" table occurs, it stays at its previously set value of 0 and thus the code works as intended. Any help as to how to make this run automatically from linux command line example: (Rscript /folder/file.R) would be greatly appreciated.

library(RMySQL)
library(twitteR)
library(plyr)
library(stringr)
library(sentiment)

Date<-format(Sys.time(),"%Y-%m-%d %H:%M")
Time<-format(Sys.time(),"%H:%M")

tweets.con<-dbConnect(MySQL(),user="xxxxxxxxxxxx",password="xxxxxxxxxxxx",dbname="xxxxxxxxxx",host="xxxxxxxxxxxxxxxxxxxx.com")
Feel<-dbGetQuery(tweets.con,"select `tweet_text` from `tweets` where `created_at` BETWEEN timestamp(DATE_ADD(NOW(), INTERVAL 49 MINUTE)) AND timestamp(DATE_ADD(NOW(), INTERVAL 60 MINUTE))")

length(as.matrix(Feel))
n<-length(as.matrix(Feel))
Total_Count<-length(as.matrix(Feel))

results.con<-dbConnect(MySQL(),user="xxxxxxxxxxx",password="xxxxxxxxxxxxxxxxxx",dbname="xxxxxxxxxxxxxx",host="xxxxxxxxxxxxxxxxxx")
last.results.alt<-dbGetQuery(results.con,"select `Neg_Prop_Alt`,`Neu_Prop_Alt`,`Pos_Prop_Alt`,`neg5_Prop`,`neg4_Prop`,`neg3_Prop`,`neg2_Prop`,`neg1_Prop`,`zero_Prop`,`pos1_Prop`,`pos2_Prop`,`pos3_Prop`,`pos4_Prop`,`pos5_Prop` from `results_10m_alt` ORDER BY Date DESC LIMIT 1")

# function score.sentiment
score.sentiment = function(sentences, pos.words, neg.words, .progress='none')
{
   # Parameters
   # sentences: vector of text to score
   # pos.words: vector of words of postive sentiment
   # neg.words: vector of words of negative sentiment
   # .progress: passed to laply() to control of progress bar

   # create simple array of scores with laply
   scores = laply(sentences,
   function(sentence, pos.words, neg.words)
   {

# remove retweet entities
sentence = gsub("(RT|via)((?:\\b\\W*@\\w+)+)", "", sentence)
# remove at people
sentence = gsub("@\\w+", "", sentence)
# remove punctuation
sentence = gsub("[[:punct:]]", "", sentence)
# remove numbers
sentence = gsub("[[:digit:]]", "", sentence)
# remove control characters
sentence = gsub("[[:cntrl:]]", "", sentence)
# remove html links
sentence = gsub("http\\w+", "", sentence)
# remove unnecessary spaces
sentence = gsub("[ \t]{2,}", "", sentence)
sentence = gsub("^\\s+|\\s+$", "", sentence)

      # define error handling function when trying tolower
      tryTolower = function(x)
      {
         # create missing value
         y = NA
         # tryCatch error
         try_error = tryCatch(tolower(x), error=function(e) e)
         # if not an error
         if (!inherits(try_error, "error"))
         y = tolower(x)
         # result
         return(y)
      }
      # use tryTolower with sapply 
      sentence = sapply(sentence, tryTolower)

      # split sentence into words with str_split (stringr package)
      word.list = str_split(sentence, "\\s+")
      words = unlist(word.list)

      # compare words to the dictionaries of positive & negative terms
      pos.matches = match(words, pos.words)
      neg.matches = match(words, neg.words)

      # get the position of the matched term or NA
      # we just want a TRUE/FALSE
      pos.matches = !is.na(pos.matches)
      neg.matches = !is.na(neg.matches)

      # final score
      score = sum(pos.matches) - sum(neg.matches)
      return(score)
      }, pos.words, neg.words, .progress=.progress )

   # data frame with scores for each sentence
   scores.df = data.frame(text=sentences, score=scores)
   return(scores.df)
}

# import positive and negative words
pos = readLines("/home/jgraab/R/scripts/positive_words.txt")
neg = readLines("/home/jgraab/R/scripts/negative_words.txt")

Feel_txt = sapply(Feel, function(x) gettext(x))

scores.df = score.sentiment(Feel_txt, pos, neg, .progress='text')
results<-table(scores.df[,2])+.0001

#Set Table Defaults
Neg_Count_Alt<-0
Neg_Prop_Alt<-0
Neg_Change_Alt<-0
Neu_Count_Alt<-0
Neu_Prop_Alt<-0
Neu_Change_Alt<-0
Pos_Count_Alt<-0
Pos_Prop_Alt<-0
Pos_Change_Alt<-0
neg5_Count<-0
neg5_Prop<-0
neg5_Change<-0
neg4_Count<-0
neg4_Prop<-0
neg4_Change<-0
neg3_Count<-0
neg3_Prop<-0
neg3_Change<-0
neg2_Count<-0
neg2_Prop<-0
neg2_Change<-0
neg1_Count<-0
neg1_Prop<-0
neg1_Change<-0
zero_Count<-0
zero_Prop<-0
zero_Change<-0
pos1_Count<-0
pos1_Prop<-0
pos1_Change<-0
pos2_Count<-0
pos2_Prop<-0
pos2_Change<-0
pos3_Count<-0
pos3_Prop<-0
pos3_Change<-0
pos4_Count<-0
pos4_Prop<-0
pos4_Change<-0
pos5_Count<-0
pos5_Prop<-0
pos5_Change<-0

#Get Table Results
neg5_Count<-results[["-5"]]
neg5_Prop<-neg5_Count/Total_Count
neg5_Change<-(neg5_Prop-as.numeric(last.results.alt[[4]]))/as.numeric(last.results.alt[[4]])*100
neg4_Count<-results[["-4"]]
neg4_Prop<-neg4_Count/Total_Count
neg4_Change<-(neg4_Prop-as.numeric(last.results.alt[[5]]))/as.numeric(last.results.alt[[5]])*100
neg3_Count<-results[["-3"]]
neg3_Prop<-neg3_Count/Total_Count
neg3_Change<-(neg3_Prop-as.numeric(last.results.alt[[6]]))/as.numeric(last.results.alt[[6]])*100
neg2_Count<-results[["-2"]]
neg2_Prop<-neg2_Count/Total_Count
neg2_Change<-(neg2_Prop-as.numeric(last.results.alt[[7]]))/as.numeric(last.results.alt[[7]])*100
neg1_Count<-results[["-1"]]
neg1_Prop<-neg1_Count/Total_Count
neg1_Change<-(neg1_Prop-as.numeric(last.results.alt[[8]]))/as.numeric(last.results.alt[[8]])*100
zero_Count<-results[["0"]]
zero_Prop<-zero_Count/Total_Count
zero_Change<-(zero_Prop-as.numeric(last.results.alt[[9]]))/as.numeric(last.results.alt[[9]])*100
pos1_Count<-results[["1"]]
pos1_Prop<-pos1_Count/Total_Count
pos1_Change<-(pos1_Prop-as.numeric(last.results.alt[[10]]))/as.numeric(last.results.alt[[10]])*100
pos2_Count<-results[["2"]]
pos2_Prop<-pos2_Count/Total_Count
pos2_Change<-(pos2_Prop-as.numeric(last.results.alt[[11]]))/as.numeric(last.results.alt[[11]])*100
pos3_Count<-results[["3"]]
pos3_Prop<-pos3_Count/Total_Count
pos3_Change<-(pos3_Prop-as.numeric(last.results.alt[[12]]))/as.numeric(last.results.alt[[12]])*100
pos4_Count<-results[["4"]]
pos4_Prop<-pos4_Count/Total_Count
pos4_Change<-(pos4_Prop-as.numeric(last.results.alt[[13]]))/as.numeric(last.results.alt[[13]])*100
pos5_Count<-results[["5"]]
Pos5_Prop<-pos5_Count/Total_Count
Pos5_Change<-(pos5_Prop-as.numeric(last.results.alt[[14]]))/as.numeric(last.results.alt[[14]])*100

#Get Negative, Neutral, and Positive Totals
Neg_Count_Alt<-neg5_Count+neg4_Count+neg3_Count+neg2_Count+neg1_Count
Neg_Prop_Alt<-Neg_Count_Alt/Total_Count
Neg_Change_Alt<-(Neg_Prop_Alt-as.numeric(last.results.alt[[1]]))/as.numeric(last.results.alt[[1]])*100
Neu_Count_Alt<-zero_Count
Neu_Prop_Alt<-Neu_Count_Alt/Total_Count
Neu_Change_Alt<-(Neu_Prop_Alt-as.numeric(last.results.alt[[2]]))/as.numeric(last.results.alt[[2]])*100
Pos_Count_Alt<-pos1_Count+pos2_Count+pos3_Count+pos4_Count+pos5_Count
Pos_Prop_Alt<-Pos_Count_Alt/Total_Count
Pos_Change_Alt<-(Pos_Prop_Alt-as.numeric(last.results.alt[[3]]))/as.numeric(last.results.alt[[3]])*100

Mean<-(-5*neg5_Count-4*neg4_Count-3*neg3_Count-2*neg2_Count-neg1_Count+pos1_Count+2*pos2_Count+3*pos3_Count+4*pos4_Count+5*pos5_Count)/Total_Count

Feel_alt.df<-data.frame(Date,Time,Total_Count,Mean,Neg_Count_Alt,Neg_Prop_Alt,Neg_Change_Alt,Neu_Count_Alt,Neu_Prop_Alt,Neu_Change_Alt,Pos_Count_Alt,Pos_Prop_Alt,Pos_Change_Alt,
neg5_Count,neg5_Prop,neg5_Change,neg4_Count,neg4_Prop,neg4_Change,neg3_Count,neg3_Prop,neg3_Change,neg2_Count,neg2_Prop,neg2_Change,neg1_Count,neg1_Prop,neg1_Change,
zero_Count,zero_Prop,zero_Change,pos1_Count,pos1_Prop,pos1_Change,pos2_Count,pos2_Prop,pos2_Change,pos3_Count,pos3_Prop,pos3_Change,pos4_Count,pos4_Prop,pos4_Change,pos5_Count,pos5_Prop,pos5_Change)

dbWriteTable(results.con,name="results_10m_alt",Feel_alt.df,append=T,overwrite=F,row.names=F)

Upvotes: 0

Answers (3)

agstudy

Reputation: 121568

It is normal you have a lot of errors with such code!!

Avoid defining variables and use list or data.frame structure to aggregate your result.
Use small functions to divide you code in small parts. This will help to use trycatch after.

script structure

Your program should look to something like this :

  data <- load.tweets()                      ## read inputs
  scores <- score.sentiment(data,...))       ## clean data/extract info
  ratios <- compute.ratios(scores,data,...)) ## analysis
  save.results(ratios,data,...))             ## save results

For example , Here I try to factorize 2 parts :

Get Table Results

here i am using mapply since you repeat the same statements for all your coefficients.

compute.ratios <- function(){
  mapply(function(x,y){
    pos5_Count <-results[[x]]
    Pos5_Prop <- pos5_Count/Total_Count
    val <- as.numeric(last.results.alt[[y]]) ## you should check that val !=0
    Pos5_Change <- (pos5_Prop-val)/val*100
  },names(results),seq_along(last.results.alt))
}

define a function to load data

load.tweets <- function(){
tweets.con <- dbConnect(MySQL(),user="xxxxxxxxxxxx",password="xxxxxxxxxxxx",
                                dbname="xxxxxxxxxx",host="xxxxxxxxxxxxxxxxxxxx.com")
Feel <- dbGetQuery(tweets.con,"SELECT `tweet_text` 
                             FROM `tweets` 
                             WHERE `created_at` 
                             BETWEEN timestamp(DATE_ADD(NOW(), INTERVAL 49 MINUTE)) 
                             AND timestamp(DATE_ADD(NOW(), INTERVAL 60 MINUTE))")


n <- length(as.matrix(Feel))
Total_Count<- n

results.con<-dbConnect(MySQL(),user="xxxxxxxxxxx",password="xxxxxxxxxxxxxxxxxx",
                               dbname="xxxxxxxxxxxxxx",host="xxxxxxxxxxxxxxxxxx")
last.results.alt <- dbGetQuery(results.con,"SELECT `Neg_Prop_Alt`,`Neu_Prop_Alt`,
                                                   `Pos_Prop_Alt`,`neg5_Prop`,`neg4_Prop`,
                                                   `neg3_Prop`,`neg2_Prop`,`neg1_Prop`,
                                                    `zero_Prop`,`pos1_Prop`,`pos2_Prop`,`pos3_Prop`,
                                                    `pos4_Prop`,`pos5_Prop` 
                                           FROM `results_10m_alt` 
                                           ORDER BY Date DESC LIMIT 1")

list(Feel=Feel,last.results.alt =last.results.alt )
}

Upvotes: 1

mnel

Reputation: 115392

You can use the evaluate package. knitr uses the evaluate package to process literate documents.

We can use knit to use evaulate

A simple example would be a script test.r

x <- -1:5
a <- 'a'
x <- x + a
print(x)

We can use `kintr

library(knitr)
knit(text = paste('```{r}',source('test.r'),'```',collapse = '\n'))
 ## Error in x + a : non-numeric argument to binary operator
 # note the error has occured, but evalulation proceeded on
 # and x is defined
 x
 ## [1] -1  0  1  2  3  4  5

Using evaluate in a rudimentary manner

 # cleaning up to show that x will be redefined
 rm(x)
evaluate(input = paste(readLines('test.r')))

## [[1]]
## $src
## [1] "x <- -1:5\n"
## 
## attr(,"class")
## [1] "source"
## 
## [[2]]
## $src
## [1] "a <- 'a'\n"
## 
## attr(,"class")
## [1] "source"
## 
## [[3]]
## $src
## [1] "x <- x + a\n"
## 
## attr(,"class")
## [1] "source"
## 
## [[4]]
## <simpleError in x + a: non-numeric argument to binary operator>
## 
## [[5]]
## $src
## [1] "print(x)"
## 
## attr(,"class")
## [1] "source"
## 
## [[6]]
## [1] "[1] -1  0  1  2  3  4  5\n"

# and x is defined!
x
## [1] -1  0  1  2  3  4  5

Upvotes: 0

Hong Ooi

Reputation: 57686

Use try or tryCatch (the former is simpler and generally all you need). You're already using tryCatch later on, so use it to deal with your problematic query as well.