Reputation: 396
I have this file that contains three .csv files, EURAUD_201501
, EURAUD_201502
, EURAUD_201503
. The files contain forex tick-data for from January to March 2015. The first step of my exercise is to manipulate the data-set into a functional form. The (working) code is:
#Entering and editing data by hand#
data1<-read.csv("EURAUD_201501.csv", header = FALSE, col.names = c("TIMESTAMP", "BID", "OFR", "VOL"), stringsAsFactors = FALSE)
data1$VOL<- NULL #drops the VOL column
data1$TIMESTAMP = sub( '(?<=.{11})', ':', data1$TIMESTAMP, perl=TRUE ) #manipulate the strings to create clear timestamps
data1$TIMESTAMP = sub( '(?<=.{14})', ':', data1$TIMESTAMP, perl=TRUE )
data1$TIMESTAMP = sub( '(?<=.{17})', '.', data1$TIMESTAMP, perl=TRUE )
xts_data1 = xts(data1[,c(2,3)], order.by = as.POSIXct(data1$TIMESTAMP, tz = "EST", format = "%Y%m%d %H:%M:%OS")) #Convert file to an xts object
rm(data1) #remove data1 object in order to save space
data2<-read.csv("EURAUD_201502.csv", header = FALSE, col.names = c("TIMESTAMP", "BID", "OFR", "VOL"), stringsAsFactors = FALSE)
data2$VOL<- NULL #drops the VOL column
data2$TIMESTAMP = sub( '(?<=.{11})', ':', data2$TIMESTAMP, perl=TRUE ) #manipulate the strings to create clear timestamps
data2$TIMESTAMP = sub( '(?<=.{14})', ':', data2$TIMESTAMP, perl=TRUE )
data2$TIMESTAMP = sub( '(?<=.{17})', '.', data2$TIMESTAMP, perl=TRUE )
xts_data2 = xts(data2[,c(2,3)], order.by = as.POSIXct(data2$TIMESTAMP, tz = "EST", format = "%Y%m%d %H:%M:%OS")) #Convert file to an xts object
rm(data2) #remove data2 object in order to save space
data3<-read.csv("EURAUD_201503.csv", header = FALSE, col.names = c("TIMESTAMP", "BID", "OFR", "VOL"), stringsAsFactors = FALSE)
data3$VOL<- NULL #drops the VOL column
data3$TIMESTAMP = sub( '(?<=.{11})', ':', data3$TIMESTAMP, perl=TRUE ) #manipulate the strings to create clear timestamps
data3$TIMESTAMP = sub( '(?<=.{14})', ':', data3$TIMESTAMP, perl=TRUE )
data3$TIMESTAMP = sub( '(?<=.{17})', '.', data3$TIMESTAMP, perl=TRUE )
xts_data3 = xts(data3[,c(2,3)], order.by = as.POSIXct(data3$TIMESTAMP, tz = "EST", format = "%Y%m%d %H:%M:%OS")) #Convert file to an xts object
rm(data3) #remove data3 object in order to save space
#Create 5-minute intervals
final_xts = rbind.xts(xts_data1, xts_data2, xts_data3)
rm(data1_xts, data2_xts, data3_xts)
final_fivemin = aggregatets(final_xts, FUN = "previoustick", on = "minutes", k = 5)
How can I create a functional loop without having to repeat the same procedure for each data-set?
Upvotes: 0
Views: 89
Reputation: 13118
Seems like you might want to try lapply
. You could replace the for
loop with
xts_data <- lapply(real_data, function(x){
data <- read.csv(x, header = FALSE, col.names = c("TIMESTAMP", "BID", "OFR", "VOL"),
stringsAsFactors = FALSE)
data$VOL<- NULL #drops the VOL column
data$TIMESTAMP = sub( '(?<=.{11})', ':', data[i]$TIMESTAMP, perl=TRUE ) #manipulate the strings to create clear timestamps
data$TIMESTAMP = sub( '(?<=.{14})', ':', data[i]$TIMESTAMP, perl=TRUE )
data$TIMESTAMP = sub( '(?<=.{17})', '.', data[i]$TIMESTAMP, perl=TRUE )
return(xts(data[,c(2,3)],
order.by = as.POSIXct(data$TIMESTAMP, tz = "EST", format = "%Y%m%d %H:%M:%OS")))
#Convert file to an xts object
})
And then finish up:
#Create 5-minute intervals
final_xts = do.call(rbind, xts_data)
final_fivemin = aggregatets(final_xts, FUN = "previoustick", on = "minutes", k = 5)
Upvotes: 2