Reputation: 482
I am trying to use na.spline
function from the zoo
package in order to interpolate some NA
values in a data frame
. I am getting the following error:
Error in splinefun(x[!na], y[!na], ...) : zero non-NA points
In addition: Warning message:
In xy.coords(x, y, setLab = FALSE) : NAs introduced by coercion
The code I have used is presented below.
#read the excel file and the sheet of interest
test= structure(list(`1243_4sel` = c("2.3525000000000001E-2", "5.6603000000000001E-2", "9.1589000000000004E-2", "8.6460999999999996E-2",
"1.7899999999999999E-3","1.8776000000000001E-2", "NA"), `1245_4sel` = c("2.6909999999999998E-3",
"1.7314E-2", "-4.8430000000000001E-3", "4.0668999999999997E-2", "-1.0984000000000001E-2",
"-4.2880000000000001E-3", "NA"), `1255_4sel` = c("-2.6103000000000001E-2","5.2512999999999997E-2",
"-2.1322000000000001E-2", "-3.5166999999999997E-2", "4.4469000000000002E-2", "4.6221999999999999E-2", "NA"),
`1265_4sel` = c("NA", "-2.8042999999999998E-2", "NA", "NA", "-5.4059000000000003E-2", "-6.4116000000000006E-2", "NA"),
`1266_4sel` = c("NA", "3.6759E-2", "NA", "NA", "-7.7029999999999998E-3", "1.9910000000000001E-3", "NA"),
`1268_4sel` = c("-8.8400000000000002E-4", "6.5909999999999996E-2", "1.9495999999999999E-2", "6.9381999999999999E-2", "-2.764E-3",
"-3.6695999999999999E-2", "NA"), `1269_4sel` = c("-2.4181000000000001E-2", "2.1572000000000001E-2",
"-1.6182999999999999E-2", "2.5044E-2", "-3.3234E-2", "-1.6448999999999998E-2", "NA"),
`1274_4sel` = c("-2.0017E-2", "3.5234000000000001E-2", "-3.8670999999999997E-2", "5.6230000000000004E-3",
"-1.7319000000000001E-2", "1.4264000000000001E-2", "NA"), `1276_4sel` = c("-4.1009999999999996E-3", "6.1688E-2", "1.3915E-2", "1.0439E-2",
"5.1450000000000003E-3","3.5539999999999999E-3", "NA"), `1277_4sel` = c("2.8437E-2", "2.0038E-2", "5.1650000000000001E-2", "9.8672999999999997E-2",
"-3.5707999999999997E-2", "1.7539999999999999E-3", "NA"), `1278_4sel` = c("3.4150000000000001E-3", "4.5303999999999997E-2", "2.818E-2",
"6.0167999999999999E-2","-2.5447999999999998E-2", "-4.061E-2", "NA"), `1387_4sel` = c("NA", "-8.8667999999999997E-2", "-3.0911999999999999E-2",
"NA", "-4.4260000000000001E-2", "-2.3598999999999998E-2", "NA")), row.names = c(NA, -7L), class = c("tbl_df", "tbl", "data.frame"))
#convert the column Date from character to date format
df$DATE <- as.Date(df$DATE, format = "%y-%m-%d")
str(df$DATE)
#remove the 1st column of the excel
df <- subset(df, select = -c(RowID))
#convert the character columns to numeric
df %<>% mutate_if(is.character,as.numeric)
class(df$DATE)
#check the format of the columns
df[] <- lapply(df, function(x) {
if(is.factor(x)) as.numeric(as.character(x)) else x
})
sapply(df, class)
#convert all character cells to null
df[is.character(df)]= NULL
View(df)
df[which(is.character(df))] <- NULL
sum(is.na(df))
#move the last column to the 1st position
df %>% select(0:0, length(df), everything())
na.spline(df)
I don't know if I have to read the excel file as a zoo object or not in order to call the na.spline function, but even if I do, I am getting an other error
Error in read.zoo(df) : index has bad entries at data rows: 7 8 21
I found on other posts that the problem caused because some rows
starts with NA
's. Any thoughts?
Upvotes: 3
Views: 709
Reputation: 73702
You probably need to use lapply
to apply na.spline
column-wise.
dat[-1] <- lapply(dat[-1], zoo::na.spline)
dat
# DATE X155_4sel X964_4sel X970_4sel
# 1 2016-12-02 -0.0162270 -0.022269 0.095243
# 2 2016-12-10 0.1005000 0.041044 -0.021599
# 3 2016-12-18 -0.0064720 0.039549 0.161545
# 4 2016-12-26 -0.0386020 0.059017 0.159219
# 5 2017-01-01 0.1657300 -0.046500 -0.054670
# 6 2017-01-09 0.1582580 -0.017922 -0.079368
# 7 2017-01-17 -0.5417341 -0.110800 -0.107872 ## dat[7, 2] is extrapolated
I'm not quite sure what your problem is, but this works with your new sample data as well:
test[] <- lapply(test, as.numeric) ## convert to numeric
test[] <- lapply(test, zoo::na.spline) ## apply na.spline
# summary(test)
# 1243_4sel 1245_4sel 1255_4sel 1265_4sel
# Min. :0.00179 Min. :-0.010984 Min. :-0.21019 Min. :-0.07487
# 1st Qu.:0.02115 1st Qu.:-0.004566 1st Qu.:-0.03063 1st Qu.:-0.05909
# Median :0.05660 Median : 0.002691 Median :-0.02132 Median :-0.04469
# Mean :0.08711 Mean : 0.039850 Mean :-0.02137 Mean :-0.04608
# 3rd Qu.:0.08903 3rd Qu.: 0.028991 3rd Qu.: 0.04535 3rd Qu.:-0.03203
# Max. :0.33106 Max. : 0.238393 Max. : 0.05251 Max. :-0.02076
# 1266_4sel 1268_4sel 1269_4sel 1274_4sel
# Min. :-0.007703 Min. :-0.036696 Min. :-0.03323 Min. :-0.038671
# 1st Qu.:-0.001574 1st Qu.:-0.001824 1st Qu.:-0.02031 1st Qu.:-0.018668
# Median : 0.009681 Median : 0.019496 Median :-0.01618 Median : 0.005623
# Mean : 0.019375 Mean : 0.038579 Mean : 0.03308 Mean : 0.032151
# 3rd Qu.: 0.030351 3rd Qu.: 0.067646 3rd Qu.: 0.02331 3rd Qu.: 0.024749
# Max. : 0.076094 Max. : 0.155606 Max. : 0.27501 Max. : 0.245945
# 1276_4sel 1277_4sel 1278_4sel 1387_4sel
# Min. :-0.004101 Min. :-0.03571 Min. :-0.04061 Min. :-0.25055
# 1st Qu.: 0.004350 1st Qu.: 0.01090 1st Qu.:-0.01102 1st Qu.:-0.06646
# Median : 0.010439 Median : 0.02844 Median : 0.02818 Median :-0.03140
# Mean : 0.015397 Mean : 0.10897 Mean : 0.04256 Mean :-0.05613
# 3rd Qu.: 0.015529 3rd Qu.: 0.07516 3rd Qu.: 0.05274 3rd Qu.:-0.02726
# Max. : 0.061688 Max. : 0.59797 Max. : 0.22689 Max. : 0.07646
Data
dat <- structure(list(`155_4sel` = c("-0.016226999999999998", "0.10050000000000001",
"-0.0064720000000000003", "-0.038601999999999997", "0.16572999999999999",
"0.15825800000000001", "NA"), `964_4sel` = c(-0.022269, 0.041044,
0.039549, 0.059017, -0.0465, -0.017922, -0.1108), `970_4sel` = c(0.095243,
-0.021599, 0.161545, 0.159219, -0.05467, -0.079368, -0.107872
), DATE = structure(c(1480636800, 1481328000, 1482019200, 1482710400,
1483228800, 1483920000, 1484611200), class = c("POSIXct", "POSIXt"
), tzone = "UTC")), row.names = c(NA, -7L), class = "data.frame")
# dat <- as.data.frame(read_excel("2017_NDVI_Anomaly_Zonal_Stats.xlsx", sheet = "Sheet4"))[-1]
dat$DATE <- as.Date(dat$DATE, format="%y-%m-%d")
dat$`155_4sel` <- as.numeric(dat$`155_4sel`)
dat <- dat[c(4, 1:3)]
names(dat) <- make.names(names(dat)) ## You may want to use proper names (not beginning with number)
Upvotes: 1