Reputation: 123
I am writing a code where I have an original matrix of data of 42 rows, and 6 columns. I am trying to randomly select 12 rows from this matrix (MAtrix B) and get 2 sets of regression coefficients from 2 sets of 2 columns of data within this matrix. From there, I want to use the newly calulated regression coefficients and predict the values using the data from the other 30 points in the matrix that were not selected in the random sample (Matrix C).
I keep getting an error when trying to run the code below. This is the error message I get:
Warning messages:
Warning messages:
1: 'newdata' had 30 rows but variable(s) found have 12 rows
2: 'newdata' had 30 rows but variable(s) found have 12 rows
I am pretty sure my EDGww regression is for some reason only having 12 rows, instead of the 30 rows, which is why I am getting this error. I am not sure how to fix this though. Also, there appears to be something incorrect about "EDGww <- predict" line of code. Notsure how the "newdata" part of the command works.
A <- matrix(c(Box.Z, Box.DC.gm, Box.CR, Box.DC.ww, Box.DC.gd, Box.DC.w), nrow=42)
randco <- function(A) {
B<- A[sample(42,12),]
arows <- apply(A, 1, paste, collapse="_")
brows <- apply(B, 1, paste, collapse="_")
C<- A[-match(brows, arows), ]
Boxgm <- C[,2]
Boxww <- C[,4]
Boxgd <- C[,5]
Boxw <- C[,6]
EDGgm<- predict(lm(B[,2] ~ B[,1]), newdata=data.frame(B=C[,1:2]))
EDGww<- predict(lm(B[,4] ~ B[,3]), newdata1=data.frame(B=C[,3:4]))
EDGgd <- EDGgm - EDGww
EDGw <- (EDGww*100/EDGgd)
rmse.gm <- sqrt(mean((EDGgm-Boxgm)^2, na.rm = TRUE))
rmse.gd <- sqrt(mean((EDGgd-Boxgd)^2, na.rm = TRUE))
rmse.ww <- sqrt(mean((EDGww-Boxww)^2, na.rm = TRUE))
rmse.w <- sqrt(mean((EDGw-Boxw)^2, na.rm = TRUE))
list(rmse.gm, rmse.gd, rmse.ww, rmse.w)
}
Z <- t(replicate(2, randco(A)))
Here is my matrix A:
structure(c(972.7298, 934.814227158535, 996.933797909408, 791.217703626463,
784.234752589183, 783.659378596087, 947.066666666667, 918.351477449456,
884.944612286002, 561.061151079137, 543.853469531525, 557.416696524543,
545.154969718561, 549.458419120938, 572.653512815785, 508.803114948366,
489.664296345295, 546.234253551327, 601.674754971929, 564.621528408059,
657.593123209169, 605.194301773428, 662.548898497015, 658.731787399959,
653.712059064807, 645.51177904143, 638.927926119253, 672.18137254902,
662.839142363581, 679.747359701802, 696.70399323896, 673.530320189437,
700.21186440678, 807.331830584886, 786.227683746812, 779.653232288437,
668.38576585869, 644.662921348315, 647.371589085072, 572.055997804008,
568.525605977766, 573.693858845096, 19.3350173135753, 19.2528485621378,
17.97264, 19.8067268125686, 19.6961540482885, 19.2498052750475,
18.1278467677645, 18.5052143669591, 17.8549653586694, 21.3537310834607,
20.9012957360391, 20.7400015202543, 20.8698484361062, 20.2784363115619,
20.4579902498884, 19.5835524259481, 20.8039105491502, 20.3545184888189,
19.2183579751146, 19.2741515717697, 18.8531635242811, 20.4671201111593,
19.9477324477516, 20.0816370797239, 19.16865095264, 19.3289307393948,
19.0611214754502, 19.9355592993905, 19.8016546674182, 19.1990838235428,
19.8209288189899, 20.0623629281521, 20.3352443372472, 19.0073567368552,
19.0215587432765, 19.4547199391263, 18.9404044208691, 19.5176145389921,
18.9251879854177, 20.2094551375156, 20.6700059171779, 20.6466740494858,
0.0184126042027551, 0.0211500857551233, 0.0175062024694362, 0.0337401180409656,
0.0346889237620637, 0.0347098429625015, 0.0197595945566906, 0.0208987649013026,
0.0233611391296427, 0.079218078642384, 0.0854783227450312, 0.0809066783513659,
0.0876586886055242, 0.0862439899396154, 0.0788434985488822, 0.100477074583164,
0.109512990787781, 0.0874625785566515, 0.0697002042276112, 0.0795835405948175,
0.0590959175682916, 0.0691508307033452, 0.0566322479499254, 0.0581804263930345,
0.0598276441702881, 0.0615442739013907, 0.0627343073093989, 0.0496103325156931,
0.0537150387879776, 0.0506587540839388, 0.0472721193305599, 0.0516289789530612,
0.0470104694823737, 0.0298054466074238, 0.0322343007905799, 0.0327009202323074,
0.0509075880344454, 0.0561708857467634, 0.055379202376338, 0.0761713062471437,
0.0773846368854214, 0.075659018034763, 1.24742047184357, 1.25188518062349,
1.17616931947406, 1.4929693577313, 1.44061448275529, 1.40750828224021,
1.28417359593431, 1.35503554694099, 1.28696331263137, 2.4154456205358,
2.34834547373797, 2.3203592861058, 2.45940290188245, 2.39036941912483,
2.42395662840317, 2.64559953691598, 2.85551593111206, 2.78375293672456,
2.02594247326842, 1.99500782475067, 1.96872999244972, 1.96773821995353,
1.91969552725095, 1.93016191885846, 2.0243918375924, 2.02855863447954,
2.0272144929465, 2.19754956724573, 2.16243045886116, 2.05659819121302,
1.80255436993863, 1.82806964661878, 1.86868466766234, 1.59199447237071,
1.60223256800273, 1.65077123393453, 2.34177469771875, 2.32564214914119,
2.25709470774863, 2.31314883935834, 2.38916504448898, 2.44775420826948,
18.0875968417318, 18.0009633815143, 16.7964698066862, 18.3137574548373,
18.2555395655332, 17.8422969928073, 16.8436731718302, 17.1501788200181,
16.5680020460381, 18.9382854629249, 18.5529502623012, 18.4196422341485,
18.4104455342238, 17.8880668924371, 18.0340336214853, 16.9379528890322,
17.9483946180381, 17.5707655520944, 17.1924155018462, 17.2791437470191,
16.8844335318313, 18.4993818912057, 18.0280369205006, 18.1514751608654,
17.1442591150476, 17.3003721049152, 17.0339069825037, 17.7380097321447,
17.639224208557, 17.1424856323298, 18.0183744490513, 18.2342932815333,
18.4665596695848, 17.4153622644845, 17.4193261752738, 17.8039487051918,
16.5986297231503, 17.1919723898509, 16.6680932776691, 17.8963062981573,
18.2808408726889, 18.1989198412163, 6.8965517241379, 6.95454545454546,
7.00247928886737, 8.15217391304348, 7.89138265447493, 7.88860471724921,
7.62407096619517, 7.90099952403618, 7.76776408558641, 12.7542993544187,
12.6575312310825, 12.597200622084, 13.3587364700685, 13.3629275510785,
13.441012029141, 15.6193582202551, 15.9095896423085, 15.8430941922889,
11.783931542667, 11.5457562825984, 11.6600298656048, 10.636778199,
10.64838914917, 10.6336366700371, 11.8079867085979, 11.7255202499558,
11.9010541447053, 12.3889297639934, 12.2592152199763, 11.9970827762246,
10.0039788552265, 10.0254483044328, 10.1192896841534, 9.14132274823418,
9.19801691455233, 9.27193883373274, 14.1082410824108, 13.5274888558692,
13.5414091470952, 12.9252863737391, 13.0692294798011, 13.4499971955802
), .Dim = c(42L, 6L), .Dimnames = list(NULL, c("col1", "col2",
"col3", "col4", "col5", "col6")))
Upvotes: 1
Views: 154
Reputation: 93813
You could avoid a lot of troubles if you convert A
to a data.frame
, allowing you to use named elements for the newdata=
part of the predict
call. I think this is what you are chasing, but it's hard to know for sure:
A <- setNames(data.frame(A),c("Box.Z", "Box.DC.gm", "Box.CR",
"Box.DC.ww", "Box.DC.gd", "Box.DC.w"))
randco <- function(A) {
rowsel <- sample(42,12)
B <- A[rowsel,]
C <- A[-rowsel, ]
Boxgm <- C$Box.DC.gm
Boxww <- C$Box.DC.ww
Boxgd <- C$Box.DC.gd
Boxw <- C$Box.DC.w
EDGgm <- predict(lm(Box.DC.gm ~ Box.Z, data=B),newdata=C[,"Box.Z",drop=FALSE])
EDGww <- predict(lm(Box.DC.ww ~ Box.CR, data=B),newdata=C[,"Box.CR",drop=FALSE])
EDGgd <- EDGgm - EDGww
EDGw <- (EDGww*100/EDGgd)
rmse.gm <- sqrt(mean((EDGgm-Boxgm)^2, na.rm = TRUE))
rmse.gd <- sqrt(mean((EDGgd-Boxgd)^2, na.rm = TRUE))
rmse.ww <- sqrt(mean((EDGww-Boxww)^2, na.rm = TRUE))
rmse.w <- sqrt(mean((EDGw-Boxw)^2, na.rm = TRUE))
list(gm=rmse.gm, gd=rmse.gd, ww=rmse.ww, w=rmse.w)
}
Results:
Z <- t(replicate(2, randco(A)))
# gm gd ww w
#[1,] 0.7078684 0.7588057 0.1827982 1.310923
#[2,] 0.6728875 0.7858108 0.2072046 1.522664
Upvotes: 3