Reputation: 21
I try to estimate multinomial logistic regression by using mlogit package, but I got the error. I have 205109 observations and below I attempted the code and error.
> Data <- read.csv("NWEScsv.csv",header=T)
> colnames(Data)
[1] "NWSE" "W.S" "W.E" "W.N" "W.W" "U.S" "U.E" "U.N" "U.W"
[10] "W.Not" "U.Not" "BW" "BU"
> movement = mlogit.data(Data, shape ="wide", varying = 2:11, choice = "NWSE")
> head(movement,15)
NWSE BW BU alt W U chid
1.E FALSE 194.42 0 E 198.3434 2.47404e-10 1
1.N FALSE 194.42 0 N 194.4160 1.41319e-10 1
1.Not TRUE 194.42 0 Not 194.4200 0.00000e+00 1
1.S FALSE 194.42 0 S 212.7249 2.08726e-10 1
1.W FALSE 194.42 0 W 198.3434 1.37143e-10 1
2.E TRUE 257.24 0 E 202.3502 8.05144e-10 2
2.N FALSE 257.24 0 N 200.3368 4.59906e-10 2
> result.movement = mlogit(NWSE ~ 0 |BW+BU | W+U, movement)
solve.default(H, g[!fixed]) error:
Lapack routine dgesv: system is exactly singular: U[20,20] = 0
How can I solve the problem?
========
Sorry for long question. In my knowledge, when we try to estimate multiple logistic regression, we need to separate explanatory variables into three types. (ex: U_{ij}=α_{j}+β*X_{ij}+γ_{j} Z_{i}+δ_{j} W_{ij}+ε_{ij}, X_{ij},Z_{i},W_{ij}are three types) In the mlogit package, we need to separate the variables by using "|" and the equation will become like X_{ij}|Z_{i}|W_{ij}.
No matter how many times I try, I couldn't get the results when I put the variables into the W_{ij}. Below I put part of data structure.
> dput(head(movement, n = 25))
structure(list(NWSE = c(`1.E` = FALSE, `1.N` = FALSE, `1.Not` = TRUE,
`1.S` = FALSE, `1.W` = FALSE, `2.E` = TRUE, `2.N` = FALSE, `2.Not` = FALSE,
`2.S` = FALSE, `2.W` = FALSE, `3.E` = TRUE, `3.N` = FALSE, `3.Not` = FALSE,
`3.S` = FALSE, `3.W` = FALSE, `4.E` = TRUE, `4.N` = FALSE, `4.Not` = FALSE,
`4.S` = FALSE, `4.W` = FALSE, `5.E` = TRUE, `5.N` = FALSE, `5.Not` = FALSE,
`5.S` = FALSE, `5.W` = FALSE), BW = c(`1.E` = 194.42, `1.N` = 194.42,
`1.Not` = 194.42, `1.S` = 194.42, `1.W` = 194.42, `2.E` = 257.24,
`2.N` = 257.24, `2.Not` = 257.24, `2.S` = 257.24, `2.W` = 257.24,
`3.E` = 262.43, `3.N` = 262.43, `3.Not` = 262.43, `3.S` = 262.43,
`3.W` = 262.43, `4.E` = 183.09, `4.N` = 183.09, `4.Not` = 183.09,
`4.S` = 183.09, `4.W` = 183.09, `5.E` = 311.06, `5.N` = 311.06,
`5.Not` = 311.06, `5.S` = 311.06, `5.W` = 311.06), BU = c(`1.E` = 0,
`1.N` = 0, `1.Not` = 0, `1.S` = 0, `1.W` = 0, `2.E` = 0, `2.N` = 0,
`2.Not` = 0, `2.S` = 0, `2.W` = 0, `3.E` = 0, `3.N` = 0, `3.Not` = 0,
`3.S` = 0, `3.W` = 0, `4.E` = 0, `4.N` = 0, `4.Not` = 0, `4.S` = 0,
`4.W` = 0, `5.E` = 0, `5.N` = 0, `5.Not` = 0, `5.S` = 0, `5.W` = 0
), alt = c(`1.E` = "E", `1.N` = "N", `1.Not` = "Not", `1.S` = "S",
`1.W` = "W", `2.E` = "E", `2.N` = "N", `2.Not` = "Not", `2.S` = "S",
`2.W` = "W", `3.E` = "E", `3.N` = "N", `3.Not` = "Not", `3.S` = "S",
`3.W` = "W", `4.E` = "E", `4.N` = "N", `4.Not` = "Not", `4.S` = "S",
`4.W` = "W", `5.E` = "E", `5.N` = "N", `5.Not` = "Not", `5.S` = "S",
`5.W` = "W"), W = c(`1.E` = 198.3434254, `1.N` = 194.4159624,
`1.Not` = 194.42, `1.S` = 212.7249464, `1.W` = 198.3434254, `2.E` =
202.3502284,
`2.N` = 200.33681, `2.Not` = 257.24, `2.S` = 219.2033856, `2.W` =
204.383882,
`3.E` = 208.5127103, `3.N` = 206.4379742, `3.Not` = 262.43, `3.S` =
225.8791225,
`3.W` = 210.6082979, `4.E` = 198.3434254, `4.N` = 194.4159624,
`4.Not` = 183.09, `4.S` = 212.7249464, `4.W` = 198.3434254, `5.E` =
244.6919323,
`5.N` = 239.8467074, `5.Not` = 311.06, `5.S` = 262.4340992, `5.W` =
244.6919323
), U = c(`1.E` = 2.47e-10, `1.N` = 1.41e-10, `1.Not` = 0, `1.S` = 2.09e-
10,
`1.W` = 1.37e-10, `2.E` = 8.05e-10, `2.N` = 4.6e-10, `2.Not` = 0,
`2.S` = 6.73e-10, `2.W` = 4.46e-10, `3.E` = 6.53e-10, `3.N` = 3.73e-10,
`3.Not` = 0, `3.S` = 5.51e-10, `3.W` = 3.62e-10, `4.E` = 2.47e-10,
`4.N` = 1.41e-10, `4.Not` = 0, `4.S` = 2.09e-10, `4.W` = 1.37e-10,
`5.E` = 2.65e-10, `5.N` = 1.52e-10, `5.Not` = 0, `5.S` = 2.24e-10,
`5.W` = 1.47e-10), chid = c(`1.E` = 1L, `1.N` = 1L, `1.Not` = 1L,
`1.S` = 1L, `1.W` = 1L, `2.E` = 2L, `2.N` = 2L, `2.Not` = 2L,
`2.S` = 2L, `2.W` = 2L, `3.E` = 3L, `3.N` = 3L, `3.Not` = 3L,
`3.S` = 3L, `3.W` = 3L, `4.E` = 4L, `4.N` = 4L, `4.Not` = 4L,
`4.S` = 4L, `4.W` = 4L, `5.E` = 5L, `5.N` = 5L, `5.Not` = 5L,
`5.S` = 5L, `5.W` = 5L)), reshapeLong = list(varying = structure(list(
W = c("W.S", "W.E", "W.N", "W.W", "W.Not"), U = c("U.S",
"U.E", "U.N", "U.W", "U.Not")), v.names = c("W", "U"), times = c("S",
"E", "N", "W", "Not")), v.names = c("W", "U"), idvar = "chid",
timevar = "alt"), index = structure(list(chid = structure(c(1L,
1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 4L, 4L,
4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L), .Label = c("1", "2", "3", "4",
"5"), class = "factor"), alt = structure(c(1L, 2L, 3L, 4L, 5L,
1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L,
2L, 3L, 4L, 5L), .Label = c("E", "N", "Not", "S", "W"), class = "factor")),
class = "data.frame", row.names = c("1.E",
"1.N", "1.Not", "1.S", "1.W", "2.E", "2.N", "2.Not", "2.S", "2.W",
"3.E", "3.N", "3.Not", "3.S", "3.W", "4.E", "4.N", "4.Not", "4.S",
"4.W", "5.E", "5.N", "5.Not", "5.S", "5.W")), choice = "NWSE", row.names =
c("1.E",
"1.N", "1.Not", "1.S", "1.W", "2.E", "2.N", "2.Not", "2.S", "2.W",
"3.E", "3.N", "3.Not", "3.S", "3.W", "4.E", "4.N", "4.Not", "4.S",
"4.W", "5.E", "5.N", "5.Not", "5.S", "5.W"), class = c("mlogit.data",
"data.frame"))
Upvotes: 2
Views: 347
Reputation: 2368
So it looks like your data are not full rank, e.g. high collinearity. We can test this by examining the rank of the data vs the number of predictors, if the rank is not equal to the number of predictors you will have a non-unique fit which is the error you are getting
For example:
library(dplyr)
movement %>%
select_if(is.numeric) %>%
{
cat("num predictors", ncol(.), "\n")
qr(.) %>%
.$rank
}
Yields:
num predictors 5
[1] 4
You can also see this by looking at the correlation of predictors:
movement %>%
select_if(is.numeric) %>%
cor()
Which yields:
BW BU W U chid
BW 1.0000000 NA 0.7360761 0.2079872 0.4765022
BU NA 1 NA NA NA
W 0.7360761 NA 1.0000000 -0.2686001 0.4933215
U 0.2079872 NA -0.2686001 1.0000000 -0.1964596
chid 0.4765022 NA 0.4933215 -0.1964596 1.0000000
This indicates for example that BW and W provide basically the same information. I don't know your subject matter, but you should probably not include both of these predictors in your model. BU also does not have any variation in it, so that will throw errors.
Upvotes: 1