collins fang
collins fang

Reputation: 1

Variable lengths differ, R

#restricting sample
replicatedata_firststage <- replicatedata_full %>%
  filter(!is.na(belief_treatment_w3), hk_local == 1,
         followup_postjuly1st_w3 == 1, !is.na(guess_july1_2016_partust_w3pos))
replicatedata_firststage

#directional treatment indicator
belief_treatment_im <- belief_treatment_im %>%
  ifelse(replicatedata_firststage$guess_july1_2016_planust_w3 >= 17 & replicatedata_firststage$belief_treatment_w3 == 1, yes = -1)
belief_treatment_im

# Generate sample splitting indicator and interaction with controls
guess_july1_2016_above17 <- replicatedata_firststage$guess_july1_2016_planust_w3[replicatedata_firststage$guess_july1_2016_planust_w3 >= 17]
guess_july1_2016_partustXabv17 = replicatedata_firststage$guess_july1_2016_partust_w3pre * guess_july1_2016_above17
guess_july1_2016_partXabv17 <- replicatedata_firststage$guess_july1_2016_part_w3pre * guess_july1_2016_above17

#generate trimmed prior beliefs
if(!require('DescTools')) {
  install.packages('DescTools')
  library('DescTools')
}
guess_july1_2016_planust_w3_tr <- if(replicatedata_firststage$guess_july1_2016_planust_w3) { Winsorize(replicatedata_firststage$guess_july1_2016_planust_w3, minval = NULL, maxval = NULL)
}
guess_july1_2016_planust_w3_tr

#first stage regression, for HK students
reg1_1 <- lm(guess_july1_2016_partust_w3pos ~ belief_treatment_w3 + guess_july1_2016_partust_w3pre + guess_july1_2016_above17 + guess_july1_2016_partustXabv17, data = replicatedata_firststage, na.rm = TRUE)
summary(reg1_1)

It run smoothly until the lm(), which turns out that:

Error in model.frame.default(formula = guess_july1_2016_partust_w3pos ~ : variable lengths differ (found for 'guess_july1_2016_above17')

How can I fix it? I've tried the na.rm, na.omit(), but didn't work.

Upvotes: 0

Views: 540

Answers (1)

lhs
lhs

Reputation: 1038

Keep all of the variables you create inside the dataframe by assigning them replicatedata_firststage$new_var <- my_function(replicatedata_firststage$old_var). Then when you call lm(), all of the referenced variables will be from the dataframe in the data = argument. Right now the guess_july1_2016_planust_w3_tr variable (and others) are not in the dataframe so it seems like it might be a different length.

#restricting sample
replicatedata_firststage <- replicatedata_full %>%
  filter(!is.na(belief_treatment_w3), hk_local == 1,
         followup_postjuly1st_w3 == 1, !is.na(guess_july1_2016_partust_w3pos))
replicatedata_firststage

#directional treatment indicator
replicatedata_firststage$belief_treatment_im <- belief_treatment_im %>%
  ifelse(replicatedata_firststage$guess_july1_2016_planust_w3 >= 17 & replicatedata_firststage$belief_treatment_w3 == 1, yes = -1)
replicatedata_firststage$belief_treatment_im

# Generate sample splitting indicator and interaction with controls
replicatedata_firststage$guess_july1_2016_above17 <- replicatedata_firststage$guess_july1_2016_planust_w3[replicatedata_firststage$guess_july1_2016_planust_w3 >= 17]
replicatedata_firststage$guess_july1_2016_partustXabv17 = replicatedata_firststage$guess_july1_2016_partust_w3pre * replicatedata_firststage$guess_july1_2016_above17
replicatedata_firststage$guess_july1_2016_partXabv17 <- replicatedata_firststage$guess_july1_2016_part_w3pre * replicatedata_firststage$guess_july1_2016_above17

#generate trimmed prior beliefs
if(!require('DescTools')) {
  install.packages('DescTools')
  library('DescTools')
}
replicatedata_firststage$guess_july1_2016_planust_w3_tr <- if(replicatedata_firststage$guess_july1_2016_planust_w3) { Winsorize(replicatedata_firststage$guess_july1_2016_planust_w3, minval = NULL, maxval = NULL)
}
replicatedata_firststage$guess_july1_2016_planust_w3_tr

#first stage regression, for HK students
reg1_1 <- lm(guess_july1_2016_partust_w3pos ~ belief_treatment_w3 + guess_july1_2016_partust_w3pre + guess_july1_2016_above17 + guess_july1_2016_partustXabv17, data = replicatedata_firststage)
summary(reg1_1)

Upvotes: 0

Related Questions