Reputation: 131
I am working multiple files (each file corresponds to each year from 2008 to 2020) using Stata and R. For question purpose, 3 example Stata data files are named as: "file_2008.dta", "file_2009.dta" and "file_2010.dta". I would like to rename and label the variables corresponding to each year. Then, I would like to merge these files using looping. I tried in Stata with the codes below. I would like to learn how I can reproduce similar output using looping in R.
Data examples in Stata file_2008.dta
clear
input byte(id x1 x2 y1 y2)
1 1 1 1 1
2 2 2 2 2
3 3 3 3 3
end
save "C:/Users/sai/Desktop/file_2008.dta"
clear
input byte(id x1 x2 y1 y2)
1 1 1 1 1
2 2 2 2 2
3 3 3 3 3
end
save "C:/Users/sai/Desktop/file_2009.dta"
clear
input byte(id x1 x2 y1 y2)
1 1 1 1 1
2 2 2 2 2
3 3 3 3 3
end
save "C:/Users/sai/Desktop/file_2010.dta"
Desired output
clear
input byte(id y1_08 y2_08 x1_08 x2_08 y1_09 y2_09 x1_09 x2_09 y1_10 y2_10 x1_10 x2_10)
1 1 1 1 1 1 1 1 1 1 1 1 1
2 2 2 2 2 2 2 2 2 2 2 2 2
3 3 3 3 3 3 3 3 3 3 3 3 3
end
save "C:/Users/sai/Desktop/file_2008_2010.dta"
In Stata, I have used the following codes to get desired output.
global workdir "C:/Users/sai/Desktop/"
*for rename and label variables corresponding to each year
foreach name in "08" "09" "10" {
use "${workdir}file_20`name'.dta",clear
rename x1 x1_`name'
label variable x1_`name' "year 20`name'"
rename x2 x2_`name'
label variable x2_`name' "year 20`name'"
rename y1 y1_`name'
label variable y1_`name' "year 20`name'"
rename y2 y2_`name'
label variable y2_`name' "year 20`name'"
order y*, before (x1*)
save "${workdir}file_20`name'.dta",replace
}
*merging multiple files
use "${workdir}file_2008.dta", clear
forvalues i=2009/2010 {
sort id
merge 1:1 id using "${workdir}file_`i'.dta", nogen
}
save "${workdir}file_2008_2010.dta", replace
For R, example data and desired output are as follow.
file_2008 <- tribble(
~id,~x1,~x2,~y1,~y2,
1, 1, 1, 1, 1,
2, 2, 2, 2, 2,
3, 3, 3, 3, 3
)
file_2009 <- tribble(
~id,~x1,~x2,~y1,~y2,
1, 1, 1, 1, 1,
2, 2, 2, 2, 2,
3, 3, 3, 3, 3
)
file_2010 <- tribble(
~id,~x1,~x2,~y1,~y2,
1, 1, 1, 1, 1,
2, 2, 2, 2, 2,
3, 3, 3, 3, 3
)
# desired output
file_2008_2010 <- tribble(
~id,~y1_08,~y2_08,~x1_08,~x2_08, ~y1_09,~y2_09,~x1_09,~x2_09,~y1_10,~y2_10,~x1_10,~x2_10,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3
)
Upvotes: 0
Views: 310
Reputation: 5898
Assuming that your dta files are in a folder of path "./data_folder"
(i.e. in a subfolder of R's working directory), with tidyverse
for wrangling and haven
for dta import,
library(tidyverse)
library(haven)
# Function to load the dta's, and append the year from the file name as a variable
fx_load_lable_dta <- function(path) {
str_yearlabel <- sub("^.*(?=\\d\\d[.]dta$)", "", path , perl = TRUE) %>% sub("[.]dta$", "", . )
df <- haven::read_dta(path)
df$year_label <- str_yearlabel
df
}
list.files(path = "./dta_folder/", full.names = TRUE) %>% #Create a list of file's paths
map_df(.f = fx_load_lable_dta) %>% #Iterate the list of file paths through the function and append all data frames
pivot_longer(cols = x1:y2) %>% #Wrangle to get your desire outpu
pivot_wider(id_cols = id,
names_from = c(name, year_label),
values_from = c(value))
# A tibble: 3 x 13
id x1_08 x2_08 y1_08 y2_08 x1_09 x2_09 y1_09 y2_09 x1_10 x2_10 y1_10 y2_10
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 1 1 1 1 1 1 1 1 1 1 1 1 1
2 2 2 2 2 2 2 2 2 2 2 2 2 2
3 3 3 3 3 3 3 3 3 3 3 3 3 3
Upvotes: 3