Reputation: 306
I wrote the following code for cleaning the starting dataset named db17
:
#traspose matrix
db17_t <- t(db17)
#convert in data.frame object
db17_t <- as.data.frame((db17_t))
#first row as column names
db17_t <- row_to_names(db17_t, 1, remove_row = TRUE)
#name first four columns
colnames(db17_t)[1:4] <- c("name","code","cluster","sector")
#Get rid of Commodoties sector
db17_t_nocomm <-db17_t[!(db17_t$cluster=="Commodities"),]
rm(db17_t)
#Keep only EORA26 sector classification
db17_t_nocomm_f <- filter(db17_t_nocomm, grepl("Agriculture|Fishing|Mining and Quarrying|Food and Beverages|Textiles and Wearing Apparel|Wood and Paper|Petroleum, Chemical and Non-Metallic Mineral Products|Metal Products|Electrinal and Machinery|Transport Equipment|Other Manufacturing|Recylcing|Electricity, Gas and Water|Construction|Maintenance and Repair|Wholesale Trade|Retail Trade|Hotels and Restaurants|Transport|Post and Telecommunications|Financial Intermediation and Business Activities|Public Administration|Education, Health and Other Services|Private Households|Others|Re-export & Re-import" , sector))
rm(db17_t_nocomm)
#convert VA values in numeric
db17_t_nocomm_f[, 5:194] <- sapply(db17_t_nocomm_f[, 5:194], as.numeric)
#Calculate total VA
db17_t_nocomm_f$Total <- rowSums(db17_t_nocomm_f[,5:194], na.rm = TRUE)
db17_t_nocomm_f <- db17_t_nocomm_f %>% relocate(Total, .before = Afghanistan)
#Calculate internal VA
db17_t_nocomm_f$Internal_VA <- as.numeric(db17_t_nocomm_f[cbind(1:nrow(db17_t_nocomm_f), match(db17_t_nocomm_f$name, names(db17_t_nocomm_f)))])
#Calculate Net Exported VA
db17_t_nocomm_f$Net_TotalExp <- db17_t_nocomm_f$Total - db17_t_nocomm_f$Internal_VA
db17_t_nocomm_f <- db17_t_nocomm_f %>% relocate(c(Internal_VA,Net_TotalExp), .before = Afghanistan)
#finalize
db17_final <- db17_t_nocomm_f[,c(1,2,3,4,7)]
rm(db17_t_nocomm_f)
Now, I have to apply the same code for dataset named from db03
to db17
.
Is there a for loop that allows me to do so, using the data.frame name as an index?
Thanks
Upvotes: 0
Views: 38
Reputation: 2906
It's hard to know without a datasample if this will work or not. However:
Create a list of your dataframes:
my_list <- list(df03,...,df17)
Code:
MyF <- function(data){
#traspose matrix
db17_t <- t(data)
#convert in data.frame object
db17_t <- as.data.frame((db17_t))
#first row as column names
db17_t <- row_to_names(db17_t, 1, remove_row = TRUE)
#name first four columns
colnames(db17_t)[1:4] <- c("name","code","cluster","sector")
#Get rid of Commodoties sector
db17_t_nocomm <-db17_t[!(db17_t$cluster=="Commodities"),]
#Keep only EORA26 sector classification
db17_t_nocomm_f <- filter(db17_t_nocomm, grepl("Agriculture|Fishing|Mining and Quarrying|Food and Beverages|Textiles and Wearing Apparel|Wood and Paper|Petroleum, Chemical and Non-Metallic Mineral Products|Metal Products|Electrinal and Machinery|Transport Equipment|Other Manufacturing|Recylcing|Electricity, Gas and Water|Construction|Maintenance and Repair|Wholesale Trade|Retail Trade|Hotels and Restaurants|Transport|Post and Telecommunications|Financial Intermediation and Business Activities|Public Administration|Education, Health and Other Services|Private Households|Others|Re-export & Re-import" , sector))
#convert VA values in numeric
db17_t_nocomm_f[, 5:194] <- sapply(db17_t_nocomm_f[, 5:194], as.numeric)
#Calculate total VA
db17_t_nocomm_f$Total <- rowSums(db17_t_nocomm_f[,5:194], na.rm = TRUE)
db17_t_nocomm_f <- db17_t_nocomm_f %>% relocate(Total, .before = Afghanistan)
#Calculate internal VA
db17_t_nocomm_f$Internal_VA <- as.numeric(db17_t_nocomm_f[cbind(1:nrow(db17_t_nocomm_f), match(db17_t_nocomm_f$name, names(db17_t_nocomm_f)))])
#Calculate Net Exported VA
db17_t_nocomm_f$Net_TotalExp <- db17_t_nocomm_f$Total - db17_t_nocomm_f$Internal_VA
db17_t_nocomm_f <- db17_t_nocomm_f %>% relocate(c(Internal_VA,Net_TotalExp), .before = Afghanistan)
#finalize
db17_final <- db17_t_nocomm_f[,c(1,2,3,4,7)]
db17_final
}
list_of_df <- lapply(my_list, MyF)
i = 3
MyF2 <- function(input){
if(i<10){
nom <- paste0("df_final_0",i)
} else{
nom <- paste0("df_final_",i)
}
assign(nom, input, envir = .GlobalEnv)
i <<- i + 1
}
lapply(list_of_df, MyF2)
This will create in your environment all the df_final
from 3 to 17
Upvotes: 2
Reputation: 1000
Not exactly a loop, but you could turn your code into a function and easily call it several times.
doing_stuff <- function(df) {
#traspose matrix
df_t <- t(df)
#convert in data.frame object
df_t <- as.data.frame((df_t))
#first row as column names
df_t <- row_to_names(df_t, 1, remove_row = TRUE)
#name first four columns
colnames(df_t)[1:4] <- c("name","code","cluster","sector")
#Get rid of Commodoties sector
df_t_nocomm <-df_t[!(df_t$cluster=="Commodities"),]
rm(df_t)
#Keep only EORA26 sector classification
df_t_nocomm_f <- filter(df_t_nocomm, grepl("Agriculture|Fishing|Mining and Quarrying|Food and Beverages|Textiles and Wearing Apparel|Wood and Paper|Petroleum, Chemical and Non-Metallic Mineral Products|Metal Products|Electrinal and Machinery|Transport Equipment|Other Manufacturing|Recylcing|Electricity, Gas and Water|Construction|Maintenance and Repair|Wholesale Trade|Retail Trade|Hotels and Restaurants|Transport|Post and Telecommunications|Financial Intermediation and Business Activities|Public Administration|Education, Health and Other Services|Private Households|Others|Re-export & Re-import" , sector))
rm(df_t_nocomm)
#convert VA values in numeric
df_t_nocomm_f[, 5:194] <- sapply(df_t_nocomm_f[, 5:194], as.numeric)
#Calculate total VA
df_t_nocomm_f$Total <- rowSums(df_t_nocomm_f[,5:194], na.rm = TRUE)
df_t_nocomm_f <- df_t_nocomm_f %>% relocate(Total, .before = Afghanistan)
#Calculate internal VA
df_t_nocomm_f$Internal_VA <- as.numeric(df_t_nocomm_f[cbind(1:nrow(df_t_nocomm_f), match(df_t_nocomm_f$name, names(df_t_nocomm_f)))])
#Calculate Net Exported VA
df_t_nocomm_f$Net_TotalExp <- df_t_nocomm_f$Total - df_t_nocomm_f$Internal_VA
df_t_nocomm_f <- df_t_nocomm_f %>% relocate(c(Internal_VA,Net_TotalExp), .before = Afghanistan)
#finalize
df_final <- df_t_nocomm_f[,c(1,2,3,4,7)]
rm(df_t_nocomm_f)
# return final output
return(df_final)
}
db17_final <- doing_stuff(db17)
db03_final <- doing_stuff(db03)
db19_final <- doing_stuff(db19)
(Since I don't have the data to check, I just replaced every "db17" with "df".)
Edit: The answer from @MonJeanJean is fancier, but depending on how big your data is, maybe executing the function several times instead of having the data loaded twice might be better. If you have a small dataset, ignore my answer!
Upvotes: 1