nflore
nflore

Reputation: 306

Iterating same code for different dataset

I wrote the following code for cleaning the starting dataset named db17:

#traspose matrix
db17_t <- t(db17)
#convert in data.frame object
db17_t <- as.data.frame((db17_t))
#first row as column names
db17_t <- row_to_names(db17_t, 1, remove_row = TRUE)
#name first four columns
colnames(db17_t)[1:4] <- c("name","code","cluster","sector")
#Get rid of Commodoties sector
db17_t_nocomm <-db17_t[!(db17_t$cluster=="Commodities"),]
rm(db17_t)
#Keep only EORA26 sector classification
db17_t_nocomm_f <- filter(db17_t_nocomm, grepl("Agriculture|Fishing|Mining and Quarrying|Food and Beverages|Textiles and Wearing Apparel|Wood and Paper|Petroleum, Chemical and Non-Metallic Mineral Products|Metal Products|Electrinal and Machinery|Transport Equipment|Other Manufacturing|Recylcing|Electricity, Gas and Water|Construction|Maintenance and Repair|Wholesale Trade|Retail Trade|Hotels and Restaurants|Transport|Post and Telecommunications|Financial Intermediation and Business Activities|Public Administration|Education, Health and Other Services|Private Households|Others|Re-export & Re-import" , sector))
rm(db17_t_nocomm)
#convert VA values in numeric
db17_t_nocomm_f[, 5:194] <- sapply(db17_t_nocomm_f[, 5:194], as.numeric)
#Calculate total VA
db17_t_nocomm_f$Total <- rowSums(db17_t_nocomm_f[,5:194], na.rm = TRUE)
db17_t_nocomm_f <- db17_t_nocomm_f %>% relocate(Total, .before = Afghanistan)
#Calculate internal VA
db17_t_nocomm_f$Internal_VA <- as.numeric(db17_t_nocomm_f[cbind(1:nrow(db17_t_nocomm_f), match(db17_t_nocomm_f$name, names(db17_t_nocomm_f)))])
#Calculate Net Exported VA
db17_t_nocomm_f$Net_TotalExp <- db17_t_nocomm_f$Total - db17_t_nocomm_f$Internal_VA
db17_t_nocomm_f <- db17_t_nocomm_f %>% relocate(c(Internal_VA,Net_TotalExp), .before = Afghanistan)
#finalize
db17_final <- db17_t_nocomm_f[,c(1,2,3,4,7)]
rm(db17_t_nocomm_f)

Now, I have to apply the same code for dataset named from db03 to db17.

Is there a for loop that allows me to do so, using the data.frame name as an index?

Thanks

Upvotes: 0

Views: 38

Answers (2)

MonJeanJean
MonJeanJean

Reputation: 2906

It's hard to know without a datasample if this will work or not. However:

Create a list of your dataframes:

my_list <- list(df03,...,df17)

Code:

MyF <- function(data){
#traspose matrix
db17_t <- t(data)
#convert in data.frame object
db17_t <- as.data.frame((db17_t))
#first row as column names
db17_t <- row_to_names(db17_t, 1, remove_row = TRUE)
#name first four columns
colnames(db17_t)[1:4] <- c("name","code","cluster","sector")
#Get rid of Commodoties sector
db17_t_nocomm <-db17_t[!(db17_t$cluster=="Commodities"),]
#Keep only EORA26 sector classification
db17_t_nocomm_f <- filter(db17_t_nocomm, grepl("Agriculture|Fishing|Mining and Quarrying|Food and Beverages|Textiles and Wearing Apparel|Wood and Paper|Petroleum, Chemical and Non-Metallic Mineral Products|Metal Products|Electrinal and Machinery|Transport Equipment|Other Manufacturing|Recylcing|Electricity, Gas and Water|Construction|Maintenance and Repair|Wholesale Trade|Retail Trade|Hotels and Restaurants|Transport|Post and Telecommunications|Financial Intermediation and Business Activities|Public Administration|Education, Health and Other Services|Private Households|Others|Re-export & Re-import" , sector))
#convert VA values in numeric
db17_t_nocomm_f[, 5:194] <- sapply(db17_t_nocomm_f[, 5:194], as.numeric)
#Calculate total VA
db17_t_nocomm_f$Total <- rowSums(db17_t_nocomm_f[,5:194], na.rm = TRUE)
db17_t_nocomm_f <- db17_t_nocomm_f %>% relocate(Total, .before = Afghanistan)
#Calculate internal VA
db17_t_nocomm_f$Internal_VA <- as.numeric(db17_t_nocomm_f[cbind(1:nrow(db17_t_nocomm_f), match(db17_t_nocomm_f$name, names(db17_t_nocomm_f)))])
#Calculate Net Exported VA
db17_t_nocomm_f$Net_TotalExp <- db17_t_nocomm_f$Total - db17_t_nocomm_f$Internal_VA
db17_t_nocomm_f <- db17_t_nocomm_f %>% relocate(c(Internal_VA,Net_TotalExp), .before = Afghanistan)
#finalize
db17_final <- db17_t_nocomm_f[,c(1,2,3,4,7)]
db17_final
}


list_of_df <- lapply(my_list, MyF)


i = 3
MyF2 <- function(input){
  if(i<10){
    nom <- paste0("df_final_0",i)
  } else{
    nom <- paste0("df_final_",i)
  }
  assign(nom, input, envir = .GlobalEnv)
  i <<- i + 1
}

lapply(list_of_df, MyF2)

This will create in your environment all the df_final from 3 to 17

Upvotes: 2

Zoe
Zoe

Reputation: 1000

Not exactly a loop, but you could turn your code into a function and easily call it several times.

doing_stuff <- function(df) {
  #traspose matrix
  df_t <- t(df)
  #convert in data.frame object
  df_t <- as.data.frame((df_t))
  #first row as column names
  df_t <- row_to_names(df_t, 1, remove_row = TRUE)
  #name first four columns
  colnames(df_t)[1:4] <- c("name","code","cluster","sector")
  #Get rid of Commodoties sector
  df_t_nocomm <-df_t[!(df_t$cluster=="Commodities"),]
  rm(df_t)
  #Keep only EORA26 sector classification
  df_t_nocomm_f <- filter(df_t_nocomm, grepl("Agriculture|Fishing|Mining and Quarrying|Food and Beverages|Textiles and Wearing Apparel|Wood and Paper|Petroleum, Chemical and Non-Metallic Mineral Products|Metal Products|Electrinal and Machinery|Transport Equipment|Other Manufacturing|Recylcing|Electricity, Gas and Water|Construction|Maintenance and Repair|Wholesale Trade|Retail Trade|Hotels and Restaurants|Transport|Post and Telecommunications|Financial Intermediation and Business Activities|Public Administration|Education, Health and Other Services|Private Households|Others|Re-export & Re-import" , sector))
  rm(df_t_nocomm)
  #convert VA values in numeric
  df_t_nocomm_f[, 5:194] <- sapply(df_t_nocomm_f[, 5:194], as.numeric)
  #Calculate total VA
  df_t_nocomm_f$Total <- rowSums(df_t_nocomm_f[,5:194], na.rm = TRUE)
  df_t_nocomm_f <- df_t_nocomm_f %>% relocate(Total, .before = Afghanistan)
  #Calculate internal VA
  df_t_nocomm_f$Internal_VA <- as.numeric(df_t_nocomm_f[cbind(1:nrow(df_t_nocomm_f), match(df_t_nocomm_f$name, names(df_t_nocomm_f)))])
  #Calculate Net Exported VA
  df_t_nocomm_f$Net_TotalExp <- df_t_nocomm_f$Total - df_t_nocomm_f$Internal_VA
  df_t_nocomm_f <- df_t_nocomm_f %>% relocate(c(Internal_VA,Net_TotalExp), .before = Afghanistan)
  #finalize
  df_final <- df_t_nocomm_f[,c(1,2,3,4,7)]
  rm(df_t_nocomm_f)
  # return final output
  return(df_final)
}

db17_final <- doing_stuff(db17)
db03_final <- doing_stuff(db03)
db19_final <- doing_stuff(db19)

(Since I don't have the data to check, I just replaced every "db17" with "df".)

Edit: The answer from @MonJeanJean is fancier, but depending on how big your data is, maybe executing the function several times instead of having the data loaded twice might be better. If you have a small dataset, ignore my answer!

Upvotes: 1

Related Questions