Function looping for multiple species

Question

I want to modify this function so that it handles a vector of species rather than one species at a time.

merge_TATB_standalone <- function(ta, tb , species, country = "all", 
                                      strata = BioIndex::strata_scheme, 
                                      wd = NA, save = TRUE, verbose = TRUE) {
    
    # For testing purposes only
    if (FALSE) {
      verbose <- TRUE
      wd <- "wd"
      species <- "ARISFOL"
      country <- "all"
    }
    
    
    # Load or declare helper functions and objects as needed.
    # For time conversion, ensure you have installed and loaded the hms package:
    # install.packages("hms"); library(hms)
    # The following line prevents errors if these functions are not available,
    # but you need to have them in your environment or replace them with your own versions.
    check_date_haul <- check_dictionary <- check_hauls_TBTA <- check_numeric_range <- 
      convert_coordinates <- hms <- write.table <- NULL
    
    TA_cols <- BioIndex::TA_cols  # expected columns for TA
    TB_cols <- BioIndex::TB_cols  # expected columns for TB
    
    # Process the species code: here we assume the species string is at least 7 characters long.
    # Adjust as necessary.
    species[2] <- substr(species, 5, 7)
    species[1] <- substr(species[1], 1, 4)
    sspp <- paste(species[1], species[2], sep = "")
    
    # Assign data frames
    TA <- ta
    TB <- tb
    
    # Check that the countries in TA are also present in TB
    TA_country <- sort(unique(as.character(TA$COUNTRY)))
    TB_country <- sort(unique(as.character(TB$COUNTRY)))
    for (i in 1:length(TA_country)) {
      if (!(TA_country[i] %in% TB_country)) {
        warning(paste("The country", TA_country[i], "is not present in the TB file."))
      }
    }
    
    # Determine which countries to analyze.
    if (length(TA_country) == 1) {
      country_analysis <- TA_country
    } else {
      if (any(country %in% "all")) {
        country_analysis <- TA_country
      } else {
        country_analysis <- country
      }
    }
    
    # Subset TA and TB by country
    TA <- TA[TA$COUNTRY %in% country_analysis, ]
    TB <- TB[TB$COUNTRY %in% country_analysis, ]
    
    # Create a unique haul identifier (id) for both TA and TB
    id_TA <- data.frame(id = paste(TA$AREA, TA$COUNTRY, TA$YEAR, "_",
                                   TA$VESSEL, TA$MONTH, TA$DAY, "_", TA$HAUL_NUMBER, sep = ""))
    id_TB <- data.frame(id = paste(TB$AREA, TB$COUNTRY, TB$YEAR, "_",
                                   TB$VESSEL, TB$MONTH, TB$DAY, "_", TB$HAUL_NUMBER, sep = ""))
    
    # Remove invalid hauls (assumes TA has a column VALIDITY where "I" indicates invalid)
    id_invalid <- id_TA$id[which(TA$VALIDITY == "I")]
    
    # (Optional) Quality checks can be performed here on TA and TB by year.
    yyy <- sort(unique(TA$YEAR))
    suffix <- paste(as.character(Sys.Date()), format(Sys.time(), "_time_h%Hm%Ms%OS0"), sep = "")
    for (yy in 1:length(yyy)) {
      TA_year <- TA[TA$YEAR == yyy[yy], ]
      TB_year <- TB[TB$YEAR == yyy[yy], ]
      
      Field <- "SHOOTING_TIME"
      Values <- seq(0, 2400, 1)
      if (!check_dictionary(ResultData = TA_year, Field, Values, 
                            year = yyy[yy], wd = file.path(wd, "output"),
                            suffix = suffix)) {
        stop("SHOOTING_TIME value out of allowed range. Please check the logfile.")
      }
      
      Field <- "HAULING_TIME"
      Values <- seq(0, 2400, 1)
      if (!check_dictionary(ResultData = TA_year, Field, Values, 
                            year = yyy[yy], wd = file.path(wd, "output"),
                            suffix = suffix)) {
        stop("HAULING_TIME value out of allowed range. Please check the logfile.")
      }
      
      Field <- "WING_OPENING"
      Values <- c(30, 50:250)
      if (!check_dictionary(ResultData = TA_year, Field, Values, 
                            year = yyy[yy], wd = file.path(wd, "output"),
                            suffix = suffix)) {
        stop("WING_OPENING value out of allowed range. Please check the logfile.")
      }
      
      Field <- "SHOOTING_LATITUDE"
      Values <- c(3020, 4730)
      if (!check_numeric_range(Data = TA_year, Field, Values, 
                               year = yyy[yy], wd = file.path(wd, "output"),
                               suffix = suffix)) {
        stop("SHOOTING_LATITUDE in TA out of allowed range. Please check the logfile.")
      }
      
      Field <- "HAULING_LATITUDE"
      Values <- c(3020, 4730)
      if (!check_numeric_range(Data = TA_year, Field, Values, 
                               year = yyy[yy], wd = file.path(wd, "output"),
                               suffix = suffix)) {
        stop("HAULING_LATITUDE in TA out of allowed range. Please check the logfile.")
      }
      
      Field <- "SHOOTING_LONGITUDE"
      Values <- c(0, 4200)
      if (!check_numeric_range(Data = TA_year, Field, Values, 
                               year = yyy[yy], wd = file.path(wd, "output"),
                               suffix = suffix)) {
        stop("SHOOTING_LONGITUDE in TA out of allowed range. Please check the logfile.")
      }
      
      Field <- "HAULING_LONGITUDE"
      Values <- c(0, 4200)
      if (!check_numeric_range(Data = TA_year, Field, Values, 
                               year = yyy[yy], wd = file.path(wd, "output"),
                               suffix = suffix)) {
        stop("HAULING_LONGITUDE in TA out of allowed range. Please check the logfile.")
      }
      
      if (!check_date_haul(TA_year, TB_year, year = yyy[yy],
                           wd = file.path(wd, "output"), suffix = suffix)) {
        stop("Date in TB not consistent with TA. Please check the logfile.")
      }
      
      if (!check_hauls_TBTA(TA_year, TB_year, year = yyy[yy],
                            wd = file.path(wd, "output"), suffix = suffix)) {
        stop("Haul in TB not reported in TA. Please check the logfile.")
      }
    }
    
    # Rename column "AREA" to "GSA" in both datasets
    colnames(TA)[which(colnames(TA) == "AREA")] <- "GSA"
    colnames(TB)[which(colnames(TB) == "AREA")] <- "GSA"
    
    # Attach the id fields to the main data frames
    TA_merge <- cbind(id_TA, TA)
    TB_merge <- cbind(id_TB, TB)
    TB_merge$GENUS <- as.character(TB_merge$GENUS)
    TB_merge$SPECIES <- as.character(TB_merge$SPECIES)
    
    # Remove invalid hauls
    TA_merge <- TA_merge[!TA_merge$id %in% id_invalid, ]
    TB_merge <- TB_merge[!TB_merge$id %in% id_invalid, ]
    
    # Keep only the desired columns (based on BioIndex definitions)
    TA_merge <- TA_merge[, which(colnames(TA_merge) %in% TA_cols)]
    TB_merge <- TB_merge[TB_merge$GENUS == species[1] & TB_merge$SPECIES == species[2],
                         which(colnames(TB_merge) %in% TB_cols)]
    
    if (verbose) {
      message("- Merging TA-TB files")
    }
    
    # Merge TA and TB by the unique id
    merge_TATB <- merge(TA_merge, TB_merge, by.x = "id", by.y = "id", all.x = TRUE)
    
    # Create a MEDITS_CODE field from GENUS and SPECIES
    merge_TATB$MEDITS_CODE <- as.character(paste(merge_TATB$GENUS, merge_TATB$SPECIES))
    
    # Replace missing or invalid MEDITS_CODE with default values
    l_TATB <- nrow(merge_TATB)
    if(l_TATB > 0) {
      for (i in 1:l_TATB) {
        if (merge_TATB[i, "MEDITS_CODE"] == "NA NA") {
          merge_TATB[i, "MEDITS_CODE"] <- "NA"
          merge_TATB[i, "GENUS"] <- -1
          merge_TATB[i, "SPECIES"] <- -1
          merge_TATB[i, "TOTAL_WEIGHT_IN_THE_HAUL"] <- 0
          merge_TATB[i, "TOTAL_NUMBER_IN_THE_HAUL"] <- 0
          merge_TATB[i, "NB_OF_FEMALES"] <- 0
          merge_TATB[i, "NB_OF_MALES"] <- 0
          merge_TATB[i, "NB_OF_UNDETERMINED"] <- 0
        }
      }
    }
    
    # (Optional) Coordinate conversion if needed (using convert_coordinates)
    coord <- convert_coordinates(merge_TATB)
    merge_TATB$MEAN_LATITUDE <- (merge_TATB$SHOOTING_LATITUDE + merge_TATB$HAULING_LATITUDE) / 2
    merge_TATB$MEAN_LATITUDE_DEC <- (coord$lat_start + coord$lat_end) / 2
    merge_TATB$MEAN_LONGITUDE <- (merge_TATB$SHOOTING_LONGITUDE + merge_TATB$HAULING_LONGITUDE) / 2
    merge_TATB$MEAN_LONGITUDE_DEC <- (coord$lon_start + coord$lon_end) / 2
    merge_TATB$MEAN_DEPTH <- (merge_TATB$SHOOTING_DEPTH + merge_TATB$HAULING_DEPTH) / 2
    merge_TATB$SWEPT_AREA <- merge_TATB$DISTANCE * merge_TATB$WING_OPENING / 1e+07
    
    # Process time fields to calculate haul duration, densities, etc.
    l_TATB <- nrow(merge_TATB)
    # Initialize vectors for shooting and hauling times
    hour_shooting <- rep(NA, l_TATB)
    min_shooting  <- rep(NA, l_TATB)
    hour_hauling  <- rep(NA, l_TATB)
    min_hauling   <- rep(NA, l_TATB)
    
    # Optionally, subset the strata scheme if needed (using %in% to avoid length warnings)
    strata_scheme <- strata_scheme[strata_scheme$GSA %in% unique(merge_TATB$GSA) & 
                                     strata_scheme$COUNTRY %in% as.character(unique(merge_TATB$COUNTRY)), ]
    
    # Loop over each row to assign a STRATUM_CODE and extract time components
    for (i in 1:l_TATB) {
      for (j in 1:nrow(strata_scheme)) {
        if (strata_scheme[j, 3] == 1) {
          if (floor(merge_TATB$MEAN_DEPTH[i]) >= strata_scheme[j, 4] &&
              floor(merge_TATB$MEAN_DEPTH[i]) <= strata_scheme[j, 5]) {
            merge_TATB$STRATUM_CODE[i] <- strata_scheme[j, 3]
          }
        } else {
          if (floor(merge_TATB$MEAN_DEPTH[i]) > strata_scheme[j, 4] &&
              floor(merge_TATB$MEAN_DEPTH[i]) <= strata_scheme[j, 5]) {
            merge_TATB$STRATUM_CODE[i] <- strata_scheme[j, 3]
          }
        }
      }
      
      # Extract hours and minutes for SHOOTING_TIME
      if (nchar(merge_TATB$SHOOTING_TIME[i]) == 4) {
        hour_shooting[i] <- substr(merge_TATB$SHOOTING_TIME[i], 1, 2)
        min_shooting[i]  <- substr(merge_TATB$SHOOTING_TIME[i], 3, 4)
      } else if (nchar(merge_TATB$SHOOTING_TIME[i]) == 3) {
        hour_shooting[i] <- substr(merge_TATB$SHOOTING_TIME[i], 1, 1)
        min_shooting[i]  <- substr(merge_TATB$SHOOTING_TIME[i], 2, 3)
      }
      
      # Extract hours and minutes for HAULING_TIME
      if (nchar(merge_TATB$HAULING_TIME[i]) == 4) {
        hour_hauling[i] <- substr(merge_TATB$HAULING_TIME[i], 1, 2)
        min_hauling[i]  <- substr(merge_TATB$HAULING_TIME[i], 3, 4)
      } else if (nchar(merge_TATB$HAULING_TIME[i]) == 3) {
        hour_hauling[i] <- substr(merge_TATB$HAULING_TIME[i], 1, 1)
        min_hauling[i]  <- substr(merge_TATB$HAULING_TIME[i], 2, 3)
      }
    }
    
    # Convert the extracted time components into a time object using hms
    hms_shooting <- hms(rep(0, l_TATB), as.numeric(min_shooting), as.numeric(hour_shooting))
    hms_hauling  <- hms(rep(0, l_TATB), as.numeric(min_hauling), as.numeric(hour_hauling))
    
    # Calculate duration in hours (assuming hms subtraction yields seconds)
    duration <- as.numeric(hms_hauling - hms_shooting) / 3600
    
    merge_TATB$SHOOTING_TIME <- hms_shooting
    merge_TATB$HAULING_TIME  <- hms_hauling
    merge_TATB$N_h   <- merge_TATB$TOTAL_NUMBER_IN_THE_HAUL / duration
    merge_TATB$N_km2 <- merge_TATB$TOTAL_NUMBER_IN_THE_HAUL / merge_TATB$SWEPT_AREA
    merge_TATB$kg_h  <- merge_TATB$TOTAL_WEIGHT_IN_THE_HAUL / duration / 1000
    merge_TATB$kg_km2<- merge_TATB$TOTAL_WEIGHT_IN_THE_HAUL / merge_TATB$SWEPT_AREA /   1000
    
    merge_TATB$MEDITS_CODE <- as.character(merge_TATB$MEDITS_CODE)
    
    if (save) {
      outfile <- paste0(wd, "/output/mergeTATB_", species[1], species[2], ".csv")
      write.table(merge_TATB, outfile, sep = ";", row.names = FALSE)
      if (verbose) {
        message("TA-TB files correctly merged")
        message(paste("Merge TA-TB file saved in:", outfile))
      }
    } else {
      if (verbose) {
        message("TA-TB files correctly merged (file not saved)")
      }
    }
    
    return(merge_TATB)
    }

I tried looping the function for a species vector but nothing i did worked.

Function looping for multiple species

Answers (1)

Related Questions