Editing a function from a package in R?

Question

I am using the referenceIntervals package in R, to do some data analytics.

In particular I am using the refLimit function which calculates reference and confidence intervals. I want to edit it to remove certain functionality (for instance it runs a shapiro normalitiy test, which stops the entire code if the data larger than 5000, it wont allow you to parametrically test samples less than 120). To do this I have been typing refLimit into the terminal - copying the function definition, then saving it as a separate file (below is the full original definition of the function).

 singleRefLimit = 
function (data, dname = "default", out.method = "horn", out.rm = FALSE, 
    RI = "p", CI = "p", refConf = 0.95, limitConf = 0.9) 
{
    if (out.method == "dixon") {
        output = dixon.outliers(data)
    }
    else if (out.method == "cook") {
        output = cook.outliers(data)
    }
    else if (out.method == "vanderLoo") {
        output = vanderLoo.outliers(data)
    }
    else {
        output = horn.outliers(data)
    }
    if (out.rm == TRUE) {
        data = output$subset
    }
    outliers = output$outliers
    n = length(data)
    mean = mean(data, na.rm = TRUE)
    sd = sd(data, na.rm = TRUE)
    norm = NULL
    if (RI == "n") {
        methodRI = "Reference Interval calculated nonparametrically"
        data = sort(data)
        holder = nonparRI(data, indices = 1:length(data), refConf)
        lowerRefLimit = holder[1]
        upperRefLimit = holder[2]
        if (CI == "p") {
            CI = "n"
        }
    }
    if (RI == "r") {
        methodRI = "Reference Interval calculated using Robust algorithm"
        holder = robust(data, 1:length(data), refConf)
        lowerRefLimit = holder[1]
        upperRefLimit = holder[2]
        CI = "boot"
    }
    if (RI == "p") {
        methodRI = "Reference Interval calculated parametrically"
        methodCI = "Confidence Intervals calculated parametrically"
        refZ = qnorm(1 - ((1 - refConf)/2))
        limitZ = qnorm(1 - ((1 - limitConf)/2))
        lowerRefLimit = mean - refZ * sd
        upperRefLimit = mean + refZ * sd
        se = sqrt(((sd^2)/n) + (((refZ^2) * (sd^2))/(2 * n)))
        lowerRefLowLimit = lowerRefLimit - limitZ * se
        lowerRefUpperLimit = lowerRefLimit + limitZ * se
        upperRefLowLimit = upperRefLimit - limitZ * se
        upperRefUpperLimit = upperRefLimit + limitZ * se
        shap_normalcy = shapiro.test(data)
        shap_output = paste(c("Shapiro-Wilk: W = ", format(shap_normalcy$statistic, 
            digits = 6), ", p-value = ", format(shap_normalcy$p.value, 
            digits = 6)), collapse = "")
        ks_normalcy = suppressWarnings(ks.test(data, "pnorm", 
            m = mean, sd = sd))
        ks_output = paste(c("Kolmorgorov-Smirnov: D = ", format(ks_normalcy$statistic, 
            digits = 6), ", p-value = ", format(ks_normalcy$p.value, 
            digits = 6)), collapse = "")
        if (shap_normalcy$p.value < 0.05 | ks_normalcy$p.value < 
            0.05) {
            norm = list(shap_output, ks_output)
        }
        else {
            norm = list(shap_output, ks_output)
        }
    }
    if (CI == "n") {
        if (n < 120) {
            cat("
Sample size too small for non-parametric confidence intervals, 
    		bootstrapping instead
")
            CI = "boot"
        }
        else {
            methodCI = "Confidence Intervals calculated nonparametrically"
            ranks = nonparRanks[which(nonparRanks$SampleSize == 
                n), ]
            lowerRefLowLimit = data[ranks$Lower]
            lowerRefUpperLimit = data[ranks$Upper]
            upperRefLowLimit = data[(n + 1) - ranks$Upper]
            upperRefUpperLimit = data[(n + 1) - ranks$Lower]
        }
    }
    if (CI == "boot" & (RI == "n" | RI == "r")) {
        methodCI = "Confidence Intervals calculated by bootstrapping, R = 5000"
        if (RI == "n") {
            bootresult = boot::boot(data = data, statistic = nonparRI, 
                refConf = refConf, R = 5000)
        }
        if (RI == "r") {
            bootresult = boot::boot(data = data, statistic = robust, 
                refConf = refConf, R = 5000)
        }
        bootresultlower = boot::boot.ci(bootresult, conf = limitConf, 
            type = "basic", index = 1)
        bootresultupper = boot::boot.ci(bootresult, conf = limitConf, 
            type = "basic", index = 2)
        lowerRefLowLimit = bootresultlower$basic[4]
        lowerRefUpperLimit = bootresultlower$basic[5]
        upperRefLowLimit = bootresultupper$basic[4]
        upperRefUpperLimit = bootresultupper$basic[5]
    }
    RVAL = list(size = n, dname = dname, out.method = out.method, 
        out.rm = out.rm, outliers = outliers, methodRI = methodRI, 
        methodCI = methodCI, norm = norm, refConf = refConf, 
        limitConf = limitConf, Ref_Int = c(lowerRefLimit = lowerRefLimit, 
            upperRefLimit = upperRefLimit), Conf_Int = c(lowerRefLowLimit = lowerRefLowLimit, 
            lowerRefUpperLimit = lowerRefUpperLimit, upperRefLowLimit = upperRefLowLimit, 
            upperRefUpperLimit = upperRefUpperLimit))
    class(RVAL) = "interval"
    return(RVAL)
}

However when I then execute this file a large number of terms end up being undefined, for instance when I use the function I get 'object 'nonparRanks' not found'.

How do I edit the function in the package? I have looked at trying to important the package namespace and environment but this has not helped. I have also tried to find the actual function in the package files in my directory, but not been able to.

I am reasonably experienced in R, but I have never had to edit a package before. I am clearly missing something about how functions are defined in packages, but I am not sure what.

AidanGawronski · Accepted Answer

In the beginning of the package there is a line

data(sysdata, envir=environment())

See here: https://github.com/cran/referenceIntervals/tree/master/data/sysdata.rda

I suspect that "nonparRanks" is defined there as I don't see it defined anywhere else. So perhaps you could download that file, write your own function, then run that same line before running your function and it may work.

EDIT: Download the file then run:

load("C:/sysdata.rda")

With your path to the file and then your function will work.

Editing a function from a package in R?

Answers (2)

Related Questions