SidC
SidC

Reputation: 3203

How to Declare Input Variables in Stored Procedure with R and TSQL?

I have integrated an R model into a stored procedure using the R Tools for Visual Studio guidance. The syntax is as follows:

ALTER PROCEDURE [dbo].[spRegressionPeak]
AS
BEGIN


EXEC sp_execute_external_script @language = N'R'
, @script = N'
# @InputDataSet: input data frame, result of SQL query execution
# @OutputDataSet: data frame to pass back to SQL

# Test code
#library(RODBC)
# channel <- odbcDriverConnect(dbConnection)
# InputDataSet <- sqlQuery(channel,iconv(paste(readLines(''~/visual studio 2017/prod360/regressionpeak.query.sql'', encoding = ''UTF-8'', warn = FALSE), collapse=''\n''), from = ''UTF-8'', to = ''ASCII'', sub = '''') )
# odbcClose(channel)

#'' Regression Peaks
#''
#'' Runs polynomial regressions on a data table with one model for each
#'' user ID - independent variable pair. Note that independent variables
#'' are identified as all columns matching the following pattern: the
#'' letter "c" followed by a one-or-more digit number. The dependent
#'' variable is identified by its name "dv". The user ID is identified by
#'' its name "id". Also note that the regressors are the means of the
#'' original observations, grouped by \code{code}.
#''
#'' @param x Table to run regressions on
#'' @param c_means Code means table
#'' @importFrom rlang .data
#''
#'' @return Summary table where each distinct \code{code} value is
#''   represented by one row with columns for the respective standard
#''   deviations of each independent variable.
#'' @export
regression_peak <- function(x, c_means) {

df <-
dplyr::select(x, .data$id, .data$code, .data$dv) %>%
dplyr::left_join(c_means, by = "code")

id <- unique(df$id)
iv <- names(df)[stringr::str_detect(names(df), "c\\d+")]
grid <- tidyr::crossing(id, iv)

peaks <- purrr::map2_df(grid$id, grid$iv, function(i_id, i_iv) {

    x <-
    dplyr::filter(df, .data$id == i_id) %>%
    dplyr::select_at(dplyr::vars(.data$dv, i_iv)) %>%
    dplyr::rename(iv = !!i_iv)

    fit <- stats::lm(dv ~ iv + I(iv ^ 2), data = x)
    coef_a <- stats::coef(fit)["iv"]
    coef_b <- stats::coef(fit)["I(iv^2)"]

    extr <-
    tibble::tibble(
    type = c(2, 1, 1),
    iv = c(unname(-coef_a / (2 * coef_b)), min(x$iv), max(x$iv))
  ) %>%
    dplyr::mutate(y = stats::predict(fit, newdata = ., type = "response"))

    t_max <- extr$iv[extr$type == 2]

    tibble::tibble(
  id = i_id,
  iv = i_iv,
  max = dplyr::case_when(
    min(x$iv) < t_max & t_max < max(x$iv) ~ extr$iv[which.max(extr$y)],
    TRUE ~ extr$iv[which.max(extr$y[extr$type == 1])]
  ) # should be x value
)

})

tidyr::spread(peaks, .data$iv, .data$max) %>%
dplyr::select(.data$id, iv) %>%
dplyr::filter_at(dplyr::vars(dplyr::matches("c\\d+")), 
dplyr::any_vars(!is.na(.)))

}

OutputDataSet <- InputDataSet
'
, @input_data_1 = N'-- Place SQL query retrieving data for the R stored procedure here
DECLARE @cols AS NVARCHAR(MAX),
    @query  AS NVARCHAR(MAX),
    @StudyID int,
    @sStudyID VARCHAR(50)

Select a.StudyId, a.RespID, p.ProductNumber, p.ProductSequence, 
CONVERT(varchar(50),a.DateAdded,101) as StudyDate,
CONVERT(VARCHAR(15),CAST((a.DateAdded)AS TIME),100) as 
StudyTime,DATENAME(dw,a.DateAdded) as [DayOfWeek],
p.A_Value as A,p.B_Value as B,p.C_Value as C,p.D_Value AS D,p.E_Value AS 
E,p.F_Value AS F, q.QuestionNumber  
from answers a
inner join Products p on a.ProductID = p.ProductID 
inner join Questions q on a.QuestionID = q.QuestionID
where a.StudyID = @sStudyID'
--- Edit this line to handle the output data frame.
WITH RESULT SETS (([StudyID] int, [RespID] int, [ProductNumber] int, 
[ProductSequence] int, [StudyDate] date, [StudyTime] time, [DayOfWeek] 
VARCHAR(10),[QuestionNumber] int, [A] int, [B] int, [C] int, [D] int, [E] 
int, [F] INT));
END;

When I execute the stored procedure in SQL Server Management Studio, the Return Value = 0 and no data is output. I'm not sure that the variables are being appropriately declared as I'm not prompted for them when I execute the stored procedure.

How do I modify the stored procedure to return the intended data? Can I call this from ASP.NET by providing the appropriate study ID?

Upvotes: 0

Views: 869

Answers (1)

Niels Berglund
Niels Berglund

Reputation: 1698

Your code is very hard to read, but to me, it looks like you are not declaring the parameters you want to get out. Below is an example of how you can do it:

DECLARE @out_val float;

exec sp_execute_external_script
  @language = N'R',
  @script = N'
    iris_dataset <- iris

    setosa <- iris[iris$Species == "setosa",]

    menSepWidth <- mean(setosa$Sepal.Width)

    iris_dataset$Sepal.Length <- iris_dataset$Sepal.Length * multiplier
    OutputDataSet <- data.frame(iris_dataset$Sepal.Length)    

  ',
  @params = N'@multiplier float, @menSepWidth float OUTPUT',
  @multiplier = 5,
  @menSepWidth = @out_val OUTPUT
  WITH RESULT SETS ((SepalLength float));

SELECT @out_val AS MeanSepWidth  

Have a look at this blog post where I talk about how to handle parameters etc. when you use sp_execute_external_script.

Hope this helps!

Upvotes: 2

Related Questions