Reputation: 3203
I have integrated an R model into a stored procedure using the R Tools for Visual Studio guidance. The syntax is as follows:
ALTER PROCEDURE [dbo].[spRegressionPeak]
AS
BEGIN
EXEC sp_execute_external_script @language = N'R'
, @script = N'
# @InputDataSet: input data frame, result of SQL query execution
# @OutputDataSet: data frame to pass back to SQL
# Test code
#library(RODBC)
# channel <- odbcDriverConnect(dbConnection)
# InputDataSet <- sqlQuery(channel,iconv(paste(readLines(''~/visual studio 2017/prod360/regressionpeak.query.sql'', encoding = ''UTF-8'', warn = FALSE), collapse=''\n''), from = ''UTF-8'', to = ''ASCII'', sub = '''') )
# odbcClose(channel)
#'' Regression Peaks
#''
#'' Runs polynomial regressions on a data table with one model for each
#'' user ID - independent variable pair. Note that independent variables
#'' are identified as all columns matching the following pattern: the
#'' letter "c" followed by a one-or-more digit number. The dependent
#'' variable is identified by its name "dv". The user ID is identified by
#'' its name "id". Also note that the regressors are the means of the
#'' original observations, grouped by \code{code}.
#''
#'' @param x Table to run regressions on
#'' @param c_means Code means table
#'' @importFrom rlang .data
#''
#'' @return Summary table where each distinct \code{code} value is
#'' represented by one row with columns for the respective standard
#'' deviations of each independent variable.
#'' @export
regression_peak <- function(x, c_means) {
df <-
dplyr::select(x, .data$id, .data$code, .data$dv) %>%
dplyr::left_join(c_means, by = "code")
id <- unique(df$id)
iv <- names(df)[stringr::str_detect(names(df), "c\\d+")]
grid <- tidyr::crossing(id, iv)
peaks <- purrr::map2_df(grid$id, grid$iv, function(i_id, i_iv) {
x <-
dplyr::filter(df, .data$id == i_id) %>%
dplyr::select_at(dplyr::vars(.data$dv, i_iv)) %>%
dplyr::rename(iv = !!i_iv)
fit <- stats::lm(dv ~ iv + I(iv ^ 2), data = x)
coef_a <- stats::coef(fit)["iv"]
coef_b <- stats::coef(fit)["I(iv^2)"]
extr <-
tibble::tibble(
type = c(2, 1, 1),
iv = c(unname(-coef_a / (2 * coef_b)), min(x$iv), max(x$iv))
) %>%
dplyr::mutate(y = stats::predict(fit, newdata = ., type = "response"))
t_max <- extr$iv[extr$type == 2]
tibble::tibble(
id = i_id,
iv = i_iv,
max = dplyr::case_when(
min(x$iv) < t_max & t_max < max(x$iv) ~ extr$iv[which.max(extr$y)],
TRUE ~ extr$iv[which.max(extr$y[extr$type == 1])]
) # should be x value
)
})
tidyr::spread(peaks, .data$iv, .data$max) %>%
dplyr::select(.data$id, iv) %>%
dplyr::filter_at(dplyr::vars(dplyr::matches("c\\d+")),
dplyr::any_vars(!is.na(.)))
}
OutputDataSet <- InputDataSet
'
, @input_data_1 = N'-- Place SQL query retrieving data for the R stored procedure here
DECLARE @cols AS NVARCHAR(MAX),
@query AS NVARCHAR(MAX),
@StudyID int,
@sStudyID VARCHAR(50)
Select a.StudyId, a.RespID, p.ProductNumber, p.ProductSequence,
CONVERT(varchar(50),a.DateAdded,101) as StudyDate,
CONVERT(VARCHAR(15),CAST((a.DateAdded)AS TIME),100) as
StudyTime,DATENAME(dw,a.DateAdded) as [DayOfWeek],
p.A_Value as A,p.B_Value as B,p.C_Value as C,p.D_Value AS D,p.E_Value AS
E,p.F_Value AS F, q.QuestionNumber
from answers a
inner join Products p on a.ProductID = p.ProductID
inner join Questions q on a.QuestionID = q.QuestionID
where a.StudyID = @sStudyID'
--- Edit this line to handle the output data frame.
WITH RESULT SETS (([StudyID] int, [RespID] int, [ProductNumber] int,
[ProductSequence] int, [StudyDate] date, [StudyTime] time, [DayOfWeek]
VARCHAR(10),[QuestionNumber] int, [A] int, [B] int, [C] int, [D] int, [E]
int, [F] INT));
END;
When I execute the stored procedure in SQL Server Management Studio, the Return Value = 0 and no data is output. I'm not sure that the variables are being appropriately declared as I'm not prompted for them when I execute the stored procedure.
How do I modify the stored procedure to return the intended data? Can I call this from ASP.NET by providing the appropriate study ID?
Upvotes: 0
Views: 869
Reputation: 1698
Your code is very hard to read, but to me, it looks like you are not declaring the parameters you want to get out. Below is an example of how you can do it:
DECLARE @out_val float;
exec sp_execute_external_script
@language = N'R',
@script = N'
iris_dataset <- iris
setosa <- iris[iris$Species == "setosa",]
menSepWidth <- mean(setosa$Sepal.Width)
iris_dataset$Sepal.Length <- iris_dataset$Sepal.Length * multiplier
OutputDataSet <- data.frame(iris_dataset$Sepal.Length)
',
@params = N'@multiplier float, @menSepWidth float OUTPUT',
@multiplier = 5,
@menSepWidth = @out_val OUTPUT
WITH RESULT SETS ((SepalLength float));
SELECT @out_val AS MeanSepWidth
Have a look at this blog post where I talk about how to handle parameters etc. when you use sp_execute_external_script
.
Hope this helps!
Upvotes: 2