Reputation: 5935
I know that normally to create an SQL table from "scratch", you need to first create an empty table, and then insert data into this table. For example:
# create table
CREATE TABLE Customers (
CustomerName int,
ContactName varchar(255),
Address varchar(255),
City varchar(255),
PostalCode varchar(255)
Country varchar(255)
);
#populate table
INSERT INTO Customers (CustomerName, ContactName, Address, City, PostalCode, Country)
VALUES ('Cardinal', 'Tom B. Erichsen', 'Skagen 21', 'Stavanger', '4006', 'Norway');
Suppose I have the following table in R:
my_table = data.frame(id = c(1,2,3), name = c("sam", "smith", "sean"), height = c(156, 175, 191), address = c("123 first street", "234 second street", "345 third street"))
id name height address
1 1 sam 156 123 first street
2 2 smith 175 234 second street
3 3 sean 191 345 third street
I would like to automatically generate SQL code in R that would allow me to create this table on an SQL server (e.g. DBI/ODBC). I know that there is a "DBPLYR" library in R that can convert DPLYR code into SQL. But suppose I have this table "my_table" in R - is there some way I could "trick" R into automatically generating the following code?
CREATE TABLE my_table (
id int,
name varchar(255),
height int,
address varchar(255),
);
INSERT INTO my_table(id, name, height, address)
VALUES ('1', 'sam', '156', '123 first street');
INSERT INTO my_table(id, name, height, address)
VALUES ('2', 'smith', '175', '234 second street');
INSERT INTO my_table(id, name, height, address)
VALUES ('1', 'sean', '191', '345 first street');
This way, I could paste the resulting code from R into the software I am using to write SQL and create the table on the SQL server.
I understand that there are far easier ways to upload a table from R onto an SQL server (e.g. dbWriteTable()
) - but these options are currently not available to me : I can not make R communicate with the SQL Server. The only thing I can do to create tables in SQL involves the "CREATE TABLE" and "INSERT INTO" commands. I was hoping that perhaps there might be some function like dput()
which takes the "recipe" for a table in R, and outputs some SQL code - and when you paste this output SQL code, it creates the same table on the SQL server.
Thank you!
Upvotes: 2
Views: 332
Reputation: 20409
When you look into getMethod("dbWriteTable", c(conn="OdbcConnection", name="character", value="data.frame"))
(the code responsible for writing tables to an odbc
connection) you see the following relevant lines:
### creation
if (!found || overwrite) {
sql <- sqlCreateTable(conn, name, values, field.types = field.types,
row.names = FALSE, temporary = temporary)
dbExecute(conn, sql, immediate = TRUE)
}
### adding data
values <- sqlData(conn, row.names = row.names, value[, , drop = FALSE])
if (nrow(value) > 0) {
name <- dbQuoteIdentifier(conn, name)
fields <- dbQuoteIdentifier(conn, names(values))
params <- rep("?", length(fields))
sql <- paste0("INSERT INTO ", name, " (",
paste0(fields, collapse = ", "), ")\n",
"VALUES (", paste0(params, collapse = ", "),
")")
## ...
}
This poses a problem, because all the SQL
generation depends on the backend (ANSI
SQL generates different lines than T-SQL
for instance).
Thus, without a connection it will be difficult to piggyback on existing code, b/c existing code would need an existing connection, in which case you would not even need your workaround.
There is dbplyr::simulate_mssql()
which allows for some simulation w/o data connection, but not for your problem at hand:
library(dbplyr)
library(DBI)
lazy_frame(x = c("abc", "def", "ghif"), con = simulate_mssql()) %>%
group_by(x) %>%
summarize(n = n())
## SELECT `x`, COUNT(*) AS `n`
## FROM `df`
## GROUP BY `x`
sqlCreateTable(simulate_mssql(), "test", mtcars)
# Error in h(simpleError(msg, call)) :
# error in evaluating the argument 'con' in selecting a method for function 'sqlCreateTable': could not find function
Having said that, you will need to write your own wrapper (code gently stolen from the relevant DBI
code):
library(DBI)
library(odbc)
library(magrittr)
do_sql_quote_id <- function(x) getMethod("dbQuoteIdentifier",
c(conn = "DBIConnection",
x="character"))@.Data(x = x)
do_sql_quote_string <- function(x) getMethod("dbQuoteString",
c(conn = "DBIConnection",
x = "character"))@.Data(x = x)
do_sql_quote_literal <- function(x) {
if (is(x, "SQL"))
return(x)
if (is.factor(x))
return(do_sql_quote_string(as.character(x)))
if (is.character(x))
return(do_sql_quote_string(x))
if (inherits(x, "POSIXt")) {
return(do_sql_quote_string(strftime(as.POSIXct(x), "%Y%m%d%H%M%S",
tz = "UTC")))
}
if (inherits(x, "Date"))
return(do_sql_quote_string(as.character(x)))
if (inherits(x, "difftime"))
return(do_sql_quote_string(format_hms(x)))
if (is.list(x)) {
blob_data <- vapply(x, function(x) {
if (is.null(x)) {
"NULL"
}
else if (is.raw(x)) {
paste0("X'", paste(format(x), collapse = ""),
"'")
}
else {
stop("Lists must contain raw vectors or NULL",
call. = FALSE)
}
}, character(1))
return(SQL(blob_data, names = names(x)))
}
if (is.logical(x))
x <- as.numeric(x)
x <- as.character(x)
x[is.na(x)] <- "NULL"
SQL(x, names = names(x))
}
get_data_type <- function(obj) {
res <- character(NCOL(obj))
nms <- names(obj)
for (i in seq_along(obj)) {
res[[i]] <- odbc:::`odbcDataType.Microsoft SQL Server`(obj = obj[[i]])
}
names(res) <- nms
field_names <- do_sql_quote_id(names(res))
field_types <- unname(res)
paste0(field_names, " ", field_types)
}
df_to_sql_data <- function(value) {
is_POSIXlt <- vapply(value, function(x) is.object(x) && (is(x,
"POSIXlt")), logical(1))
value[is_POSIXlt] <- lapply(value[is_POSIXlt], as.POSIXct)
is_IDate <- vapply(value, function(x) is.object(x) && (is(x,
"IDate")), logical(1))
value[is_IDate] <- lapply(value[is_IDate], as.Date)
is_object <- vapply(value, function(x) is.object(x) && !(is(x,
"POSIXct") || is(x, "Date") || odbc:::is_blob(x) ||
is(x, "difftime")), logical(1))
value[is_object] <- lapply(value[is_object], as.character)
value
}
get_table_sql <- function(name, my_data) {
table <- do_sql_quote_id(name)
fields <- get_data_type(my_data)
ct <- paste0("CREATE TABLE ", table, " (\n",
" ", paste(fields, collapse = ",\n "),
"\n)\n")
values <- df_to_sql_data(my_data)
fields <- do_sql_quote_id(names(values))
rows <- split(values, seq_len(nrow(values))) %>%
lapply(function(row) do.call(paste,
c(sep = ", ", lapply(row, do_sql_quote_literal))))
it <- paste0("INSERT INTO ", table, " (",
paste0(fields, collapse = ", "), ")\n",
"VALUES \n", paste0("(", rows, ")", collapse = ", \n"))
SQL(paste0(ct, "\n", it))
}
get_table_sql("my_table_name", iris[1:5, ])
## <SQL> CREATE TABLE "my_table_name" (
## "Sepal.Length" FLOAT,
## "Sepal.Width" FLOAT,
## "Petal.Length" FLOAT,
## "Petal.Width" FLOAT,
## "Species" varchar(255)
## )
##
## INSERT INTO "my_table_name" ("Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width", "Species")
## VALUES
## (5.1, 3.5, 1.4, 0.2, 'setosa'),
## (4.9, 3, 1.4, 0.2, 'setosa'),
## (4.7, 3.2, 1.3, 0.2, 'setosa'),
## (4.6, 3.1, 1.5, 0.2, 'setosa'),
## (5, 3.6, 1.4, 0.2, 'setosa')
Upvotes: 2