stats_noob
stats_noob

Reputation: 5935

Transferring Tables from R into SQL

I know that normally to create an SQL table from "scratch", you need to first create an empty table, and then insert data into this table. For example:

# create table

CREATE TABLE Customers (
    CustomerName int,
    ContactName varchar(255),
    Address varchar(255),
    City varchar(255),
    PostalCode varchar(255)
Country varchar(255)
);

#populate table
INSERT INTO Customers (CustomerName, ContactName, Address, City, PostalCode, Country)
VALUES ('Cardinal', 'Tom B. Erichsen', 'Skagen 21', 'Stavanger', '4006', 'Norway');

Suppose I have the following table in R:

my_table = data.frame(id = c(1,2,3), name = c("sam", "smith", "sean"), height = c(156, 175, 191), address = c("123 first street", "234 second street", "345 third street"))

  id  name height           address
1  1   sam    156  123 first street
2  2 smith    175 234 second street
3  3  sean    191  345 third street

I would like to automatically generate SQL code in R that would allow me to create this table on an SQL server (e.g. DBI/ODBC). I know that there is a "DBPLYR" library in R that can convert DPLYR code into SQL. But suppose I have this table "my_table" in R - is there some way I could "trick" R into automatically generating the following code?

CREATE TABLE my_table (
    id int,
    name varchar(255),
    height int,
    address varchar(255),
);

INSERT INTO my_table(id, name, height, address)
VALUES ('1', 'sam', '156', '123 first street');

INSERT INTO my_table(id, name, height, address)
VALUES ('2', 'smith', '175', '234 second street');

INSERT INTO my_table(id, name, height, address)
VALUES ('1', 'sean', '191', '345 first street');

This way, I could paste the resulting code from R into the software I am using to write SQL and create the table on the SQL server.

I understand that there are far easier ways to upload a table from R onto an SQL server (e.g. dbWriteTable()) - but these options are currently not available to me : I can not make R communicate with the SQL Server. The only thing I can do to create tables in SQL involves the "CREATE TABLE" and "INSERT INTO" commands. I was hoping that perhaps there might be some function like dput() which takes the "recipe" for a table in R, and outputs some SQL code - and when you paste this output SQL code, it creates the same table on the SQL server.

Thank you!

Upvotes: 2

Views: 332

Answers (1)

thothal
thothal

Reputation: 20409

When you look into getMethod("dbWriteTable", c(conn="OdbcConnection", name="character", value="data.frame")) (the code responsible for writing tables to an odbc connection) you see the following relevant lines:

### creation
if (!found || overwrite) {
   sql <- sqlCreateTable(conn, name, values, field.types = field.types, 
                         row.names = FALSE, temporary = temporary)
   dbExecute(conn, sql, immediate = TRUE)
}

### adding data
values <- sqlData(conn, row.names = row.names, value[, , drop = FALSE])
if (nrow(value) > 0) {
   name <- dbQuoteIdentifier(conn, name)
   fields <- dbQuoteIdentifier(conn, names(values))
   params <- rep("?", length(fields))
   sql <- paste0("INSERT INTO ", name, " (", 
                 paste0(fields, collapse = ", "), ")\n", 
                "VALUES (", paste0(params, collapse = ", "), 
                ")")
## ...
}

This poses a problem, because all the SQL generation depends on the backend (ANSI SQL generates different lines than T-SQL for instance).

Thus, without a connection it will be difficult to piggyback on existing code, b/c existing code would need an existing connection, in which case you would not even need your workaround.

There is dbplyr::simulate_mssql() which allows for some simulation w/o data connection, but not for your problem at hand:

library(dbplyr)
library(DBI)

lazy_frame(x = c("abc", "def", "ghif"), con = simulate_mssql()) %>% 
   group_by(x) %>% 
   summarize(n = n())

## SELECT `x`, COUNT(*) AS `n`
## FROM `df`
## GROUP BY `x`

sqlCreateTable(simulate_mssql(), "test", mtcars)
# Error in h(simpleError(msg, call)) : 
#  error in evaluating the argument 'con' in selecting a method for function 'sqlCreateTable': could not find function

Having said that, you will need to write your own wrapper (code gently stolen from the relevant DBI code):

library(DBI)
library(odbc)
library(magrittr)
do_sql_quote_id <- function(x) getMethod("dbQuoteIdentifier", 
                                   c(conn = "DBIConnection",
                                     x="character"))@.Data(x = x)

do_sql_quote_string <- function(x) getMethod("dbQuoteString", 
          c(conn = "DBIConnection", 
            x = "character"))@.Data(x = x)

do_sql_quote_literal <- function(x) {
    if (is(x, "SQL")) 
        return(x)
    if (is.factor(x)) 
        return(do_sql_quote_string(as.character(x)))
    if (is.character(x)) 
        return(do_sql_quote_string(x))
    if (inherits(x, "POSIXt")) {
        return(do_sql_quote_string(strftime(as.POSIXct(x), "%Y%m%d%H%M%S", 
            tz = "UTC")))
    }
    if (inherits(x, "Date")) 
        return(do_sql_quote_string(as.character(x)))
    if (inherits(x, "difftime")) 
        return(do_sql_quote_string(format_hms(x)))
    if (is.list(x)) {
        blob_data <- vapply(x, function(x) {
            if (is.null(x)) {
                "NULL"
            }
            else if (is.raw(x)) {
                paste0("X'", paste(format(x), collapse = ""), 
                  "'")
            }
            else {
                stop("Lists must contain raw vectors or NULL", 
                  call. = FALSE)
            }
        }, character(1))
        return(SQL(blob_data, names = names(x)))
    }
    if (is.logical(x)) 
        x <- as.numeric(x)
    x <- as.character(x)
    x[is.na(x)] <- "NULL"
    SQL(x, names = names(x))
}

get_data_type <- function(obj) {
   res <- character(NCOL(obj))
   nms <- names(obj)
   for (i in seq_along(obj)) {
       res[[i]] <- odbc:::`odbcDataType.Microsoft SQL Server`(obj = obj[[i]])
   }
   names(res) <- nms
   field_names <- do_sql_quote_id(names(res))
   field_types <- unname(res)
   paste0(field_names, " ", field_types)
}

df_to_sql_data <- function(value) {
    is_POSIXlt <- vapply(value, function(x) is.object(x) && (is(x, 
        "POSIXlt")), logical(1))
    value[is_POSIXlt] <- lapply(value[is_POSIXlt], as.POSIXct)
    is_IDate <- vapply(value, function(x) is.object(x) && (is(x, 
        "IDate")), logical(1))
    value[is_IDate] <- lapply(value[is_IDate], as.Date)
    is_object <- vapply(value, function(x) is.object(x) && !(is(x, 
        "POSIXct") || is(x, "Date") || odbc:::is_blob(x) || 
        is(x, "difftime")), logical(1))
    value[is_object] <- lapply(value[is_object], as.character)
    value
}

get_table_sql <- function(name, my_data) {
  table <- do_sql_quote_id(name)
  fields <- get_data_type(my_data)
  ct <- paste0("CREATE TABLE ", table, " (\n", 
               "  ", paste(fields, collapse = ",\n  "), 
               "\n)\n")
  values <- df_to_sql_data(my_data)
  fields <- do_sql_quote_id(names(values))
  rows <- split(values, seq_len(nrow(values))) %>%
    lapply(function(row) do.call(paste, 
                                 c(sep = ", ", lapply(row, do_sql_quote_literal))))
  it <- paste0("INSERT INTO ", table, " (", 
               paste0(fields, collapse = ", "), ")\n", 
               "VALUES \n", paste0("(", rows, ")", collapse = ", \n"))
  SQL(paste0(ct, "\n", it))
}

get_table_sql("my_table_name", iris[1:5, ])

## <SQL> CREATE TABLE "my_table_name" (
##   "Sepal.Length" FLOAT,
##   "Sepal.Width" FLOAT,
##   "Petal.Length" FLOAT,
##   "Petal.Width" FLOAT,
##   "Species" varchar(255)
## )
## 
## INSERT INTO "my_table_name" ("Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width", "Species")
## VALUES 
## (5.1, 3.5, 1.4, 0.2, 'setosa'), 
## (4.9, 3, 1.4, 0.2, 'setosa'), 
## (4.7, 3.2, 1.3, 0.2, 'setosa'), 
## (4.6, 3.1, 1.5, 0.2, 'setosa'), 
## (5, 3.6, 1.4, 0.2, 'setosa')

Upvotes: 2

Related Questions