Andre Elrico
Andre Elrico

Reputation: 11480

Automatically - "Convert numbers stored as text to numbers"

Lets consider this small example:

df1<- data.frame(A=c(1,NA,"pvalue",0.0003),B=c(0.5,7,"I destroy","numbers all day"),stringsAsFactors = T)

Write file:

openxlsx::write.xlsx(df1,"Test.xlsx")

In my resulting excel file, 1 and 7 are text cells. Excel has the "intuition" that they are numbers stored as text. I can convert them by hand.

How can I convert those "flagged" values automatically to numbers from inside R?

In the "What I want" I have by hand converted the TEXT into Numbers. It's an option behind the "green triangle" in the "What I get" Part (red arrows).

enter image description here

@Roland's comment: Rearranging as list does not work.

df1<- as.data.frame(cbind(A=list(1,NA_real_,"pvalue",0.0003),B=list(0.5,7,"I destroy","numbers all day")))
openxlsx::write.xlsx(df1,"Test2.xlsx")

Upvotes: 10

Views: 8378

Answers (4)

Laura
Laura

Reputation: 329

Looks like there's an easier way to handle this in openxlsx2. It's explained better in the documentation for openxlsx2 here. But essentially, adding options("openxlsx2.string_nums" = TRUE) will convert it to text instead of resulting in the numbers stored as text error, and then wb_add_numfmt will make it all numbers. This does mean that any cells with characters will have the 'number' cell type and just have the leading apostrophe that tells excel it's not a formula. All together, this would look like

library(openxlsx2)

dat_w_subtitle <- data.frame(
  speed = c("Speed (mph)", 4, 4, 7, 7, 8, 9),
  dis = c("Stopping distance (ft)", 2, 10, 4, 22, 16, 10)
)

wb<-wb_workbook()
options("openxlsx2.string_nums" = TRUE)
wb$add_worksheet("dat_w_subtitle")$add_data(x = dat_w_subtitle)
wb$add_numfmt(1, dims = "A2:B8", numfmt = "0")

wb_save(wb, "outputname.xlsx", overwrite = T)

xl_open("outputname.xlsx")


And the output would look like

excel workbook showing output

Upvotes: 1

vorpal
vorpal

Reputation: 318

Just in case it helps someone else, I imported an excel document, did a bunch of manipulations on the dataframe and then wrote it out as a new excel document. I didn't want to put the conversion from char to numeric in the dataframe, because it would mess with my existing code, so I put it in the writeData bit.

wb <- createWorkbook()
lapply(listOfDFs, function(x) addWorksheet(wb, sheetName = x))   
for (n in 1:length(listOfDFs)) {
  sheet <- allDFs[[n]]
  for (row in 1:nrow(sheet)){
    sheetRow <- data.frame(lapply(sheet[row,], function(x){type.convert(as.character(x))}), check.names = FALSE, stringsAsFactors = FALSE)
    if (row == 1) {
      writeData(wb, sheet = n, x = sheetRow, startRow = row, colNames = TRUE)
    } else {
      writeData(wb, sheet = n, x = sheetRow, startRow = row+1, colNames = FALSE)
    }
  }
}
saveWorkbook(wb, file = "test.xlsx", overwrite = TRUE)

Upvotes: 0

Andre Elrico
Andre Elrico

Reputation: 11480

thanks @mt1022 added the validator to let 000123 stay 000123 in the helpers function part

A solution that can do what openxlsx::write.xlsx() can do + "finding meaningful types".

function: (its 98% openxlsx::write.xlsx)

writeXlsxWithTypes <- function(x, file, asTable = FALSE, ...) {
    library(magrittr);library(openxlsx);

    if(T) {
        setTypes <- function(x) {
            x %<>%
                lapply(function(xX){
                    lapply(xX ,function(u) {
                        if(canConvert(u)) { type.convert(as.character(u), as.is = TRUE) } else { u }
                    })
                }) %>% do.call(cbind, .) %>% as.data.frame
        } #types fun

        validateBorderStyle <- function(borderStyle){


            valid <- c("none", "thin", "medium", "dashed", "dotted", "thick", "double", "hair", "mediumDashed", 
                       "dashDot", "mediumDashDot", "dashDotDot", "mediumDashDotDot", "slantDashDot")

            ind <- match(tolower(borderStyle), tolower(valid))
            if(any(is.na(ind)))
                stop("Invalid borderStyle", call. = FALSE)

            return(valid[ind])

        }

        validateColour <- function(colour, errorMsg = "Invalid colour!"){

            ## check if
            if(is.null(colour))
                colour = "black"

            validColours <- colours()

            if(any(colour %in% validColours))
                colour[colour %in% validColours] <- col2hex(colour[colour %in% validColours])

            if(any(!grepl("^#[A-Fa-f0-9]{6}$", colour)))
                stop(errorMsg, call.=FALSE)

            colour <- gsub("^#", "FF", toupper(colour))

            return(colour)

        }
        #x="0001"
        canConvert <- function(x) {
            return( !grepl("^0+\\.?\\d",x) )
            }
    } # define helper functions

    if(T) {
        params <- list(...)
        if (!is.logical(asTable)) 
            stop("asTable must be a logical.")
        creator <- ifelse("creator" %in% names(params), params$creator, 
                          "")
        title <- params$title
        subject <- params$subject
        category <- params$category
        sheetName <- "Sheet 1"
        if ("sheetName" %in% names(params)) {
            if (any(nchar(params$sheetName) > 31)) 
                stop("sheetName too long! Max length is 31 characters.")
            sheetName <- as.character(params$sheetName)
            if ("list" %in% class(x) & length(sheetName) == length(x)) 
                names(x) <- sheetName
        }
        tabColour <- NULL
        if ("tabColour" %in% names(params)) 
            tabColour <- validateColour(params$tabColour, "Invalid tabColour!")
        zoom <- 100
        if ("zoom" %in% names(params)) {
            if (is.numeric(params$zoom)) {
                zoom <- params$zoom
            }
            else {
                stop("zoom must be numeric")
            }
        }
        gridLines <- TRUE
        if ("gridLines" %in% names(params)) {
            if (all(is.logical(params$gridLines))) {
                gridLines <- params$gridLines
            }
            else {
                stop("Argument gridLines must be TRUE or FALSE")
            }
        }
        overwrite <- TRUE
        if ("overwrite" %in% names(params)) {
            if (is.logical(params$overwrite)) {
                overwrite <- params$overwrite
            }
            else {
                stop("Argument overwrite must be TRUE or FALSE")
            }
        }
        withFilter <- TRUE
        if ("withFilter" %in% names(params)) {
            if (is.logical(params$withFilter)) {
                withFilter <- params$withFilter
            }
            else {
                stop("Argument withFilter must be TRUE or FALSE")
            }
        }
        startRow <- 1
        if ("startRow" %in% names(params)) {
            if (all(startRow > 0)) {
                startRow <- params$startRow
            }
            else {
                stop("startRow must be a positive integer")
            }
        }
        startCol <- 1
        if ("startCol" %in% names(params)) {
            if (all(startCol > 0)) {
                startCol <- params$startCol
            }
            else {
                stop("startCol must be a positive integer")
            }
        }
        colNames <- TRUE
        if ("colNames" %in% names(params)) {
            if (is.logical(params$colNames)) {
                colNames <- params$colNames
            }
            else {
                stop("Argument colNames must be TRUE or FALSE")
            }
        }
        if ("col.names" %in% names(params)) {
            if (is.logical(params$col.names)) {
                colNames <- params$col.names
            }
            else {
                stop("Argument col.names must be TRUE or FALSE")
            }
        }
        rowNames <- FALSE
        if ("rowNames" %in% names(params)) {
            if (is.logical(params$rowNames)) {
                rowNames <- params$rowNames
            }
            else {
                stop("Argument colNames must be TRUE or FALSE")
            }
        }
        if ("row.names" %in% names(params)) {
            if (is.logical(params$row.names)) {
                rowNames <- params$row.names
            }
            else {
                stop("Argument row.names must be TRUE or FALSE")
            }
        }
        xy <- NULL
        if ("xy" %in% names(params)) {
            if (length(params$xy) != 2) 
                stop("xy parameter must have length 2")
            xy <- params$xy
        }
        headerStyle <- NULL
        if ("headerStyle" %in% names(params)) {
            if (length(params$headerStyle) == 1) {
                if ("Style" %in% class(params$headerStyle)) {
                    headerStyle <- params$headerStyle
                }
                else {
                    stop("headerStyle must be a style object.")
                }
            }
            else {
                if (all(sapply(params$headerStyle, function(x) "Style" %in% 
                               class(x)))) {
                    headerStyle <- params$headerStyle
                }
                else {
                    stop("headerStyle must be a style object.")
                }
            }
        }
        borders <- NULL
        if ("borders" %in% names(params)) {
            borders <- tolower(params$borders)
            if (!all(borders %in% c("surrounding", "rows", "columns", 
                                    "all"))) 
                stop("Invalid borders argument")
        }
        borderColour <- getOption("openxlsx.borderColour", "black")
        if ("borderColour" %in% names(params)) 
            borderColour <- params$borderColour
        borderStyle <- getOption("openxlsx.borderStyle", "thin")
        if ("borderStyle" %in% names(params)) {
            borderStyle <- validateBorderStyle(params$borderStyle)
        }
        keepNA <- FALSE
        if ("keepNA" %in% names(params)) {
            if (!"logical" %in% class(keepNA)) {
                stop("keepNA must be a logical.")
            }
            else {
                keepNA <- params$keepNA
            }
        }
        tableStyle <- "TableStyleLight9"
        if ("tableStyle" %in% names(params)) 
            tableStyle <- params$tableStyle
        colWidths <- ""
        if ("colWidths" %in% names(params)) 
            colWidths <- params$colWidths
    } # params check

    if(class(x) == "data.frame") {
        x %<>% setTypes %>% list
    } else {
        lNames <- names(x)
        x %<>% lapply(setTypes)
    }

    if(T) {   
        nms <- names(x)
        nSheets <- length(x)
        if (is.null(nms)) {
            nms <- paste("Sheet", 1:nSheets)
        }
        else if (any("" %in% nms)) {
            nms[nms %in% ""] <- paste("Sheet", (1:nSheets)[nms %in% 
                                                               ""])
        }
        else {
            nms <- make.unique(nms)
        }
        if (any(nchar(nms) > 31)) {
            warning("Truncating list names to 31 characters.")
            nms <- substr(nms, 1, 31)
        }
        if (!is.null(tabColour)) {
            if (length(tabColour) != nSheets) 
                tabColour <- rep_len(tabColour, length.out = nSheets)
        }
        if (length(zoom) != nSheets) 
            zoom <- rep_len(zoom, length.out = nSheets)
        if (length(gridLines) != nSheets) 
            gridLines <- rep_len(gridLines, length.out = nSheets)
        if (length(withFilter) != nSheets) 
            withFilter <- rep_len(withFilter, length.out = nSheets)
        if (length(colNames) != nSheets) 
            colNames <- rep_len(colNames, length.out = nSheets)
        if (length(rowNames) != nSheets) 
            rowNames <- rep_len(rowNames, length.out = nSheets)
        if (length(startRow) != nSheets) 
            startRow <- rep_len(startRow, length.out = nSheets)
        if (length(startCol) != nSheets) 
            startCol <- rep_len(startCol, length.out = nSheets)
        if (!is.null(headerStyle)) 
            headerStyle <- lapply(1:nSheets, function(x) return(headerStyle))
        if (length(borders) != nSheets & !is.null(borders)) 
            borders <- rep_len(borders, length.out = nSheets)
        if (length(borderColour) != nSheets) 
            borderColour <- rep_len(borderColour, length.out = nSheets)
        if (length(borderStyle) != nSheets) 
            borderStyle <- rep_len(borderStyle, length.out = nSheets)
        if (length(keepNA) != nSheets) 
            keepNA <- rep_len(keepNA, length.out = nSheets)
        if (length(asTable) != nSheets) 
            asTable <- rep_len(asTable, length.out = nSheets)
        if (length(tableStyle) != nSheets) 
            tableStyle <- rep_len(tableStyle, length.out = nSheets)
        if (length(colWidths) != nSheets) 
            colWidths <- rep_len(colWidths, length.out = nSheets)
    }  # setup and validation

    wb <- openxlsx::createWorkbook(creator = creator, title = title, subject = subject, 
                         category = category)

    for (i in 1:nSheets) {

        if(T) {

            wb$addWorksheet(nms[[i]], showGridLines = gridLines[i], 
                            tabColour = tabColour[i], zoom = zoom[i])
            if (asTable[i]) {

                for(ii in seq_along(x[[i]])){
                    openxlsx::writeDataTable(wb = wb, sheet = i, x = names(x[[i]])[[ii]],
                                             startCol = ii, startRow = 1, 
                                             xy = xy, colNames = colNames[[i]], rowNames = rowNames[[i]], 
                                             tableStyle = tableStyle[[i]], tableName = NULL, 
                                             headerStyle = headerStyle[[i]], withFilter = withFilter[[i]], 
                                             keepNA = keepNA[[i]]
                                             )
                    icol <- x[[i]][[ii]]

                    for(j in seq_along(icol)){
                        dati <- icol[[j]]

                        openxlsx::writeData(wb = wb, sheet = i,x = dati,
                                            startCol = ii, startRow = j+1, 
                                            xy = xy, colNames = colNames[[i]], rowNames = rowNames[[i]], 
                                            tableStyle = tableStyle[[i]], tableName = NULL, 
                                            headerStyle = headerStyle[[i]], withFilter = withFilter[[i]], 
                                            keepNA = keepNA[[i]]
                                            )
                    }
                }
            }
            else {

                for(ii in seq_along(x[[i]])){

                    openxlsx::writeData(wb = wb, sheet = i, x = names(x[[i]])[[ii]],
                                        startCol = ii, startRow = 1,
                                        xy = xy, colNames = colNames[[i]], rowNames = rowNames[[i]],
                                        headerStyle = headerStyle[[i]],
                                        borders = borders[[i]], borderColour = borderColour[[i]], borderStyle = borderStyle[[i]],
                                        keepNA = keepNA[[i]]
                    )
                    icol <- x[[i]][[ii]]

                    for(j in seq_along(icol)){
                        dati <- icol[[j]]

                        openxlsx::writeData(wb = wb, sheet = i,x = dati,
                                            startCol = ii, startRow = j+1, 
                                            xy = xy, colNames = colNames[[i]], rowNames = rowNames[[i]],
                                            headerStyle = headerStyle[[i]],
                                            borders = borders[[i]], borderColour = borderColour[[i]], borderStyle = borderStyle[[i]],
                                            keepNA = keepNA[[i]]
                        )
                    }
                }
            }
            if (colWidths[i] %in% "auto") 
                setColWidths(wb, sheet = i, cols = 1:ncol(x[[i]]) + 
                                 startCol[[i]] - 1L, widths = "auto")

            } #from list



    }

    if(T) {
        freezePanes <- FALSE
        firstActiveRow <- rep_len(1L, length.out = nSheets)
        if ("firstActiveRow" %in% names(params)) {
            firstActiveRow <- params$firstActiveRow
            freezePanes <- TRUE
            if (length(firstActiveRow) != nSheets) 
                firstActiveRow <- rep_len(firstActiveRow, length.out = nSheets)
        }
        firstActiveCol <- rep_len(1L, length.out = nSheets)
        if ("firstActiveCol" %in% names(params)) {
            firstActiveCol <- params$firstActiveCol
            freezePanes <- TRUE
            if (length(firstActiveCol) != nSheets) 
                firstActiveCol <- rep_len(firstActiveCol, length.out = nSheets)
        }
        firstRow <- rep_len(FALSE, length.out = nSheets)
        if ("firstRow" %in% names(params)) {
            firstRow <- params$firstRow
            freezePanes <- TRUE
            if ("list" %in% class(x) & length(firstRow) != nSheets) 
                firstRow <- rep_len(firstRow, length.out = nSheets)
        }
        firstCol <- rep_len(FALSE, length.out = nSheets)
        if ("firstCol" %in% names(params)) {
            firstCol <- params$firstCol
            freezePanes <- TRUE
            if ("list" %in% class(x) & length(firstCol) != nSheets) 
                firstCol <- rep_len(firstCol, length.out = nSheets)
        }
        if (freezePanes) {
            for (i in 1:nSheets) openxlsx::freezePane(wb = wb, sheet = i, 
                                            firstActiveRow = firstActiveRow[i], firstActiveCol = firstActiveCol[i], 
                                            firstRow = firstRow[i], firstCol = firstCol[i])
        }
    } # additional settings/Options

    openxlsx::saveWorkbook(wb = wb, file = file, overwrite = overwrite)

    return(invisible(NULL))
}

example data:

df1 <- mtcars

df1[1,3]<-"ID =====>"
df1[1,4]<-"00000123"
df1[3,7]<-NA
df1[2,6]<-"stringi"

ldf <- list(NOW=df1, WITH=df1, LISTS=df1)

call:

writeXlsxWithTypes(df1, "test_normal3.xlsx" , rowNames = TRUE, borders = "surrounding")
writeXlsxWithTypes(ldf, "test_list3.xlsx", rowNames = TRUE, borders = "surrounding")

Upvotes: 3

mt1022
mt1022

Reputation: 17289

I wrote a small piece of code following the suggestions of @Roland and @phiver. It starts with a tidy data.frame (to preserve the data type of each cell) and save values one by one:

library(openxlsx)
df1<- as.data.frame(cbind(A=list(1,NA_real_,"pvalue",0.0003),B=list(0.5,7,"I destroy","numbers all day")))

wb <- createWorkbook()
sheet.name <- 'test'
addWorksheet(wb, sheet.name)

for(i in seq_along(df1)){
    writeData(wb, sheet = sheet.name, names(df1)[i], startCol = i, startRow = 1)
    icol <- df1[[i]]
    for(j in seq_along(icol)){
        x <- icol[[j]]
        writeData(wb, sheet = sheet.name, x, startCol = i, startRow = j + 1)
    }
}
saveWorkbook(wb, file = "Test.xlsx")

enter image description here

Hope this works for your data.

Upvotes: 5

Related Questions