Reputation: 65
I have a CSV file with backslashes to escape certain commas, like so:
c389c3d0-1175-465f-b2bb-1070f24d17a6,eccbc87e4b5ce2fe28308fd9f2a7baf3,{"parsed_query":{"qty":"2"\,"unit":null\,"brand":null\,"food":"apples"}\,"accuracy":"yellow"},apples,NULL,NULL,g,189.28,0.62,0.10,0.00,3.64,50.27,8.74,37.82,0.95,389.48,40.04,2015-10-19 21:24:49,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,0,NULL,NULL,NULL,NULL
5df13606-1f78-491f-a4b2-d104f82d030f,c81e728d9d4c2f636f067f89cc14862c,{"parsed_query":{"qty":"2"\,"unit":null\,"brand":null\,"food":"oranges"}\,"accuracy":"yellow"},oranges,NULL,NULL,g,137.20,0.42,0.05,0.00,2.80,35.11,6.16,23.80,2.55,464.80,64.40,2015-10-19 21:24:49,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,0,NULL,NULL,NULL,NULL
I'm trying to read the file into R, but I'm receiving the following error:
Error in read.table(file = file, header = header, sep = sep, quote = quote, : more columns than column names
Does anyone know how to get R to read the backslashes as escape characters? Thanks in advance!
Upvotes: 0
Views: 1385
Reputation: 78792
If you don't want to muck with the JSON blob that someone errantly stuck in the middle of a CSV record, you can also do the following. It's a bit more heavyweight, but it will work around gnarly mid-field JSON blobs w/o disturbing them:
library(stringi)
library(tidyverse)
stri_read_lines("~/Desktop/a.csv") %>%
map_df(~{
m1 <- stri_match_first_regex(.x, "^([[:alnum:]-\\:, \\.]+)\\{")[,2]
m2 <- stri_match_last_regex(.x, "\\}([[:alnum:]-\\:, \\.]+)")[,2]
c(
stri_split_fixed(m1, ",", simplify=TRUE)[,-1],
stri_replace_first_fixed(.x, m1, "") %>% stri_replace_last_fixed(m2, ""),
stri_split_fixed(m2, ",", simplify=TRUE)[,-1]
) %>%
as.list() %>%
set_names(~sprintf("X%d", 1:length(.x)))
}) %>%
glimpse()
## Observations: 2
## Variables: 32
## $ X1 <chr> "eccbc87e4b5ce2fe28308fd9f2a7baf3", "c81e728d9d4c2f636f067f89cc14862c"
## $ X2 <chr> "", ""
## $ X3 <chr> "{\"parsed_query\":{\"qty\":\"2\"\\,\"unit\":null\\,\"brand\":null\\,\"food\":\"apples\"}\\,\"accuracy\":\"yellow\"}", "{\"parsed_query\":{\...
## $ X4 <chr> "apples", "oranges"
## $ X5 <chr> "NULL", "NULL"
## $ X6 <chr> "NULL", "NULL"
## $ X7 <chr> "g", "g"
## $ X8 <chr> "189.28", "137.20"
## $ X9 <chr> "0.62", "0.42"
## $ X10 <chr> "0.10", "0.05"
## $ X11 <chr> "0.00", "0.00"
## $ X12 <chr> "3.64", "2.80"
## $ X13 <chr> "50.27", "35.11"
## $ X14 <chr> "8.74", "6.16"
## $ X15 <chr> "37.82", "23.80"
## $ X16 <chr> "0.95", "2.55"
## $ X17 <chr> "389.48", "464.80"
## $ X18 <chr> "40.04", "64.40"
## $ X19 <chr> "2015-10-19 21:24:49", "2015-10-19 21:24:49"
## $ X20 <chr> "NULL", "NULL"
## $ X21 <chr> "NULL", "NULL"
## $ X22 <chr> "NULL", "NULL"
## $ X23 <chr> "NULL", "NULL"
## $ X24 <chr> "NULL", "NULL"
## $ X25 <chr> "NULL", "NULL"
## $ X26 <chr> "NULL", "NULL"
## $ X27 <chr> "NULL", "NULL"
## $ X28 <chr> "0", "0"
## $ X29 <chr> "NULL", "NULL"
## $ X30 <chr> "NULL", "NULL"
## $ X31 <chr> "NULL", "NULL"
## $ X32 <chr> "NULL", "NULL"
Upvotes: 1