Amadou Kone
Amadou Kone

Reputation: 956

Weird output when writing a data.frame to a text file

I keep getting weird output when writing a data frame to a text file using R. I've had the same problem using write.csv(), write.csv2() and write.table().

I have a data.frame, which I created by binding multiple data.frames together using rbind() that were previously subset from a larger data.frame. It is called "extract", and I am trying to write every column but the first (a large text string) to a txt file.

> dim(extract)
[1] 1001   16

> extract[1:5,2:16]
          latitude    longitude WordCount English Common Tagalog Bikol Cebuano Hiligaynon Ilocano Kapampangan Pangasinense Waray Zam Spanish
20     20.45164941  121.9671742         7       1      0       0     3       0          0       0           0            0     0   0       0
77     20.45164941  121.9671742         7       1      0       0     3       0          0       0           0            0     0   0       0
106    20.45164941  121.9671742         7       1      0       0     3       0          0       0           0            0     0   0       0
48670  20.45164941  121.9671742         7       1      0       0     3       0          0       0           0            0     0   0       0
94831   8.19183664 124.17909375        13       8      2       0     3       0          0       0           0            0     0   0       0

When I do

> write.csv(extract[,2:16], 'extract 6-30.txt', row.names=FALSE)

My table treats the first two items latitude and longitude as columns, and then has concatenated lists of every item in a column for each individual items in the table. So, for example:

> write.csv(extract[1:5,2:16], 'extract 6-29.txt', row.names=FALSE)

yields the following text file:

"latitude","longitude","WordCount","English","Common","Tagalog","Bikol","Cebuano","Hiligaynon","Ilocano","Kapampangan","Pangasinense","Waray","Zam","Spanish"
    "20.45164941","121.9671742",c(7, 7, 7, 7, 13),c(1, 1, 1, 1, 8),c(0, 0, 0, 0, 2),c(0, 0, 0, 0, 0),c(3, 3, 3, 3, 3),c(0, 0, 0, 0, 0),c(0, 0, 0, 0, 0),c(0, 0, 0, 0, 0),c(0, 0, 0, 0, 0),c(0, 0, 0, 0, 0),c(0, 0, 0, 0, 0),c(0, 0, 0, 0, 0),c(0, 0, 0, 0, 0)
    "20.45164941","121.9671742",c(7, 7, 7, 7, 13),c(1, 1, 1, 1, 8),c(0, 0, 0, 0, 2),c(0, 0, 0, 0, 0),c(3, 3, 3, 3, 3),c(0, 0, 0, 0, 0),c(0, 0, 0, 0, 0),c(0, 0, 0, 0, 0),c(0, 0, 0, 0, 0),c(0, 0, 0, 0, 0),c(0, 0, 0, 0, 0),c(0, 0, 0, 0, 0),c(0, 0, 0, 0, 0)
    "20.45164941","121.9671742",c(7, 7, 7, 7, 13),c(1, 1, 1, 1, 8),c(0, 0, 0, 0, 2),c(0, 0, 0, 0, 0),c(3, 3, 3, 3, 3),c(0, 0, 0, 0, 0),c(0, 0, 0, 0, 0),c(0, 0, 0, 0, 0),c(0, 0, 0, 0, 0),c(0, 0, 0, 0, 0),c(0, 0, 0, 0, 0),c(0, 0, 0, 0, 0),c(0, 0, 0, 0, 0)
    "20.45164941","121.9671742",c(7, 7, 7, 7, 13),c(1, 1, 1, 1, 8),c(0, 0, 0, 0, 2),c(0, 0, 0, 0, 0),c(3, 3, 3, 3, 3),c(0, 0, 0, 0, 0),c(0, 0, 0, 0, 0),c(0, 0, 0, 0, 0),c(0, 0, 0, 0, 0),c(0, 0, 0, 0, 0),c(0, 0, 0, 0, 0),c(0, 0, 0, 0, 0),c(0, 0, 0, 0, 0)
    "8.19183664","124.17909375",c(7, 7, 7, 7, 13),c(1, 1, 1, 1, 8),c(0, 0, 0, 0, 2),c(0, 0, 0, 0, 0),c(3, 3, 3, 3, 3),c(0, 0, 0, 0, 0),c(0, 0, 0, 0, 0),c(0, 0, 0, 0, 0),c(0, 0, 0, 0, 0),c(0, 0, 0, 0, 0),c(0, 0, 0, 0, 0),c(0, 0, 0, 0, 0),c(0, 0, 0, 0, 0)

Is there some argument I am missing? I have tried with and without row.names, as well as using write.table with a variety of different separators. I appreciate any help, let me know if you have more questions. I believe it might have something to do with how the dataframe was created. Sorry if this has been asked already, but this specific problem I am having is pretty difficult to search for.

Edit: So using dput yields:

> dput(extract[1:5,2:16])
structure(list(latitude = c("20.45164941", "20.45164941", "20.45164941", 
"20.45164941", "8.19183664"), longitude = c("121.9671742", "121.9671742", 
"121.9671742", "121.9671742", "124.17909375"), WordCount = structure(list(
    WordCount = c(7L, 7L, 7L, 7L, 13L)), .Names = "WordCount", row.names = c("20", 
"77", "106", "48670", "94831"), class = "data.frame"), English = structure(list(
    English = c(1L, 1L, 1L, 1L, 8L)), .Names = "English", row.names = c("20", 
"77", "106", "48670", "94831"), class = "data.frame"), Common = structure(list(
    Common = c(0L, 0L, 0L, 0L, 2L)), .Names = "Common", row.names = c("20", 
"77", "106", "48670", "94831"), class = "data.frame"), Tagalog = structure(list(
    Tagalog = c(0L, 0L, 0L, 0L, 0L)), .Names = "Tagalog", row.names = c("20", 
"77", "106", "48670", "94831"), class = "data.frame"), Bikol = structure(list(
    Bikol = c(3L, 3L, 3L, 3L, 3L)), .Names = "Bikol", row.names = c("20", 
"77", "106", "48670", "94831"), class = "data.frame"), Cebuano = structure(list(
    Cebuano = c(0L, 0L, 0L, 0L, 0L)), .Names = "Cebuano", row.names = c("20", 
"77", "106", "48670", "94831"), class = "data.frame"), Hiligaynon = structure(list(
    Hiligaynon = c(0L, 0L, 0L, 0L, 0L)), .Names = "Hiligaynon", row.names = c("20", 
"77", "106", "48670", "94831"), class = "data.frame"), Ilocano = structure(list(
    Ilocano = c(0L, 0L, 0L, 0L, 0L)), .Names = "Ilocano", row.names = c("20", 
"77", "106", "48670", "94831"), class = "data.frame"), Kapampangan = structure(list(
    Kapampangan = c(0L, 0L, 0L, 0L, 0L)), .Names = "Kapampangan", row.names = c("20", 
"77", "106", "48670", "94831"), class = "data.frame"), Pangasinense = structure(list(
   Pangasinense = c(0L, 0L, 0L, 0L, 0L)), .Names = "Pangasinense", row.names = c("20", 
"77", "106", "48670", "94831"), class = "data.frame"), Waray = structure(list(
    Waray = c(0L, 0L, 0L, 0L, 0L)), .Names = "Waray", row.names = c("20", 
"77", "106", "48670", "94831"), class = "data.frame"), Zam = structure(list(
    Zam = c(0L, 0L, 0L, 0L, 0L)), .Names = "Zam", row.names = c("20", 
"77", "106", "48670", "94831"), class = "data.frame"), Spanish = structure(list(
    Spanish = c(0L, 0L, 0L, 0L, 0L)), .Names = "Spanish", row.names = c("20", 
"77", "106", "48670", "94831"), class = "data.frame")), .Names = c("latitude", 
"longitude", "WordCount", "English", "Common", "Tagalog", "Bikol", 
"Cebuano", "Hiligaynon", "Ilocano", "Kapampangan", "Pangasinense", 
"Waray", "Zam", "Spanish"), row.names = c(20L, 77L, 106L, 48670L, 
94831L), class = "data.frame")

And a summary:

> summary(extract)
    alltweets.text                                                                                                                                                              latitude        
 Grabe. Kasanok masungbatan nu ana nakaturog nak idin. 1AM kano ngamin idin. Haha. \xed\xa0\xbd\xed\xb1\x8a\xed\xa0\xbd\xed\xb1\x8a                                 :  9   Length:1001       
 @MitchyyWong mejo mejo lang pud hahaha ikaw ang hinfi talaga ba haha labyu                                                                           :  8                 Class :character  
 #EvictLoisa nakaka inis panoorin sa tv screen di na maganda tingnan palayasin nayan ngaung saturday! #PBBStopAndStare                                :  8                 Mode  :character  
 Lionel Messi for breakfast! \xed\xa0\xbd\xed\xb8\x9c Happy Monday.. #ARG vs #BIH #WorldCup2014                                                              :  8                            
 ngano lami man kaayo ang kan.on oy? ilabi nag parisan ug bulad ug ginamos. ginoo ko tabang!                                                          :  8                                   
 "@IndayBanat: Ang taong pinakamahirap tanggalin sa isipan mo, ay yong taong kahit hindi kayo pero pinapasaya ka at binubuo ang araw mo." \xed\xa0\xbd\xed\xb8\xad:  7                       
 (Other)                                                                                                                                              :953                                   
  longitude         WordCount.WordCount   English.English      Common.Common     Tagalog.Tagalog      Bikol.Bikol     Cebuano.Cebuano   Hiligaynon.Hiligaynon  Ilocano.Ilocano  
 Length:1001        Min.   : 3.000000   Min.   : 0.000000   Min.   : 0.000000   Min.   :0.000000   Min.   :0.00000   Min.   :0.000000   Min.   :0.000000      Min.   :0.000000  
 Class :character   1st Qu.: 9.000000   1st Qu.: 1.000000   1st Qu.: 2.000000   1st Qu.:0.000000   1st Qu.:0.00000   1st Qu.:0.000000   1st Qu.:0.000000      1st Qu.:0.000000  
 Mode  :character   Median :12.000000   Median : 2.000000   Median : 4.000000   Median :1.000000   Median :0.00000   Median :0.000000   Median :0.000000      Median :0.000000  
                    Mean   :12.384615   Mean   : 2.298701   Mean   : 4.074925   Mean   :1.599401   Mean   :0.07992   Mean   :0.418581   Mean   :0.140859      Mean   :0.305694  
                    3rd Qu.:15.000000   3rd Qu.: 3.000000   3rd Qu.: 6.000000   3rd Qu.:3.000000   3rd Qu.:0.00000   3rd Qu.:0.000000   3rd Qu.:0.000000      3rd Qu.:0.000000  
                    Max.   :22.000000   Max.   :15.000000   Max.   :13.000000   Max.   :7.000000   Max.   :4.00000   Max.   :5.000000   Max.   :4.000000      Max.   :4.000000  

 Kapampangan.Kapampangan Pangasinense.Pangasinense     Waray.Waray       Zam.Zam    Spanish.Spanish  
 Min.   :0.00000         Min.   :0.000000          Min.   : 0.000000   Min.   :0   Min.   :0.000000  
 1st Qu.:0.00000         1st Qu.:0.000000          1st Qu.: 0.000000   1st Qu.:0   1st Qu.:0.000000  
 Median :0.00000         Median :0.000000          Median : 0.000000   Median :0   Median :0.000000  
 Mean   :0.26973         Mean   :0.076923          Mean   : 0.818182   Mean   :0   Mean   :0.367632  
 3rd Qu.:0.00000         3rd Qu.:0.000000          3rd Qu.: 1.000000   3rd Qu.:0   3rd Qu.:1.000000  
 Max.   :8.00000         Max.   :5.000000          Max.   :10.000000   Max.   :0   Max.   :5.000000  

And a sample of the string column:

> extract$alltweets.text[1:10]
 [1] Helo #Payi!! #lobster!! #happyfiesta diptan!! \xed\xa0\xbc\xed\xbe\x89\xed\xa0\xbc\xed\xbe\x8a\xed\xa0\xbd\xed\xb8\x8d\xed\xa0\xbd\xed\xb1\x8c @ Diptan, Basco,Batanes http://t.co/rZy4QlIwxW
 [2] Helo #Payi!! #lobster!! #happyfiesta diptan!! \xed\xa0\xbc\xed\xbe\x89\xed\xa0\xbc\xed\xbe\x8a\xed\xa0\xbd\xed\xb8\x8d\xed\xa0\xbd\xed\xb1\x8c @ Diptan, Basco,Batanes http://t.co/rZy4QlIwxW
 [3] Helo #Payi!! #lobster!! #happyfiesta diptan!! \xed\xa0\xbc\xed\xbe\x89\xed\xa0\xbc\xed\xbe\x8a\xed\xa0\xbd\xed\xb8\x8d\xed\xa0\xbd\xed\xb1\x8c @ Diptan, Basco,Batanes http://t.co/rZy4QlIwxW
 [4] Helo #Payi!! #lobster!! #happyfiesta diptan!! \xed\xa0\xbc\xed\xbe\x89\xed\xa0\xbc\xed\xbe\x8a\xed\xa0\xbd\xed\xb8\x8d\xed\xa0\xbd\xed\xb1\x8c @ Diptan, Basco,Batanes http://t.co/rZy4QlIwxW
 [5] Here is what's cooking at Gloria's Lechon in Iligan City! Who wants lechon manok? @ Gloria's Lechon http://t.co/9gxv4GxgIW                                       
 [6] Here is what's cooking at Gloria's Lechon in Iligan City! Who wants lechon manok? @ Gloria's Lechon http://t.co/9gxv4GxgIW                                       
 [7] "@PBBabscbn: Pindot, pindot lang! Pindot, pindot lang! #PBBStopAndStare"                                                                                       
 [8] "@PBBabscbn: Pindot, pindot lang! Pindot, pindot lang! #PBBStopAndStare"                                                                                      
 [9] @eyyyorange dae pwede duman. Crowded na duman ta dakulon na boarders. Dae ngani ko duman makaadal tultol ta maribok. Inda matxt na lang ako                      
[10] Pindot pindot lang..pindot pindot pindot lang xD hahahahahaha                                                                                                    
92162 Levels: ⃣ phones are the best \xed��\xed�\u008a _beown eyes tells his soul\nxo xo http://t.co/vzbMmzgdk7 ... 

Upvotes: 2

Views: 371

Answers (1)

A5C1D2H2I1M1N2O1R2T1
A5C1D2H2I1M1N2O1R2T1

Reputation: 193497

As indicated in the comments, your problem is due to the structure of your data.frame. If you can't fix it at the source of the problem, you should be able to fix it with:

do.call(data.frame, extract) ## extract is the name of your original data frame

Example:

str(do.call(data.frame, extract))
# 'data.frame':  5 obs. of  15 variables:
# $ latitude    : Factor w/ 2 levels "20.45164941",..: 1 1 1 1 2
# $ longitude   : Factor w/ 2 levels "121.9671742",..: 1 1 1 1 2
# $ WordCount   : int  7 7 7 7 13
# $ English     : int  1 1 1 1 8
# $ Common      : int  0 0 0 0 2
# $ Tagalog     : int  0 0 0 0 0
# $ Bikol       : int  3 3 3 3 3
# $ Cebuano     : int  0 0 0 0 0
# $ Hiligaynon  : int  0 0 0 0 0
# $ Ilocano     : int  0 0 0 0 0
# $ Kapampangan : int  0 0 0 0 0
# $ Pangasinense: int  0 0 0 0 0
# $ Waray       : int  0 0 0 0 0
# $ Zam         : int  0 0 0 0 0
# $ Spanish     : int  0 0 0 0 0

Here's what it should look like:

A <- tempfile()
write.csv(do.call(data.frame, extract), A)
readLines(A)
# [1] "\"\",\"latitude\",\"longitude\",\"WordCount\",\"English\",\"Common\",\"Tagalog\",\"Bikol\",\"Cebuano\",\"Hiligaynon\",\"Ilocano\",\"Kapampangan\",\"Pangasinense\",\"Waray\",\"Zam\",\"Spanish\""
# [2] "\"20\",\"20.45164941\",\"121.9671742\",7,1,0,0,3,0,0,0,0,0,0,0,0"                                                                                                                                
# [3] "\"77\",\"20.45164941\",\"121.9671742\",7,1,0,0,3,0,0,0,0,0,0,0,0"                                                                                                                                
# [4] "\"106\",\"20.45164941\",\"121.9671742\",7,1,0,0,3,0,0,0,0,0,0,0,0"                                                                                                                               
# [5] "\"48670\",\"20.45164941\",\"121.9671742\",7,1,0,0,3,0,0,0,0,0,0,0,0"                                                                                                                             
# [6] "\"94831\",\"8.19183664\",\"124.17909375\",13,8,2,0,3,0,0,0,0,0,0,0,0" 

Upvotes: 1

Related Questions