Christian Bongiorno
Christian Bongiorno

Reputation: 5648

data frame partitioning, how to

Here is my data

      Date    male  female test
2013-10-06    7.21   0.651  1
2013-10-12      NA      NA  1
2013-10-18    4.68   1.040  1
2013-10-24    3.47   0.363  2
2013-10-30    2.42   0.507  2

And, I basically, need a count of the number of valid cases by tes:

test    nobs
   1       2
   2       2

I am a total rookie to R. My current code continues to produce the 0 nobs

partition <- function(directory, id = 1:200) {
  files = list.files(directory)
  results = NULL

  for(file in files) {
    data = read.csv(file)
    comp = complete.cases(data)

    for(i in id) {
      results["test"] = i
      r = comp["test" == i]
      results["nobs"] = length(r)
    }

  }     
  results
}

and here is my (definitely) wrong output:

partition(".",c(1)
  test nobs 
     1    0 

Upvotes: 1

Views: 205

Answers (5)

Nick Kennedy
Nick Kennedy

Reputation: 12640

Using plyr:

library("plyr")
partition <- function(directory, id = 1:200) {
  files <- list.files(directory)
  ldply(files, function (file) {
    data <- read.csv(file)
    data <- data[complete.cases(data), ]
    setNames(data.frame(file, table(factor(data$test, levels = id))), c("file", "test", "nobs"))
  })
}

Upvotes: 0

Carlos Cinelli
Carlos Cinelli

Reputation: 11617

Using dplyr:

library(dplyr)
data %>% na.omit %>% group_by(test) %>% summarise(nobs = n())
Source: local data frame [2 x 2]

  test nobs
1    1    2
2    2    2

Upvotes: 1

thelatemail
thelatemail

Reputation: 93938

Use aggregate in base R:

aggregate(list(nobs=complete.cases(data)), data["test"], FUN=sum)
#  test nobs
#1    1    2
#2    2    2

Upvotes: 2

Jilber Urbina
Jilber Urbina

Reputation: 61214

Using R base functions, df is your data:

> res <- sapply(split(df, df$test), function(x) sum(complete.cases(x)), USE.NAMES=FALSE)
> res <- data.frame(test=names(res), nobs=res)
> res
  test nobs
1    1    2
2    2    2

Upvotes: 0

user227710
user227710

Reputation: 3194

    library(data.table)
df1<-data[complete.cases(data)]
    setDT(df1)[,list(nobs=.N),by=test]
       test nobs
    1:    1    2
    2:    2    2
#Or, as per @Arun, 
   setDT(data)[complete.cases(data),list(nobs=.N),by=test]


data<-structure(list(Date = structure(1:5, .Label = c("2013-10-06", 
"2013-10-12", "2013-10-18", "2013-10-24", "2013-10-30"), class = "factor"), 
    male = c(7.21, NA, 4.68, 3.47, 2.42), female = c(0.651, NA, 
    1.04, 0.363, 0.507), test = c(1L, 1L, 1L, 2L, 2L)), .Names = c("Date", 
"male", "female", "test"), class = "data.frame", row.names = c(NA, 
-5L))

Upvotes: 4

Related Questions