Reputation: 5648
Here is my data
Date male female test
2013-10-06 7.21 0.651 1
2013-10-12 NA NA 1
2013-10-18 4.68 1.040 1
2013-10-24 3.47 0.363 2
2013-10-30 2.42 0.507 2
And, I basically, need a count of the number of valid cases by tes:
test nobs
1 2
2 2
I am a total rookie to R. My current code continues to produce the 0 nobs
partition <- function(directory, id = 1:200) {
files = list.files(directory)
results = NULL
for(file in files) {
data = read.csv(file)
comp = complete.cases(data)
for(i in id) {
results["test"] = i
r = comp["test" == i]
results["nobs"] = length(r)
}
}
results
}
and here is my (definitely) wrong output:
partition(".",c(1)
test nobs
1 0
Upvotes: 1
Views: 205
Reputation: 12640
Using plyr:
library("plyr")
partition <- function(directory, id = 1:200) {
files <- list.files(directory)
ldply(files, function (file) {
data <- read.csv(file)
data <- data[complete.cases(data), ]
setNames(data.frame(file, table(factor(data$test, levels = id))), c("file", "test", "nobs"))
})
}
Upvotes: 0
Reputation: 11617
Using dplyr:
library(dplyr)
data %>% na.omit %>% group_by(test) %>% summarise(nobs = n())
Source: local data frame [2 x 2]
test nobs
1 1 2
2 2 2
Upvotes: 1
Reputation: 93938
Use aggregate
in base R:
aggregate(list(nobs=complete.cases(data)), data["test"], FUN=sum)
# test nobs
#1 1 2
#2 2 2
Upvotes: 2
Reputation: 61214
Using R base functions, df
is your data:
> res <- sapply(split(df, df$test), function(x) sum(complete.cases(x)), USE.NAMES=FALSE)
> res <- data.frame(test=names(res), nobs=res)
> res
test nobs
1 1 2
2 2 2
Upvotes: 0
Reputation: 3194
library(data.table)
df1<-data[complete.cases(data)]
setDT(df1)[,list(nobs=.N),by=test]
test nobs
1: 1 2
2: 2 2
#Or, as per @Arun,
setDT(data)[complete.cases(data),list(nobs=.N),by=test]
data<-structure(list(Date = structure(1:5, .Label = c("2013-10-06",
"2013-10-12", "2013-10-18", "2013-10-24", "2013-10-30"), class = "factor"),
male = c(7.21, NA, 4.68, 3.47, 2.42), female = c(0.651, NA,
1.04, 0.363, 0.507), test = c(1L, 1L, 1L, 2L, 2L)), .Names = c("Date",
"male", "female", "test"), class = "data.frame", row.names = c(NA,
-5L))
Upvotes: 4