Reputation: 367
I have a data.frame that has 4 columns. Column 2 has each individual's unique ID ("Cofecha"), and column 4 has the plot that a given individual belongs to ("Plot"). There are multiple occurrences of each individual in the data.frame. I'm trying to do two things: (1) get the number of unique individuals, then (2) find out how many unique individuals occur in each plot. I can find the number of total unique individuals (4 in the excerpt), but I can't figure out how to then count the number of individuals per plot. Any help would be appreciated!
Excerpt from original data.frame:
dx1 <- structure(list(Year = c(1920L, 1921L, 1921L, 1922L, 1922L, 1923L, 1923L, 1924L, 1924L, 1924L, 1925L, 1925L, 1925L, 1926L, 1926L, 1926L, 1927L, 1927L, 1927L, 1927L, 1928L, 1928L, 1928L, 1928L, 1929L), Cofecha = structure(c(69L, 166L, 69L, 166L, 69L, 166L, 69L, 166L, 69L, 50L, 166L, 69L, 50L, 166L, 69L, 50L, 166L, 232L, 69L, 50L, 166L, 232L, 69L, 50L, 166L), .Label = c("LB1A002", "LB1A003", "LB1A101", "LB1A102", "LB1A103", "LB1A212", "LB1A228", "LB1A231", "LB1A233", "LB1B001", "LB1B002", "LB1B003", "LB1B210", "LB1B216", "LB2A001", "LB2A002", "LB2A003", "LB2A004", "LB2A008", "LB2A009", "LB2A011", "LB2B001", "LB2B005", "LB2B008", "LB2B101", "LB2B102", "LB2B103", "LB2C003", "LB2C004", "LB2C008", "LB2C009", "LB2C010", "LB2C001", "LB2D005", "LB2D006", "LB2D007", "LB2D008", "LB2D009", "LB2D010", "LB2D101", "SM1A005", "SM1A101", "SM1A301", "SM1A302", "SM1B003", "SM1C005", "SM1C302", "SM1D006", "SM2A004", "SM2A005", "SM2A007", "SM2A210", "SM2A301", "SM2B001", "SM2B005", "SM2B006", "SM2B101", "SM2C005", "SM2C101", "SM2C301", "SM2D006", "SM2D101", "SM2D221", "IR1A004", "IR1A009", "IR1A206", "IR1B001", "IR1B004", "IR1B005", "IR1B301", "IR1B302", "IR1C005", "IR1C006", "IR1C007", "IR1C008", "IR1C204", "IR1C205", "IR1D002", "IR1D101", "IR2A003", "IR2A101", "IR2A211", "IR2A234", "IR2B002", "IR2B005", "IR2B101", "IR2B201", "IR2B210", "IR2B229", "IR2C230", "IR2C256", "IR2C301", "IR2C302", "IR2C002", "IR2C009", "IR2C101", "IR2C204", "IR2C215", "IR2D227", "IR2D228", "IR2D237", "IR2D254", "IR2D301", "IR2D302", "IR2D003", "IR2D006", "IR2D009", "IR2D011", "IR2D207", "IR2D216", "JA1A101", "JA1A224", "JA1A301", "JA1B004", "JA1B101", "JA1B102", "JA1B219", "JA1B233", "JA1C002", "JA1C232", "JA1D001", "JA1D101", "JA2A101", "JA2A102", "JA2A206", "JA2A209", "JA2A210", "JA2A004", "JA2A005", "JA2A006", "JA2A007", "JA2A008", "JA2B005", "JA2B206", "JA2C001", "JA2C002", "JA2C007", "JA2C101", "JA2C202", "JA3N007", "JA3N008", "JA3N009", "JA3N010", "JA3N011", "JA3N012", "JA3N001", "JA3N002", "JA3N003", "JA3N004", "JA3N005", "JA3N006", "SF5A007", "SF5B223", "SF5B227", "SF5B228", "SF5B301", "SF5B302", "SF5C201", "SF5C214", "SF5C216", "SF5C301", "SF5C303", "SF5D004", "SF5D101", "SF5D207", "AP1A001", "AP1A004", "AP1A005", "AP1A006", "AP1A008", "AP1A009", "AP1A010", "AP1A101", "AP1B005", "AP1B007", "AP1B011", "AP1B101", "AP1B102", "AP1C006", "AP1C007", "AP1C010", "AP1C011", "AP1C001", "AP1C002", "AP1D001", "AP1D005", "AP1D007", "AP1D008", "AP1D009", "AP1D010", "AP1D011", "AP1D012", "AP1D013", "AP1D101", "AP1D102", "AP1D103", "AP1D104", "AP1C004", "AP1C005", "AP2A001", "AP2A002", "AP2A003", "AP2B001", "AP2B003", "AP2B004", "AP2B101", "AP2B102", "AP2C001", "AP2C002", "AP2C003", "AP2C004", "AP2C005", "AP2C007", "AP2C008", "AP2C102", "AP2C103", "AP2C104", "AP2D001", "AP2D002", "AP2D005", "AP2D006", "AP2D009", "AP2D101", "AP2D102", "AP2D103", "AP3A003", "AP3A005", "AP3A008", "AP3A014", "AP3A015", "AP3A101", "AP3A102", "AP3B101", "AP3B102", "AP3B103", "AP3B104", "AP3B003", "AP3B007", "AP3B010", "AP3B012", "AP3C003", "AP3C004", "AP3C006", "AP3C007", "AP3C009", "AP3C011", "AP3C101", "AP3C102", "AP3C103", "AP3C104", "AP3C105", "AP3D006", "AP3D011", "AP3D101", "AP3D102", "BF1A101", "BF1A102", "BF1A103", "BF1A104", "BF1B003", "BF1B005", "BF1B006", "BF1B007", "BF1B101", "BF1C007", "BF1C101", "BF1C102", "BF1D003", "BF1D007", "BF1D010", "BF1D101", "BF1D102", "BF1D103", "BF1D210", "BF2A001", "BF2A002", "BF2B001", "BF2B214", "BF2B219", "BF2C001", "BF2C004", "BF2C008", "BF2C101", "BF2C102", "BF2C201", "BF2C205", "BF2C213", "BF2C219", "BF2C301", "BF2D004", "BF2D013", "BF2D014", "BF2D015", "BF3A001", "BF3A002", "BF3A004", "BF3A005", "BF3A007", "BF3A008", "BF3A009", "BF3A101", "BF3B003", "BF3B101", "BF3C002", "BF3C003", "BF3C007", "BF3C009", "BF3C010", "BF3D002", "BF3D003", "BF3D004", "BF3D009", "BF3D010" ), class = "factor"), AvgBaiTenyr = c(3.1292, 2.3011, 3.07395, 2.374, 3.4236, 2.34095, 3.50005, 2.3903, 3.68825, 2.2265, 2.35475, 3.69255, 2.3487, 2.417, 3.57705, 2.32715, 2.39665, 2.6338, 3.433, 2.2573, 2.37235, 2.6384, 3.49635, 2.28685, 2.26055), Plot = c("IR1", "AP1", "IR1", "AP1", "IR1", "AP1", "IR1", "AP1", "IR1", "SM2", "AP1", "IR1", "SM2", "AP1", "IR1", "SM2", "AP1", "AP3", "IR1", "SM2", "AP1", "AP3", "IR1", "SM2", "AP1")), .Names = c("Year", "Cofecha", "AvgBaiTenyr", "Plot"), row.names = c(323L, 326L, 331L, 335L, 341L, 345L, 351L, 355L, 361L, 365L, 366L, 372L, 376L, 377L, 383L, 387L, 388L, 391L, 396L, 400L, 401L, 404L, 409L, 413L, 414L), class = "data.frame")
Code used to find unique individuals:
dx2 <- (unique(dx1$Cofecha))
I also tried "table" but it only gave me the counts for how many times each individual appeared, and not how many unique individuals occurred per plot:
table(dx1$Cofecha)
Is there a way to then count each individual based on the first 3 characters of the name? The first 3 characters are equivalent to the plot ID.
Upvotes: 1
Views: 770
Reputation: 3239
Use a substring:
table(substr(dx1$Cofecha, 1,3))
gives me
AP1 AP3 IR1 SM2
9 2 9 5
Counting each one once with unique:
table(substr(unique(dx1$Cofecha), 1,3))
# AP1 AP3 IR1 SM2
# 1 1 1 1
Upvotes: 1
Reputation: 70246
Here's another approach:
library(dplyr)
dx1 %>%
group_by(Plot) %>%
summarize(distint_IDs = n_distinct(Cofecha))
#Source: local data frame [4 x 2]
#
# Plot distint_IDs
#1 AP1 1
#2 AP3 1
#3 IR1 1
#4 SM2 1
And one more possibility using base R:
unlist(lapply(split(dx1, dx1$Plot), function(x) length(unique(x$Cofecha))))
#AP1 AP3 IR1 SM2
# 1 1 1 1
Upvotes: 1
Reputation: 886938
You could try
with(dx1, tapply(as.character(Cofecha), list(Plot), FUN=function(x) length(unique(x))))
# AP1 AP3 IR1 SM2
# 1 1 1 1
Or
library(data.table)
setDT(dx1)[, list(UniqueIDs= length(unique(Cofecha))), by=Plot]
# Plot UniqueIDs
# 1: IR1 1
# 2: AP1 1
# 3: SM2 1
# 4: AP3 1
Upvotes: 2