Reputation: 25
I'm looking to find unique values : species (here it is species code called "SPID", with the first four letters from genus name first four letters from species name) - in different groups of my data frame : habitats (there is 3 habitats (variable "hab"), named TA, TB and TC).
Here is an output of my data :
library(dplyr)
brk%>%
+ dplyr::select(spid,hab)%>%
+ dplyr::sample_n(20)%>%
+ dput()
structure(list(spid = structure(c(127L, 78L, 33L, 162L, 81L,
72L, 72L, 196L, 196L, 241L, 240L, 238L, 190L, 181L, 188L, 162L,
214L, 13L, 178L, 78L), .Label = c("ACROEMER", "ACROMEGA", "AEROSUBPM",
"AMAZDIPL", "ANASAURI", "ANASPILI", "ANDRABER", "ANDRBILO", "ANEULATI",
"BAZZDECR", "BAZZDECRM", "BAZZMASC", "BAZZNITI", "BAZZPRAE",
"BAZZROCA", "BRACEURY", "BUCKMEMB", "CALYARGU", "CALYFISS", "CALYMASC",
"CALYPALI", "CALYPERU", "CAMPARCTM", "CAMPAURE", "CAMPCRAT",
"CAMPFLEX", "CAMPJAME", "CAMPROBI", "CAMPTHWA", "CEPHVAGI", "CERABELA",
"CERACORN", "CERAZENK", "CHEICAME", "CHEICORDI", "CHEIDECU",
"CHEIMONT", "CHEISERP", "CHEISURR", "CHEITRIF", "CHEIUSAM", "CHEIXANT",
"COLOCEAT", "COLOHASK", "COLOHILD", "COLOOBLI", "COLOPEPO", "COLOTANZ",
"COLOZENK", "COLUBENO", "COLUCALY", "COLUDIGI", "COLUHUMB", "COLUOBES",
"COLUTENU", "CONOTRAP", "CRYPMART", "CUSPCONT", "CYCLBORB", "CYCLBREV",
"CYLIKIAE", "DALTANGU", "DALTLATI", "DENDBORB", "DICRBILLB",
"DIPLCAVI", "DIPLCOGO", "DIPLCORN", "DREPCULT", "DREPHELE", "DREPMADA",
"DREPPHYS", "ECTRREGU", "ECTRVALE", "FISSASPL", "FISSMEGAH",
"FISSSCIO", "FRULAPIC", "FRULAPICU", "FRULBORB", "FRULCAPE",
"FRULGROS", "FRULHUMB", "FRULLIND", "FRULREPA", "FRULSCHI", "FRULSERR",
"FRULUSAMR", "FRULVARI", "FUSCCONN", "GOTTNEES", "GOTTSCHI",
"GOTTSPHA", "GROULAXO", "HAPLSTIC", "HERBDICR", "HERBJUNI", "HERBMAUR",
"HETEDUBI", "HETESPLE", "HETESPN", "HOLOBORB", "HOLOCYLI", "HYPNCUPR",
"ISOPCHRY", "ISOPCITR", "ISOPINTO", "ISOTAUBE", "JAEGSOLI", "JAEGSOLIR",
"KURZCAPI", "KURZCAPIS", "LEJEALAT", "LEJEANIS", "LEJECONF",
"LEJEECKL", "LEJEFLAV", "LEJELOMA", "LEJEOBTU", "LEJERAMO", "LEJETABU",
"LEJETUBE", "LEJEVILL", "LEPIAFRI", "LEPICESP", "LEPIDELE", "LEPIHIRS",
"LEPISTUH", "LEPISTUHP", "LEPTFLEX", "LEPTINFU", "LEPTMACU",
"LEUCANGU", "LEUCBIFI", "LEUCBORY", "LEUCCANDI", "LEUCCAPI",
"LEUCCINC", "LEUCDELI", "LEUCGRAN", "LEUCHILD", "LEUCISLE", "LEUCLEPE",
"LEUCMAYO", "LEUCSEYC", "LOPHBORB", "LOPHCOAD", "LOPHCONC", "LOPHDIFF",
"LOPHEULO", "LOPHMULT", "LOPHMURI", "LOPHNIGR", "LOPHSUBF", "MACRACID",
"MACRMAUR", "MACRMICR", "MACRPALL", "MACRSERP", "MACRSULC", "MACRTENU",
"MASTDICL", "METZCONS", "METZFURC", "METZLEPT", "METZMADA", "MICRAFRI",
"MICRANKA", "MICRDISP", "MICRINFL", "MICRKAME", "MICROBLO", "MICRSTRA",
"MITTLIMO", "MNIOFUSC", "PAPICOMP", "PLAGANGU", "PLAGDREP", "PLAGPECT",
"PLAGRENA", "PLAGREPA", "PLAGRODR", "PLAGTERE", "PLEUGIGA", "PLICHIRT",
"POLYCOMM", "POROELON", "POROMADA", "POROUSAG", "PRIOGRAT", "PSEUDECI",
"PTYCSTRI", "PYRRSPIN", "RACOAFRI", "RADUANKE", "RADUAPPR", "RADUBORB",
"RADUBORY", "RADUCOMO", "RADUEVEL", "RADUFULV", "RADUMADA", "RADUSTEN",
"RADUTABU", "RADUVOLU", "RHAPCRIS", "RHAPGRAC", "RHAPRUBR", "RICCAMAZ",
"RICCEROS", "RICCFAST", "RICCLIMB", "RICCLONG", "SCHLBADI", "SCHLMICRO",
"SCHLOANGU", "SCHLSQUA", "SEMACRAS", "SEMASCHI", "SEMASUBP",
"SERPCYRT", "SOLEBORG", "SOLEONRA", "SOLESPHA", "SPHATUMI", "SPHEMINU",
"SYRRAFRI", "SYRRAPER", "SYRRDIMO", "SYRRGAUD", "SYRRHISP", "SYRRPOTT",
"SYRRPROL", "SYRRPROLA", "SYZYPURP", "TAXICONFO", "TELACOAC",
"TELADIAC", "TELANEMA", "TRICADHA", "TRICDEBE", "TRICPERV", "ULOTFULV",
"WARBLEPT", "ZYGOINTE", "ZYGOREIN"), class = "factor"), hab = structure(c(1L,
3L, 3L, 2L, 3L, 2L, 1L, 2L, 3L, 1L, 2L, 2L, 3L, 1L, 2L, 1L, 3L,
2L, 3L, 2L), .Label = c("TA", "TB", "TC"), class = "factor")), row.names = c(NA,
-20L), class = "data.frame")
I tried with :
dplyr::select(spid,hab)%>%
dplyr::group_by(hab)%>%
dplyr::summarise(n_distinct(spid))
Obviously it won't show me what I want. How could I find the name of unique species by habitat ?
Thank you for your help,
Upvotes: 0
Views: 241
Reputation: 3923
EDITED to match ... A very low key solution but sometimes tables are nice ways of displaying this...
table(your_data$spid,
your_data$hab)[rowSums(table(your_data$spid,
your_data$hab)) == 1, ]
#>
#> TA TB TC
#> BAZZNITI 0 1 0
#> CERAZENK 0 0 1
#> FRULCAPE 0 0 1
#> LEPIHIRS 1 0 0
#> PLAGDREP 0 0 1
#> PLAGREPA 1 0 0
#> POROMADA 0 1 0
#> PRIOGRAT 0 0 1
#> SCHLBADI 0 0 1
#> TELADIAC 0 1 0
#> TRICADHA 0 1 0
#> TRICDEBE 1 0 0
Upvotes: 0
Reputation: 174586
You could try:
df <- dplyr::select(brk, spid, hab)
lapply(split(df, df$hab), unique)
Which will give you a list containing one data frame for each habitat with the unique species in the spid
column:
$TA
spid hab
1 LEPIHIRS TA
7 DREPPHYS TA
10 TRICDEBE TA
14 PLAGREPA TA
16 MASTDICL TA
$TB
spid hab
4 MASTDICL TB
6 DREPPHYS TB
8 RADUAPPR TB
11 TRICADHA TB
12 TELADIAC TB
15 POROMADA TB
18 BAZZNITI TB
20 FRULAPIC TB
$TC
spid hab
2 FRULAPIC TC
3 CERAZENK TC
5 FRULCAPE TC
9 RADUAPPR TC
13 PRIOGRAT TC
17 SCHLBADI TC
19 PLAGDREP TC
Edit based on clarification:
df2 <- as.data.frame(table(df$spid, df$hab)[rowSums(table(df$spid, df$hab))==1,])
df2[df2$Freq != 0,]
#> Var1 Var2 Freq
#> 4 LEPIHIRS TA 1
#> 6 PLAGREPA TA 1
#> 12 TRICDEBE TA 1
#> 13 BAZZNITI TB 1
#> 19 POROMADA TB 1
#> 22 TELADIAC TB 1
#> 23 TRICADHA TB 1
#> 26 CERAZENK TC 1
#> 27 FRULCAPE TC 1
#> 29 PLAGDREP TC 1
#> 32 PRIOGRAT TC 1
#> 33 SCHLBADI TC 1
Upvotes: 3
Reputation: 799
Here's a revised solution to show spid
's associated with 1 hab
only.
> brk %>%
group_by(spid) %>%
summarize(nn = n_distinct(hab)) %>%
filter(nn == 1) %>%
ungroup()
# A tibble: 12 x 2
spid nn
<fct> <int>
1 BAZZNITI 1
2 CERAZENK 1
3 FRULCAPE 1
4 LEPIHIRS 1
5 PLAGDREP 1
6 PLAGREPA 1
7 POROMADA 1
8 PRIOGRAT 1
9 SCHLBADI 1
10 TELADIAC 1
11 TRICADHA 1
12 TRICDEBE 1
Upvotes: 1
Reputation: 3876
dplyr
solution:
brk %>%
distinct(spid, hab) %>%
arrange(hab)
spid hab
1 LEPIHIRS TA
2 DREPPHYS TA
3 TRICDEBE TA
4 PLAGREPA TA
5 MASTDICL TA
6 MASTDICL TB
7 DREPPHYS TB
8 RADUAPPR TB
9 TRICADHA TB
10 TELADIAC TB
11 POROMADA TB
12 BAZZNITI TB
13 FRULAPIC TB
14 FRULAPIC TC
15 CERAZENK TC
16 FRULCAPE TC
17 RADUAPPR TC
18 PRIOGRAT TC
19 SCHLBADI TC
20 PLAGDREP TC
Edit Based on clarification
brk %>%
group_by(spid) %>%
mutate(n = n()) %>%
filter(n == 1) %>%
select(-n)
# A tibble: 12 x 2
# Groups: spid [12]
spid hab
<fct> <fct>
1 LEPIHIRS TA
2 CERAZENK TC
3 FRULCAPE TC
4 TRICDEBE TA
5 TRICADHA TB
6 TELADIAC TB
7 PRIOGRAT TC
8 PLAGREPA TA
9 POROMADA TB
10 SCHLBADI TC
11 BAZZNITI TB
12 PLAGDREP TC
Upvotes: 2
Reputation: 25
Thank you for yours answers. Yes it's kind of what I want, but do you know why a species (spid) can be unique in 2 habitats ? What I want is to show all species that are only present in 1 habitat, maybe I didn't explained well, sorry if it's the case.
Upvotes: 0