Reputation: 501
I have a data frame with many repeating values in certain columns. I would like to create a new columns with a new value for each unique entry in the column of interest. I have looked around in aggregation related questions on Stack Overflow and haven't quite found what I am looking for.
dput(head(example)) output is below.
structure(list(avecor = c(-0.929199786400515, -0.729228501795928,
-0.431983639087243, -0.55088842103792, -0.978422379116014, -0.627856061946295
), miR = structure(c(9L, 5L, 6L, 2L, 8L, 4L), .Label = c("hsa-miR-107",
"hsa-miR-193a-3p", "hsa-miR-28-5p", "hsa-miR-331-3p", "hsa-miR-362-3p",
"hsa-miR-362-5p", "hsa-miR-429", "hsa-miR-590-5p", "hsa-miR-630"
), class = "factor"), mRNA = structure(c(1L, 2L, 2L, 3L, 3L,
4L), .Label = c("IGF1R", "PRKCA", "TESK2", "THBS1", "TLN2", "VAV3"
), class = "factor")), row.names = c("hsa-miR-630:IGF1R", "hsa-miR-362-3p:PRKCA",
"hsa-miR-362-5p:PRKCA", "hsa-miR-193a-3p:TESK2", "hsa-miR-590-5p:TESK2",
"hsa-miR-331-3p:THBS1"), class = "data.frame")
avecor miR mRNA
hsa-miR-630:IGF1R -0.9291998 hsa-miR-630 IGF1R
hsa-miR-362-3p:PRKCA -0.7292285 hsa-miR-362-3p PRKCA
hsa-miR-362-5p:PRKCA -0.4319836 hsa-miR-362-5p PRKCA
hsa-miR-193a-3p:TESK2 -0.5508884 hsa-miR-193a-3p TESK2
hsa-miR-590-5p:TESK2 -0.9784224 hsa-miR-590-5p TESK2
hsa-miR-331-3p:THBS1 -0.6278561 hsa-miR-331-3p THBS1
hsa-miR-28-5p:TLN2 -0.9988643 hsa-miR-28-5p TLN2
hsa-miR-331-3p:TLN2 -0.8773624 hsa-miR-331-3p TLN2
hsa-miR-429:TLN2 -0.9901250 hsa-miR-429 TLN2
hsa-miR-107:VAV3 -0.7713383 hsa-miR-107 VAV3
If applied to the mRNA column, the ideal output would be:
avecor miR mRNA UniquemRNA
hsa-miR-630:IGF1R -0.9291998 hsa-miR-630 IGF1R 1
hsa-miR-362-3p:PRKCA -0.7292285 hsa-miR-362-3p PRKCA 2
hsa-miR-362-5p:PRKCA -0.4319836 hsa-miR-362-5p PRKCA 2
hsa-miR-193a-3p:TESK2 -0.5508884 hsa-miR-193a-3p TESK2 3
hsa-miR-590-5p:TESK2 -0.9784224 hsa-miR-590-5p TESK2 3
hsa-miR-331-3p:THBS1 -0.6278561 hsa-miR-331-3p THBS1 4
hsa-miR-28-5p:TLN2 -0.9988643 hsa-miR-28-5p TLN2 5
hsa-miR-331-3p:TLN2 -0.8773624 hsa-miR-331-3p TLN2 5
hsa-miR-429:TLN2 -0.9901250 hsa-miR-429 TLN2 5
hsa-miR-107:VAV3 -0.7713383 hsa-miR-107 VAV3 6
Any help would be most appreciated.
Upvotes: 0
Views: 44
Reputation: 376
I use R base package.
df<-structure(list(avecor = c(-0.929199786400515, -0.729228501795928,
-0.431983639087243, -0.55088842103792, -0.978422379116014, -0.627856061946295
), miR = structure(c(9L, 5L, 6L, 2L, 8L, 4L), .Label = c("hsa-miR-107",
"hsa-miR-193a-3p", "hsa-miR-28-5p", "hsa-miR-331-3p", "hsa-miR-362-3p",
"hsa-miR-362-5p", "hsa-miR-429", "hsa-miR-590-5p", "hsa-miR-630"
), class = "factor"), mRNA = structure(c(1L, 2L, 2L, 3L, 3L,
4L), .Label = c("IGF1R", "PRKCA", "TESK2", "THBS1", "TLN2", "VAV3"
), class = "factor")), row.names = c("hsa-miR-630:IGF1R", "hsa-miR-362-3p:PRKCA",
"hsa-miR-362-5p:PRKCA", "hsa-miR-193a-3p:TESK2", "hsa-miR-590-5p:TESK2",
"hsa-miR-331-3p:THBS1"), class = "data.frame")
UniquemRNA<-c()
for (i in 1:length(table(df$mRNA))){
fre <- rep(i, table(df$mRNA)[[i]])
UniquemRNA<-c(UniquemRNA,fre)
}
UniquemRNA
df$UniquemRNA<-UniquemRNA
df
Upvotes: 1
Reputation: 1800
If I understand you correctly, you did already create that column by creating mRNA
as a factor
.
If that is really what you want, you could just recode the factor
into numeric
values. But that does just replicate the information that is already there.
This is how you could go about doing that:
library(dplyr)
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
structure(list(avecor = c(-0.929199786400515, -0.729228501795928,
-0.431983639087243, -0.55088842103792, -0.978422379116014, -0.627856061946295
), miR = structure(c(9L, 5L, 6L, 2L, 8L, 4L), .Label = c("hsa-miR-107",
"hsa-miR-193a-3p", "hsa-miR-28-5p", "hsa-miR-331-3p", "hsa-miR-362-3p",
"hsa-miR-362-5p", "hsa-miR-429", "hsa-miR-590-5p", "hsa-miR-630"
), class = "factor"), mRNA = structure(c(1L, 2L, 2L, 3L, 3L,
4L), .Label = c("IGF1R", "PRKCA", "TESK2", "THBS1", "TLN2", "VAV3"
), class = "factor")), row.names = c("hsa-miR-630:IGF1R", "hsa-miR-362-3p:PRKCA",
"hsa-miR-362-5p:PRKCA", "hsa-miR-193a-3p:TESK2", "hsa-miR-590-5p:TESK2",
"hsa-miR-331-3p:THBS1"), class = "data.frame") %>%
mutate(UniquemRNA = as.numeric(mRNA))
#> avecor miR mRNA UniquemRNA
#> 1 -0.9291998 hsa-miR-630 IGF1R 1
#> 2 -0.7292285 hsa-miR-362-3p PRKCA 2
#> 3 -0.4319836 hsa-miR-362-5p PRKCA 2
#> 4 -0.5508884 hsa-miR-193a-3p TESK2 3
#> 5 -0.9784224 hsa-miR-590-5p TESK2 3
#> 6 -0.6278561 hsa-miR-331-3p THBS1 4
Upvotes: 1