Reputation: 5169
I have the following code:
library(tidyverse)
# Function ----------------------------------------------------------------
convert <- function(three_aa_seq = NULL) {
pep_dat <- structure(list(full = c(
"Alanine", "Arginine", "Asparagine",
"Aspartate", "Cysteine", "Glutamine", "Glutamate", "Glycine",
"Histidine", "Isoleucine", "Leucine", "Lysine", "Methionine",
"Phenylalanine", "Proline", "Serine", "Threonine", "Tryptophan",
"Tyrosine", "Valine"
), three = c(
"Ala", "Arg", "Asn", "Asp",
"Cys", "Gln", "Glu", "Gly", "His", "Ile", "Leu", "Lys", "Met",
"Phe", "Pro", "Ser", "Thr", "Trp", "Tyr", "Val"
), one = c(
"A",
"R", "N", "D", "C", "Q", "E", "G", "H", "I", "L", "K", "M", "F",
"P", "S", "T", "W", "Y", "V"
)), .Names = c("full", "three", "one"), class = c("tbl_df", "tbl", "data.frame"), row.names = c(
NA,
-20L
))
str_split(three_aa_seq, pattern = "-")[[1]] %>%
as.tibble() %>%
rename(three = value) %>%
inner_join(pep_dat, by = "three") %>%
pull(one) %>%
paste(., collapse = "")
}
Which basically change the set of three letter amino acid strings into single letter string. e.g. "His-Ser-Leu"
into HSL
But when I tried with this code:
tribble(
~ pep_name, ~ three_seq,
"PA_19", "His-Ser-Leu-Gly-Lys-Trp-Leu-Gly-His-Pro-Asp-Lys-Phe",
"PA_20", "Thr-Ala-Pro-Arg-Ser-Leu-Arg-Arg-Ser-Ser-Cys-Phe-Gly-Gly-Arg-Met-Asp-Arg-Ile-Gly-Ala-Gln-Ser-Gly-Leu-Gly-Cys-Asn-Ser-Phe-Arg-Tyr"
) %>%
mutate(pep = convert(three_aa_seq = three_seq)) %>%
select(pepname, pep)
It returns:
# A tibble: 2 x 2
pep_name pep
<chr> <chr>
1 PA_19 HSLGKWLGHPDKF
2 PA_20 HSLGKWLGHPDKF
It only the resultant column pep
for PA_20
doesn't get updated with TAPRSLRRSSCFGGRMDRIGAQSGLGCNSFRY
. What's the right way to do it?
Upvotes: 3
Views: 191
Reputation: 2724
Please be aware that creating a tibble
for each replacement will get pretty slow when used with more rows. The most advisable approach is to use the vectorized string replacement available in stringr
right away:
library(tidyverse)
library(stringr)
tibble(three = c(
"Ala", "Arg", "Asn", "Asp",
"Cys", "Gln", "Glu", "Gly", "His", "Ile", "Leu", "Lys", "Met",
"Phe", "Pro", "Ser", "Thr", "Trp", "Tyr", "Val"
), one = c(
"A",
"R", "N", "D", "C", "Q", "E", "G", "H", "I", "L", "K", "M", "F",
"P", "S", "T", "W", "Y", "V"
)) %>%
{set_names(.$one, .$three)} ->
aa_map
tribble(
~ pep_name, ~ three_seq,
"PA_19", "His-Ser-Leu-Gly-Lys-Trp-Leu-Gly-His-Pro-Asp-Lys-Phe",
"PA_20", "Thr-Ala-Pro-Arg-Ser-Leu-Arg-Arg-Ser-Ser-Cys-Phe-Gly-Gly-Arg-Met-Asp-Arg-Ile-Gly-Ala-Gln-Ser-Gly-Leu-Gly-Cys-Asn-Ser-Phe-Arg-Tyr") %>%
mutate(one_seq = str_replace_all(three_seq, aa_map) %>% str_replace_all("-", ""))
Upvotes: 1
Reputation: 887048
The output of str_split
is a list
while the OP pulls out only the first element of the list
with [[1]]
. Instead it should be looped. If the initial dataset is 'tbl'
tbl %>%
mutate(pep = str_split(three_seq, pattern = "-") %>%
map_chr(~
as_tibble(.x) %>%
rename(three=value) %>%
inner_join(pep_dat, by = 'three') %>%
pull(one) %>%
paste(collapse=""))) %>%
select(-three_seq)
# A tibble: 2 x 2
# pep_name pep
# <chr> <chr>
#1 PA_19 HSLGKWLGHPDKF
#2 PA_20 TAPRSLRRSSCFGGRMDRIGAQSGLGCNSFRY
Converting the above into a function
convertfn <- function(three_aa_seq, keydat) {
str_split(three_aa_seq, pattern = "-") %>%
map_chr(~
as_tibble(.x) %>%
rename(three = value) %>%
inner_join(keydat, by = 'three') %>%
pull(one) %>%
paste(collapse = ""))
}
tbl %>%
mutate(pep = convertfn(three_seq, pep_dat)) %>%
select(-three_seq)
Upvotes: 3