Reputation: 119
I have a dataframe with some column names like; "Sample_ID", "Time00", "X7236Nr1", "Y844Nr1856", "X9834Nr21", "S844Nr567"
I want to add leading zeros to the digits after "Nr", so that I can convert it all to 4 digit numbers; "Sample_ID", "Time00", "X7236Nr0001", "Y844Nr1856", "X9834Nr0021", "S844Nr0567"
I tried to use rename_at to select the columns and apply the appropriate function such as sprintf,
df %>% rename_at(vars(starts_with("[A-B][0-9]")), ~ FUNCTION)
but could not build correct function. Can you please advise any way to deal with that kind of mixed strings?
Thanks in advance
Upvotes: 3
Views: 136
Reputation: 101335
You can try
idx <- grepl("Nr\\d+$", s)
x <- s[idx]
s[idx] <- paste0(sub("\\d+$", "", x), sprintf("%.4i", as.integer(sub(".*Nr", "", x))))
sapply(
strsplit(s, "(?<=Nr)(?=\\d)", perl = TRUE),
\(x) {
if (length(x) == 2) {
paste0(x[1], strrep("0", max(0, 4 - nchar(x[2]))), x[2])
} else {
x
}
}
)
library(gsubfn)
gsubfn("(.*Nr)(\\d+)$", ~ paste0(x, sprintf("%.4i", as.integer(y))), s)
or
gsubfn("Nr(\\d+)$" ~ sprintf("Nr%.4i", as.integer(x)), s)
which gives
> s
[1] "Sample_ID" "Time00" "X7236Nr0001" "Y844Nr1856" "X9834Nr0021"
[6] "S844Nr0567"
s <- c("Sample_ID", "Time00", "X7236Nr1", "Y844Nr1856", "X9834Nr21", "S844Nr567")
Upvotes: 4
Reputation: 2911
# your data.frame
df <- data.frame(Sample_ID = 1, Time00 = 1, X7236Nr1 = 1, Y844Nr1856 = 1, X9834Nr21 = 1, S844Nr567 = 1)
# one row only base R for the enthusiasts w/o any explanation
df <- do.call(data.frame, lapply(names(df), function(x) setNames(list(df[[x]]), if(grepl("Nr(\\d+)", x)) paste0(sub("Nr(\\d+)", "", x), "Nr", sprintf("%04d", as.numeric(sub('.+Nr(.+)', '\\1', x)))) else x)))
I have a dataframe with some column names like; "Sample_ID", "Time00", "X7236Nr1", "Y844Nr1856", "X9834Nr21", "S844Nr567"
you can do it by using str_replace_all
with a str_match
that finds the "Nr" + number and str_pad()
s the number to 4 digits with zeroes.
library(dplyr)
library(stringr)
# your data.frame
df <- data.frame(Sample_ID = 1, Time00 = 1, X7236Nr1 = 1, Y844Nr1856 = 1, X9834Nr21 = 1, S844Nr567 = 1)
df <- df %>%
rename_with(~ str_replace_all(., "Nr(\\d+)", function(x) {
match <- str_match(x, "Nr(\\d+)")
if (!is.na(match[2])) {
paste0("Nr", str_pad(match[2], 4, pad = "0")) # only do if "Nr" is found
} else {
x
}
}))
### Result
> colnames(df)
"Sample_ID" "Time00" "X7236Nr0001" "Y844Nr1856" "X9834Nr0021" "S844Nr0567"
# Explanations
> str_match("Y844Nr0856", "Nr(\\d+)")
[,1] [,2]
[1,] "Nr0856" "0856"
> str_match("Time00", "Nr(\\d+)") # has NA as match[,2], therefore we will not replace anything
[,1] [,2]
[1,] NA NA
> str_pad("856", 4, pad = "0") # could also use sprintf()
[1] "0856"
Upvotes: 1
Reputation: 72803
Here a way using sprintf
> f <- \(x) {
+ sts <- strsplit(x, '(?<=Nr)', perl=TRUE)
+ nbs <- sapply(sts[u <- lengths(sts) > 1], `[[`, 2)
+ sts[u] <- Map(c, lapply(sts[u], `[[`, 1),
+ sprintf(paste0('%0', max(nchar(nbs)), 'd'), as.integer(nbs))
+ )
+ sts |> sapply(paste, collapse='')
+ }
> f(x)
[1] "Sample_ID" "Time00" "X7236Nr0001" "Y844Nr1856" "X9834Nr0021" "S844Nr0567"
Upvotes: 0
Reputation: 79218
Use str_Replace_all
as shown below:
str_replace_all(names(df), "(?<=Nr)\\d+", ~sprintf("%04d", as.numeric(.x)))
[1] "Sample_ID" "Time00" "X7236Nr0001" "Y844Nr1856" "X9834Nr0021"
[6] "S844Nr0567"
The regex is even simpler if you can use rename_with
:
df %>%
rename_with(~str_replace_all(., "\\d+", ~sprintf("%04d", as.numeric(.))), matches("Nr\\d+"))
Sample_ID Time00 X7236Nr0001 Y0844Nr1856 X9834Nr0021 S0844Nr0567
1 1 A 1 4 7 10
2 2 B 2 5 8 11
3 3 C 3 6 9 12
in base R, this will directly change the names of the df:
m <- regexpr("(?<=Nr)\\d+", names(df), perl = TRUE)
regmatches(names(df), m) <- sprintf("%04d", as.numeric(regmatches(names(df), m)))
df
Sample_ID Time00 X7236Nr0001 Y844Nr1856 X9834Nr0021 S844Nr0567
1 1 A 1 4 7 10
2 2 B 2 5 8 11
3 3 C 3 6 9 12
Upvotes: 3