Reputation: 11
I am new in r, i have this table first i try to open the file with
df<- read.csv("geral_modelo_inadimplencia_2.csv",sep=";",stringsAsFactors = FALSE)
,that result all datas in chr after I did try to convert to numeric with as.numeric, but return
[1] NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA.
how to convert chr to numeric?
Saldo.Rotativo A.Vista Parcelado Saque
1 dez/16 2.919.869.506,96 6.364.901.107,55 463.677.827,46 729.185,18
2 dez/17 2.007.351.784,18 6.831.919.805,09 780.093.428,86 2.817.814,72
3 dez/18 1.944.752.969,68 7.458.695.484,62 542.074.530,21 1.885.633,29
4 jan/19 1.991.796.619,57 7.371.837.099,11 540.893.516,33 2.058.371,60
5 fev/19 2.121.650.035,30 6.967.735.692,25 562.211.487,01 2.132.249,61
6 mar/19 2.062.475.653,11 6.900.028.117,67 575.861.976,61 2.100.849,74
7 abr/19 2.089.793.039,24 6.945.593.710,28 583.181.387,89 2.090.951,69
8 mai/19 2.078.700.800,99 7.146.597.703,16 612.184.578,96 2.132.951,04
9 jun/19 2.239.390.093,82 6.851.118.033,80 618.844.690,37 1.764.866,10
10 jul/19 2.121.263.409,38 7.196.087.606,84 629.995.715,52 3.945.650,40
11 ago/19 2.173.187.280,54 7.089.675.942,22 624.808.459,45 6.341.527,95
12 set/19 2.285.571.063,90 7.111.228.186,19 617.840.220,61 6.143.505,16
13 out/19 2.193.401.889,85 7.263.912.266,04 622.821.392,86 7.253.169,67
14 nov/19 2.281.061.211,60 7.240.713.335,11 611.161.428,40 7.484.398,11
15 dez/19 2.212.531.321,45 7.892.016.606,72 597.916.084,63 6.464.980,78
Upvotes: 1
Views: 230
Reputation: 160892
Unfortunately, R doesn't appear to have a base function that automatically detects the different-locale, so we need a pair of gsub
s.
dat[,-1] <- lapply(dat[,-1], function(a) as.numeric(sub(",", ".", gsub(".", "", a, fixed = TRUE))) )
head(dat)
# rowname Saldo.Rotativo A.Vista Parcelado Saque
# 1 dez/16 2919869507 6364901108 463677827 729185.2
# 2 dez/17 2007351784 6831919805 780093429 2817814.7
# 3 dez/18 1944752970 7458695485 542074530 1885633.3
# 4 jan/19 1991796620 7371837099 540893516 2058371.6
# 5 fev/19 2121650035 6967735692 562211487 2132249.6
# 6 mar/19 2062475653 6900028118 575861977 2100849.7
str(dat)
# 'data.frame': 15 obs. of 5 variables:
# $ rowname : chr "dez/16" "dez/17" "dez/18" "jan/19" ...
# $ Saldo.Rotativo: num 2.92e+09 2.01e+09 1.94e+09 1.99e+09 2.12e+09 ...
# $ A.Vista : num 6.36e+09 6.83e+09 7.46e+09 7.37e+09 6.97e+09 ...
# $ Parcelado : num 4.64e+08 7.80e+08 5.42e+08 5.41e+08 5.62e+08 ...
# $ Saque : num 729185 2817815 1885633 2058372 2132250 ...
While not base-R, readr::parse_number
can be made to do it "properly":
options(digits=9)
readr::parse_number("6.143.505,16", locale = readr::locale(grouping_mark = ".", decimal_mark = ","))
# [1] 6143505.16
so while it's not necessarily faster (I have not benchmarked it), one might consider this a clean declarative code:
str(lapply(dat[,-1], readr::parse_number, locale = readr::locale(grouping_mark = ".", decimal_mark = ",")))
# List of 4
# $ Saldo.Rotativo: num [1:15] 2.92e+09 2.01e+09 1.94e+09 1.99e+09 2.12e+09 ...
# $ A.Vista : num [1:15] 6.36e+09 6.83e+09 7.46e+09 7.37e+09 6.97e+09 ...
# $ Parcelado : num [1:15] 4.64e+08 7.80e+08 5.42e+08 5.41e+08 5.62e+08 ...
# $ Saque : num [1:15] 729185 2817815 1885633 2058372 2132250 ...
(output str
'd and truncated for brevity.)
Data:
dat <- structure(list(rowname = c("dez/16", "dez/17", "dez/18", "jan/19",
"fev/19", "mar/19", "abr/19", "mai/19", "jun/19", "jul/19", "ago/19",
"set/19", "out/19", "nov/19", "dez/19"), Saldo.Rotativo = c("2.919.869.506,96",
"2.007.351.784,18", "1.944.752.969,68", "1.991.796.619,57", "2.121.650.035,30",
"2.062.475.653,11", "2.089.793.039,24", "2.078.700.800,99", "2.239.390.093,82",
"2.121.263.409,38", "2.173.187.280,54", "2.285.571.063,90", "2.193.401.889,85",
"2.281.061.211,60", "2.212.531.321,45"), A.Vista = c("6.364.901.107,55",
"6.831.919.805,09", "7.458.695.484,62", "7.371.837.099,11", "6.967.735.692,25",
"6.900.028.117,67", "6.945.593.710,28", "7.146.597.703,16", "6.851.118.033,80",
"7.196.087.606,84", "7.089.675.942,22", "7.111.228.186,19", "7.263.912.266,04",
"7.240.713.335,11", "7.892.016.606,72"), Parcelado = c("463.677.827,46",
"780.093.428,86", "542.074.530,21", "540.893.516,33", "562.211.487,01",
"575.861.976,61", "583.181.387,89", "612.184.578,96", "618.844.690,37",
"629.995.715,52", "624.808.459,45", "617.840.220,61", "622.821.392,86",
"611.161.428,40", "597.916.084,63"), Saque = c("729.185,18",
"2.817.814,72", "1.885.633,29", "2.058.371,60", "2.132.249,61",
"2.100.849,74", "2.090.951,69", "2.132.951,04", "1.764.866,10",
"3.945.650,40", "6.341.527,95", "6.143.505,16", "7.253.169,67",
"7.484.398,11", "6.464.980,78")), class = "data.frame", row.names = c(NA,
-15L))
(I inferred rowname
, since it was not obvious in your question. When there's any ambiguity, it helps to use dput
.)
Upvotes: 2
Reputation: 887851
We can use str_remove_all
library(dplyr)
library(stringr)
df <- df %>%
mutate_at(-1, ~ as.numeric(str_replace(str_remove_all(.,
'\\.'), ',', '.')))
df
# date Saldo.Rotativo A.Vista Parcelado Saque
#1 dez/16 2919869507 6364901108 463677827 729185.2
#2 dez/17 2007351784 6831919805 780093429 2817814.7
#3 dez/18 1944752970 7458695485 542074530 1885633.3
#4 jan/19 1991796620 7371837099 540893516 2058371.6
#5 fev/19 2121650035 6967735692 562211487 2132249.6
#6 mar/19 2062475653 6900028118 575861977 2100849.7
#7 abr/19 2089793039 6945593710 583181388 2090951.7
#8 mai/19 2078700801 7146597703 612184579 2132951.0
#9 jun/19 2239390094 6851118034 618844690 1764866.1
#10 jul/19 2121263409 7196087607 629995716 3945650.4
#11 ago/19 2173187281 7089675942 624808459 6341528.0
#12 set/19 2285571064 7111228186 617840221 6143505.2
#13 out/19 2193401890 7263912266 622821393 7253169.7
#14 nov/19 2281061212 7240713335 611161428 7484398.1
#15 dez/19 2212531321 7892016607 597916085 6464980.8
str(df)
#'data.frame': 15 obs. of 5 variables:
# $ date : chr "dez/16" "dez/17" "dez/18" "jan/19" ...
# $ Saldo.Rotativo: num 2.92e+09 2.01e+09 1.94e+09 1.99e+09 2.12e+09 ...
# $ A.Vista : num 6.36e+09 6.83e+09 7.46e+09 7.37e+09 6.97e+09 ...
# $ Parcelado : num 4.64e+08 7.80e+08 5.42e+08 5.41e+08 5.62e+08 ...
# $ Saque : num 729185 2817815 1885633 2058372 2132250 ...
Or in base R
, we can also use type.convert
after replacing the values
df[-1] <- lapply(df[-1], function(x)
sub(",", ".", gsub(".", "", x, fixed = TRUE)))
df <- type.convert(df, as.is = TRUE)
df <- structure(list(date = c("dez/16", "dez/17", "dez/18", "jan/19",
"fev/19", "mar/19", "abr/19", "mai/19", "jun/19", "jul/19", "ago/19",
"set/19", "out/19", "nov/19", "dez/19"), Saldo.Rotativo = c("2.919.869.506,96",
"2.007.351.784,18", "1.944.752.969,68", "1.991.796.619,57", "2.121.650.035,30",
"2.062.475.653,11", "2.089.793.039,24", "2.078.700.800,99", "2.239.390.093,82",
"2.121.263.409,38", "2.173.187.280,54", "2.285.571.063,90", "2.193.401.889,85",
"2.281.061.211,60", "2.212.531.321,45"), A.Vista = c("6.364.901.107,55",
"6.831.919.805,09", "7.458.695.484,62", "7.371.837.099,11", "6.967.735.692,25",
"6.900.028.117,67", "6.945.593.710,28", "7.146.597.703,16", "6.851.118.033,80",
"7.196.087.606,84", "7.089.675.942,22", "7.111.228.186,19", "7.263.912.266,04",
"7.240.713.335,11", "7.892.016.606,72"), Parcelado = c("463.677.827,46",
"780.093.428,86", "542.074.530,21", "540.893.516,33", "562.211.487,01",
"575.861.976,61", "583.181.387,89", "612.184.578,96", "618.844.690,37",
"629.995.715,52", "624.808.459,45", "617.840.220,61", "622.821.392,86",
"611.161.428,40", "597.916.084,63"), Saque = c("729.185,18",
"2.817.814,72", "1.885.633,29", "2.058.371,60", "2.132.249,61",
"2.100.849,74", "2.090.951,69", "2.132.951,04", "1.764.866,10",
"3.945.650,40", "6.341.527,95", "6.143.505,16", "7.253.169,67",
"7.484.398,11", "6.464.980,78")), class = "data.frame", row.names = c("1",
"2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13",
"14", "15"))
Upvotes: 0