Johnny
Johnny

Reputation: 91

Value labels in R?

Thanks for all of the support that I've recieved on here so far. It is much appreciated!

I'm trying to add value labels to an R variable.

For example, I currently have this in SPSS:

VARIABLE LABELS band_age_3 'Three-way banded age group'.

VALUE LABELS band_age_3
               1 '1-29'
               2 '30-49'
               3 '50-59'

FREQUENCIES band_age_3.

Would this be the best method:

2019data$band_age_3 <- factor(2019data$band_age_3,
levels = c(1,2,3),
labels = c("1-29", "30-49", "50-59"))

As with all things R related, there tends to be multiple ways of doing things and I was wondering whether this would be the most suitable.

Cheers.

Upvotes: 3

Views: 5660

Answers (2)

Carlos Hern&#225;ndez
Carlos Hern&#225;ndez

Reputation: 155

To manage variable metadata the SPSS-style in R, you can use the expss package.

Using a reproducible example:

df <- data.frame(band_age_3  = c(1, 3, 1, 2, 3), 
                 sex = c(1, 2, 2, 1, 1),
                 weight = c(50, 20, 30, 40, 5))

library(expss)
library(labelled)

=======================================================================
# Setting variable metadata
=======================================================================

# Set variable labels
var_lab(df$band_age_3) <- "Three-way banded age group" 
var_lab(df$sex) <- "Gender" 


# Get variable labels
var_lab(df$band_age_3)
[1] "Three-way banded age group"
var_lab(df$sex)
[1] "Gender"


# Set value labels
val_lab(df$band_age_3) <- make_labels("1 1-29
                                       2 30-49
                                       3 50-59")

val_lab(df$sex) <- make_labels("1 Men
                                2 Women")

# Get variable labels
val_lab(df$band_age_3)
 1-29 30-49 50-59 
    1     2     3 

val_lab(df$sex)
  Men Women 
    1     2 

=======================================================================
Frequencies and crosstabulations
=======================================================================


# Frequencies
=================================  

  # Unweighted
  fre(df$band_age_3)
 
 
 | Three-way banded age group | Count | Valid percent | Percent | Responses, % | Cumulative responses, % |
 | -------------------------- | ----- | ------------- | ------- | ------------ | ----------------------- |
 |                       1-29 |     2 |            40 |      40 |           40 |                      40 |
 |                      30-49 |     1 |            20 |      20 |           20 |                      60 |
 |                      50-59 |     2 |            40 |      40 |           40 |                     100 |
 |                     #Total |     5 |           100 |     100 |          100 |                         |
 |                       <NA> |     0 |               |       0 |              |                         |

  
  # Weighted 
  fre(df$band_age_3, weight = df$weight)

 | Three-way banded age group | Count | Valid percent | Percent | Responses, % | Cumulative responses, % |
 | -------------------------- | ----- | ------------- | ------- | ------------ | ----------------------- |
 |                       1-29 |    80 |          55.2 |    55.2 |         55.2 |                    55.2 |
 |                      30-49 |    40 |          27.6 |    27.6 |         27.6 |                    82.8 |
 |                      50-59 |    25 |          17.2 |    17.2 |         17.2 |                   100.0 |
 |                     #Total |   145 |         100.0 |   100.0 |        100.0 |                         |
 |                       <NA> |     0 |               |     0.0 |              |                         |


# Crosstabs
=================================    
  
  # Weighted 
  
      # Count
      cro(df$band_age_3, list(total(),df$sex), weight = df$weight, total_statistic = "w_cases")

 |                            |                   | #Total | Gender |       |
 |                            |                   |        |    Men | Women |
 | -------------------------- | ----------------- | ------ | ------ | ----- |
 | Three-way banded age group |              1-29 |     80 |     50 |    30 |
 |                            |             30-49 |     40 |     40 |       |
 |                            |             50-59 |     25 |      5 |    20 |
 |                            | #Total wtd. cases |    145 |     95 |    50 |
      
      # Row percentages
      cro_rpct(df$band_age_3, list(total(),df$sex), weight = df$weight, total_statistic = "w_rpct")

 |                            |                  | #Total | Gender |       |
 |                            |                  |        |    Men | Women |
 | -------------------------- | ---------------- | ------ | ------ | ----- |
 | Three-way banded age group |             1-29 |    100 |   62.5 |  37.5 |
 |                            |            30-49 |    100 |  100.0 |       |
 |                            |            50-59 |    100 |   20.0 |  80.0 |
 |                            | #Total wtd. rpct |    100 |   65.5 |  34.5
      
      # Column percentages
      cro_cpct(df$band_age_3, list(total(),df$sex), weight = df$weight, total_statistic = "w_cpct")  
      
 |                            |                  | #Total | Gender |       |
 |                            |                  |        |    Men | Women |
 | -------------------------- | ---------------- | ------ | ------ | ----- |
 | Three-way banded age group |             1-29 |   55.2 |   52.6 |    60 |
 |                            |            30-49 |   27.6 |   42.1 |       |
 |                            |            50-59 |   17.2 |    5.3 |    40 |
 |                            | #Total wtd. cpct |  100.0 |  100.0 |   1

      # Total percentages 
      cro_tpct(df$band_age_3, list(total(),df$sex), weight = df$weight, total_statistic = "w_tpct")
    
 |                            |                  | #Total | Gender |       |
 |                            |                  |        |    Men | Women |
 | -------------------------- | ---------------- | ------ | ------ | ----- |
 | Three-way banded age group |             1-29 |   55.2 |   34.5 |  20.7 |
 |                            |            30-49 |   27.6 |   27.6 |       |
 |                            |            50-59 |   17.2 |    3.4 |  13.8 |
 |                            | #Total wtd. tpct |  100.0 |   65.5 
    

# Unweighted

    # Count
    cro(df$band_age_3, list(total(),df$sex), total_statistic = "u_cases")

 |                            |              | #Total | Gender |       |
 |                            |              |        |    Men | Women |
 | -------------------------- | ------------ | ------ | ------ | ----- |
 | Three-way banded age group |         1-29 |      2 |      1 |     1 |
 |                            |        30-49 |      1 |      1 |       |
 |                            |        50-59 |      2 |      1 |     1 |
 |                            | #Total cases |      5 |      3 |     2 |
    
    # Row percentages
    cro_rpct(df$band_age_3, list(total(),df$sex), total_statistic = "u_cases")

 |                            |              | #Total | Gender |       |
 |                            |              |        |    Men | Women |
 | -------------------------- | ------------ | ------ | ------ | ----- |
 | Three-way banded age group |         1-29 |    100 |     50 |    50 |
 |                            |        30-49 |    100 |    100 |       |
 |                            |        50-59 |    100 |     50 |    50 |
 |                            | #Total cases |      5 |      3 |     2 |
    
    # Column percentages
    cro_cpct(df$band_age_3, list(total(),df$sex), total_statistic = "u_cases") 

 |                            |              | #Total | Gender |       |
 |                            |              |        |    Men | Women |
 | -------------------------- | ------------ | ------ | ------ | ----- |
 | Three-way banded age group |         1-29 |     40 |   33.3 |    50 |
 |                            |        30-49 |     20 |   33.3 |       |
 |                            |        50-59 |     40 |   33.3 |    50 |
 |                            | #Total cases |      5 |    3.0 |     2 | 
    
    # Total percentages 
    cro_tpct(df$band_age_3, list(total(),df$sex), total_statistic = "u_cases") 

 |                            |              | #Total | Gender |       |
 |                            |              |        |    Men | Women |
 | -------------------------- | ------------ | ------ | ------ | ----- |
 | Three-way banded age group |         1-29 |     40 |     20 |    20 |
 |                            |        30-49 |     20 |     20 |       |
 |                            |        50-59 |     40 |     20 |    20 |
 |                            | #Total cases |      5 |      3 |   


Upvotes: 3

akrun
akrun

Reputation: 887991

We can use a named vector for replacement

setNames(c("1-29", "30-49", "50-59"), 1:3)[as.character(`2019data`$band_age_3)]

Using a reproducible example

unname(setNames(c("1-29", "30-49", "50-59"), 1:3)[as.character(c(1, 3, 2, 1, 3))])
#[1] "1-29"  "50-59" "30-49" "1-29"  "50-59"

Upvotes: 1

Related Questions