UseR10085
UseR10085

Reputation: 8156

Group wise xtabs in R

I want to prepare xtabs for different stations. But it is giving me overall table across stations. I have used the following code

library(tidyverse)

df %>% group_by(Station) %>% 
  xtabs( ~ Observed + Forecasted, data = .) 

This is giving me

#>        Forecasted
#>Observed   1   3   4   5   9
#>       1 132   5  31  31   3
#>       3  16   0   6  13   7
#>       4   6   0  13  23   8
#>       5   4   0  16  33  15
#>       9   0   0   0   2   0

But I want to have the output stationwise like

Aizawl
#>        Forecasted
#>Observed   1   3   4   5   9
#>       1 132   5  31  31   3
#>       3  16   0   6  13   7
#>       4   6   0  13  23   8
#>       5   4   0  16  33  15
#>       9   0   0   0   2   0

Serchhip
#>        Forecasted
#>Observed   1   3   4   5   9
#>       1 132   5  31  31   3
#>       3  16   0   6  13   7
#>       4   6   0  13  23   8
#>       5   4   0  16  33  15
#>       9   0   0   0   2   0

Then I want to export the output in .csv file.

Data

df = structure(list(Station = c("Aizawl", "Aizawl", "Aizawl", "Aizawl", 
"Aizawl", "Aizawl", "Aizawl", "Aizawl", "Aizawl", "Aizawl", "Aizawl", 
"Aizawl", "Aizawl", "Aizawl", "Aizawl", "Aizawl", "Aizawl", "Aizawl", 
"Aizawl", "Aizawl", "Aizawl", "Aizawl", "Aizawl", "Aizawl", "Aizawl", 
"Aizawl", "Aizawl", "Aizawl", "Aizawl", "Aizawl", "Aizawl", "Aizawl", 
"Aizawl", "Aizawl", "Aizawl", "Aizawl", "Aizawl", "Aizawl", "Aizawl", 
"Aizawl", "Aizawl", "Aizawl", "Aizawl", "Aizawl", "Aizawl", "Aizawl", 
"Aizawl", "Aizawl", "Aizawl", "Aizawl", "Aizawl", "Aizawl", "Aizawl", 
"Aizawl", "Aizawl", "Aizawl", "Aizawl", "Aizawl", "Aizawl", "Aizawl", 
"Aizawl", "Aizawl", "Aizawl", "Aizawl", "Aizawl", "Aizawl", "Aizawl", 
"Aizawl", "Aizawl", "Aizawl", "Aizawl", "Aizawl", "Aizawl", "Aizawl", 
"Aizawl", "Aizawl", "Aizawl", "Aizawl", "Aizawl", "Aizawl", "Aizawl", 
"Aizawl", "Aizawl", "Aizawl", "Aizawl", "Aizawl", "Aizawl", "Aizawl", 
"Aizawl", "Aizawl", "Aizawl", "Aizawl", "Aizawl", "Aizawl", "Aizawl", 
"Aizawl", "Aizawl", "Aizawl", "Aizawl", "Aizawl", "Aizawl", "Aizawl", 
"Aizawl", "Aizawl", "Aizawl", "Aizawl", "Aizawl", "Aizawl", "Aizawl", 
"Aizawl", "Aizawl", "Aizawl", "Aizawl", "Aizawl", "Aizawl", "Aizawl", 
"Aizawl", "Aizawl", "Aizawl", "Aizawl", "Aizawl", "Aizawl", "Aizawl", 
"Aizawl", "Aizawl", "Aizawl", "Aizawl", "Aizawl", "Aizawl", "Aizawl", 
"Aizawl", "Aizawl", "Aizawl", "Aizawl", "Aizawl", "Aizawl", "Aizawl", 
"Aizawl", "Aizawl", "Aizawl", "Aizawl", "Aizawl", "Aizawl", "Aizawl", 
"Aizawl", "Aizawl", "Aizawl", "Aizawl", "Aizawl", "Aizawl", "Aizawl", 
"Aizawl", "Aizawl", "Aizawl", "Aizawl", "Aizawl", "Aizawl", "Aizawl", 
"Aizawl", "Aizawl", "Aizawl", "Aizawl", "Aizawl", "Aizawl", "Aizawl", 
"Aizawl", "Aizawl", "Aizawl", "Aizawl", "Aizawl", "Aizawl", "Aizawl", 
"Aizawl", "Aizawl", "Aizawl", "Aizawl", "Aizawl", "Aizawl", "Aizawl", 
"Aizawl", "Aizawl", "Aizawl", "Serchhip", "Serchhip", "Serchhip", 
"Serchhip", "Serchhip", "Serchhip", "Serchhip", "Serchhip", "Serchhip", 
"Serchhip", "Serchhip", "Serchhip", "Serchhip", "Serchhip", "Serchhip", 
"Serchhip", "Serchhip", "Serchhip", "Serchhip", "Serchhip", "Serchhip", 
"Serchhip", "Serchhip", "Serchhip", "Serchhip", "Serchhip", "Serchhip", 
"Serchhip", "Serchhip", "Serchhip", "Serchhip", "Serchhip", "Serchhip", 
"Serchhip", "Serchhip", "Serchhip", "Serchhip", "Serchhip", "Serchhip", 
"Serchhip", "Serchhip", "Serchhip", "Serchhip", "Serchhip", "Serchhip", 
"Serchhip", "Serchhip", "Serchhip", "Serchhip", "Serchhip", "Serchhip", 
"Serchhip", "Serchhip", "Serchhip", "Serchhip", "Serchhip", "Serchhip", 
"Serchhip", "Serchhip", "Serchhip", "Serchhip", "Serchhip", "Serchhip", 
"Serchhip", "Serchhip", "Serchhip", "Serchhip", "Serchhip", "Serchhip", 
"Serchhip", "Serchhip", "Serchhip", "Serchhip", "Serchhip", "Serchhip", 
"Serchhip", "Serchhip", "Serchhip", "Serchhip", "Serchhip", "Serchhip", 
"Serchhip", "Serchhip", "Serchhip", "Serchhip", "Serchhip", "Serchhip", 
"Serchhip", "Serchhip", "Serchhip", "Serchhip", "Serchhip", "Serchhip", 
"Serchhip", "Serchhip", "Serchhip", "Serchhip", "Serchhip", "Serchhip", 
"Serchhip", "Serchhip", "Serchhip", "Serchhip", "Serchhip", "Serchhip", 
"Serchhip", "Serchhip", "Serchhip", "Serchhip", "Serchhip", "Serchhip", 
"Serchhip", "Serchhip", "Serchhip", "Serchhip", "Serchhip", "Serchhip", 
"Serchhip", "Serchhip", "Serchhip", "Serchhip", "Serchhip", "Serchhip", 
"Serchhip", "Serchhip", "Serchhip", "Serchhip", "Serchhip", "Serchhip", 
"Serchhip", "Serchhip", "Serchhip", "Serchhip", "Serchhip", "Serchhip", 
"Serchhip", "Serchhip", "Serchhip", "Serchhip", "Serchhip", "Serchhip", 
"Serchhip", "Serchhip", "Serchhip", "Serchhip", "Serchhip", "Serchhip", 
"Serchhip", "Serchhip", "Serchhip", "Serchhip", "Serchhip", "Serchhip", 
"Serchhip", "Serchhip", "Serchhip", "Serchhip", "Serchhip", "Serchhip", 
"Serchhip", "Serchhip", "Serchhip", "Serchhip", "Serchhip", "Serchhip", 
"Serchhip", "Serchhip", "Serchhip", "Serchhip", "Serchhip", "Serchhip", 
"Serchhip", "Serchhip", "Serchhip", "Serchhip", "Serchhip", "Serchhip", 
"Serchhip", "Serchhip", "Serchhip", "Serchhip", "Serchhip"), 
    Observed = c(1, 1, 1, 5, 5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 
    1, 1, 1, 1, 3, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 3, 1, 1, 
    1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 4, 1, 1, 4, 1, 3, 1, 1, 
    1, 1, 1, 1, 1, 1, 1, 4, 4, 4, 3, 4, 1, 1, 1, 1, 1, 3, 5, 
    5, 3, 5, 3, 1, 1, 3, 1, 1, 1, 1, 1, 5, 3, 4, 1, 1, 1, 1, 
    1, 3, 1, 4, 1, 1, 1, 1, 1, 4, 4, 5, 1, 5, 4, 5, 5, 5, 5, 
    1, 5, 1, 4, 5, 4, 4, 5, 4, 5, 5, 3, 1, 5, 3, 4, 3, 4, 5, 
    5, 5, 5, 4, 4, 4, 5, 5, 5, 5, 5, 5, 4, 5, 3, 4, 4, 5, 3, 
    5, 4, 4, 5, 5, 5, 5, 4, 5, 5, 5, 5, 5, 5, 5, 1, 1, 1, 1, 
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
    1, 3, 5, 5, 1, 1, 3, 4, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 1, 
    1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 5, 
    3, 3, 3, 1, 1, 1, 1, 1, 1, 1, 4, 4, 1, 3, 4, 1, 1, 1, 1, 
    1, 1, 1, 1, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
    1, 1, 1, 4, 3, 9, 5, 5, 4, 1, 5, 1, 1, 1, 1, 4, 5, 5, 5, 
    5, 5, 5, 1, 1, 4, 1, 4, 4, 4, 5, 1, 1, 4, 3, 5, 1, 1, 4, 
    3, 5, 3, 4, 5, 3, 4, 4, 5, 5, 3, 4, 5, 5, 5, 5, 5, 4, 4, 
    4, 4, 5, 1, 9, 5, 5), Forecasted = c(1, 1, 1, 5, 5, 1, 1, 
    1, 5, 5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 
    5, 9, 5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 1, 1, 1, 1, 
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
    1, 1, 1, 4, 1, 4, 4, 1, 1, 5, 3, 1, 1, 1, 4, 5, 5, 5, 5, 
    1, 1, 1, 5, 5, 1, 5, 5, 5, 9, 4, 5, 4, 4, 4, 3, 4, 4, 1, 
    1, 5, 5, 4, 4, 4, 1, 1, 1, 4, 4, 4, 4, 4, 4, 1, 1, 5, 4, 
    4, 5, 4, 4, 4, 4, 5, 4, 5, 5, 5, 5, 5, 4, 5, 5, 4, 1, 1, 
    4, 4, 5, 5, 5, 5, 1, 4, 5, 5, 1, 4, 4, 9, 9, 9, 9, 9, 9, 
    9, 9, 9, 9, 9, 9, 9, 9, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
    5, 5, 5, 9, 1, 1, 1, 5, 4, 1, 1, 1, 5, 4, 1, 1, 1, 1, 1, 
    1, 1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 9, 5, 5, 1, 1, 1, 1, 1, 
    1, 1, 1, 1, 5, 5, 5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, 1, 1, 1, 1, 
    1, 1, 1, 1, 1, 1, 1, 5, 5, 4, 1, 1, 1, 1, 1, 4, 1, 1, 1, 
    4, 4, 4, 4, 1, 4, 1, 3, 1, 1, 1, 4, 4, 4, 4, 4, 4, 1, 1, 
    1, 4, 4, 3, 5, 5, 5, 4, 3, 5, 5, 5, 5, 5, 4, 5, 5, 5, 4, 
    5, 4, 4, 5, 5, 4, 4, 5, 4, 1, 4, 4, 5, 5, 4, 5, 4, 5, 4, 
    5, 5, 5, 1, 4, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 
    5, 5, 5, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 9)), row.names = c(NA, 
364L), class = "data.frame")

Upvotes: 2

Views: 1014

Answers (3)

Darren Tsai
Darren Tsai

Reputation: 35554

If you want to operate on a xtabs(3-dimensional array) instead of list or data.frame, then put the stratified variable, i.e. Station, to the last position of the formula in xtabs().

res <- xtabs( ~ Observed + Forecasted + Station, df)
res

# , , Station = Aizawl
# 
#         Forecasted
# Observed  1  3  4  5  9
#        1 56  2 13 13  1
#        3 12  0  4  8  3
#        4  4  0  9 11  4
#        5  3  0  7 23  9
#        9  0  0  0  0  0
# 
# , , Station = Serchhip
# 
#         Forecasted
# Observed  1  3  4  5  9
#        1 76  3 18 18  2
#        3  4  0  2  5  4
#        4  2  0  4 12  4
#        5  1  0  9 10  6
#        9  0  0  0  2  0

class(res)
# [1] "xtabs" "table"

To print the array to 2 individual csv files:

lapply(dimnames(res)$Station, function(x) write.csv(res[,,x], paste0("table_", x, ".csv")))

To transform the array to a data.frame format:

library(tidyr)

res %>%
  as.data.frame() %>%
  pivot_wider(names_from = Forecasted, values_from = Freq)

# # A tibble: 10 x 7
#    Observed Station    `1`   `3`   `4`   `5`   `9`
#    <fct>    <fct>    <int> <int> <int> <int> <int>
#  1 1        Aizawl      56     2    13    13     1
#  2 3        Aizawl      12     0     4     8     3
#  3 4        Aizawl       4     0     9    11     4
#  4 5        Aizawl       3     0     7    23     9
#  5 9        Aizawl       0     0     0     0     0
#  6 1        Serchhip    76     3    18    18     2
#  7 3        Serchhip     4     0     2     5     4
#  8 4        Serchhip     2     0     4    12     4
#  9 5        Serchhip     1     0     9    10     6
# 10 9        Serchhip     0     0     0     2     0

Upvotes: 3

jay.sf
jay.sf

Reputation: 72778

Using by.

res <- by(df, df$Station, xtabs, f=~Observed + Forecasted)
res
# df$Station: Aizawl
#          Forecasted
# Observed  1  3  4  5  9
#           1 56  2 13 13  1
#           3 12  0  4  8  3
#           4  4  0  9 11  4
#           5  3  0  7 23  9
# --------------------------------------------------------- 
#   df$Station: Serchhip
#         Forecasted
# Observed  1  3  4  5  9
#           1 76  3 18 18  2
#           3  4  0  2  5  4
#           4  2  0  4 12  4
#           5  1  0  9 10  6
#           9  0  0  0  2  0

## Output .csv
lapply(names(res), function(x) write.csv(res[[x]], paste0("myfile_", x,".csv")))

Upvotes: 2

dshkol
dshkol

Reputation: 1228

If you want to write it to CSV, best to keep your data in a CSV-friendly format. This output

Aizawl
#>        Forecasted
#>Observed   1   3   4   5   9
#>       1 132   5  31  31   3
#>       3  16   0   6  13   7
#>       4   6   0  13  23   8
#>       5   4   0  16  33  15
#>       9   0   0   0   2   0

Serchhip
#>        Forecasted
#>Observed   1   3   4   5   9
#>       1 132   5  31  31   3
#>       3  16   0   6  13   7
#>       4   6   0  13  23   8
#>       5   4   0  16  33  15
#>       9   0   0   0   2   0

Might cause issues, and a CSV-format-friendly output is easy to prepare with dplyr and tidyr.

library(dplyr)
library(tidyr)

df %>% 
  group_by(Station, Observed, Forecasted) %>% 
  tally() %>% 
  pivot_wider(names_from = Forecasted, values_from = n) %>% 
  replace(is.na(.), 0) 
# A tibble: 9 x 7
# Groups:   Station, Observed [9]
  Station  Observed   `1`   `3`   `4`   `5`   `9`
  <chr>       <dbl> <int> <int> <int> <int> <int>
1 Aizawl          1    56     2    13    13     1
2 Aizawl          3    12     0     4     8     3
3 Aizawl          4     4     0     9    11     4
4 Aizawl          5     3     0     7    23     9
5 Serchhip        1    76     3    18    18     2
6 Serchhip        3     4     0     2     5     4
7 Serchhip        4     2     0     4    12     4
8 Serchhip        5     1     0     9    10     6
9 Serchhip        9     0     0     0     2     0

Which you can export easily to CSV at this point or directly by chaining your pipes.

df %>% 
  group_by(Station, Observed, Forecasted) %>% 
  tally() %>% 
  pivot_wider(names_from = Forecasted, values_from = n) %>% 
  replace(is.na(.), 0) %>%
  readr::write_csv("my_output.csv")

Upvotes: 2

Related Questions