Devin Mendez
Devin Mendez

Reputation: 101

Summarizing Dataframe containing numbers and strings by latitude

I would like to be able to calculate the average "IntakeEnergy" by location (latitude) for the following dataset. The problem that I think I am having is that the dataset contains both number and characters which results in the function not knowing how to calculate the mean of a string. I have tried grouping the data by latitude and then calculating the means of the other columns with this:

blah<-profit.fall.all %>% group_by(Lat) %>% summarise_all(funs(mean))

However, this is only successful if I drop all of the string columns. I am a lot more familiar with excel and I believe that a potential solution for this would be to create a pivot table, but i'm not sure if that is the best way to solve this problem.

Data<-dput(head(profit.fall.all,15))
structure(list(Id = structure(c(35L, 70L, 20L, 5L, 15L, 5L, 35L, 
34L, 36L, 47L, 33L, 50L, 69L, 66L, 20L), .Label = c("Barren Island Mud 1", 
"BH High 1", "BH High 2", "BH Low 1", "BH Low 2", "BH Low 3", 
"BH SAV 2", "BHH 1 C", "BHH 2 E", "BHL 1 E", "BHL 2", "BHL 3 (B)", 
"BHM 1", "BHM 1 C", "BI High 1", "BI Low 1", "BI Low 2C", "BI Low 3", 
"BI Marsh B", "BI Mud", "BIHI High B", "BIL1 (low) E", "BIL1 E", 
"BIL1E", "BIL2 E", "BIL2E", "BW Fresh 1", "BW Fresh 2", "BW High 1", 
"BW High 2", "BW High 5", "BW Low 3", "BW Money Stump", "BW Mud 1", 
"BW SAV 1", "BW SAV 2", "BWH 1 D", "BWH 2", "BWH 3", "BWH 5", 
"BWL 1", "BWL 2", "BWL 3", "BWM 1", "BWMS D", "BWS 1", "EN High 2", 
"EN High 4", "EN High 5", "EN Low 1", "EN Low 2", "EN Mud 2", 
"ENH3 A High", "ENH4 A High", "ENH5 A High", "ENL1 Low E", "ENM1 A Mud", 
"ENS1 SAV", "ENS2 SAV 2C", "ENS3 SAV 3E", "High 3C", "James Marsh", 
"MWP 27 High 1", "MWP 28 High 2", "MWP 29 Low 1", "MWP 30 Mud 1", 
"MWP 31 Low 2", "MWP 32 Mud 2", "MWP 33 Low 3", "MWP 34 Low 4", 
"MWP 35 Mud 3", "PWRC Fresh", "PWRC Fresh 1", "PWRC Fresh 1-4", 
"WP 27 HM-MARC", "WP 28 HM-MARC", "WP 30 IT MARE", "WP29 LM-MARC", 
"WP30 IT MARE"), class = "factor"), Season = structure(c(2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("", 
"Fall", "Spring", "Spring?"), class = "factor"), Refuge = structure(c(5L, 
7L, 2L, 3L, 2L, 3L, 5L, 5L, 5L, 6L, 5L, 6L, 7L, 7L, 2L), .Label = c("", 
"Barren Island", "Bishop's Head", "Bishops Head", "Blackwater", 
"Eastern Neck", "Martin", "PWRC"), class = "factor"), HType = structure(c(6L, 
4L, 5L, 4L, 3L, 4L, 6L, 5L, 6L, 3L, 4L, 4L, 4L, 5L, 5L), .Label = c("", 
"Fresh", "High", "Low", "Mud", "SAV"), class = "factor"), Long = c(-76.109109, 
-75.99733, -76.261634, -76.038959, -76.256617, -76.038959, -76.109109, 
-76.146408, -76.103627, -76.225188, -76.23491, -76.239864, -75.99354, 
-76.01407, -76.261634), Lat = c(38.441089, 37.99369, 38.336058, 
38.224469, 38.326234, 38.224469, 38.441089, 38.417947, 38.403511, 
39.04065, 38.43141, 39.026771, 37.98833, 38.01108, 38.336058), 
    Prey.Group = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 3L), .Label = c("Melampus", "Ruppia", 
    "Scirpus", "Zannichellia"), class = "factor"), IntakeEnergy = c(112.577988769079, 
    29.6957686910562, 22.825852053573, 64.2669620925843, 1182.80971128049, 
    454.559045812661, 893.487901876808, 483.341619235618, 16853.8824849192, 
    38.6168499963705, 30.540123199744, 557.798944392019, 6326.03316402962, 
    57.7990761383585, 11.4733747438)), row.names = c(1L, 5L, 
6L, 7L, 8L, 15L, 22L, 23L, 24L, 25L, 33L, 35L, 42L, 43L, 45L), class = "data.frame")
> View(Data)

Upvotes: 1

Views: 35

Answers (2)

Chuck P
Chuck P

Reputation: 3923

Final two options. Hopefully these are close enough that you can manage to change to your taste. I feel like I've given you four valid answers and you can't quite describe what you want


Data %>% 
  group_by(Lat) %>% 
  mutate(avgIntakeEnergybyLat = mean(IntakeEnergy, na.rm = TRUE)) %>% 
  arrange(Id) %>%
  select(-Prey.Group) 
#> # A tibble: 15 x 8
#> # Groups:   Lat [12]
#>    Id         Season Refuge    HType  Long   Lat IntakeEnergy avgIntakeEnergyby…
#>    <fct>      <fct>  <fct>     <fct> <dbl> <dbl>        <dbl>              <dbl>
#>  1 BH Low 2   Fall   Bishop's… Low   -76.0  38.2         64.3              259. 
#>  2 BH Low 2   Fall   Bishop's… Low   -76.0  38.2        455.               259. 
#>  3 BI High 1  Fall   Barren I… High  -76.3  38.3       1183.              1183. 
#>  4 BI Mud     Fall   Barren I… Mud   -76.3  38.3         22.8               17.1
#>  5 BI Mud     Fall   Barren I… Mud   -76.3  38.3         11.5               17.1
#>  6 BW Money … Fall   Blackwat… Low   -76.2  38.4         30.5               30.5
#>  7 BW Mud 1   Fall   Blackwat… Mud   -76.1  38.4        483.               483. 
#>  8 BW SAV 1   Fall   Blackwat… SAV   -76.1  38.4        113.               503. 
#>  9 BW SAV 1   Fall   Blackwat… SAV   -76.1  38.4        893.               503. 
#> 10 BW SAV 2   Fall   Blackwat… SAV   -76.1  38.4      16854.             16854. 
#> 11 EN High 2  Fall   Eastern … High  -76.2  39.0         38.6               38.6
#> 12 EN Low 1   Fall   Eastern … Low   -76.2  39.0        558.               558. 
#> 13 MWP 30 Mu… Fall   Martin    Mud   -76.0  38.0         57.8               57.8
#> 14 MWP 33 Lo… Fall   Martin    Low   -76.0  38.0       6326.              6326. 
#> 15 MWP 34 Lo… Fall   Martin    Low   -76.0  38.0         29.7               29.7

Data %>% 
  group_by(Lat) %>% 
  mutate(avgIntakeEnergybyLat = mean(IntakeEnergy, na.rm = TRUE)) %>% 
  arrange(Id) %>%
  select(-Prey.Group, -IntakeEnergy) %>%
  distinct(Lat, .keep_all = TRUE)
#> # A tibble: 12 x 7
#> # Groups:   Lat [12]
#>    Id             Season Refuge        HType  Long   Lat avgIntakeEnergybyLat
#>    <fct>          <fct>  <fct>         <fct> <dbl> <dbl>                <dbl>
#>  1 BH Low 2       Fall   Bishop's Head Low   -76.0  38.2                259. 
#>  2 BI High 1      Fall   Barren Island High  -76.3  38.3               1183. 
#>  3 BI Mud         Fall   Barren Island Mud   -76.3  38.3                 17.1
#>  4 BW Money Stump Fall   Blackwater    Low   -76.2  38.4                 30.5
#>  5 BW Mud 1       Fall   Blackwater    Mud   -76.1  38.4                483. 
#>  6 BW SAV 1       Fall   Blackwater    SAV   -76.1  38.4                503. 
#>  7 BW SAV 2       Fall   Blackwater    SAV   -76.1  38.4              16854. 
#>  8 EN High 2      Fall   Eastern Neck  High  -76.2  39.0                 38.6
#>  9 EN Low 1       Fall   Eastern Neck  Low   -76.2  39.0                558. 
#> 10 MWP 30 Mud 1   Fall   Martin        Mud   -76.0  38.0                 57.8
#> 11 MWP 33 Low 3   Fall   Martin        Low   -76.0  38.0               6326. 
#> 12 MWP 34 Low 4   Fall   Martin        Low   -76.0  38.0                 29.7

Turns out OP wants to add a column and keep other columns and rows ...

library(dplyr)

Data %>% 
  group_by(Lat) %>% 
  mutate(avgbyLat = mean(IntakeEnergy, na.rm = TRUE)) %>% 
  arrange(Id)
#> # A tibble: 15 x 9
#> # Groups:   Lat [12]
#>    Id        Season Refuge    HType  Long   Lat Prey.Group IntakeEnergy avgbyLat
#>    <fct>     <fct>  <fct>     <fct> <dbl> <dbl> <fct>             <dbl>    <dbl>
#>  1 BH Low 2  Fall   Bishop's… Low   -76.0  38.2 Melampus           64.3    259. 
#>  2 BH Low 2  Fall   Bishop's… Low   -76.0  38.2 Melampus          455.     259. 
#>  3 BI High 1 Fall   Barren I… High  -76.3  38.3 Melampus         1183.    1183. 
#>  4 BI Mud    Fall   Barren I… Mud   -76.3  38.3 Melampus           22.8     17.1
#>  5 BI Mud    Fall   Barren I… Mud   -76.3  38.3 Scirpus            11.5     17.1
#>  6 BW Money… Fall   Blackwat… Low   -76.2  38.4 Ruppia             30.5     30.5
#>  7 BW Mud 1  Fall   Blackwat… Mud   -76.1  38.4 Ruppia            483.     483. 
#>  8 BW SAV 1  Fall   Blackwat… SAV   -76.1  38.4 Melampus          113.     503. 
#>  9 BW SAV 1  Fall   Blackwat… SAV   -76.1  38.4 Ruppia            893.     503. 
#> 10 BW SAV 2  Fall   Blackwat… SAV   -76.1  38.4 Ruppia          16854.   16854. 
#> 11 EN High 2 Fall   Eastern … High  -76.2  39.0 Ruppia             38.6     38.6
#> 12 EN Low 1  Fall   Eastern … Low   -76.2  39.0 Ruppia            558.     558. 
#> 13 MWP 30 M… Fall   Martin    Mud   -76.0  38.0 Ruppia             57.8     57.8
#> 14 MWP 33 L… Fall   Martin    Low   -76.0  38.0 Ruppia           6326.    6326. 
#> 15 MWP 34 L… Fall   Martin    Low   -76.0  38.0 Melampus           29.7     29.7

as opposed to earlier answer.

Try summarise_if(is.numeric, mean, na.rm = TRUE) [sure you only want Lat]

library(dplyr)

Data %>% group_by(Lat) %>% summarise_if(is.numeric, mean, na.rm = TRUE)
#> # A tibble: 12 x 3
#>      Lat  Long IntakeEnergy
#>    <dbl> <dbl>        <dbl>
#>  1  38.0 -76.0       6326. 
#>  2  38.0 -76.0         29.7
#>  3  38.0 -76.0         57.8
#>  4  38.2 -76.0        259. 
#>  5  38.3 -76.3       1183. 
#>  6  38.3 -76.3         17.1
#>  7  38.4 -76.1      16854. 
#>  8  38.4 -76.1        483. 
#>  9  38.4 -76.2         30.5
#> 10  38.4 -76.1        503. 
#> 11  39.0 -76.2        558. 
#> 12  39.0 -76.2         38.6

where the data is...

Data <- structure(list(Id = structure(c(35L, 70L, 20L, 5L, 15L, 5L, 35L,
34L, 36L, 47L, 33L, 50L, 69L, 66L, 20L), .Label = c("Barren Island Mud 1",
"BH High 1", "BH High 2", "BH Low 1", "BH Low 2", "BH Low 3",
"BH SAV 2", "BHH 1 C", "BHH 2 E", "BHL 1 E", "BHL 2", "BHL 3 (B)",
"BHM 1", "BHM 1 C", "BI High 1", "BI Low 1", "BI Low 2C", "BI Low 3",
"BI Marsh B", "BI Mud", "BIHI High B", "BIL1 (low) E", "BIL1 E",
"BIL1E", "BIL2 E", "BIL2E", "BW Fresh 1", "BW Fresh 2", "BW High 1",
"BW High 2", "BW High 5", "BW Low 3", "BW Money Stump", "BW Mud 1",
"BW SAV 1", "BW SAV 2", "BWH 1 D", "BWH 2", "BWH 3", "BWH 5",
"BWL 1", "BWL 2", "BWL 3", "BWM 1", "BWMS D", "BWS 1", "EN High 2",
"EN High 4", "EN High 5", "EN Low 1", "EN Low 2", "EN Mud 2",
"ENH3 A High", "ENH4 A High", "ENH5 A High", "ENL1 Low E", "ENM1 A Mud",
"ENS1 SAV", "ENS2 SAV 2C", "ENS3 SAV 3E", "High 3C", "James Marsh",
"MWP 27 High 1", "MWP 28 High 2", "MWP 29 Low 1", "MWP 30 Mud 1",
"MWP 31 Low 2", "MWP 32 Mud 2", "MWP 33 Low 3", "MWP 34 Low 4",
"MWP 35 Mud 3", "PWRC Fresh", "PWRC Fresh 1", "PWRC Fresh 1-4",
"WP 27 HM-MARC", "WP 28 HM-MARC", "WP 30 IT MARE", "WP29 LM-MARC",
"WP30 IT MARE"), class = "factor"), Season = structure(c(2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("",
"Fall", "Spring", "Spring?"), class = "factor"), Refuge = structure(c(5L,
7L, 2L, 3L, 2L, 3L, 5L, 5L, 5L, 6L, 5L, 6L, 7L, 7L, 2L), .Label = c("",
"Barren Island", "Bishop's Head", "Bishops Head", "Blackwater",
"Eastern Neck", "Martin", "PWRC"), class = "factor"), HType = structure(c(6L,
4L, 5L, 4L, 3L, 4L, 6L, 5L, 6L, 3L, 4L, 4L, 4L, 5L, 5L), .Label = c("",
"Fresh", "High", "Low", "Mud", "SAV"), class = "factor"), Long = c(-76.109109,
-75.99733, -76.261634, -76.038959, -76.256617, -76.038959, -76.109109,
-76.146408, -76.103627, -76.225188, -76.23491, -76.239864, -75.99354,
-76.01407, -76.261634), Lat = c(38.441089, 37.99369, 38.336058,
38.224469, 38.326234, 38.224469, 38.441089, 38.417947, 38.403511,
39.04065, 38.43141, 39.026771, 37.98833, 38.01108, 38.336058),
Prey.Group = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 3L), .Label = c("Melampus", "Ruppia",
"Scirpus", "Zannichellia"), class = "factor"), IntakeEnergy = c(112.577988769079,
29.6957686910562, 22.825852053573, 64.2669620925843, 1182.80971128049,
454.559045812661, 893.487901876808, 483.341619235618, 16853.8824849192,
38.6168499963705, 30.540123199744, 557.798944392019, 6326.03316402962,
57.7990761383585, 11.4733747438)), row.names = c(1L, 5L,
6L, 7L, 8L, 15L, 22L, 23L, 24L, 25L, 33L, 35L, 42L, 43L, 45L), class = "data.frame")

glimpse(Data)
#> Rows: 15
#> Columns: 8
#> $ Id           <fct> BW SAV 1, MWP 34 Low 4, BI Mud, BH Low 2, BI High 1, BH …
#> $ Season       <fct> Fall, Fall, Fall, Fall, Fall, Fall, Fall, Fall, Fall, Fa…
#> $ Refuge       <fct> Blackwater, Martin, Barren Island, Bishop's Head, Barren…
#> $ HType        <fct> SAV, Low, Mud, Low, High, Low, SAV, Mud, SAV, High, Low,…
#> $ Long         <dbl> -76.10911, -75.99733, -76.26163, -76.03896, -76.25662, -…
#> $ Lat          <dbl> 38.44109, 37.99369, 38.33606, 38.22447, 38.32623, 38.224…
#> $ Prey.Group   <fct> Melampus, Melampus, Melampus, Melampus, Melampus, Melamp…
#> $ IntakeEnergy <dbl> 112.57799, 29.69577, 22.82585, 64.26696, 1182.80971, 454…

Created on 2020-05-07 by the reprex package (v0.3.0)

Upvotes: 1

akrun
akrun

Reputation: 887851

We can also use summarise_at

library(dplyr)
profit.fall.all %>%
    group_by(Lat) %>%
    summarise_at(vars(Long,IntakeEnergy), mean, na.rm = TRUE)

Upvotes: 1

Related Questions