Reputation: 101
I would like to be able to calculate the average "IntakeEnergy" by location (latitude) for the following dataset. The problem that I think I am having is that the dataset contains both number and characters which results in the function not knowing how to calculate the mean of a string. I have tried grouping the data by latitude and then calculating the means of the other columns with this:
blah<-profit.fall.all %>% group_by(Lat) %>% summarise_all(funs(mean))
However, this is only successful if I drop all of the string columns. I am a lot more familiar with excel and I believe that a potential solution for this would be to create a pivot table, but i'm not sure if that is the best way to solve this problem.
Data<-dput(head(profit.fall.all,15))
structure(list(Id = structure(c(35L, 70L, 20L, 5L, 15L, 5L, 35L,
34L, 36L, 47L, 33L, 50L, 69L, 66L, 20L), .Label = c("Barren Island Mud 1",
"BH High 1", "BH High 2", "BH Low 1", "BH Low 2", "BH Low 3",
"BH SAV 2", "BHH 1 C", "BHH 2 E", "BHL 1 E", "BHL 2", "BHL 3 (B)",
"BHM 1", "BHM 1 C", "BI High 1", "BI Low 1", "BI Low 2C", "BI Low 3",
"BI Marsh B", "BI Mud", "BIHI High B", "BIL1 (low) E", "BIL1 E",
"BIL1E", "BIL2 E", "BIL2E", "BW Fresh 1", "BW Fresh 2", "BW High 1",
"BW High 2", "BW High 5", "BW Low 3", "BW Money Stump", "BW Mud 1",
"BW SAV 1", "BW SAV 2", "BWH 1 D", "BWH 2", "BWH 3", "BWH 5",
"BWL 1", "BWL 2", "BWL 3", "BWM 1", "BWMS D", "BWS 1", "EN High 2",
"EN High 4", "EN High 5", "EN Low 1", "EN Low 2", "EN Mud 2",
"ENH3 A High", "ENH4 A High", "ENH5 A High", "ENL1 Low E", "ENM1 A Mud",
"ENS1 SAV", "ENS2 SAV 2C", "ENS3 SAV 3E", "High 3C", "James Marsh",
"MWP 27 High 1", "MWP 28 High 2", "MWP 29 Low 1", "MWP 30 Mud 1",
"MWP 31 Low 2", "MWP 32 Mud 2", "MWP 33 Low 3", "MWP 34 Low 4",
"MWP 35 Mud 3", "PWRC Fresh", "PWRC Fresh 1", "PWRC Fresh 1-4",
"WP 27 HM-MARC", "WP 28 HM-MARC", "WP 30 IT MARE", "WP29 LM-MARC",
"WP30 IT MARE"), class = "factor"), Season = structure(c(2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("",
"Fall", "Spring", "Spring?"), class = "factor"), Refuge = structure(c(5L,
7L, 2L, 3L, 2L, 3L, 5L, 5L, 5L, 6L, 5L, 6L, 7L, 7L, 2L), .Label = c("",
"Barren Island", "Bishop's Head", "Bishops Head", "Blackwater",
"Eastern Neck", "Martin", "PWRC"), class = "factor"), HType = structure(c(6L,
4L, 5L, 4L, 3L, 4L, 6L, 5L, 6L, 3L, 4L, 4L, 4L, 5L, 5L), .Label = c("",
"Fresh", "High", "Low", "Mud", "SAV"), class = "factor"), Long = c(-76.109109,
-75.99733, -76.261634, -76.038959, -76.256617, -76.038959, -76.109109,
-76.146408, -76.103627, -76.225188, -76.23491, -76.239864, -75.99354,
-76.01407, -76.261634), Lat = c(38.441089, 37.99369, 38.336058,
38.224469, 38.326234, 38.224469, 38.441089, 38.417947, 38.403511,
39.04065, 38.43141, 39.026771, 37.98833, 38.01108, 38.336058),
Prey.Group = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 3L), .Label = c("Melampus", "Ruppia",
"Scirpus", "Zannichellia"), class = "factor"), IntakeEnergy = c(112.577988769079,
29.6957686910562, 22.825852053573, 64.2669620925843, 1182.80971128049,
454.559045812661, 893.487901876808, 483.341619235618, 16853.8824849192,
38.6168499963705, 30.540123199744, 557.798944392019, 6326.03316402962,
57.7990761383585, 11.4733747438)), row.names = c(1L, 5L,
6L, 7L, 8L, 15L, 22L, 23L, 24L, 25L, 33L, 35L, 42L, 43L, 45L), class = "data.frame")
> View(Data)
Upvotes: 1
Views: 35
Reputation: 3923
Final two options. Hopefully these are close enough that you can manage to change to your taste. I feel like I've given you four valid answers and you can't quite describe what you want
Data %>%
group_by(Lat) %>%
mutate(avgIntakeEnergybyLat = mean(IntakeEnergy, na.rm = TRUE)) %>%
arrange(Id) %>%
select(-Prey.Group)
#> # A tibble: 15 x 8
#> # Groups: Lat [12]
#> Id Season Refuge HType Long Lat IntakeEnergy avgIntakeEnergyby…
#> <fct> <fct> <fct> <fct> <dbl> <dbl> <dbl> <dbl>
#> 1 BH Low 2 Fall Bishop's… Low -76.0 38.2 64.3 259.
#> 2 BH Low 2 Fall Bishop's… Low -76.0 38.2 455. 259.
#> 3 BI High 1 Fall Barren I… High -76.3 38.3 1183. 1183.
#> 4 BI Mud Fall Barren I… Mud -76.3 38.3 22.8 17.1
#> 5 BI Mud Fall Barren I… Mud -76.3 38.3 11.5 17.1
#> 6 BW Money … Fall Blackwat… Low -76.2 38.4 30.5 30.5
#> 7 BW Mud 1 Fall Blackwat… Mud -76.1 38.4 483. 483.
#> 8 BW SAV 1 Fall Blackwat… SAV -76.1 38.4 113. 503.
#> 9 BW SAV 1 Fall Blackwat… SAV -76.1 38.4 893. 503.
#> 10 BW SAV 2 Fall Blackwat… SAV -76.1 38.4 16854. 16854.
#> 11 EN High 2 Fall Eastern … High -76.2 39.0 38.6 38.6
#> 12 EN Low 1 Fall Eastern … Low -76.2 39.0 558. 558.
#> 13 MWP 30 Mu… Fall Martin Mud -76.0 38.0 57.8 57.8
#> 14 MWP 33 Lo… Fall Martin Low -76.0 38.0 6326. 6326.
#> 15 MWP 34 Lo… Fall Martin Low -76.0 38.0 29.7 29.7
Data %>%
group_by(Lat) %>%
mutate(avgIntakeEnergybyLat = mean(IntakeEnergy, na.rm = TRUE)) %>%
arrange(Id) %>%
select(-Prey.Group, -IntakeEnergy) %>%
distinct(Lat, .keep_all = TRUE)
#> # A tibble: 12 x 7
#> # Groups: Lat [12]
#> Id Season Refuge HType Long Lat avgIntakeEnergybyLat
#> <fct> <fct> <fct> <fct> <dbl> <dbl> <dbl>
#> 1 BH Low 2 Fall Bishop's Head Low -76.0 38.2 259.
#> 2 BI High 1 Fall Barren Island High -76.3 38.3 1183.
#> 3 BI Mud Fall Barren Island Mud -76.3 38.3 17.1
#> 4 BW Money Stump Fall Blackwater Low -76.2 38.4 30.5
#> 5 BW Mud 1 Fall Blackwater Mud -76.1 38.4 483.
#> 6 BW SAV 1 Fall Blackwater SAV -76.1 38.4 503.
#> 7 BW SAV 2 Fall Blackwater SAV -76.1 38.4 16854.
#> 8 EN High 2 Fall Eastern Neck High -76.2 39.0 38.6
#> 9 EN Low 1 Fall Eastern Neck Low -76.2 39.0 558.
#> 10 MWP 30 Mud 1 Fall Martin Mud -76.0 38.0 57.8
#> 11 MWP 33 Low 3 Fall Martin Low -76.0 38.0 6326.
#> 12 MWP 34 Low 4 Fall Martin Low -76.0 38.0 29.7
Turns out OP wants to add a column and keep other columns and rows ...
library(dplyr)
Data %>%
group_by(Lat) %>%
mutate(avgbyLat = mean(IntakeEnergy, na.rm = TRUE)) %>%
arrange(Id)
#> # A tibble: 15 x 9
#> # Groups: Lat [12]
#> Id Season Refuge HType Long Lat Prey.Group IntakeEnergy avgbyLat
#> <fct> <fct> <fct> <fct> <dbl> <dbl> <fct> <dbl> <dbl>
#> 1 BH Low 2 Fall Bishop's… Low -76.0 38.2 Melampus 64.3 259.
#> 2 BH Low 2 Fall Bishop's… Low -76.0 38.2 Melampus 455. 259.
#> 3 BI High 1 Fall Barren I… High -76.3 38.3 Melampus 1183. 1183.
#> 4 BI Mud Fall Barren I… Mud -76.3 38.3 Melampus 22.8 17.1
#> 5 BI Mud Fall Barren I… Mud -76.3 38.3 Scirpus 11.5 17.1
#> 6 BW Money… Fall Blackwat… Low -76.2 38.4 Ruppia 30.5 30.5
#> 7 BW Mud 1 Fall Blackwat… Mud -76.1 38.4 Ruppia 483. 483.
#> 8 BW SAV 1 Fall Blackwat… SAV -76.1 38.4 Melampus 113. 503.
#> 9 BW SAV 1 Fall Blackwat… SAV -76.1 38.4 Ruppia 893. 503.
#> 10 BW SAV 2 Fall Blackwat… SAV -76.1 38.4 Ruppia 16854. 16854.
#> 11 EN High 2 Fall Eastern … High -76.2 39.0 Ruppia 38.6 38.6
#> 12 EN Low 1 Fall Eastern … Low -76.2 39.0 Ruppia 558. 558.
#> 13 MWP 30 M… Fall Martin Mud -76.0 38.0 Ruppia 57.8 57.8
#> 14 MWP 33 L… Fall Martin Low -76.0 38.0 Ruppia 6326. 6326.
#> 15 MWP 34 L… Fall Martin Low -76.0 38.0 Melampus 29.7 29.7
as opposed to earlier answer.
Try summarise_if(is.numeric, mean, na.rm = TRUE)
[sure you only want Lat
]
library(dplyr)
Data %>% group_by(Lat) %>% summarise_if(is.numeric, mean, na.rm = TRUE)
#> # A tibble: 12 x 3
#> Lat Long IntakeEnergy
#> <dbl> <dbl> <dbl>
#> 1 38.0 -76.0 6326.
#> 2 38.0 -76.0 29.7
#> 3 38.0 -76.0 57.8
#> 4 38.2 -76.0 259.
#> 5 38.3 -76.3 1183.
#> 6 38.3 -76.3 17.1
#> 7 38.4 -76.1 16854.
#> 8 38.4 -76.1 483.
#> 9 38.4 -76.2 30.5
#> 10 38.4 -76.1 503.
#> 11 39.0 -76.2 558.
#> 12 39.0 -76.2 38.6
where the data is...
Data <- structure(list(Id = structure(c(35L, 70L, 20L, 5L, 15L, 5L, 35L,
34L, 36L, 47L, 33L, 50L, 69L, 66L, 20L), .Label = c("Barren Island Mud 1",
"BH High 1", "BH High 2", "BH Low 1", "BH Low 2", "BH Low 3",
"BH SAV 2", "BHH 1 C", "BHH 2 E", "BHL 1 E", "BHL 2", "BHL 3 (B)",
"BHM 1", "BHM 1 C", "BI High 1", "BI Low 1", "BI Low 2C", "BI Low 3",
"BI Marsh B", "BI Mud", "BIHI High B", "BIL1 (low) E", "BIL1 E",
"BIL1E", "BIL2 E", "BIL2E", "BW Fresh 1", "BW Fresh 2", "BW High 1",
"BW High 2", "BW High 5", "BW Low 3", "BW Money Stump", "BW Mud 1",
"BW SAV 1", "BW SAV 2", "BWH 1 D", "BWH 2", "BWH 3", "BWH 5",
"BWL 1", "BWL 2", "BWL 3", "BWM 1", "BWMS D", "BWS 1", "EN High 2",
"EN High 4", "EN High 5", "EN Low 1", "EN Low 2", "EN Mud 2",
"ENH3 A High", "ENH4 A High", "ENH5 A High", "ENL1 Low E", "ENM1 A Mud",
"ENS1 SAV", "ENS2 SAV 2C", "ENS3 SAV 3E", "High 3C", "James Marsh",
"MWP 27 High 1", "MWP 28 High 2", "MWP 29 Low 1", "MWP 30 Mud 1",
"MWP 31 Low 2", "MWP 32 Mud 2", "MWP 33 Low 3", "MWP 34 Low 4",
"MWP 35 Mud 3", "PWRC Fresh", "PWRC Fresh 1", "PWRC Fresh 1-4",
"WP 27 HM-MARC", "WP 28 HM-MARC", "WP 30 IT MARE", "WP29 LM-MARC",
"WP30 IT MARE"), class = "factor"), Season = structure(c(2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("",
"Fall", "Spring", "Spring?"), class = "factor"), Refuge = structure(c(5L,
7L, 2L, 3L, 2L, 3L, 5L, 5L, 5L, 6L, 5L, 6L, 7L, 7L, 2L), .Label = c("",
"Barren Island", "Bishop's Head", "Bishops Head", "Blackwater",
"Eastern Neck", "Martin", "PWRC"), class = "factor"), HType = structure(c(6L,
4L, 5L, 4L, 3L, 4L, 6L, 5L, 6L, 3L, 4L, 4L, 4L, 5L, 5L), .Label = c("",
"Fresh", "High", "Low", "Mud", "SAV"), class = "factor"), Long = c(-76.109109,
-75.99733, -76.261634, -76.038959, -76.256617, -76.038959, -76.109109,
-76.146408, -76.103627, -76.225188, -76.23491, -76.239864, -75.99354,
-76.01407, -76.261634), Lat = c(38.441089, 37.99369, 38.336058,
38.224469, 38.326234, 38.224469, 38.441089, 38.417947, 38.403511,
39.04065, 38.43141, 39.026771, 37.98833, 38.01108, 38.336058),
Prey.Group = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 3L), .Label = c("Melampus", "Ruppia",
"Scirpus", "Zannichellia"), class = "factor"), IntakeEnergy = c(112.577988769079,
29.6957686910562, 22.825852053573, 64.2669620925843, 1182.80971128049,
454.559045812661, 893.487901876808, 483.341619235618, 16853.8824849192,
38.6168499963705, 30.540123199744, 557.798944392019, 6326.03316402962,
57.7990761383585, 11.4733747438)), row.names = c(1L, 5L,
6L, 7L, 8L, 15L, 22L, 23L, 24L, 25L, 33L, 35L, 42L, 43L, 45L), class = "data.frame")
glimpse(Data)
#> Rows: 15
#> Columns: 8
#> $ Id <fct> BW SAV 1, MWP 34 Low 4, BI Mud, BH Low 2, BI High 1, BH …
#> $ Season <fct> Fall, Fall, Fall, Fall, Fall, Fall, Fall, Fall, Fall, Fa…
#> $ Refuge <fct> Blackwater, Martin, Barren Island, Bishop's Head, Barren…
#> $ HType <fct> SAV, Low, Mud, Low, High, Low, SAV, Mud, SAV, High, Low,…
#> $ Long <dbl> -76.10911, -75.99733, -76.26163, -76.03896, -76.25662, -…
#> $ Lat <dbl> 38.44109, 37.99369, 38.33606, 38.22447, 38.32623, 38.224…
#> $ Prey.Group <fct> Melampus, Melampus, Melampus, Melampus, Melampus, Melamp…
#> $ IntakeEnergy <dbl> 112.57799, 29.69577, 22.82585, 64.26696, 1182.80971, 454…
Created on 2020-05-07 by the reprex package (v0.3.0)
Upvotes: 1
Reputation: 887851
We can also use summarise_at
library(dplyr)
profit.fall.all %>%
group_by(Lat) %>%
summarise_at(vars(Long,IntakeEnergy), mean, na.rm = TRUE)
Upvotes: 1