Reputation: 651
I'm trying to create a correlation plot based off two parameters in my data frame.. However I'm confused on how to manipulate the data frame to get what I want.
This is the structure of my data frame:
structure(list(orgid = c("USGS-NJ", "USGS-NJ", "USGS-NJ", "USGS-NJ",
"USGS-NJ", "USGS-NJ", "USGS-NJ", "USGS-NJ", "USGS-NJ", "USGS-NJ",
"USGS-NJ", "USGS-NJ", "USGS-NJ", "USGS-NJ", "USGS-NJ", "USGS-NJ",
"USGS-NJ", "USGS-NJ", "USGS-NJ", "USGS-NJ"), locid = c("USGS-01367785",
"USGS-01367785", "USGS-01455099", "USGS-01455099", "USGS-01440000",
"USGS-01440000", "USGS-01380100", "USGS-01380100", "USGS-01380100",
"USGS-01387700", "USGS-01387700", "USGS-01398000", "USGS-01398000",
"USGS-0140940950", "USGS-01466500", "USGS-01461880", "USGS-01461880",
"USGS-01445600", "USGS-01446400", "USGS-0140940950"), stdate = structure(c(16134,
16134, 16133, 16133, 16135, 16135, 16133, 16133, 16133, 16127,
16127, 16105, 16105, 16112, 15770, 15749, 15749, 15749, 15762,
16112), class = "Date"), sttime = structure(c(45000, 45000, 39600,
39600, 35040, 35040, 48000, 48000, 48000, 39600, 39600, 38700,
38700, 39600, 37200, 32400, 32400, 40500, 36000, 39600), class = c("hms",
"difftime"), units = "secs"), charnam = c("Total dissolved solids",
"Total dissolved solids", "Total dissolved solids", "Total dissolved solids",
"Total dissolved solids", "Total dissolved solids", "Total dissolved solids",
"Total dissolved solids", "Total dissolved solids", "Total dissolved solids",
"Total dissolved solids", "Total dissolved solids", "Total dissolved solids",
"Total dissolved solids", "Specific conductance", "Total dissolved solids",
"Specific conductance", "Specific conductance", "Specific conductance",
"Total dissolved solids"), val = c(0.21, 154, 0.43, 333, 0.16,
109, 12.1, 0.2, 143, 32, 0.05, 1.03, 711, 1.62, 31, 218, 391,
384, 478, 104), valunit = c("tons/ac ft", "mg/l", "tons/ac ft",
"mg/l", "tons/ac ft", "mg/l", "tons/day", "tons/ac ft", "mg/l",
"mg/l", "tons/ac ft", "tons/ac ft", "mg/l", "tons/day", "uS/cm @25C",
"mg/l", "uS/cm @25C", "uS/cm @25C", "uS/cm @25C", "mg/l"), swqs = c("FW2-NT",
"FW2-NT", "FW2-TP", "FW2-TP", "FW2-TM", "FW2-TM", "FW2-NT", "FW2-NT",
"FW2-NT", "FW2-TP", "FW2-TP", "FW2-NT", "FW2-NT", "PL", "FW1",
"FW2-TM", "FW2-TM", "FW2-NT", "FW2-TM", "PL"), WMA = c(2L, 2L,
1L, 1L, 1L, 1L, 6L, 6L, 6L, 3L, 3L, 8L, 8L, 14L, 19L, 11L, 11L,
1L, 1L, 14L), year = c(2014L, 2014L, 2014L, 2014L, 2014L, 2014L,
2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 2013L,
2013L, 2013L, 2013L, 2013L, 2014L)), .Names = c("orgid", "locid",
"stdate", "sttime", "charnam", "val", "valunit", "swqs", "WMA",
"year"), row.names = c(NA, -20L), class = c("tbl_df", "tbl",
"data.frame"))
I want to plot Total dissolved solids vs. Specific conductance.. However the values of each these parameters are all in one column named val. Would I have to manipulate the data frame to have Total dissolved solids in its own column with it's values and the same for Specific conductance? If so, how would I be able to do that based on how the charnam column has the names of the parameters and the column val has all the values of the parameters? I tried subsetting the data frame but it's not working.
Code I have:
correlation_plot1<-ggplot() +
geom_point(data=TDS_correlation_df,aes(x="",y=val))+
geom_point(data=SC_correlation_df,aes(x=val,y=""))+
ggtitle("Statewide Total Dissolved Solids vs. Specific Conductance Correlation\n;1997-2018") +
xlab("SC(µS/cm)") + ylab("TDS(mg/L)")+
scale_color_manual("",
values = c("red"),
labels=c("Freshwater Aquatic Life Criteria for TDS = 500 mg/L"))+
correlation_theme+
theme(legend.position ="bottom")
TDS_correlation and Sc_correlation are subsets of the original data set, filtered to only have that parameter.
Upvotes: 0
Views: 248
Reputation: 4338
OK, this is very clunky, but I believe it gets to what you're looking for. As the comments discuss, the issue isn't with your ggplot
code, but with your data:
data <- structure(list(orgid = c("USGS-NJ", "USGS-NJ", "USGS-NJ", "USGS-NJ",
"USGS-NJ", "USGS-NJ", "USGS-NJ", "USGS-NJ", "USGS-NJ", "USGS-NJ",
"USGS-NJ", "USGS-NJ", "USGS-NJ", "USGS-NJ", "USGS-NJ", "USGS-NJ",
"USGS-NJ", "USGS-NJ", "USGS-NJ", "USGS-NJ"), locid = c("USGS-01367785",
"USGS-01367785", "USGS-01455099", "USGS-01455099", "USGS-01440000",
"USGS-01440000", "USGS-01380100", "USGS-01380100", "USGS-01380100",
"USGS-01387700", "USGS-01387700", "USGS-01398000", "USGS-01398000",
"USGS-0140940950", "USGS-01466500", "USGS-01461880", "USGS-01461880",
"USGS-01445600", "USGS-01446400", "USGS-0140940950"), stdate = structure(c(16134,
16134, 16133, 16133, 16135, 16135, 16133, 16133, 16133, 16127,
16127, 16105, 16105, 16112, 15770, 15749, 15749, 15749, 15762,
16112), class = "Date"), sttime = structure(c(45000, 45000, 39600,
39600, 35040, 35040, 48000, 48000, 48000, 39600, 39600, 38700,
38700, 39600, 37200, 32400, 32400, 40500, 36000, 39600), class = c("hms",
"difftime"), units = "secs"), charnam = c("Total dissolved solids",
"Total dissolved solids", "Total dissolved solids", "Total dissolved solids",
"Total dissolved solids", "Total dissolved solids", "Total dissolved solids",
"Total dissolved solids", "Total dissolved solids", "Total dissolved solids",
"Total dissolved solids", "Total dissolved solids", "Total dissolved solids",
"Total dissolved solids", "Specific conductance", "Total dissolved solids",
"Specific conductance", "Specific conductance", "Specific conductance",
"Total dissolved solids"), val = c(0.21, 154, 0.43, 333, 0.16,
109, 12.1, 0.2, 143, 32, 0.05, 1.03, 711, 1.62, 31, 218, 391,
384, 478, 104), valunit = c("tons/ac ft", "mg/l", "tons/ac ft",
"mg/l", "tons/ac ft", "mg/l", "tons/day", "tons/ac ft", "mg/l",
"mg/l", "tons/ac ft", "tons/ac ft", "mg/l", "tons/day", "uS/cm @25C",
"mg/l", "uS/cm @25C", "uS/cm @25C", "uS/cm @25C", "mg/l"), swqs = c("FW2-NT",
"FW2-NT", "FW2-TP", "FW2-TP", "FW2-TM", "FW2-TM", "FW2-NT", "FW2-NT",
"FW2-NT", "FW2-TP", "FW2-TP", "FW2-NT", "FW2-NT", "PL", "FW1",
"FW2-TM", "FW2-TM", "FW2-NT", "FW2-TM", "PL"), WMA = c(2L, 2L,
1L, 1L, 1L, 1L, 6L, 6L, 6L, 3L, 3L, 8L, 8L, 14L, 19L, 11L, 11L,
1L, 1L, 14L), year = c(2014L, 2014L, 2014L, 2014L, 2014L, 2014L,
2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 2013L,
2013L, 2013L, 2013L, 2013L, 2014L)), .Names = c("orgid", "locid",
"stdate", "sttime", "charnam", "val", "valunit", "swqs", "WMA",
"year"), row.names = c(NA, -20L), class = c("tbl_df", "tbl",
"data.frame"))
library(tidyverse)
data_tidy <- data %>%
spread(charnam, val)
specific_conductance <- data_tidy %>%
select(`Specific conductance`) %>%
filter(!is.na(`Specific conductance`) == T) %>%
rep(times = 4) %>%
as.data.frame() %>%
gather(1:4) %>%
select(value) %>%
rename("Specific conductance" = value) %>%
rowid_to_column()
total_dissolved_solids <- data_tidy %>%
select(`Total dissolved solids`) %>%
filter(!is.na(`Total dissolved solids`) == T) %>%
rowid_to_column()
combined <- total_dissolved_solids %>%
left_join(specific_conductance)
ggplot(combined, aes(x = `Specific conductance`, y = `Total dissolved solids`)) +
geom_point()
Upvotes: 0