Lyndz
Lyndz

Reputation: 423

Plot two time series from two data sets with different y-intervals

I have the following data:

dat1<-structure(list(Height = c(614L, 672L, 789L, 1086L, 1252L, 1342L, 
1523L, 2141L, 2316L, 2426L, 2549L, 2788L, 2858L, 3080L, 3175L, 
3223L, 3880L, 3983L, 4166L, 4285L, 4339L, 4365L, 4488L, 4625L, 
4723L, 4765L, 5525L, 5570L, 5910L, 5941L, 5957L, 6196L, 6359L, 
6425L, 6475L, 6745L, 7079L, 7480L, 7630L, 7782L, 7898L, 7996L, 
8314L, 8520L, 8688L, 8709L, 8794L, 8880L, 9055L, 9099L, 9210L, 
9323L, 9740L, 9883L, 9956L, 10227L, 10302L, 10429L, 11010L, 11370L, 
11996L, 12087L, 12500L, 12565L, 12825L, 13167L, 13418L, 13525L, 
13859L, 14053L, 14210L, 14290L, 14660L, 14789L, 15002L, 15314L, 
16044L, 16420L, 16584L, 16640L, 17049L, 17491L, 17829L, 17991L, 
18117L, 18131L, 18506L, 18670L, 18841L, 19582L, 19885L, 20086L, 
20319L, 20670L, 21044L, 21109L, 21449L, 21519L, 21739L, 21906L, 
22046L, 22125L), Temp = c(25.6, 24.4, 23.4, 20.2, 21, 20.5, 19.4, 
17.2, 16.8, 16.1, 15.4, 13.4, 13.4, 11.6, 11.6, 11.4, 6.8, 6.4, 
5.4, 5.2, 4.8, 4.8, 4.8, 4.4, 3.8, 4, -2.1, -2.5, -5.9, -6.3, 
-6.4, -8.1, -8.2, -8.3, -7.9, -9.9, -12.3, -13.3, -14.7, -15.7, 
-16.4, -16.9, -19.6, -21.3, -22, -22.1, -22.5, -23.3, -24.1, 
-24.5, -25.5, -26.3, -29.7, -30.8, -31.3, -33.9, -34.4, -35.1, 
-39.9, -42.9, -48.1, -48.9, -52.7, -53.3, -55.3, -58, -59.9, 
-60.9, -63.8, -65.5, -66.6, -67.1, -70.5, -71.7, -73.1, -75, 
-79.7, -82.1, -81.9, -81.9, -81.3, -80.6, -80.1, -79.9, -76.7, 
-76.3, -74.8, -74.1, -73.2, -69.5, -67.9, -66.9, -67, -67.1, 
-66.4, -66.3, -60.7, -59.5, -58.6, -57.9, -58.9, -59.5)), class = "data.frame", row.names =     
c(NA,-102L))

dat2<-structure(list(Height = c(614L, 651L, 776L, 957L, 1093L, 1152L, 
1362L, 1514L, 1575L, 1875L, 2186L, 2531L, 2632L, 3168L, 3618L, 
3834L, 4170L, 4396L, 4451L, 4573L, 4893L, 5372L, 5461L, 5704L, 
5890L, 5906L, 5953L, 5985L, 6033L, 6081L, 6178L, 6210L, 6309L, 
6801L, 6924L, 7120L, 7174L, 7506L, 7563L, 7620L, 7832L, 7929L, 
8309L, 8774L, 9035L, 9329L, 9726L, 9750L, 10312L, 11020L, 11645L, 
11851L, 12510L, 12573L, 12897L, 13239L, 13564L, 13906L, 14310L, 
14632L, 14973L, 15671L, 15923L, 15973L, 16338L, 16613L, 16670L, 
17016L, 17385L, 17462L, 17539L, 17712L, 18062L, 18670L, 18753L, 
19189L, 19308L, 19570L, 19721L, 19877L, 20317L, 20432L, 20670L, 
21049L, 21412L, 21453L, 21739L, 22147L, 22194L, 22241L, 22982L, 
23037L, 23373L, 23393L), Temp = c(24.2, 24, 23.6, 22.3, 21.4, 
22.8, 22.8, 21.4, 21, 18.8, 16.5, 14, 14.4, 11.2, 8.9, 7.8, 5.6, 
3.8, 3.4, 2.6, 0.8, -2.5, -2.9, -4.7, -5.5, -5.5, -5.7, -5.9, 
-6.1, -5.3, -5.9, -5.5, -5.5, -9.3, -9.3, -10.4, -10.7, -12.6, 
-12.9, -13.5, -15.1, -15.9, -18.8, -22.5, -23.9, -25.5, -28.7, 
-28.9, -33.7, -39.3, -43.7, -45.6, -51.5, -52, -54.8, -57.7, 
-60.5, -63.5, -66.9, -69.4, -72, -77.4, -79.3, -79.3, -79.2, 
-79.1, -79.5, -81.1, -82.8, -83.1, -83.1, -82.2, -80.5, -77.5, 
-77, -74.6, -73.9, -69.8, -67.5, -67.5, -67.3, -67.3, -65.7, 
-63.8, -61.9, -62.1, -63.3, -65.1, -63.9, -62.7, -63.9, -63.3, 
-59.7, -59.5)), class = "data.frame", row.names = c(NA, -94L))

The two data sets have two columns height (in meters) and temperature (in deg C).

I would to plot these two data sets (height as the y-axis, temperature as the x-axis) using R.

My problem is they don't have equal intervals of height.

  1. Is it possible to create a common interval for both data set? For example, from 0 to 80km by 10 km interval.

  2. From the interval in (1), get the average temperature per interval.

  3. Plot one height (y axis) vs temperature (x axis) plot for the the two data sets.

Currently, I manually do this in Excel including the averaging.

Upvotes: 0

Views: 275

Answers (2)

Ray
Ray

Reputation: 2268

You basically "coded" your approach already with your 1, 2, and 3.

I combine both data sets in one and add a series id to it. You could keep this separated dependent on your task/sample size.
cut() allows you to define breaks for your intervals. As the sample data ranges <= 30000, I define the breaks as a sequence seq() spaced by 5000 m. Adapt as needed to meet your requirements.

library(dplyr)

dat1 <- dat1 %>% mutate(set = "1")    # label as character to make it discrete for ggplot
dat2 <- dat2 %>% mutate(set = "2")
dat  <- bind_rows(dat1, dat2)         # combine all measurements

dat_bin <- dat %>%
#------------- bin height in intervals -------------------------
   mutate(Height_bin = cut(Height, breaks = seq(from = 0, to = 30000, by = 5000))) %>%
#------------- group by set and height bin and calculate mean temparature per group
   group_by(set, Height_bin) %>% 
   summarise(Mean_temp = mean(Temp), .groups = "drop")

This gives you

# A tibble: 10 x 3
   set   Height_bin      Mean_temp
   <chr> <fct>               <dbl>
 1 1     (0,5e+03]            12.8
 2 1     (5e+03,1e+04]       -16.5
 3 1     (1e+04,1.5e+04]     -54.1
 4 1     (1.5e+04,2e+04]     -76.9
 5 1     (2e+04,2.5e+04]     -62.6

You can now plot the bins and average temp values and use the colour to different the different sets.

dat %>%
ggplot() + 
   geom_point(aes(x = Mean_temp, y = Height_bin, colour = set)) + 
  theme_minimal()

Obviously, you can "beautify" the plot to your liking. For "nicer" break labels, you can use the labels argument with cut() call.

Upvotes: 1

M--
M--

Reputation: 28930

You can bind the two datasets and plot them. For making intervals we can use Hmisc::cut2. For a more statistically robust outcome, I defined the minimum number of observations in each group (m) instead of hard-coding the cut points. I also show the average value of each interval instead of intervals. I show the actual intervals as labels.

If you want to go with hard-coded intervals, you can refer to the other answer and improve upon both answers by comparing them.

library(ggplot2)
library(dplyr)
library(Hmisc)
library(ggrepel)


dat1 %>% 
  bind_rows(dat2, .id = "Dataset") %>% 
  mutate(`Height Interval Average` = 
            as.numeric(as.character((cut2(Height, m = 20, levels.mean = T)))),
         `Height Intervals` = as.character((cut2(Height, m = 20)))) %>% 
  group_by(Dataset, `Height Interval Average`) %>% 
  summarise(`Average Temperature` = mean(Temp, na.rm = T),
            `Height Intervals` = first(`Height Intervals`),
            .groups = "drop") -> dat

 ggplot(data = dat,
        aes(x=`Average Temperature`, y=`Height Interval Average`, 
            group = Dataset, color = Dataset)) +
  geom_line() + 
  geom_point() + 
  geom_text_repel(data = dat %>%  filter(Dataset ==1),
                  aes(x=`Average Temperature`, y=`Height Interval Average`, group = 1,
                      label = `Height Intervals`), show.legend = F, color = "black")

Upvotes: 2

Related Questions