Plot two time series from two data sets with different y-intervals

Question

I have the following data:

dat1<-structure(list(Height = c(614L, 672L, 789L, 1086L, 1252L, 1342L, 
1523L, 2141L, 2316L, 2426L, 2549L, 2788L, 2858L, 3080L, 3175L, 
3223L, 3880L, 3983L, 4166L, 4285L, 4339L, 4365L, 4488L, 4625L, 
4723L, 4765L, 5525L, 5570L, 5910L, 5941L, 5957L, 6196L, 6359L, 
6425L, 6475L, 6745L, 7079L, 7480L, 7630L, 7782L, 7898L, 7996L, 
8314L, 8520L, 8688L, 8709L, 8794L, 8880L, 9055L, 9099L, 9210L, 
9323L, 9740L, 9883L, 9956L, 10227L, 10302L, 10429L, 11010L, 11370L, 
11996L, 12087L, 12500L, 12565L, 12825L, 13167L, 13418L, 13525L, 
13859L, 14053L, 14210L, 14290L, 14660L, 14789L, 15002L, 15314L, 
16044L, 16420L, 16584L, 16640L, 17049L, 17491L, 17829L, 17991L, 
18117L, 18131L, 18506L, 18670L, 18841L, 19582L, 19885L, 20086L, 
20319L, 20670L, 21044L, 21109L, 21449L, 21519L, 21739L, 21906L, 
22046L, 22125L), Temp = c(25.6, 24.4, 23.4, 20.2, 21, 20.5, 19.4, 
17.2, 16.8, 16.1, 15.4, 13.4, 13.4, 11.6, 11.6, 11.4, 6.8, 6.4, 
5.4, 5.2, 4.8, 4.8, 4.8, 4.4, 3.8, 4, -2.1, -2.5, -5.9, -6.3, 
-6.4, -8.1, -8.2, -8.3, -7.9, -9.9, -12.3, -13.3, -14.7, -15.7, 
-16.4, -16.9, -19.6, -21.3, -22, -22.1, -22.5, -23.3, -24.1, 
-24.5, -25.5, -26.3, -29.7, -30.8, -31.3, -33.9, -34.4, -35.1, 
-39.9, -42.9, -48.1, -48.9, -52.7, -53.3, -55.3, -58, -59.9, 
-60.9, -63.8, -65.5, -66.6, -67.1, -70.5, -71.7, -73.1, -75, 
-79.7, -82.1, -81.9, -81.9, -81.3, -80.6, -80.1, -79.9, -76.7, 
-76.3, -74.8, -74.1, -73.2, -69.5, -67.9, -66.9, -67, -67.1, 
-66.4, -66.3, -60.7, -59.5, -58.6, -57.9, -58.9, -59.5)), class = "data.frame", row.names =     
c(NA,-102L))

dat2<-structure(list(Height = c(614L, 651L, 776L, 957L, 1093L, 1152L, 
1362L, 1514L, 1575L, 1875L, 2186L, 2531L, 2632L, 3168L, 3618L, 
3834L, 4170L, 4396L, 4451L, 4573L, 4893L, 5372L, 5461L, 5704L, 
5890L, 5906L, 5953L, 5985L, 6033L, 6081L, 6178L, 6210L, 6309L, 
6801L, 6924L, 7120L, 7174L, 7506L, 7563L, 7620L, 7832L, 7929L, 
8309L, 8774L, 9035L, 9329L, 9726L, 9750L, 10312L, 11020L, 11645L, 
11851L, 12510L, 12573L, 12897L, 13239L, 13564L, 13906L, 14310L, 
14632L, 14973L, 15671L, 15923L, 15973L, 16338L, 16613L, 16670L, 
17016L, 17385L, 17462L, 17539L, 17712L, 18062L, 18670L, 18753L, 
19189L, 19308L, 19570L, 19721L, 19877L, 20317L, 20432L, 20670L, 
21049L, 21412L, 21453L, 21739L, 22147L, 22194L, 22241L, 22982L, 
23037L, 23373L, 23393L), Temp = c(24.2, 24, 23.6, 22.3, 21.4, 
22.8, 22.8, 21.4, 21, 18.8, 16.5, 14, 14.4, 11.2, 8.9, 7.8, 5.6, 
3.8, 3.4, 2.6, 0.8, -2.5, -2.9, -4.7, -5.5, -5.5, -5.7, -5.9, 
-6.1, -5.3, -5.9, -5.5, -5.5, -9.3, -9.3, -10.4, -10.7, -12.6, 
-12.9, -13.5, -15.1, -15.9, -18.8, -22.5, -23.9, -25.5, -28.7, 
-28.9, -33.7, -39.3, -43.7, -45.6, -51.5, -52, -54.8, -57.7, 
-60.5, -63.5, -66.9, -69.4, -72, -77.4, -79.3, -79.3, -79.2, 
-79.1, -79.5, -81.1, -82.8, -83.1, -83.1, -82.2, -80.5, -77.5, 
-77, -74.6, -73.9, -69.8, -67.5, -67.5, -67.3, -67.3, -65.7, 
-63.8, -61.9, -62.1, -63.3, -65.1, -63.9, -62.7, -63.9, -63.3, 
-59.7, -59.5)), class = "data.frame", row.names = c(NA, -94L))

The two data sets have two columns height (in meters) and temperature (in deg C).

I would to plot these two data sets (height as the y-axis, temperature as the x-axis) using R.

My problem is they don't have equal intervals of height.

Is it possible to create a common interval for both data set? For example, from 0 to 80km by 10 km interval.
From the interval in (1), get the average temperature per interval.
Plot one height (y axis) vs temperature (x axis) plot for the the two data sets.

Currently, I manually do this in Excel including the averaging.

M-- · Accepted Answer

You can bind the two datasets and plot them. For making intervals we can use Hmisc::cut2. For a more statistically robust outcome, I defined the minimum number of observations in each group (m) instead of hard-coding the cut points. I also show the average value of each interval instead of intervals. I show the actual intervals as labels.

If you want to go with hard-coded intervals, you can refer to the other answer and improve upon both answers by comparing them.

library(ggplot2)
library(dplyr)
library(Hmisc)
library(ggrepel)


dat1 %>% 
  bind_rows(dat2, .id = "Dataset") %>% 
  mutate(`Height Interval Average` = 
            as.numeric(as.character((cut2(Height, m = 20, levels.mean = T)))),
         `Height Intervals` = as.character((cut2(Height, m = 20)))) %>% 
  group_by(Dataset, `Height Interval Average`) %>% 
  summarise(`Average Temperature` = mean(Temp, na.rm = T),
            `Height Intervals` = first(`Height Intervals`),
            .groups = "drop") -> dat

 ggplot(data = dat,
        aes(x=`Average Temperature`, y=`Height Interval Average`, 
            group = Dataset, color = Dataset)) +
  geom_line() + 
  geom_point() + 
  geom_text_repel(data = dat %>%  filter(Dataset ==1),
                  aes(x=`Average Temperature`, y=`Height Interval Average`, group = 1,
                      label = `Height Intervals`), show.legend = F, color = "black")

Plot two time series from two data sets with different y-intervals

Answers (2)

Related Questions