Reputation: 37
Sorry in advance is there is something wrong or confusing, I'm just a biologist with some basic R.
First of all I'll give some insight in the idea behind the script. I did some experiments where flies were exposed to different ramps of increasing temperatures, using a self-built device for this purpose. Having the need to know if the device increases the temperature correctly, I had to make calibration curves for every ramp. Calibration curves were measured over time with a temperature sensor.
dput
of my first df datos
(data in spanish) that is a resume of the temperature measured for 3 different ramps (0,06 °C/min, 0,12 °C/min, 0,25°C/min)
structure(list(date2 = c("03/02/2021", "03/02/2021", "03/02/2021",
"03/02/2021", "03/02/2021", "03/02/2021", "03/02/2021", "03/02/2021",
"03/02/2021", "03/02/2021", "03/02/2021", "03/02/2021", "03/02/2021",
"03/02/2021", "03/02/2021", "03/02/2021", "03/02/2021", "03/02/2021",
"03/02/2021", "03/02/2021", "03/02/2021", "03/02/2021", "03/02/2021",
"03/02/2021", "03/02/2021", "03/02/2021", "03/02/2021", "03/02/2021",
"03/02/2021", "03/02/2021"), time2 = c("14:56:32", "14:56:42",
"14:56:52", "14:57:02", "14:57:12", "14:57:22", "14:57:32", "14:57:42",
"14:57:52", "14:58:02", "14:58:12", "14:58:22", "14:58:32", "14:58:42",
"14:58:52", "14:59:02", "14:59:12", "14:59:22", "14:59:32", "14:59:42",
"14:59:52", "15:00:02", "15:00:12", "15:00:22", "15:00:32", "15:00:42",
"15:00:52", "15:01:02", "15:01:12", "15:01:22"), temperature = c(25.2,
25.8, 25.6, 25.6, 25.6, 25.6, 25.6, 25.6, 25.7, 25.5, 25.6, 25.5,
25.6, 25.6, 25.6, 25.6, 25.6, 25.6, 25.6, 25.6, 25.6, 25.6, 25.6,
25.6, 25.6, 25.8, 25.8, 25.8, 25.8, 25.8), grupo = c("voltcraft",
"voltcraft", "voltcraft", "voltcraft", "voltcraft", "voltcraft",
"voltcraft", "voltcraft", "voltcraft", "voltcraft", "voltcraft",
"voltcraft", "voltcraft", "voltcraft", "voltcraft", "voltcraft",
"voltcraft", "voltcraft", "voltcraft", "voltcraft", "voltcraft",
"voltcraft", "voltcraft", "voltcraft", "voltcraft", "voltcraft",
"voltcraft", "voltcraft", "voltcraft", "voltcraft"), segundos = c(1L,
11L, 21L, 31L, 41L, 51L, 61L, 71L, 81L, 91L, 101L, 111L, 121L,
131L, 141L, 151L, 161L, 171L, 181L, 191L, 201L, 211L, 221L, 231L,
241L, 251L, 261L, 271L, 281L, 291L), id3 = 1:30, curva = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("a",
"b", "c", "d", "e", "f", "g", "h", "i", "j"), class = "factor"),
modo = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L), .Label = "h", class = "factor"), sujeto = c("agus",
"agus", "agus", "agus", "agus", "agus", "agus", "agus", "agus",
"agus", "agus", "agus", "agus", "agus", "agus", "agus", "agus",
"agus", "agus", "agus", "agus", "agus", "agus", "agus", "agus",
"agus", "agus", "agus", "agus", "agus"), tamb = c(26.7, 26.7,
26.7, 26.7, 26.7, 26.7, 26.7, 26.7, 26.7, 26.7, 26.7, 26.7,
26.7, 26.7, 26.7, 26.7, 26.7, 26.7, 26.7, 26.7, 26.7, 26.7,
26.7, 26.7, 26.7, 26.7, 26.7, 26.7, 26.7, 26.7), tratamiento = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("0.06",
"0.125", "0.25"), class = "factor"), datetime = structure(c(1612364192,
1612364202, 1612364212, 1612364222, 1612364232, 1612364242,
1612364252, 1612364262, 1612364272, 1612364282, 1612364292,
1612364302, 1612364312, 1612364322, 1612364332, 1612364342,
1612364352, 1612364362, 1612364372, 1612364382, 1612364392,
1612364402, 1612364412, 1612364422, 1612364432, 1612364442,
1612364452, 1612364462, 1612364472, 1612364482), class = c("POSIXct",
"POSIXt"), tzone = "GMT")), row.names = c(NA, 30L), class = "data.frame")
dput
of a df datos2
that is the same resume of calibration curves, but filtered for well measured curves (those with the smallest possible variation).
structure(list(date2 = c("01/03/2021", "01/03/2021", "01/03/2021",
"01/03/2021", "01/03/2021", "01/03/2021", "01/03/2021", "01/03/2021",
"01/03/2021", "01/03/2021", "01/03/2021", "01/03/2021", "01/03/2021",
"01/03/2021", "01/03/2021", "01/03/2021", "01/03/2021", "01/03/2021",
"01/03/2021", "01/03/2021", "01/03/2021", "01/03/2021", "01/03/2021",
"01/03/2021", "01/03/2021", "01/03/2021", "01/03/2021", "01/03/2021",
"01/03/2021", "01/03/2021"), time2 = c("10:24:28", "10:24:38",
"10:24:48", "10:24:58", "10:25:08", "10:25:18", "10:25:28", "10:25:38",
"10:25:48", "10:25:58", "10:26:08", "10:26:18", "10:26:28", "10:26:38",
"10:26:48", "10:26:58", "10:27:08", "10:27:18", "10:27:28", "10:27:38",
"10:27:48", "10:27:58", "10:28:08", "10:28:18", "10:28:28", "10:28:38",
"10:28:48", "10:28:58", "10:29:08", "10:29:18"), temperature = c(26.2,
26.8, 26.6, 26.7, 26.7, 26.7, 26.7, 26.8, 26.8, 26.8, 26.9, 26.8,
26.8, 26.7, 26.6, 26.6, 26.7, 26.6, 26.7, 26.7, 26.6, 26.6, 26.6,
26.7, 26.6, 26.9, 26.8, 26.8, 26.8, 26.8), grupo = c("voltcraft",
"voltcraft", "voltcraft", "voltcraft", "voltcraft", "voltcraft",
"voltcraft", "voltcraft", "voltcraft", "voltcraft", "voltcraft",
"voltcraft", "voltcraft", "voltcraft", "voltcraft", "voltcraft",
"voltcraft", "voltcraft", "voltcraft", "voltcraft", "voltcraft",
"voltcraft", "voltcraft", "voltcraft", "voltcraft", "voltcraft",
"voltcraft", "voltcraft", "voltcraft", "voltcraft"), segundos = c(1L,
11L, 21L, 31L, 41L, 51L, 61L, 71L, 81L, 91L, 101L, 111L, 121L,
131L, 141L, 151L, 161L, 171L, 181L, 191L, 201L, 211L, 221L, 231L,
241L, 251L, 261L, 271L, 281L, 291L), id3 = 5315:5344, curva = structure(c(4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L), .Label = c("a",
"b", "c", "d", "e", "f", "g", "h", "i", "j"), class = "factor"),
modo = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L), .Label = "h", class = "factor"), sujeto = c("agus",
"agus", "agus", "agus", "agus", "agus", "agus", "agus", "agus",
"agus", "agus", "agus", "agus", "agus", "agus", "agus", "agus",
"agus", "agus", "agus", "agus", "agus", "agus", "agus", "agus",
"agus", "agus", "agus", "agus", "agus"), tamb = c(28, 28,
28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28), tratamiento = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("0.06",
"0.125", "0.25"), class = "factor"), datetime = structure(c(1614594268,
1614594278, 1614594288, 1614594298, 1614594308, 1614594318,
1614594328, 1614594338, 1614594348, 1614594358, 1614594368,
1614594378, 1614594388, 1614594398, 1614594408, 1614594418,
1614594428, 1614594438, 1614594448, 1614594458, 1614594468,
1614594478, 1614594488, 1614594498, 1614594508, 1614594518,
1614594528, 1614594538, 1614594548, 1614594558), class = c("POSIXct",
"POSIXt"), tzone = "GMT")), row.names = c(NA, 30L), class = "data.frame")
Essentially is the same df just filtered as it follows
datos2=datos %>% filter((tratamiento == "0.125" & segundos < 4001 & (curva ==
"a" | curva == "b"|curva == "c" | curva == "d" | curva == "f")) | (tratamiento
== "0.25" & segundos < 4001 & (curva == "a" | curva == "e" | curva == "j")) |
(tratamiento == "0.06" & segundos < 4001 & (curva == "d" | curva == "e"| curva == "f")) )
Plot of datos2
for 3 different ramps (this will be needed as an idea for the final plot)
Next I used a linear model (tested the assumptions as well) so I could get the real coefficients and with that the "real" slope of each curve that relates to the real increasing temperature. For this chunk I used
modelo <- lm(datos2$temperature~datos2$segundos*datos2$tratamiento)
Coefficients obtained by linear model (slope in minutes):
> data.frame(modelo$coefficients)
modelo.coefficients
(Intercept) 26.3091784959
datos2$segundos 0.0006260104
datos2$tratamiento0.125 -1.0105069266
datos2$tratamiento0.25 -1.2511820668
datos2$segundos:datos2$tratamiento0.125 0.0015812593
datos2$segundos:datos2$tratamiento0.25 0.0047528818
#0.06
modelo$coefficients[1]=25 #intercept
round(modelo$coefficients[2]*60,3) #slope
#0.125
modelo$coefficients[1]+modelo$coefficients[3]
round((modelo$coefficients[2]+modelo$coefficients[5])*60,3)
#0.25
modelo$coefficients[1]+modelo$coefficients[4]
round((modelo$coefficients[2]+modelo$coefficients[6])*60,3)
Summed up
0.06 <- 0.038
0.125 <- 0.132
0.25 <- 0.323
Finally, I tried to merge all of this in a single plot. For this I generated a predicted ramp using the estimated ramp equation for each treatment (each curve). I used a small vector for this (10800 as last number so all curves intersect X at the same time). And last I applied predict as a function to apply estimated equation to calculate Y that should be expected. This is where I get all kind of errors that I dont understand and couldnt resolve.
new_datos <- data.frame(segundos = rep(seq(0,10800,length.out=100),3),tratamiento = c(rep("0.06",200),rep("0.125",200),rep("0.25",200)))
dput
of new_datos
structure(list(segundos = c(0, 109.090909090909, 218.181818181818,
327.272727272727, 436.363636363636, 545.454545454545, 654.545454545455,
763.636363636364, 872.727272727273, 981.818181818182, 1090.90909090909,
1200, 1309.09090909091, 1418.18181818182, 1527.27272727273, 1636.36363636364,
1745.45454545455, 1854.54545454545, 1963.63636363636, 2072.72727272727,
2181.81818181818, 2290.90909090909, 2400, 2509.09090909091, 2618.18181818182,
2727.27272727273, 2836.36363636364, 2945.45454545455, 3054.54545454545,
3163.63636363636), tratamiento = c("0.06", "0.06", "0.06", "0.06",
"0.06", "0.06", "0.06", "0.06", "0.06", "0.06", "0.06", "0.06",
"0.06", "0.06", "0.06", "0.06", "0.06", "0.06", "0.06", "0.06",
"0.06", "0.06", "0.06", "0.06", "0.06", "0.06", "0.06", "0.06",
"0.06", "0.06")), row.names = c(NA, 30L), class = "data.frame")
Rest of the script for linear model:
b <- data.frame(predict.lm(modelo,data=new_datos,interval = "prediction"))
nrow(b)
new_datos$temperature <- b[,1]
new_datos$lwr <- b[,2]
new_datos$upr <- b[,3]
new_datos[new_datos$segundos==0]
levels(new_datos$tratamiento)[levels(datos$tratamiento)=="0.06"] <- "0.038 ºC/min"
levels(new_datos$tratamiento)[levels(datos$tratamiento)=="0.125"] <- "0.132 ºC/min"
levels(new_datos$tratamiento)[levels(datos$tratamiento)=="0.25"] <- "0.323 ºC/min"
# Split datos into Training and Testing
sample_size = floor(0.2*nrow(datos2))
# randomly split datos
picked = sample(seq_len(nrow(datos2)),size = sample_size)
development =datos2[picked,]
And errors for this last part
> b <- data.frame(predict.lm(modelo,data=new_datos,interval = "prediction"))
Warning message:
In predict.lm(modelo, data = new_datos, interval = "prediction") :
predictions on current data refer to _future_ responses
> new_datos$temperature <- b[,1]
Error in `$<-.data.frame`(`*tmp*`, temperature, value = c(25.0006260103883, :
replacement has 7452 rows, data has 600
> new_datos$lwr <- b[,2]
Error in `$<-.data.frame`(`*tmp*`, lwr, value = c(22.6325633882165, 22.638854566693, :
replacement has 7452 rows, data has 600
> levels(new_datos$tratamiento)[levels(datos$tratamiento)=="0.06"] <- "0.038 ºC/min"
Error in `levels<-`(`*tmp*`, value = c("0.038 ºC/min", NA, NA)) :
factor level [3] is duplicated
> levels(new_datos$tratamiento)[levels(datos$tratamiento)=="0.125"] <- "0.132 ºC/min"
Error in `levels<-`(`*tmp*`, value = c(NA, "0.132 ºC/min", NA)) :
factor level [3] is duplicated
Question is: Is there a way to get every curve (might be in grey in the back) in the Sup.3. in a single plot and in addition 3 curves for every coefficient (slope) calculated superimposed over the curves (in grey in the back)?
Thanks!
EDIT:
Minimum reproducible example:
temperature segundos curva tratamiento
1 25.2 1 a 0.06
2 25.8 11 a 0.06
3 25.6 21 a 0.06
4 25.6 31 a 0.06
5 25.6 41 a 0.06
6 25.6 51 a 0.06
7 25.6 61 a 0.06
8 25.6 71 a 0.06
9 25.7 81 a 0.06
10 25.5 91 a 0.06
11 25.6 101 a 0.06
12 25.5 111 a 0.06
13 25.6 121 a 0.06
14 25.6 131 a 0.06
15 25.6 141 a 0.06
16 25.6 151 a 0.06
17 25.6 161 a 0.06
18 25.6 171 a 0.06
19 25.6 181 a 0.06
20 25.6 191 a 0.06
21 25.6 201 a 0.06
22 25.6 211 a 0.06
23 25.6 221 a 0.06
24 25.6 231 a 0.06
25 25.6 241 a 0.06
26 25.8 251 a 0.06
27 25.8 261 a 0.06
28 25.8 271 a 0.06
29 25.8 281 a 0.06
30 25.8 291 a 0.06
31 25.7 301 a 0.06
32 25.8 311 a 0.06
33 25.7 321 a 0.06
34 25.7 331 a 0.06
35 25.8 341 a 0.06
36 25.8 351 a 0.06
37 25.7 361 a 0.06
38 25.7 371 a 0.06
39 25.7 381 a 0.06
40 25.9 391 a 0.06
41 25.9 401 a 0.06
42 25.9 411 a 0.06
43 25.9 421 a 0.06
44 25.9 431 a 0.06
45 25.9 441 a 0.06
46 25.9 451 a 0.06
47 25.9 461 a 0.06
48 25.9 471 a 0.06
49 25.9 481 a 0.06
50 25.9 491 a 0.06
51 25.9 501 a 0.06
52 25.9 511 a 0.06
53 25.9 521 a 0.06
54 25.9 531 a 0.06
55 25.9 541 a 0.06
56 25.9 551 a 0.06
57 25.9 561 a 0.06
58 25.9 571 a 0.06
59 25.9 581 a 0.06
60 25.9 591 a 0.06
61 25.8 601 a 0.06
62 25.9 611 a 0.06
63 26.0 621 a 0.06
64 25.9 631 a 0.06
65 26.2 641 a 0.06
66 26.1 651 a 0.06
67 26.1 661 a 0.06
68 26.1 671 a 0.06
69 26.1 681 a 0.06
70 26.1 691 a 0.06
71 26.1 701 a 0.06
72 26.1 711 a 0.06
73 26.2 721 a 0.06
74 26.1 731 a 0.06
75 26.1 741 a 0.06
76 26.1 751 a 0.06
77 26.1 761 a 0.06
78 26.1 771 a 0.06
79 26.2 781 a 0.06
80 26.3 791 a 0.06
81 26.1 801 a 0.06
82 26.3 811 a 0.06
83 26.1 821 a 0.06
84 26.3 831 a 0.06
85 26.1 841 a 0.06
86 26.3 851 a 0.06
87 26.4 861 a 0.06
88 26.3 871 a 0.06
89 26.3 881 a 0.06
90 26.3 891 a 0.06
91 26.3 901 a 0.06
92 26.3 911 a 0.06
93 26.3 921 a 0.06
94 26.3 931 a 0.06
95 26.3 941 a 0.06
96 26.2 951 a 0.06
97 26.3 961 a 0.06
98 26.3 971 a 0.06
99 26.3 981 a 0.06
100 26.3 991 a 0.06
101 26.5 1001 a 0.06
102 26.3 1011 a 0.06
103 26.5 1021 a 0.06
104 26.5 1031 a 0.06
105 26.5 1041 a 0.06
106 26.5 1051 a 0.06
107 26.5 1061 a 0.06
108 26.5 1071 a 0.06
109 26.6 1081 a 0.06
110 26.5 1091 a 0.06
111 26.5 1101 a 0.06
112 26.5 1111 a 0.06
113 26.6 1121 a 0.06
114 26.5 1131 a 0.06
115 26.5 1141 a 0.06
116 26.5 1151 a 0.06
117 26.5 1161 a 0.06
118 26.5 1171 a 0.06
119 26.6 1181 a 0.06
120 26.5 1191 a 0.06
121 26.5 1201 a 0.06
122 26.5 1211 a 0.06
123 26.5 1221 a 0.06
124 26.6 1231 a 0.06
125 26.5 1241 a 0.06
126 26.6 1251 a 0.06
127 26.7 1261 a 0.06
128 26.7 1271 a 0.06
129 26.8 1281 a 0.06
130 26.5 1291 a 0.06
131 26.7 1301 a 0.06
132 26.7 1311 a 0.06
133 26.7 1321 a 0.06
134 26.8 1331 a 0.06
135 26.7 1341 a 0.06
136 26.5 1351 a 0.06
137 26.7 1361 a 0.06
138 26.7 1371 a 0.06
139 26.8 1381 a 0.06
140 26.7 1391 a 0.06
141 26.8 1401 a 0.06
142 26.7 1411 a 0.06
143 26.8 1421 a 0.06
144 26.8 1431 a 0.06
145 26.7 1441 a 0.06
146 26.7 1451 a 0.06
147 26.8 1461 a 0.06
148 26.8 1471 a 0.06
149 26.7 1481 a 0.06
150 26.8 1491 a 0.06
151 26.7 1501 a 0.06
152 26.7 1511 a 0.06
153 26.7 1521 a 0.06
154 26.7 1531 a 0.06
155 26.8 1541 a 0.06
156 26.8 1551 a 0.06
157 26.7 1561 a 0.06
158 26.8 1571 a 0.06
159 26.7 1581 a 0.06
160 26.8 1591 a 0.06
161 27.0 1601 a 0.06
162 27.0 1611 a 0.06
163 27.0 1621 a 0.06
164 27.0 1631 a 0.06
165 27.0 1641 a 0.06
166 27.0 1651 a 0.06
167 27.0 1661 a 0.06
168 27.0 1671 a 0.06
169 27.0 1681 a 0.06
170 27.0 1691 a 0.06
171 27.0 1701 a 0.06
172 27.0 1711 a 0.06
173 27.0 1721 a 0.06
174 27.0 1731 a 0.06
175 27.0 1741 a 0.06
176 26.9 1751 a 0.06
177 27.0 1761 a 0.06
178 27.0 1771 a 0.06
179 27.0 1781 a 0.06
180 27.0 1791 a 0.06
181 27.0 1801 a 0.06
182 27.0 1811 a 0.06
183 27.3 1821 a 0.06
184 27.2 1831 a 0.06
185 27.2 1841 a 0.06
186 27.2 1851 a 0.06
187 27.2 1861 a 0.06
188 27.3 1871 a 0.06
189 27.2 1881 a 0.06
190 27.2 1891 a 0.06
191 27.2 1901 a 0.06
192 27.2 1911 a 0.06
193 27.2 1921 a 0.06
194 27.2 1931 a 0.06
195 27.2 1941 a 0.06
196 27.4 1951 a 0.06
197 27.4 1961 a 0.06
198 27.5 1971 a 0.06
199 27.4 1981 a 0.06
200 27.3 1991 a 0.06
Sketch:
This is an idea, but with the 3 treatments in a single plot (as in next image)
The idea is to plot adjusted curves of estimated ramps (in any combination of colors) and real curves obtained too (in grey so the slope of the curves is "highlighted")
EDIT2: Plot with suggestions provided:
Yet this is without training and testing data, thus it is not the 'real' slopes measured. When I try to make a plot using b
:
#solved errors making a bigger vector to fit my data
new_datos <- data.frame(segundos = rep(seq(0,5000,length.out=2484),1),tratamiento = c(rep("0.06",2484),rep("0.125",2484),rep("0.25",2484)))
Since I've never provided proper way on how I'm trying to plot:
ggplot(new_datos,aes(x=segundos,y=temperature,group = tratamiento,colour=tratamiento)) +
geom_point(data=development,aes(x=segundos,y=temperature,group = curva),size = 1,alpha=0.8,color="grey50",show.legend = FALSE) +
geom_point(size = 3,show.legend = FALSE) + labs(x="Tiempo (s)", y="Temperatura (ºC)") +
geom_text(aes(x = segundos, y = temperature+1, label = c("0,038 °C/min", "0.132 ºC/min","0.323 ºC/min")),color = "black",data = new_datos[c(200,250,500),]) +
scale_color_manual(values = c("#3B9AB2", "#E8C520", "#F21A00","grey"))
Then script is the same as before.
Final plot:
Upvotes: 1
Views: 245
Reputation: 2142
I'm generating some test data and using that to demonstrate how to achieve what I think you're asking, because the data you've supplied is still not complete enough to run your code (which isn't a big problem - I think this sample data illustrates the concepts well enough):
# How many 'curves' and 'treatments' do we want?
n_curve <- 6
n_treat <- 3
n <- n_curve * n_treat
# Take the cars dataset and repeat it n times
dataDF <- do.call("rbind", replicate(n, cars, simplify = FALSE))
# Assign a curve ID
dataDF$curve <- as.factor(rep(letters[1:n_curve], each=nrow(cars)))
# Assign a treatment ID
dataDF$treatment <- as.factor(rep(LETTERS[1:n_treat], each=nrow(cars)))
# Adjust the data by a small, random number so that our curves aren't identical
dataDF$speed2 <- dataDF$speed + dataDF$speed * (sample(c(-6:6), size=nrow(dataDF), replace=T) / 100)
dataDF$dist2 <- dataDF$dist + dataDF$dist * (sample(c(-6:6), size=nrow(dataDF), replace=T) / 100)
head(dataDF)
# Make sure that data differ between treatments
dataDF$dist2 <- dataDF$dist2 * (as.numeric(dataDF$treatment)^2)
# The 'real' data are plotted in grey
# I'm using geom_smooth() to build a linear model of the data and
# then plotting that in red. You can instead build a separate lm
# and use the predict() function to generate new data, and then
# plot that here by adding another line along the lines of:
# geom_line(data=new_datos, aes(x=segundos, y=temperature))
ggplot(dataDF, aes(x=speed2, y=dist2)) +
geom_line(col='grey50') +
geom_smooth(method='lm', col='red', fill='red') +
facet_grid(cols=vars(treatment))
This gives us one plot with separate subplots ('facets') for each treatment. The original curves, based on real data, are drawn in grey. The line from the linear model (here, generated by geom_smooth()
inside the ggplot block) is plotted on top in red.
You can also do this on one plot and colour the curve lines so that they're not all grey:
# We can also do this in one plot by removing the facet
# and colouring the geom_smooth() lines by treatment:
ggplot(dataDF, aes(x=speed2, y=dist2, col=treatment, fill=treatment)) +
geom_line(alpha=0.5) +
geom_smooth(method='lm')
I'm not entirely sure what you're aiming for, but hopefully that gives you something to start with. Feel free to leave a comment on this answer if you need additional info.
Upvotes: 1