Reputation: 7127
I am trying to plot an abline
over my ggplot
.
I can plot the data using:
modl %>%
ggplot() +
geom_line(aes(x = date, y = logPrice), data = . %>% unnest(data))
However I am running into a problem trying to plot a linear regression line through the data.
The following doesn't work:
geom_abline(
slope = . %>% unnest(tidymodels) %>% filter(term == "date") %>% pull(estimate),
intercept = . %>% unnest(tidymodels) %>% filter(term == "(Intercept)") %>% pull(estimate)
)
The data looks like:
# Groups: .id [1]
.id models tidymodels data
<chr> <list> <list> <list>
1 VRTX <lm> <tibble [2 × 5]> <tibble [100 × 8]>
EDIT:
dat <- structure(list(.id = c("VRTX", "VRTX", "VRTX", "VRTX", "VRTX",
"VRTX", "VRTX", "VRTX", "VRTX", "VRTX", "VRTX", "VRTX", "VRTX",
"VRTX", "VRTX", "VRTX", "VRTX", "VRTX", "VRTX", "VRTX", "VRTX",
"VRTX", "VRTX", "VRTX", "VRTX", "VRTX", "VRTX", "VRTX", "VRTX",
"VRTX", "VRTX", "VRTX", "VRTX", "VRTX", "VRTX", "VRTX", "VRTX",
"VRTX", "VRTX", "VRTX", "VRTX", "VRTX", "VRTX", "VRTX", "VRTX",
"VRTX", "VRTX", "VRTX", "VRTX", "VRTX", "VRTX", "VRTX", "VRTX",
"VRTX", "VRTX", "VRTX", "VRTX", "VRTX", "VRTX", "VRTX", "VRTX",
"VRTX", "VRTX", "VRTX", "VRTX", "VRTX", "VRTX", "VRTX", "VRTX",
"VRTX", "VRTX", "VRTX", "VRTX", "VRTX", "VRTX", "VRTX", "VRTX",
"VRTX", "VRTX", "VRTX", "VRTX", "VRTX", "VRTX", "VRTX", "VRTX",
"VRTX", "VRTX", "VRTX", "VRTX", "VRTX", "VRTX", "VRTX", "VRTX",
"VRTX", "VRTX", "VRTX", "VRTX", "VRTX", "VRTX", "VRTX"), date = structure(c(17169,
17170, 17171, 17172, 17175, 17176, 17177, 17178, 17179, 17183,
17184, 17185, 17186, 17189, 17190, 17191, 17192, 17193, 17196,
17197, 17198, 17199, 17200, 17203, 17204, 17205, 17206, 17207,
17210, 17211, 17212, 17213, 17214, 17218, 17219, 17220, 17221,
17224, 17225, 17226, 17227, 17228, 17231, 17232, 17233, 17234,
17235, 17238, 17239, 17240, 17241, 17242, 17245, 17246, 17247,
17248, 17249, 17252, 17253, 17254, 17255, 17256, 17259, 17260,
17261, 17262, 17263, 17266, 17267, 17268, 17269, 17273, 17274,
17275, 17276, 17277, 17280, 17281, 17282, 17283, 17284, 17287,
17288, 17289, 17290, 17291, 17294, 17295, 17296, 17297, 17298,
17301, 17302, 17303, 17304, 17305, 17308, 17309, 17310, 17311
), class = "Date"), logPrice = c(4.31602035690836, 4.34445460070188,
4.3702070325713, 4.37437240314098, 4.4171524487178, 4.41799687846311,
4.40586522415372, 4.4114640581901, 4.42088668183346, 4.40121636585427,
4.41304065184823, 4.39333747525739, 4.39900669157719, 4.39937529757852,
4.40964177738779, 4.44488407953168, 4.44652611598691, 4.47015200864673,
4.43533037114274, 4.45283455962092, 4.45713408396959, 4.4626845492227,
4.46279987928647, 4.47072406655711, 4.46406729894837, 4.4741499349496,
4.49166545158536, 4.47903993153248, 4.4741499349496, 4.48762466712236,
4.49278508896161, 4.47392186517061, 4.4810798233994, 4.46072231628426,
4.45190246124124, 4.45120285768425, 4.45516091866138, 4.48751216501705,
4.50667497234427, 4.50324817411677, 4.4985866894606, 4.50191857835302,
4.50987005013333, 4.5143698216616, 4.52374318729236, 4.53870981928782,
4.54308202559635, 4.53206168189204, 4.52979985739765, 4.54934030102742,
4.52569399531349, 4.52091861178245, 4.51426028872484, 4.49423862528081,
4.50025402714471, 4.48919804342461, 4.49958745675377, 4.50865931864689,
4.49613624265995, 4.68222383394719, 4.67553545796919, 4.69455372883288,
4.67702559516856, 4.72108481948996, 4.73716290421006, 4.73847657080025,
4.7525551470123, 4.73672460819376, 4.73821398600866, 4.7437140041424,
4.73444254805427, 4.74327857068563, 4.74188402674831, 4.74170952395576,
4.76439370859218, 4.75960651358557, 4.76779914343257, 4.76447894289098,
4.75651690410063, 4.76575723360317, 4.7732237963436, 4.79264513289434,
4.77465977240762, 4.77954359813237, 4.75694661236208, 4.74353982042179,
4.74545371405291, 4.75900650498059, 4.74797096728123, 4.74805768220515,
4.74144778682863, 4.75557101246651, 4.77144701174072, 4.73952621225881,
4.77009122810188, 4.7708540762096, 4.77861918109365, 4.75866347263694,
4.76898829668438, 4.77575646970022)), row.names = c(NA, -100L
), groups = structure(list(.id = "VRTX", .rows = structure(list(
1:100), ptype = integer(0), class = c("vctrs_list_of", "vctrs_vctr"
))), row.names = 1L, class = c("tbl_df", "tbl", "data.frame"), .drop = TRUE), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"))
Next I do the following:
models <- dat %>%
group_by(.id) %>% # I have more than one ID in the full data.
nest() %>%
mutate(models = map(data, ~lm(logPrice ~ date, data = .x))) %>%
mutate(
tidymodels = map(models, ~tidy(.x)),
glancemodels = map(models, ~glance(.x)),
augmentmodels = map(models, ~augment(.x))
)
Then I can run the ggplot
models %>%
ggplot() +
geom_line(aes(x = date, y = logPrice), data = . %>% unnest(data))
EDIT:
This works for a single observation (note - it has different slope/intercept than the example here)
models %>%
ggplot() +
geom_line(aes(x = date, y = logPrice), data = . %>% unnest(data)) +
geom_abline(
slope = 0.00036,
intercept = -2.4
)
I would like to do this more dynamically using the tidy approach.
Upvotes: 2
Views: 357
Reputation: 46958
You can only pass information from data into aes(), so for example, you have to specify geom_abline(data=..,aes(slope=..)).
Since your data is sequential and you don't need the You can try something like below:
library(dplyr)
library(ggplot2)
library(broom)
library(tidyr)
library(purrr)
set.seed(100)
d1 = rbind(dat,dat)
d1$.id = rep(c("VRTX","VRTY"),each=nrow(dat))
d1$logPrice = d1$logPrice+rnorm(nrow(d1),0,0.2)
models <- d1 %>%
group_by(.id) %>% # I have more than one ID in the full data.
nest() %>%
mutate(models = map(data, ~lm(logPrice ~ date, data = .x))) %>%
mutate(
tidymodels = map(models, ~tidy(.x)),
glancemodels = map(models, ~glance(.x)),
augmentmodels = map(models, ~augment(.x))
)
models %>%
ggplot() +
geom_line(aes(x = date, y = logPrice), data = . %>% unnest(data))+
geom_line(aes(x = date, y = .fitted), data = . %>% unnest(augmentmodels))+
facet_wrap(~.id)
Or if you like to go with intercepts etc, most likely you have to collect your intercepts into a data.frame:
models %>% unnest(tidymodels) %>% select(.id,term,estimate) %>% pivot_wider(names_from="term",values_from="estimate")
# A tibble: 2 x 3
# Groups: .id [2]
.id `(Intercept)` date
<chr> <dbl> <dbl>
1 VRTX -45.9 0.00293
2 VRTY -58.7 0.00367
Upvotes: 2
Reputation: 23767
Why don't you just use geom_smooth
instead? You can group by .ID as usual, just adding it as an aesthetic, or facet.
library(tidyverse)
ggplot(dat,aes(x = date, y = logPrice)) +
geom_line() +
geom_smooth(method = 'lm')
Created on 2020-02-23 by the reprex package (v0.3.0)
Upvotes: 3