Reputation: 510
I'm interested in creating a scatter plot using ggplot2 in r where the 1 regression line uses all of the points to create that line but the plot itself has points that are different from one another on the basis of a grouping variable with 2 level.
I want the regression line like in this plot:
I want the point differentiated like in this plot:
Is this possible?
Thanks
Here is the code I used to make the plots:
# creates data for scatter plot
## dataset of interest
iris
## for iris
colnames(iris)
### creates dataset with just cases where iris$Species == setosa or versicolor
#### unique values for iris$Species
unique(iris$Species)
#### loads tidyverse package
library(tidyverse)
##### filters dataset with just cases where iris$Species == setosa or versicolor
iris__setosa_or_versicolor <- iris %>% filter(iris$Species != "virginica")
##### turns iris__setosa_or_versicolor to dataframe
iris__setosa_or_versicolor <- data.frame(iris__setosa_or_versicolor)
##### unique values for iris__setosa_or_versicolor$Species
unique(iris__setosa_or_versicolor$Species)
## creates scatter plot
### loads ggplot2
library(ggplot2)
### Basic scatter plot
scatter_plot__sepal_length_x_sepal_width__points_is_species <- ggplot(iris__setosa_or_versicolor, aes(x=Sepal.Length, y=Sepal.Width)) + geom_point()
scatter_plot__sepal_length_x_sepal_width__points_is_species
### Basic scatter plot with regression line added
scatter_plot__sepal_length_x_sepal_width__points_is_species <- ggplot(iris__setosa_or_versicolor, aes(x=Sepal.Length, y=Sepal.Width)) + geom_point() + geom_smooth(method=lm, se=FALSE, color="green") + labs(title="Scatter plot of Sepal.Length X Sepal.Width with dots as Species where\n Species is setosa or versicolor but not differentiated by species", x="Sepal.Length", y = "Sepal.Width")
scatter_plot__sepal_length_x_sepal_width__points_is_species
### Basic scatter plot separated by Species
scatter_plot__sepal_length_x_sepal_width__points_is_species <- ggplot(iris__setosa_or_versicolor, aes(x=Sepal.Length, y=Sepal.Width, color=Species, shape=Species)) + geom_point() + geom_smooth(method=lm, se=FALSE, fullrange=TRUE) + labs(title="Scatter plot of Sepal.Length X Sepal.Width with dots as\n Species where Species is setosa or versicolor", x="Sepal.Length", y = "Sepal.Width") + scale_colour_manual(values = c("#ff0000","#0000ff"))
scatter_plot__sepal_length_x_sepal_width__points_is_species
### Basic scatter plot separated by Species with no regression lines
scatter_plot__sepal_length_x_sepal_width__points_is_species <- ggplot(iris__setosa_or_versicolor, aes(x=Sepal.Length, y=Sepal.Width, color=Species, shape=Species)) + geom_point() + labs(title="Scatter plot of Sepal.Length X Sepal.Width with dots as Species\n where Species is setosa or versicolor, with no regression lines", x="Sepal.Length", y = "Sepal.Width") + scale_colour_manual(values = c("#ff0000","#0000ff"))
scatter_plot__sepal_length_x_sepal_width__points_is_species
Upvotes: 3
Views: 192
Reputation: 499
Do you want something like this
### Basic scatter plot with regression line added
scatter_plot__sepal_length_x_sepal_width__points_is_species <-ggplot(iris__setosa_or_versicolor, aes(x=Sepal.Length, y=Sepal.Width)) + geom_point(aes(col=Species)) + geom_smooth(method=lm, se=FALSE, color="green") + labs(title="Scatter plot of Sepal.Length X Sepal.Width with dots as Species where\n Species is setosa or versicolor but not differentiated by species", x="Sepal.Length", y = "Sepal.Width")
scatter_plot__sepal_length_x_sepal_width__points_is_species
Output
Upvotes: 4