Reputation: 97
I am working on visualizing a data set consists of a many time series measured by time: 1:2341
. A sample of of this data contains 2 vectors V1, V2
, where each one of them represents a time series.
dput(df2)
#I deleted some data because of the limited space.
#EDIT: I include only 200 line from the data frame
dput(df2[1:200,])
structure(list(V1 = c(11489, 11495, 11409, 11441, 11413, 11434,
11390, 11496, 11388, 11426, 11392, 11428, 11465, 11456, 11445,
11501, 11543, 11465, 11435, 11430, 11484, 11468, 11503, 11476.25,
11449.5, 11331, 11368, 11402, 11470, 11512, 11488, 11511, 11529,
11442, 11483, 11474, 11424, 11504, 11548, 11537, 11464, 11448,
11464, 11303, 11394, 11269, 11430, 11491, 11468, 11509, 11376,
11438, 11547, 11452, 11471, 11448, 11425, 11420, 11489, 11457,
11609, 11450, 11421, 11421, 11474, 11355, 11321, 11292, 11396,
11320, 11184, 11266, 11335, 11424, 11225, 11279, 11179, 11286,
11270, 11322, 11333, 11267, 11395, 11243, 11392, 11242, 11131,
11341, 11285, 11245, 11255, 11257, 11261, 11255, 11293, 11297,
11348, 11354, 11213, 11180, 11372, 11388, 11192, 11411, 11198,
11271, 11150, 11268, 11309, 11349, 11330, 11285, 11112, 11328,
11368, 11405, 11359, 11459, 11443, 11268, 11316, 11392, 11324,
11366, 11328, 11354, 11305, 11331, 11397, 11305, 11452, 11275,
11308, 11385, 11375, 11332, 11344, 11401, 11206, 11347, 11388,
11374, 11349, 11279, 11427, 11455, 11500, 11289, 11330, 11418,
11388, 11332, 11352, 11284, 11322, 11184, 11423, 11372, 11468,
11456, 11397, 11567, 11418, 11415, 11386, 11426, 11303, 11351,
11327, 11424, 11284, 11504, 11321, 11311, 11328, 11456, 11420,
11511, 11263, 11461, 11491, 11264, 11405, 11358, 11434, 11445,
11355, 11467, 11403, 11530, 11444, 11378, 11495, 11619, 11652,
11669, 11590, 11793, 11772, 11744), V2 = c(6846, 6796, 6835,
6761, 6870, 6798, 6800, 6848, 6824, 6834, 6812, 6820, 6857, 6841,
6870, 6809, 6835, 6796, 6864, 6862, 6864, 6866, 6752, 6813, 6874,
6848, 6856, 6816, 6784, 6864, 6821, 6867, 6810, 6835, 6828, 6802,
6838, 6821, 6849, 6826, 6887, 6820, 6882, 6848, 6840, 6866, 6857,
6872, 6823, 6873, 6852, 6880, 6806, 6824, 6841, 6844, 6847, 6874,
6862, 6792, 6802, 6780, 6747, 6784, 6744, 6809, 6825, 6811, 6859,
6802, 6866, 6853, 6724, 6863, 6830, 6827, 6818, 6885, 6855, 6707,
6876, 6821, 6828, 6874, 6858, 6871, 6840, 6852, 6866, 6837, 6786,
6884, 6837, 6831, 6845, 6807, 6878, 6827, 6840, 6850, 6870, 6885,
6862, 6884, 6867, 6841, 6871, 6884, 6787, 6877, 6817, 6855, 6860,
6803, 6863, 6779, 6883, 6864, 6866, 6824, 6797, 6867, 6859, 6886,
6864, 6868, 6848, 6818, 6770, 6851, 6849, 6873, 6879, 6858, 6864,
6849, 6866, 6880, 6838, 6881, 6828, 6849, 6880, 6838, 6833, 6856,
6855, 6862, 6868, 6788, 6851, 6871, 6842, 6832, 6871, 6801, 6854,
6830, 6871, 6887, 6830, 6880, 6833, 6842, 6849, 6857, 6894, 6871,
6867, 6881, 6895, 6874, 6872, 6857, 6856, 6847, 6899, 6874.5,
6850, 6902, 6887, 6883.5, 6880, 6862, 6873, 6884, 6879, 6874,
6873.5, 6873, 6882.5, 6892, 7111, 7114.11111111111, 7117.22222222222,
7120.33333333333, 7123.44444444444, 7126.55555555556, 7129.66666666667,
7132.77777777778), Time = 1:200), row.names = c(NA, 200L), class = "data.frame")
Each vector among the above two vectors is labeled by another vectors is called class, so class1
and class2
are as
#I can't dput class1 and class2 because of the limited space
> str(class1)
num [1:2341] 5 5 5 5 5 5 5 5 5 5 ...
> summary(class1)
Min. 1st Qu. Median Mean 3rd Qu. Max.
1.00 1.00 1.00 2.51 4.00 8.00
> str(class2)
num [1:2341] 2 2 5 2 2 2 5 5 2 2 ...
> summary(class2)
Min. 1st Qu. Median Mean 3rd Qu. Max.
1.000 1.000 4.000 4.922 8.000 8.000
I can plot successfully each time series(for each vector V1 and V2) and color it according to the the label of the point but separately.
str(df2)
'data.frame': 2341 obs. of 3 variables:
$ V1 : num 11489 11495 11409 11441 11413 ...
$ V2 : num 6846 6796 6835 6761 6870 ...
$ Time: int 1 2 3 4 5 6 7 8 9 10 ...
str(class1)
num [1:2341] 5 5 5 5 5 5 5 5 5 5 ...
str(class2)
num [1:2341] 2 2 5 2 2 2 5 5 2 2 ...
ggplot(df2, aes(x = Time, y = V1, colour = factor(class1), group = 1)) +
+ geom_line()
ggplot(df2, aes(x = Time, y = V2, colour = factor(class2), group = 1)) +
+ geom_line()
MY Question: How can I join both plots in one plot, so to have a plot contains two curves. Additionally, to fix each color in both plots to the same class label, knowing that, both of class1 and class2 contain values between 1:8.
NOTE: I know it is possible to plot many series in the same plot using ts.plot()
. I didn't use it beacuse I don't know how to calibrate the color according to the class label. Additionally, ts.plot() accepts up to 10 series but my original data is more!
Upvotes: 2
Views: 666
Reputation: 39595
I also add an example with dummy data, you can add your class variables to df2
as @IanCampbell smartly does so that they will be in the data. Here a code that can helpful:
library(dplyr)
library(ggplot2)
library(ggrepel)
#Data
df <- data.frame(Time=1:20,
V1=cumsum(runif(20,1,10)),
V2=cumsum(runif(20,1,10)),
class1=sample(1:5,20,replace = T),
class2=sample(1:5,20,replace = T))
#Code
df %>% pivot_longer(-c(Time,V1,V2)) %>%
rename(class=value) %>% select(-name) %>%
pivot_longer(-c(Time,class)) %>%
mutate(Label=ifelse(Time==max(Time,na.rm = T),name,NA),
Label=ifelse(duplicated(Label),NA,Label)) %>%
ggplot(aes(x=Time,y=value,color=factor(class),group=name))+
geom_line()+
labs(color='class')+
scale_color_manual(values=c('magenta','pink','cyan','red','blue'))+
geom_label_repel(aes(label = Label),
nudge_x = 1.5,
na.rm = TRUE,show.legend = F,color='black')
Output:
Upvotes: 2
Reputation: 24790
One approach is to combine all of the variables into one data.frame and then use pivot_longer
from tidyr
.
library(tidyverse)
df2 <- df2 %>%
mutate(class1 = class1,
class2 = class2) %>%
pivot_longer(-Time,names_to = c(".value","Series"),
names_pattern = "(.+)([0-9]+$)")
df2
## A tibble: 432 x 4
# Time Series V class
# <int> <chr> <dbl> <fct>
# 1 1 1 11489 1
# 2 1 2 6846 1
# 3 2 1 11495 1
# 4 2 2 6796 1
# 5 3 1 11409 1
# 6 3 2 6835 1
# 7 4 1 11441 1
# 8 4 2 6761 1
# 9 5 1 11413 1
#10 5 2 6870 1
## … with 422 more rows
Now each row contains both the V variable and the class variable for a single series at a given time. The series has been extracted from the end of the variable names and is accomplished with the names_pattern
argument of tidyr::pivot_longer
.
ggplot(df2, aes(x = Time, y = V, color = as.factor(class), group = Series)) +
geom_line() + ylim(500,15000)
Sample Data:
class1 <- factor(as.integer(cut(1:216,5)),levels = 1:8)
class2 <- factor(as.integer(cut(1:216,6)),levels = 1:8)
Upvotes: 3