Chris Ruehlemann
Chris Ruehlemann

Reputation: 21440

Migrate base R plot to ggplot2

This is a 10-column excerpt from a 25k-column dataset:

bnc7log_t <- structure(c(3.8073549220576, 17.0416377642775, 14.1180488428888, 
                         10.5507467853832, 9.01959072835788, 15.3172006034227, 0, 6.85798099512757, 
                         17.3531640594242, 14.2536997813742, 1, 13.9409558756115, 13.8783383498952, 
                         11.5783726913608, 6.85798099512757, 15.013933264159, 14.1180488428888, 
                         16.2415363334906, 14.4605198895249, 16.2071287425943, 6.52356195605701, 
                         6.85798099512757, 17.0416377642775, 12.0864679934641, 10.1786648510065, 
                         14.1270270430046, 17.0416377642775, 14.2651022184843, 16.2071287425943, 
                         16.2071287425943, 3.16992500144231, 0, 16.2071287425943, 1.58496250072116, 
                         8.24317398347295, 16.2071287425943, 16.2071287425943, 10.8470573460913, 
                         3.90689059560852, 14.4809794856131, 12.111787735801, 10.4325419003883, 
                         16.2071287425943, 16.2071287425943, 6.39231742277876, 15.3172006034227, 
                         5.95419631038687, 15.3426997374694, 15.8411957647929, 16.2071287425943, 
                         9.18487534290828, 15.101770541626, 16.9654686605993, 10.5449644327892, 
                         11.7780771295354, 10.4429434958487, 16.2071287425943, 9.63662462054365, 
                         14.1249290466025, 16.4715482109991, 15.2312961941021, 16.4566110995749, 
                         15.950419202689, 16.2071287425943, 1, 2.8073549220576, 5.70043971814109, 
                         12.5636729598275, 3.58496250072116, 3), .Dim = c(7L, 10L), .Dimnames = list(
                           c("f1log", "f2log", "f3log", "f4log", "f5log", "f6log", "f7log"
                           ), NULL))

I'm plotting the data in base R but the execution takes ages, probably due to the for loop:

plot(bnc7log_t[,1],
     main = paste0("Within-turn corpus frequencies in 7-word turns\n(N = ", ncol(bnc7log_t), " turns)"), 
     cex.main = 0.9, cex.axis = 0.9, cex.lab = 0.9,
     ylab = "Within-turn frequencies (log2)",
     xlab = "Turn positions",
     ylim = c(0,18),
     type = "n", col = "grey", frame = F,
     xaxt = "n")
axis(1, at = 1:7, labels = paste0("w", 1:7), cex.axis = 0.8)
grid(nx = 10)

for(i in 1:ncol(bnc7log_t)){
  lines(jitter(bnc7log_t[,i], factor = 2), 
        type = "l"
        )
}

I'm sure execution is speedier in ggplot2 - just how can it the plot be migrated to ggplot2?

Upvotes: 1

Views: 60

Answers (2)

Rui Barradas
Rui Barradas

Reputation: 76673

Here are two solutions, one of them a base R solution though it is not asked for. The reason why this is because many times forgotten function matplot might do the job more quickly than the for loop in the question.

bnc7log_t <- structure(
  c(3.8073549220576, 17.0416377642775, 14.1180488428888, 
    10.5507467853832, 9.01959072835788, 15.3172006034227, 0, 6.85798099512757, 
    17.3531640594242, 14.2536997813742, 1, 13.9409558756115, 13.8783383498952, 
    11.5783726913608, 6.85798099512757, 15.013933264159, 14.1180488428888, 
    16.2415363334906, 14.4605198895249, 16.2071287425943, 6.52356195605701, 
    6.85798099512757, 17.0416377642775, 12.0864679934641, 10.1786648510065, 
    14.1270270430046, 17.0416377642775, 14.2651022184843, 16.2071287425943, 
    16.2071287425943, 3.16992500144231, 0, 16.2071287425943, 1.58496250072116, 
    8.24317398347295, 16.2071287425943, 16.2071287425943, 10.8470573460913, 
    3.90689059560852, 14.4809794856131, 12.111787735801, 10.4325419003883, 
    16.2071287425943, 16.2071287425943, 6.39231742277876, 15.3172006034227, 
    5.95419631038687, 15.3426997374694, 15.8411957647929, 16.2071287425943, 
    9.18487534290828, 15.101770541626, 16.9654686605993, 10.5449644327892, 
    11.7780771295354, 10.4429434958487, 16.2071287425943, 9.63662462054365, 
    14.1249290466025, 16.4715482109991, 15.2312961941021, 16.4566110995749, 
    15.950419202689, 16.2071287425943, 1, 2.8073549220576, 5.70043971814109, 
    12.5636729598275, 3.58496250072116, 3), 
  .Dim = c(7L, 10L), 
  .Dimnames = list(
    c("f1log", "f2log", "f3log", "f4log", "f5log", "f6log", "f7log"), 
    NULL))

suppressPackageStartupMessages({
  library(dplyr)
  library(tidyr)
  library(ggplot2)
})



matplot(
  bnc7log_t, type = "l", lty = "solid", col = "grey",
  main = paste0("Within-turn corpus frequencies in 7-word turns\n(N = ", ncol(bnc7log_t), " turns)"), 
  cex.main = 0.9, cex.axis = 0.9, cex.lab = 0.9,
  ylab = "Within-turn frequencies (log2)",
  xlab = "Turn positions"
)

bnc7log_t %>%
  as.data.frame() %>%
  pivot_longer(everything()) %>%
  tibble::rownames_to_column("x") %>% 
  mutate(x = as.integer(x)) %>%
  ggplot(aes(x, value, group = name)) +
  geom_line(colour = "grey", alpha = 0.5) + 
  labs(x = "Turn positions", y = "Within-turn frequencies (log2)") +
  ggtitle(paste0("Within-turn corpus frequencies in 7-word turns\n(N = ", ncol(bnc7log_t), " turns)")) +
  theme_bw()

Created on 2022-04-25 by the reprex package (v2.0.1)

Upvotes: 1

Limey
Limey

Reputation: 12585

Is this close to what you want?

library(tidyverse)

as_tibble(bnc7log_t) %>% 
  mutate(Row=1:7) %>% 
  pivot_longer(
    starts_with("V"),
    values_to="Value",
    names_to="Column"
  ) %>% 
  ggplot() +
    geom_line(aes(x=Row, y=Value, group=Column)) +
    labs(
      x="Turn positions", 
      y="Within-turn frequencies (log2)",
      title=paste0("Within-turn corpus frequencies in 7-word turns\n(N = ", ncol(bnc7log_t), " turns)")
    ) +
    scale_x_continuous(breaks=1:7, labels=paste0("w", 1:7))

enter image description here

Upvotes: 2

Related Questions