Shudras
Shudras

Reputation: 127

Efficient rendering of data points from large data plot in Shiny

Goal

Implement a Shiny app to efficiently visualize and adjust uploaded data sets. Each set may contain 100000 to 200000 rows. After data adjustments are done, the adjusted data can be downloaded. In steps:

  1. Data upload
  2. Data selection and visualization
  3. Data (point) removal
  4. Download option

Issue

While the app works in principal, data visualization and removal take too much time.

Code

Sample data

Some sample data is generated. The data can be uploaded onto the shiny app. The sample data distribution is not similar to my actual data. The actual data contains clearly identifiable outliers and looks like a spectra with peaks.

a = sample(1:1e12, 1e5, replace=TRUE)
b = sample(1:1e12, 1e5, replace=TRUE)
dummy1 = data.frame(Frequency = a, Amplitude = a)
dummy2 = data.frame(Frequency = b, Amplitude = b)
dummy3 = data.frame(Frequency = a, Amplitude = b)
# Sample data
write.csv(dummy1,'dummy1.csv')
write.csv(dummy2,'dummy2.csv')
write.csv(dummy3,'dummy2.csv')

Shiny app

The app takes the uploaded data and plots it. (Sample dummy data can be uploaded onto the app.) Section of data points can be removed and the new data can be downloaded.

# Packages
library(shiny)
library(ggplot2)
library(data.table)
# UI
ui = fluidPage(
    fluidRow(selectInput("selection", "Set Selection:", choices = '', selected = '', multiple = TRUE)),
    fluidRow(plotOutput(outputId = "plot", brush = "plot_brush_"), 
             downloadButton('download',"Download the data"))
)

# Server
server = function(session, input, output){
    # Pop up for data upload
    query_modal = modalDialog(title = "Upload Spectrum",
                              fileInput("file", 
                              "file",
                              multiple = TRUE,
                              accept = c(".csv")),
                              easyClose = FALSE)
    showModal(query_modal)

    ## Upload
    mt1 = reactive({
       req(input$file)
       cs = list()
       for(nr in 1:length(input$file[ , 1])){
          c = read.csv(input$file[[nr, 'datapath']])
          cs[[nr]] = data.table(Frequency = as.numeric(c[[1]]), 
                                Amplitude = as.numeric(c[[2]]), 
                                Indicator = as.factor(nr))}
        c = do.call(rbind, cs)
        c = reactiveValues(data = c)
        return(c)})

    ## Input selection
    observeEvent(
      mt1(),
      updateSelectInput(
        session, 
        "selection", 
        "Set Selection:", 
        choices = levels(mt1()$data$Indicator), 
        selected = 'Entire'))
    
    ## Plot
    output$plot <- renderPlot({
      mt = mt1()$data
      mt = mt[mt$Indicator %in% input$selection,]
      p = ggplot(mt, aes(Frequency, Amplitude, color = Indicator)) 
      p + geom_point(show.legend = TRUE)})
    
    ## Download
    output$download = downloadHandler(
      filename = function(){paste(gsub('.{1}$', '', input$file$name[1]), 'manipulated', '.csv', sep= '')}, 
      content = function(fname){
        mt = mt1()$data
        mt = mt[, .SD, .SDcols= c('Frequency', 
                                  'Amplitude', 
                                  'Indicator')]
        write.csv(mt, fname, row.names = FALSE)})
    
    ## Adjust
    observe({
      d = mt$data
      keep = mt$data[!Indicator %in% input$selection]
      df = brushedPoints(d, brush = input$plot_brush_, allRows = TRUE) 
      df = df[selected_ == FALSE]
      df$selected_ = NULL
      mt$data = rbind(keep , df[Indicator %in% input$selection,  ])})
}

# Run app
shinyApp(ui = ui, server = server)

Upvotes: 2

Views: 1028

Answers (1)

Waldi
Waldi

Reputation: 41260

You could use matplotlib Python drawing library inside R and Shiny with the reticulate package :

  1. Set up the package and the libraries :
install.packages('reticulate')

# Install python environment
reticulate::install_miniconda() 
# if Python is already installed, you can specify the path with use_python(path)

# Install matplotlib library
reticulate::py_install('matplotlib')
  1. test installation :
library(reticulate)
mpl <- import("matplotlib")
mpl$use("Agg") # Stable non interactive backend
mpl$rcParams['agg.path.chunksize'] = 0 # Disable error check on too many points

plt <- import("matplotlib.pyplot")
np <- import("numpy")

# generate lines cloud
xx = np$random$randn(100000L)
yy = np$random$randn(100000L)

plt$figure()
plt$plot(xx,yy)
plt$savefig('test.png')
plt$close(plt$gcf())

test.png :

enter image description here

  1. Use matplotlib in Shiny, drawing duration below 2 seconds for 1e5 segments :
# Packages
library(shiny)
library(ggplot2)
library(data.table)
# UI
ui = fluidPage(
  fluidRow(selectInput("selection", "Set Selection:", choices = '', selected = '', multiple = TRUE)),
  fluidRow(imageOutput(outputId = "image"), 
           downloadButton('download',"Download the data"))
)

# Server
server = function(session, input, output){
  
  # Setup Python objects
  mpl <- reticulate::import("matplotlib")
  plt <- reticulate::import("matplotlib.pyplot")
  mpl$use("Agg") 
  mpl$rcParams['agg.path.chunksize'] = 0
  
  
  # Pop up for data upload
  query_modal = modalDialog(title = "Upload Spectrum",
                            fileInput("file", 
                                      "file",
                                      multiple = TRUE,
                                      accept = c(".csv")),
                            easyClose = FALSE)
  showModal(query_modal)
  
  ## Upload
  mt1 = reactive({
    req(input$file)
    cs = list()
    for(nr in 1:length(input$file[ , 1])){
      c = read.csv(input$file[[nr, 'datapath']])
      cs[[nr]] = data.table(Frequency = as.numeric(c[[1]]), 
                            Amplitude = as.numeric(c[[2]]), 
                            Indicator = as.factor(nr))}
    c = do.call(rbind, cs)
    c = reactiveValues(data = c)
    return(c)})
  
  ## Input selection
  observeEvent(
    mt1(),
    updateSelectInput(
      session, 
      "selection", 
      "Set Selection:", 
      choices = levels(mt1()$data$Indicator), 
      selected = 'Entire'))
  
  ## Render matplotlib image
  output$image <- renderImage({
    # Read myImage's width and height. These are reactive values, so this
    # expression will re-run whenever they change.
    width  <- session$clientData$output_image_width
    height <- session$clientData$output_image_height
    
    # For high-res displays, this will be greater than 1
    pixelratio <- session$clientData$pixelratio
    
    # A temp file to save the output.
    outfile <- tempfile(fileext='.png')
    
    # Generate the image file
    mt = mt1()$data
    mt = mt[mt$Indicator %in% input$selection,]
    xx = mt$Frequency
    yy = mt$Amplitude
    
    plt$figure()
    plt$plot(xx,yy)
    plt$savefig(outfile)
    plt$close(plt$gcf())
    
    # Return a list containing the filename
    list(src = outfile,
         width = width,
         height = height,
         alt = "This is alternate text")
  }, deleteFile = TRUE)
  
  ## Download
  output$download = downloadHandler(
    filename = function(){paste(gsub('.{1}$', '', input$file$name[1]), 'manipulated', '.csv', sep= '')}, 
    content = function(fname){
      mt = mt1()$data
      mt = mt[, .SD, .SDcols= c('Frequency', 
                                'Amplitude', 
                                'Indicator')]
      write.csv(mt, fname, row.names = FALSE)})
  
  ## Adjust
  observe({
    mt = mt1()
    df = brushedPoints(mt$data, brush = input$plot_brush_, allRows = TRUE) 
    mt$data = df[df$selected_ == FALSE,  ]})
}

# Run app
shinyApp(ui = ui, server = server)

enter image description here You'll need to handle color manually, because matplotlib isn't ggplot2

Upvotes: 2

Related Questions