Reputation: 127
Implement a Shiny app to efficiently visualize and adjust uploaded data sets. Each set may contain 100000 to 200000 rows. After data adjustments are done, the adjusted data can be downloaded. In steps:
While the app works in principal, data visualization and removal take too much time.
Some sample data is generated. The data can be uploaded onto the shiny app. The sample data distribution is not similar to my actual data. The actual data contains clearly identifiable outliers and looks like a spectra with peaks.
a = sample(1:1e12, 1e5, replace=TRUE)
b = sample(1:1e12, 1e5, replace=TRUE)
dummy1 = data.frame(Frequency = a, Amplitude = a)
dummy2 = data.frame(Frequency = b, Amplitude = b)
dummy3 = data.frame(Frequency = a, Amplitude = b)
# Sample data
write.csv(dummy1,'dummy1.csv')
write.csv(dummy2,'dummy2.csv')
write.csv(dummy3,'dummy2.csv')
The app takes the uploaded data and plots it. (Sample dummy data can be uploaded onto the app.) Section of data points can be removed and the new data can be downloaded.
# Packages
library(shiny)
library(ggplot2)
library(data.table)
# UI
ui = fluidPage(
fluidRow(selectInput("selection", "Set Selection:", choices = '', selected = '', multiple = TRUE)),
fluidRow(plotOutput(outputId = "plot", brush = "plot_brush_"),
downloadButton('download',"Download the data"))
)
# Server
server = function(session, input, output){
# Pop up for data upload
query_modal = modalDialog(title = "Upload Spectrum",
fileInput("file",
"file",
multiple = TRUE,
accept = c(".csv")),
easyClose = FALSE)
showModal(query_modal)
## Upload
mt1 = reactive({
req(input$file)
cs = list()
for(nr in 1:length(input$file[ , 1])){
c = read.csv(input$file[[nr, 'datapath']])
cs[[nr]] = data.table(Frequency = as.numeric(c[[1]]),
Amplitude = as.numeric(c[[2]]),
Indicator = as.factor(nr))}
c = do.call(rbind, cs)
c = reactiveValues(data = c)
return(c)})
## Input selection
observeEvent(
mt1(),
updateSelectInput(
session,
"selection",
"Set Selection:",
choices = levels(mt1()$data$Indicator),
selected = 'Entire'))
## Plot
output$plot <- renderPlot({
mt = mt1()$data
mt = mt[mt$Indicator %in% input$selection,]
p = ggplot(mt, aes(Frequency, Amplitude, color = Indicator))
p + geom_point(show.legend = TRUE)})
## Download
output$download = downloadHandler(
filename = function(){paste(gsub('.{1}$', '', input$file$name[1]), 'manipulated', '.csv', sep= '')},
content = function(fname){
mt = mt1()$data
mt = mt[, .SD, .SDcols= c('Frequency',
'Amplitude',
'Indicator')]
write.csv(mt, fname, row.names = FALSE)})
## Adjust
observe({
d = mt$data
keep = mt$data[!Indicator %in% input$selection]
df = brushedPoints(d, brush = input$plot_brush_, allRows = TRUE)
df = df[selected_ == FALSE]
df$selected_ = NULL
mt$data = rbind(keep , df[Indicator %in% input$selection, ])})
}
# Run app
shinyApp(ui = ui, server = server)
Upvotes: 2
Views: 1028
Reputation: 41260
You could use matplotlib
Python drawing library inside R and Shiny with the reticulate
package :
install.packages('reticulate')
# Install python environment
reticulate::install_miniconda()
# if Python is already installed, you can specify the path with use_python(path)
# Install matplotlib library
reticulate::py_install('matplotlib')
library(reticulate)
mpl <- import("matplotlib")
mpl$use("Agg") # Stable non interactive backend
mpl$rcParams['agg.path.chunksize'] = 0 # Disable error check on too many points
plt <- import("matplotlib.pyplot")
np <- import("numpy")
# generate lines cloud
xx = np$random$randn(100000L)
yy = np$random$randn(100000L)
plt$figure()
plt$plot(xx,yy)
plt$savefig('test.png')
plt$close(plt$gcf())
test.png :
matplotlib
in Shiny, drawing duration below 2 seconds for 1e5 segments :# Packages
library(shiny)
library(ggplot2)
library(data.table)
# UI
ui = fluidPage(
fluidRow(selectInput("selection", "Set Selection:", choices = '', selected = '', multiple = TRUE)),
fluidRow(imageOutput(outputId = "image"),
downloadButton('download',"Download the data"))
)
# Server
server = function(session, input, output){
# Setup Python objects
mpl <- reticulate::import("matplotlib")
plt <- reticulate::import("matplotlib.pyplot")
mpl$use("Agg")
mpl$rcParams['agg.path.chunksize'] = 0
# Pop up for data upload
query_modal = modalDialog(title = "Upload Spectrum",
fileInput("file",
"file",
multiple = TRUE,
accept = c(".csv")),
easyClose = FALSE)
showModal(query_modal)
## Upload
mt1 = reactive({
req(input$file)
cs = list()
for(nr in 1:length(input$file[ , 1])){
c = read.csv(input$file[[nr, 'datapath']])
cs[[nr]] = data.table(Frequency = as.numeric(c[[1]]),
Amplitude = as.numeric(c[[2]]),
Indicator = as.factor(nr))}
c = do.call(rbind, cs)
c = reactiveValues(data = c)
return(c)})
## Input selection
observeEvent(
mt1(),
updateSelectInput(
session,
"selection",
"Set Selection:",
choices = levels(mt1()$data$Indicator),
selected = 'Entire'))
## Render matplotlib image
output$image <- renderImage({
# Read myImage's width and height. These are reactive values, so this
# expression will re-run whenever they change.
width <- session$clientData$output_image_width
height <- session$clientData$output_image_height
# For high-res displays, this will be greater than 1
pixelratio <- session$clientData$pixelratio
# A temp file to save the output.
outfile <- tempfile(fileext='.png')
# Generate the image file
mt = mt1()$data
mt = mt[mt$Indicator %in% input$selection,]
xx = mt$Frequency
yy = mt$Amplitude
plt$figure()
plt$plot(xx,yy)
plt$savefig(outfile)
plt$close(plt$gcf())
# Return a list containing the filename
list(src = outfile,
width = width,
height = height,
alt = "This is alternate text")
}, deleteFile = TRUE)
## Download
output$download = downloadHandler(
filename = function(){paste(gsub('.{1}$', '', input$file$name[1]), 'manipulated', '.csv', sep= '')},
content = function(fname){
mt = mt1()$data
mt = mt[, .SD, .SDcols= c('Frequency',
'Amplitude',
'Indicator')]
write.csv(mt, fname, row.names = FALSE)})
## Adjust
observe({
mt = mt1()
df = brushedPoints(mt$data, brush = input$plot_brush_, allRows = TRUE)
mt$data = df[df$selected_ == FALSE, ]})
}
# Run app
shinyApp(ui = ui, server = server)
You'll need to handle color manually, because
matplotlib
isn't ggplot2
Upvotes: 2