Vida Eninkio
Vida Eninkio

Reputation: 91

Extract only filtered data from table in Dash Python

I've built a website in Dash Python and I display all the data of my csv file in a table which can be filtered.

I want the user to be able to extract the data from the table. When there is not filter, I want him to be able to extract full data and when it's filtered by him, I want him to be able to extract the data filtered.

For that, I use the dcc.download component which is working very well and I also use df (from df = pd.read_csv("./data.csv")) as a global variable, to be able to reuse it in my extract callback.

Here is my code:

from dash import Dash, dash_table, dcc, html, State
from dash.dependencies import Input, Output
import dash_bootstrap_components as dbc
import pandas as pd
import csv
import time
import xlsxwriter
from datetime import datetime
from dash_extensions.enrich import Output, DashProxy, Input, MultiplexerTransform
import os

app = DashProxy(external_stylesheets=[dbc.themes.BOOTSTRAP], transforms=[MultiplexerTransform()])
server = app.server

df = pd.read_csv("./data.csv")
df = df.fillna("NaN")
PAGE_SIZE = 20

# Layout
app.layout = html.Div(children=[
        dcc.Download(id="download-dataframe-csv"),
        dbc.Card([
            dbc.CardBody([
                dash_table.DataTable(
                    id='table-sorting-filtering',
                    columns=[{'name': i, 'id': i} for i in df.columns],
                    page_current= 0,
                    page_size= PAGE_SIZE,
                    page_action='custom',
                    filter_action='custom',
                    filter_query='',
                    sort_action='custom',
                    sort_mode='multi',
                    sort_by=[],
                    style_data={'text-align':'center'},
                    style_header={
                            'backgroundColor': 'white',
                            'fontWeight': 'bold',
                            'text-align':'center'
                        },
                    style_cell={'padding': '5px'},
                    style_as_list_view=True,
                )]
            )],
            style={"margin-left":"15px", "margin-right":"15px"}
        ),
        dcc.Interval(
                id='interval-component',
                interval=1*1000, # in milliseconds
                n_intervals=0
            ),
        ]
    )]
)


operators = [['ge ', '>='],
             ['le ', '<='],
             ['lt ', '<'],
             ['gt ', '>'],
             ['ne ', '!='],
             ['eq ', '='],
             ['contains ']]

def split_filter_part(filter_part):
    for operator_type in operators:
        for operator in operator_type:
            if operator in filter_part:
                name_part, value_part = filter_part.split(operator, 1)
                name = name_part[name_part.find('{') + 1: name_part.rfind('}')]
                value_part = value_part.strip()
                v0 = value_part[0]
                if (v0 == value_part[-1] and v0 in ("'", '"', '`')):
                    value = value_part[1: -1].replace('\\' + v0, v0)
                else:
                    try:
                        value = float(value_part)
                    except ValueError:
                        value = value_part
                return name, operator_type[0].strip(), value
    return [None] * 3

# Display data in table and manage filtering
@app.callback(
    Output('table-sorting-filtering', 'data'),
    Input('table-sorting-filtering', "page_current"),
    Input('table-sorting-filtering', "page_size"),
    Input('table-sorting-filtering', 'sort_by'),
    Input('table-sorting-filtering', 'filter_query'),
    Input('interval-component', 'n_intervals'))

def update_table(page_current, page_size, sort_by, filter, n):
    global df
    global date_time
    df = pd.read_csv("./data.csv")
    df = df.fillna("NaN")
    date_time = last_modification_time_of_csv("./data.csv")
    filtering_expressions = filter.split(' && ')
    for filter_part in filtering_expressions:
        col_name, operator, filter_value = split_filter_part(filter_part)
        if operator in ('eq', 'ne', 'lt', 'le', 'gt', 'ge'):
            # these operators match pandas series operator method names
            df = df.loc[getattr(df[col_name], operator)(filter_value)]
        elif operator == 'contains':
            if type(filter_value) is str:
                df = df.loc[df[col_name].str.contains(filter_value)]
    if len(sort_by):
        df = df.sort_values(
            [col['column_id'] for col in sort_by],
            ascending=[
                col['direction'] == 'asc'
                for col in sort_by
            ],
            inplace=False
        )
    page = page_current
    size = page_size
    return df.iloc[page * size: (page + 1) * size].to_dict('records')

# Export button
@app.callback(
    Output("download-dataframe-csv", "data"),
    Input("button_export", "n_clicks"),
    prevent_initial_call=True,
)
def export_on_click(n_clicks):
    global df
    return dcc.send_data_frame(df.to_excel, "export.xlsx")

if __name__ == '__main__':
    app.run_server(debug=True, host='0.0.0.0')

I notice that I have some problems when extracting the data. Sometimes it's working very well, sometimes I'm extracting data that I didn't filter at all. So I'm wondering if global variables are a good solutions because I have multiple users using my website.

I saw there is a way to do that with state in Dash Python but I didn't understand well how it's working and if it’s something I can use in my case.

Someone can please help me?

Thank you

Upvotes: 2

Views: 2204

Answers (3)

tash
tash

Reputation: 941

Posting my solution when the DataTable is created dynamically and not present in the initial layout:

  1. Keep a div container in initial layout: https://github.com/AMP-SCZ/missing-data-tracker/blob/0f70a0fdee8249c58b8c7835328b343f893fb042/app.py#L177

  2. Return the brand-new DataTable to the div: https://github.com/AMP-SCZ/missing-data-tracker/blob/0f70a0fdee8249c58b8c7835328b343f893fb042/app.py#L265-L286

  3. Access the div's child within callback function as table['props']['derived_virtual-data']: https://github.com/AMP-SCZ/missing-data-tracker/blob/0f70a0fdee8249c58b8c7835328b343f893fb042/app.py#L302

Upvotes: 0

Vida Eninkio
Vida Eninkio

Reputation: 91

Thank you coralvanda.

I finally decided to handle only front end pagination and filtering. I've used the derived_virtual_data prop as you told me and it's working better now.

df = pd.read_csv("./data.csv")
df = df.fillna("NaN")


# Layout
app.layout = html.Div(children=[
        dcc.Download(id="download-dataframe-csv"),

        dbc.Card([
            dbc.CardBody([
                dash_table.DataTable(
                    id='datatable-interactivity',
                    columns=[{'name': i, 'id': i} for i in df.columns],
                    data=df.to_dict('records'),
                    filter_action="native",
                    sort_action="native",
                    sort_mode="multi",
                    selected_columns=[],
                    selected_rows=[],
                    page_action="native",
                    page_current= 0,
                    page_size= 20,
                    style_data={'text-align':'center'},
                    style_header={
                            'backgroundColor': 'white',
                            'fontWeight': 'bold',
                            'text-align':'center'
                        },
                    style_cell={'padding': '5px'},
                    style_as_list_view=True,
                ), 
                html.Div(id='datatable-interactivity-container')
                ]
            )],
            style={"margin-left":"15px", "margin-right":"15px"}
        ),
        dcc.Interval(
                id='interval-component',
                interval=1*1000, # in milliseconds
                n_intervals=0
            ),
        ]
    )]
)


# Display data in table and manage filtering
@app.callback(
    Output('datatable-interactivity', 'style_data_conditional'),
    Input('datatable-interactivity', 'selected_columns'),
    Input('interval-component', 'n_intervals'))

def update_styles(selected_columns, n):    
    return [{
        'if': { 'column_id': i },
        'background_color': '#D2F3FF'
    } for i in selected_columns]


# Export button
@app.callback(
    Output("download-dataframe-csv", "data"),
    Input("button_export", "n_clicks"),
    State("datatable-interactivity", "derived_virtual_data"),
    prevent_initial_call=True,
)
def export_on_click(n_clicks, virtual_data):
    df = pd.DataFrame.from_dict(virtual_data)
    return dcc.send_data_frame(df.to_excel, "export.xlsx")

if __name__ == '__main__':
    app.run_server(debug=True, host='0.0.0.0')

Thank you so much for your help and for your time!!!!!!

Upvotes: 0

coralvanda
coralvanda

Reputation: 6596

Globals are generally something to avoid, especially in Dash. The good news is, the fix should be pretty easy here. Since you're using custom filtering on the back end, rather than native, front-end filtering, I think what you need is to add the data prop to your download callback as state. If you'd been using front-end filtering, you'd have to use the derived_virtual_data prop instead. The following should work.

@app.callback(
    Output("download-dataframe-csv", "data"),
    Input("button_export", "n_clicks"),
    State("table-sorting-filtering", "data"),
    prevent_initial_call=True,
)
def export_on_click(n_clicks, table_data):
    df = pd.DataFrame.from_dict(table_data)
    return dcc.send_data_frame(df.to_excel, "export.xlsx")

Upvotes: 2

Related Questions