Reputation: 25
I'm trying to visualize references from documents. For this, I have Elements.csv
, which looks like this:
Doc,Description,DocumentID
SOP Laboratory,This SOP should be used in the lab,10414
Visual Design,Basics for Visual Design,1200139348
GMP,Good Manufacturing Practises,4638261
Windows PC manual,This manual describes how to use Windows PCs,271922
In Connections.csv
, I have the references:
Source,Target
SOP Laboratory,Windows PC manual
SOP Laboratory,GMP
Visual Design,Windows PC manual
I.e. there is a reference in SOP Laboratory
, which points to Windows PC manual
, etc.
The code I use to visualize this network works with Dash/Plotly:
import pandas as pd
import networkx as nx
import plotly.graph_objs as go
import plotly
import dash
import dash_core_components as dcc
import dash_html_components as html
## Dash setup
external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']
app = dash.Dash(__name__, external_stylesheets=external_stylesheets)
## Data
edges = pd.read_csv('Connections.csv', encoding="utf8")
nodes = pd.read_csv('Elements.csv', encoding="utf8")
## Graph
G = nx.from_pandas_edgelist(edges, 'Source', 'Target')
nx.set_node_attributes(G, nodes.set_index('Doc')['Description'].to_dict(), 'Description')
nx.set_node_attributes(G, nodes.set_index('Doc')['DocumentID'].to_dict(), 'DocumentID')
pos = nx.spring_layout(G)
for node in G.nodes:
G.nodes[node]['pos'] = list(pos[node])
traceRecode = []
index = 0
for edge in G.edges:
x0, y0 = G.nodes[edge[0]]['pos']
x1, y1 = G.nodes[edge[1]]['pos']
trace = go.Scatter(x=tuple([x0, x1, None]), y=tuple([y0, y1, None]),
mode='lines',
hoverinfo='none',
line={'width': 2},
marker=dict(color='#000000'),
line_shape='spline',
opacity=1)
traceRecode.append(trace)
index = index + 1
node_trace = go.Scatter(
x=[],
y=[],
hovertext=[],
text=[],
mode='markers+text',
textposition="bottom center",
hoverinfo='text',
marker=dict(
showscale=True,
colorscale='Agsunset',
reversescale=True,
color=[],
size=20,
colorbar=dict(
thickness=15,
title='Node Connections',
xanchor='left',
titleside='right'
),
line=dict(width=0)))
index = 0
for node in G.nodes():
x, y = G.nodes[node]['pos']
# hovertext = "Document Name: " + str(G.nodes[node]['Doc']) + "<br>" + "Document ID: " + str(G.nodes[node]['DocumentID'])
# text = nodes['Doc'][index]
node_trace['x'] += tuple([x])
node_trace['y'] += tuple([y])
# node_trace['hovertext'] += tuple([hovertext])
# node_trace['text'] += tuple([text])
index = index + 1
for node, adjacencies in enumerate(G.adjacency()):
node_trace['marker']['color']+=tuple([len(adjacencies[1])])
node_info = adjacencies[0] #+ ' (' +str(adjacencies[1]) + ')' #+' (' +str(len(adjacencies[1])) + ' connections)'
node_trace['text']+=tuple([node_info])
traceRecode.append(node_trace)
figure = {
"data": traceRecode,
"layout": go.Layout(title='Document Overview', showlegend=False, hovermode='closest',
margin={'b': 40, 'l': 40, 'r': 40, 't': 40},
xaxis={'showgrid': False, 'zeroline': False, 'showticklabels': False},
yaxis={'showgrid': False, 'zeroline': False, 'showticklabels': False},
height=1000,
clickmode='event+select',
annotations=[
dict(
ax=(G.nodes[edge[0]]['pos'][0] + G.nodes[edge[1]]['pos'][0]) / 2,
ay=(G.nodes[edge[0]]['pos'][1] + G.nodes[edge[1]]['pos'][1]) / 2, axref='x', ayref='y',
x=(G.nodes[edge[1]]['pos'][0] * 3 + G.nodes[edge[0]]['pos'][0]) / 4,
y=(G.nodes[edge[1]]['pos'][1] * 3 + G.nodes[edge[0]]['pos'][1]) / 4, xref='x', yref='y',
showarrow=True,
arrowhead=4,
arrowsize=2,
arrowwidth=1,
opacity=1
) for edge in G.edges]
)}
app.layout = html.Div([
dcc.Graph(figure=figure
),
])
if __name__ == '__main__':
app.run_server(debug=True)
I found this code from this Github repo.
This results in: Wrong arrows
However, the direction of the error is wrong. (See the red arrow for the correct direction.)
What I want to achieve is this ("Bob" and "Type1" from the Github repo), i.e. display the document name, description and ID when hovering over the node: goalAttributes
However, when I comment lines out, like this
index = 0
for node in G.nodes():
x, y = G.nodes[node]['pos']
hovertext = "Document Name: " + str(G.nodes[node]['Doc']) + "<br>" + "Document ID: " + str(G.nodes[node]['DocumentID'])
text = nodes['Doc'][index]
node_trace['x'] += tuple([x])
node_trace['y'] += tuple([y])
node_trace['hovertext'] += tuple([hovertext])
node_trace['text'] += tuple([text])
index = index + 1
# for node, adjacencies in enumerate(G.adjacency()):
# node_trace['marker']['color']+=tuple([len(adjacencies[1])])
# node_info = adjacencies[0] #+ ' (' +str(adjacencies[1]) + ')' #+' (' +str(len(adjacencies[1])) + ' connections)'
# node_trace['text']+=tuple([node_info])
However, this results in an error:
Traceback (most recent call last):
File "C:\Users\rothstem\Desktop\LearnDash\StackEX\app.py", line 73, in <module>
hovertext = "Document Name: " + str(G.nodes[node]['Doc']) + "<br>" + "Document ID: " + str(G.nodes[node]['DocumentID'])
KeyError: 'Doc'
which I don't quite understand, since 'Doc'
is defined above.
Upvotes: 2
Views: 546
Reputation: 4892
During your graph creation you created the node attribute "Description"
:
nx.set_node_attributes(G, nodes.set_index('Doc')['Description'].to_dict(), 'Description')
So you simply need to replace "Doc"
with "Description"
:
hovertext = "Document Name: " + str(G.nodes[node]['Description']) + "<br>" + "Document ID: " + str(G.nodes[node]['DocumentID'])
Upvotes: 0