Reputation: 435
I can't, for the life of me, figure out why my code is throwing a KeyError. I feel like the values should be there - I added them all in the first for loop
and the lists I added them to aren't empty (I've checked with print statements). So why does line 54 keep relentlessly throwing a KeyError? I'm sure that I've just overlooked something, but after working on this all day, I'm pretty stuck.
The functions used (graph(), and shortest_path(), etc.) are here.
edgeData has the following structure:
{string value: [list of string values that are groupings of the values in the dictionary's keys]}
tagUsers has the following structure:
{string value: [grouped lists of string values found in edgeData]}
Thanks in advance.
from collections import defaultdict, deque
import json
class Graph(object):
def __init__(self):
self.nodes = set()
self.edges = defaultdict(list)
self.distances = {}
def add_node(self, value):
self.nodes.add(value)
def add_edge(self, from_node, to_node, distance):
self.edges[from_node].append(to_node)
self.edges[to_node].append(from_node)
self.distances[(from_node, to_node)] = distance
def dijkstra(graph, initial):
visited = {initial: 0}
path = {}
nodes = set(graph.nodes)
while nodes:
min_node = None
for node in nodes:
if node in visited:
if min_node is None:
min_node = node
elif visited[node] < visited[min_node]:
min_node = node
if min_node is None:
break
nodes.remove(min_node)
current_weight = visited[min_node]
for edge in graph.edges[min_node]:
try:
weight = current_weight + graph.distances[(min_node, edge)]
except:
continue
if edge not in visited or weight < visited[edge]:
visited[edge] = weight
path[edge] = min_node
return visited, path
def shortest_path(graph, origin, destination):
visited, paths = dijkstra(graph, origin)
full_path = deque()
_destination = paths[destination]
while _destination != origin:
full_path.appendleft(_destination)
_destination = paths[_destination]
full_path.appendleft(origin)
full_path.append(destination)
return visited[destination]
if __name__ == '__main__':
edgeData = {'a': ['c', 'd'], 'b': ['d'], 'c': ['d'], 'd': ['a', 'b']}
tagUsers = {'hashtag1': ['a', 'c', 'd'], 'hashtag2': ['b'], 'hashtag3': ['b', 'd']}
shortestpaths = {}
graph = Graph()
users = []
# calls function, builds graph with data in edgeData
for key, value in edgeData.items():
users.append(key)
graph.add_node(key)
for each in value:
graph.add_edge(key, each, 1)
# determines how many users used each hashtag
hashtags = {}
for key, value in tagUsers.items():
tmpTags = key
count = len(value)
hashtags[key] = count
# normally determines which hashtag was used the most
# Here, it's pre-set
topTag = ['hashtag1']
# calculates the shortest path from each user to another user
# that uses the most-used hashtag
count = 0
if count < 1:
for key, value in edgeData.items():
tmpDict = {}
for tag in topTag:
shortest = 10000
for k, v in tagUsers.items():
if k == tag:
for each in v:
flag = False
if key != each
flag = True
tmpShort = shortest_path(graph, key, each)
if tmpShort < shortest:
shortest = tmpShort
if flag:
tmpDict[tag] = shortest
shortestpaths[key] = tmpDict
count += 1
The goal is for the data in shortestpaths
to contain, for each user,
the shortest distance to another user who uses the top hashtags
The function calls are referencing this code, courtesy of mdsrosa on github.
Specifically, the error gets thrown in shortest_path()
at `_destination = paths[_destination]
Upvotes: 2
Views: 3706
Reputation: 3240
Adding some logging to shortest_path
shows the problem:
def shortest_path(graph, origin, destination):
print 'shortest_path Origin:%s Destination:%s' % (origin, destination)
visited, paths = dijkstra(graph, origin)
full_path = deque()
print 'paths: %s' % paths
_destination = paths[destination]
results in:
shortest_path Origin:a Destination:a
paths: {'c': 'a', 'b': 'd', 'd': 'a'}
Traceback (most recent call last):
File "e.py", line 43, in <module>
tmpShort = dj.shortest_path(graph, key, each)
File "E:\kiribati\dijkstra.py", line 61, in shortest_path
_destination = paths[destination]
KeyError: 'a'
You need to handle the edge case where your origin and destination are the same
One option would be to add the check if key == each
before calling shortest_path
for k, v in tagUsers.items():
if k == tag:
for each in v:
if key == each:
continue
tmpShort = dj.shortest_path(graph, key, each)
if tmpShort < shortest:
shortest = tmpShort
tmpDict[tag] = shortest
Also change your loop variables from k
, v
, key
, value
, each
to something that describes the actual data
Upvotes: 2