darkhorse
darkhorse

Reputation: 8742

Finding the closest pair of keys if lookup fails in a Python dictionary

Lets say I have a Python dictionary where the keys are actually integers. I can create one like this:

>>> d = dict([(1, 0), (7, 10), (28, 20)])
>>> d
{1: 0, 7: 10, 28: 20}

Now, I want to do a look up where if the key is found, its index is returned. This part is really easy, like so:

>>> key = 7
>>> d[key]
10

If a key is not found, then I want to return the bound for the keys. For example:

>>> key = 6
>>> d[key]
Bound(1, 7)

Since 6 does not exist as a key, I am returning the 2 keys between which it lies. Is such a thing possible to do without basically iterating the entire dictionary? If not, then this question does not really need to be answered. If this is indeed possible, please include some performance implications as well if possible. Thanks.

Upvotes: 2

Views: 87

Answers (3)

Flávio Filho
Flávio Filho

Reputation: 473

def bound(x, d):

  if x in d:
    return x
  else:

    for i in sorted(d):
      if x > i:
        l = i

    for j in sorted(d, reverse=True):
      if j > x:
        h = j

    return(l,h)


d = dict([(1, 0), (7, 10), (28, 20), (4,5), (2,5), (15,10)])

bound(17,d)

(15, 28)

Upvotes: 0

Thierry Lathuille
Thierry Lathuille

Reputation: 24282

Here is a solution using a function to access a normal dict (I used an OrderedDict as I have an older version of Python here now, you can use a normal dict if you have Python 3.6 or more, as they are ordered.)

We sort the dict by key, which lets us use bisect to find the surrounding keys quickly.

import bisect
from collections import OrderedDict

d = OrderedDict(sorted([(1, 0), (7, 10), (28, 20)])) # Could be a simple dict with Python 3.6+

class Bound:
    def __init__(self, a, b):
        self.a = a
        self.b = b

    def __repr__(self):
        return 'Bound({}, {})'.format(self.a, self.b)

def closest(key, d):
    try:
        return d[key]
    except KeyError:
        keys = list(d.keys())
        ins_point = bisect.bisect(keys, key)
        return Bound(keys[ins_point-1] if ins_point >= 1 else None,
                     keys[ins_point] if ins_point < len(keys) else None)

closest(7, d)
# 10

closest(8, d)
# Bound(7, 28)

closest(30, d)
# Bound(28, None)

closest(-1, d)
# Bound(None, 1)

You can also subclass dict, updating the __missing__ method (code for Python >=3.6, assuming that dict is ordered:

import bisect

class Bound:
    def __init__(self, a, b):
        self.a = a
        self.b = b

    def __repr__(self):
        return 'Bound({}, {})'.format(self.a, self.b)


class BoundDict(dict):
    def __missing__(self, key):
        keys = list(self.keys())
        ins_point = bisect.bisect(keys, key)
        return Bound(keys[ins_point-1] if ins_point >= 1 else None,
                     keys[ins_point] if ins_point < len(keys) else None)


d = BoundDict(sorted([(1, 0), (7, 10), (28, 20)])) 

print(d[7])
# 10

print(d[8])
# Bound(7, 28)

print(d[30])
# Bound(28, None)

print(d[-1])
# Bound(None, 1)

Upvotes: 6

sanyassh
sanyassh

Reputation: 8540

Solution with custom dict class:

import bisect
import collections


class Bound:
    def __init__(self, left, right):
        self.left = left
        self.right = right

    def __repr__(self):
        return 'Bound({}, {})'.format(self.left, self.right)


class MyDict(collections.defaultdict):
    def __init__(self, *args, **kwargs):
        super().__init__()
        dict.__init__(self, *args, **kwargs)
        self.lst = sorted(key for key in self)

    def __setitem__(self, key, value):
        if key not in self:
            bisect.insort_left(self.lst, key)
        super().__setitem__(key, value)

    def __delitem__(self, key):
        self.lst.remove(key)
        super().__delitem__(key)

    def __missing__(self, key):
        right_index = bisect.bisect(self.lst, key)
        left_index = right_index - 1
        right = self.lst[right_index] if right_index != len(self.lst) else None
        left = self.lst[left_index] if left_index != -1 else None
        return Bound(left, right)


d = MyDict([(1, 0), (7, 10), (28, 20)])
print(d[-3]) # Bound(None, 1)
print(d[6]) # Bound(1, 7)
print(d[7]) # 10
print(d[33]) # Bound(28, None)
del d[7]
print(d[6]) # Bound(1, 28)

Upvotes: 2

Related Questions