Reputation: 41
I'm trying to create a basic simulation where I generate fake nodes, connect them and assign random amount of processes to them using DHT method. Connection is in a circular linked list fashion I thought it would be easier to implement this way. I'm using the following functions to do implement DHT:
dc is node class with a hash table and info about next node
def closest_dataCenter(dc,dcNext,key):
largestNode = hash_sha1(str(com_count))
dc_id = hash_sha1(str(dc.id))
dcNext_id = hash_sha1(str(dcNext.id))
if(dc_id>dcNext_id):
if(((largestNode)-dcNext_id-key) > (key-dc_id)):
return dc
else:
return dcNext
else:
if((key-dc_id) > (dcNext_id-key)):
return dcNext
else:
return dc
def find_dataCenter(dc,key):
current = dc
current_id = hash_sha1(str(current.id))
current_nextNode = hash_sha1(str(current.nextNode.id))
while(not (current_id<=key<current_nextNode)):
current_id = hash_sha1(str(current.id))
current_nextNode = hash_sha1(str(current.nextNode.id))
print("Key:" + str(key) + " \n")
print("Current id: " + str(current.id) + " Current id hash: " + str(current_id))
print("CurrentNext id: " + str(current.nextNode.id) + " CurrentNext id hash: " + str(current_nextNode))
time.sleep(1)
if(key>current_id>current_nextNode):
break
else:
current = current.nextNode
return closest_dataCenter(current,current.nextNode,key)
def store(start,key,value):
dc = find_dataCenter(start,key)
dc.hash_table[key]=value
def lookup(start,key):
dc = find_dataCenter(start,key)
return dc.hash_table[key]
On my hashing function I'm using sha1 hashing and then converting the hex to integer before returning it. What am I thinking is I'll send a key and value. Key will be hashed and find the closest pairs of node id and then find the closest node. But I guess hashing does not work like that because when I hash certain key values they sometimes can not find themselves a spot and loop forever. For example:
Current id: 1 Current id hash: 304942582444936629325699363757435820077590259883
CurrentNext id: 2 CurrentNext id hash: 1246245281287062843477446394631337292330716631216
Key:62051490369831458547456710601409275698631100567
Current id: 2 Current id hash: 1246245281287062843477446394631337292330716631216
CurrentNext id: 3 CurrentNext id hash: 684329801336223661356952546078269889038938702779
Key:62051490369831458547456710601409275698631100567
Current id: 3 Current id hash: 684329801336223661356952546078269889038938702779
CurrentNext id: 4 CurrentNext id hash: 156380102318965990264666286018191900590658905210
Key:62051490369831458547456710601409275698631100567
Current id: 4 Current id hash: 156380102318965990264666286018191900590658905210
CurrentNext id: 5 CurrentNext id hash: 983116577831777608312765670515538102764663892676
Key:62051490369831458547456710601409275698631100567
Current id: 5 Current id hash: 983116577831777608312765670515538102764663892676
CurrentNext id: 6 CurrentNext id hash: 1106827226057151132207397296477425838227048555128
Key:62051490369831458547456710601409275698631100567
Current id: 6 Current id hash: 1106827226057151132207397296477425838227048555128
CurrentNext id: 7 CurrentNext id hash: 823067872317374799613180678352776801958854168538
Key:62051490369831458547456710601409275698631100567
Current id: 7 Current id hash: 823067872317374799613180678352776801958854168538
CurrentNext id: 8 CurrentNext id hash: 1452173985408750203318475117189636062911214042143
Key:62051490369831458547456710601409275698631100567
Current id: 8 Current id hash: 1452173985408750203318475117189636062911214042143
CurrentNext id: 1 CurrentNext id hash: 304942582444936629325699363757435820077590259883
Key:62051490369831458547456710601409275698631100567
Am I missing something behind the logic of DHT method, should I write my own hash functions or just not use hashes? How is it possible to find closest nodes to my hashed key, what is closest if everything is hashed, is it possible to know? I'll be glad if someone can explain me logic behind DHT method and how can i apply that to my problem? Thanks in advance.
Full code if required:
import random
import time
import hashlib
com_count = random.randint(5,10)
process_count = random.randint(5,20)
###########################################################################################
class dataCenter:
def __init__(self,id):
self.nextNode = None
self.id=id
self.hash_table={}
class circularLinkedList:
def __init__(self):
self.head = None
def push(self,id):
ptr1 = dataCenter(id)
temp = self.head
ptr1.nextNode = self.head
if self.head is not None:
while(temp.nextNode != self.head):
temp = temp.nextNode
temp.nextNode = ptr1
else:
ptr1.nextNode = ptr1
self.head = ptr1
def printList(self):
temp = self.head
if self.head is not None:
while(True):
print("%d" %(temp.id))
temp = temp.nextNode
if (temp == self.head):
break
def find_next(self,id):
this_dc = self.head
while True:
if(this_dc.id == id):
print("ID: " + str(this_dc.id) + " connected to " + str(this_dc.nextNode.id))
break
elif(this_dc.nextNode == self.head):
return False
this_dc = this_dc.nextNode
def find(self,id):
this_dc = self.head
while True:
if(this_dc.id == id):
return this_dc
break
elif(this_dc.nextNode == self.head):
return False
this_dc = this_dc.nextNode
###########################################################################################
def print_connections_info(clist):
print ("Number of Data Centers: "+str(com_count))
for i in range(com_count+1):
cList.find_next(i)
###########################################################################################
def create_dc(com_count):
for i in range(com_count):
cList.push(com_count-i)
###########################################################################################
def hash_sha1(x):
hash_object = hashlib.sha1()
hash_object.update(bytes(x, encoding='utf-8'))
return int(hash_object.hexdigest(),16)
###########################################################################################
def closest_dataCenter(dc,dcNext,key):
largestNode = hash_sha1(str(com_count))
dc_id = hash_sha1(str(dc.id))
dcNext_id = hash_sha1(str(dcNext.id))
if(dc_id>dcNext_id):
if(((largestNode)-dcNext_id-key) > (key-dc_id)):
return dc
else:
return dcNext
else:
if((key-dc_id) > (dcNext_id-key)):
return dcNext
else:
return dc
###########################################################################################
def find_dataCenter(dc,key):
current = dc
current_id = hash_sha1(str(current.id))
current_nextNode = hash_sha1(str(current.nextNode.id))
while(not (current_id<=key<current_nextNode)):
current_id = hash_sha1(str(current.id))
current_nextNode = hash_sha1(str(current.nextNode.id))
print("Key:" + str(key) + " \n")
print("Current id: " + str(current.id) + " Current id hash: " + str(current_id))
print("CurrentNext id: " + str(current.nextNode.id) + " CurrentNext id hash: " + str(current_nextNode))
time.sleep(1)
if(key>current_id>current_nextNode):
break
else:
current = current.nextNode
return closest_dataCenter(current,current.nextNode,key)
###########################################################################################
def store(start,key,value):
dc = find_dataCenter(start,key)
dc.hash_table[key]=value
###########################################################################################
def lookup(start,key):
dc = find_dataCenter(start,key)
return dc.hash_table[key]
###########################################################################################
def create_process(pc_count):
li = []
for i in range(pc_count):
li.append("Process " + str(i))
return li
###########################################################################################
cList = circularLinkedList()
process_list = create_process(process_count)
create_dc(com_count)
print_connections_info(cList)
for i in range(len(process_list)):
store(cList.find(1),hash_sha1(str(i)),process_list[i])
print(cList.find(1))
print(hash_sha1(str(i)))
print(process_list[i])
print("**********************")
Upvotes: 0
Views: 243
Reputation: 41
I solved the problem changing this line:
if(key>=current.id>current.nextNode.id):
break
It was looping forever because current id was equal to key sometimes.
Upvotes: 1