Reputation:
I've had a look around for the answer to this, but I only seem to be able to find software that does it for you. Does anybody know how to go about doing this in python?
Upvotes: 25
Views: 35338
Reputation: 333
Updated script for python3 based on the accepted answer. Also added progression and no need for external libraries.
import sys, os, hashlib, io, re
decimal_match = re.compile(r'\d')
def bdecode(data):
'''Main function to decode bencoded data'''
chunks = [i.to_bytes() for i in data]
print(chunks[:100])
chunks.reverse()
root = _dechunk(chunks)
return root
def _dechunk(chunks):
item = chunks.pop()
#print(item)
if item == b'd':
item = chunks.pop()
hash = {}
while item != b'e':
chunks.append(item)
key = _dechunk(chunks)
hash[key.decode("utf-8")] = _dechunk(chunks)
item = chunks.pop()
return hash
elif item == b'l':
item = chunks.pop()
list = []
while item != b'e':
chunks.append(item)
list.append(_dechunk(chunks))
item = chunks.pop()
return list
elif item == b'i':
item = chunks.pop()
num = b''
while item != b'e':
num += item
item = chunks.pop()
return int(num)
elif decimal_match.search(item.decode("utf-8")):
num = b''
while decimal_match.search(item.decode("utf-8")):
num += item
item = chunks.pop()
line = b''
for i in range(int(num)):
line += chunks.pop()
return line
print(item)
raise ValueError("Invalid input!")
currentTestedPath = ""
def pieces_generator(info):
global currentTestedPath
"""Yield pieces from download file(s)."""
piece_length = info['piece length']
if 'files' in info: # yield pieces from a multi-file torrent
piece = b""
for file_info in info['files']:
path = os.sep.join([info['name'].decode("utf-8")] + [p.decode("utf-8") for p in file_info['path']])
currentTestedPath = path
print(currentTestedPath)
sfile = open(path, "rb")
while True:
piece += sfile.read(piece_length-len(piece))
if len(piece) != piece_length:
sfile.close()
break
yield piece
piece = b""
if piece != b"":
yield piece
else: # yield pieces from a single file torrent
path = info['name'].decode("utf-8")
print(path)
sfile = open(path.decode('UTF-8'), "rb")
while True:
piece = sfile.read(piece_length)
if not piece:
sfile.close()
return
yield piece
def corruption_failure():
"""Display error message and exit"""
print("download corrupted")
exit(1)
def main():
# Open torrent file
torrent_file = open(sys.argv[1], "rb")
metainfo = bdecode(torrent_file.read())
print(metainfo)
info = metainfo['info']
pieces = io.BytesIO(info['pieces'])
# Iterate through pieces
nbTestedPieces = 0
for piece in pieces_generator(info):
nbTestedPieces += 1
# Compare piece hash with expected hash
piece_hash = hashlib.sha1(piece).digest()
#if nbTestedPieces%100 == 0:
print("Testing hash "+piece_hash.hex()+" for '"+currentTestedPath+"' ("+str(nbTestedPieces)+"/"+str(int(len(info['pieces'])/20))+")")
if (piece_hash != pieces.read(20)):
corruption_failure()
# ensure we've read all pieces
if pieces.read():
corruption_failure()
if __name__ == "__main__":
main()
Upvotes: 0
Reputation: 51
In case if anybody wonders how to extract file hashes from BitTorrent v2 compatible torrents, you can use this command line tool.
Upvotes: 1
Reputation: 39496
I wrote a piece of python code that verifies the hashes of downloaded files against what's in a .torrent file. Assuming you want to check a download for corruption you may find this useful.
You need the bencode package to use this. Bencode is the serialization format used in .torrent files. It can marshal lists, dictionaries, strings and numbers somewhat like JSON.
The code takes the hashes contained in the info['pieces']
string:
torrent_file = open(sys.argv[1], "rb")
metainfo = bencode.bdecode(torrent_file.read())
info = metainfo['info']
pieces = StringIO.StringIO(info['pieces'])
That string contains a succession of 20 byte hashes (one for each piece). These hashes are then compared with the hash of the pieces of on-disk file(s).
The only complicated part of this code is handling multi-file torrents because a single torrent piece can span more than one file (internally BitTorrent treats multi-file downloads as a single contiguous file). I'm using the generator function pieces_generator()
to abstract that away.
You may want to read the BitTorrent spec to understand this in more details.
Full code bellow:
import sys, os, hashlib, StringIO, bencode
def pieces_generator(info):
"""Yield pieces from download file(s)."""
piece_length = info['piece length']
if 'files' in info: # yield pieces from a multi-file torrent
piece = ""
for file_info in info['files']:
path = os.sep.join([info['name']] + file_info['path'])
print path
sfile = open(path.decode('UTF-8'), "rb")
while True:
piece += sfile.read(piece_length-len(piece))
if len(piece) != piece_length:
sfile.close()
break
yield piece
piece = ""
if piece != "":
yield piece
else: # yield pieces from a single file torrent
path = info['name']
print path
sfile = open(path.decode('UTF-8'), "rb")
while True:
piece = sfile.read(piece_length)
if not piece:
sfile.close()
return
yield piece
def corruption_failure():
"""Display error message and exit"""
print("download corrupted")
exit(1)
def main():
# Open torrent file
torrent_file = open(sys.argv[1], "rb")
metainfo = bencode.bdecode(torrent_file.read())
info = metainfo['info']
pieces = StringIO.StringIO(info['pieces'])
# Iterate through pieces
for piece in pieces_generator(info):
# Compare piece hash with expected hash
piece_hash = hashlib.sha1(piece).digest()
if (piece_hash != pieces.read(20)):
corruption_failure()
# ensure we've read all pieces
if pieces.read():
corruption_failure()
if __name__ == "__main__":
main()
Upvotes: 36
Reputation: 4911
Here how I've extracted HASH value from torrent file:
#!/usr/bin/python
import sys, os, hashlib, StringIO
import bencode
def main():
# Open torrent file
torrent_file = open(sys.argv[1], "rb")
metainfo = bencode.bdecode(torrent_file.read())
info = metainfo['info']
print hashlib.sha1(bencode.bencode(info)).hexdigest()
if __name__ == "__main__":
main()
It is the same as running command:
transmissioncli -i test.torrent 2>/dev/null | grep "^hash:" | awk '{print $2}'
Hope, it helps :)
Upvotes: 19
Reputation: 54232
According to this, you should be able to find the md5sums of files by searching for the part of the data that looks like:
d[...]6:md5sum32:[hash is here][...]e
(SHA is not part of the spec)
Upvotes: -3