持久sha256哈希对象?

我需要一个Python / C / C ++ / Java实现,它可以暂停散列进度并将进度 存储在文件中,以便在稍后阶段从该文件中恢复进度。

无论使用上面列出的语言编写,它都应该在Python中正常工作。 建议您提供与“hashlib”配合使用的方法,但这不是必需的。 此外,如果已经存在这样的事情,那么链接就足够了。

对于一个想法 ,您的实现应该实现什么。

import hashlib import hashpersist #THIS IS NEEDED. sha256 = hashlib.sha256("Hello ") hashpersist.save_state(sha256, open('test_file', 'w')) sha256_recovered = hashpersist.load_state(open('test_file', 'r')) sha256_recovered.update("World") print sha256_recovered.hexdigest() 

这应该提供与我们使用标准sha256函数对“Hello World”进行简单散列相同的输出。

 a591a6d40bf420404a011733cfb7b190d62c65bf0bcda32b57b277d9ad9f146e 

事实certificate,比我想象的更容易重写hashlib是可恢复的,至少是SHA-256部分。 我花了一些时间玩使用OpenSSL加密库的C代码,但后来我意识到我不需要所有这些东西,我只能使用ctypes。

rehash.py

 #! /usr/bin/env python ''' A resumable implementation of SHA-256 using ctypes with the OpenSSL crypto library Written by PM 2Ring 2014.11.13 ''' from ctypes import * SHA_LBLOCK = 16 SHA256_DIGEST_LENGTH = 32 class SHA256_CTX(Structure): _fields_ = [ ("h", c_long * 8), ("Nl", c_long), ("Nh", c_long), ("data", c_long * SHA_LBLOCK), ("num", c_uint), ("md_len", c_uint) ] HashBuffType = c_ubyte * SHA256_DIGEST_LENGTH #crypto = cdll.LoadLibrary("libcrypto.so") crypto = cdll.LoadLibrary("libeay32.dll" if os.name == "nt" else "libssl.so") class sha256(object): digest_size = SHA256_DIGEST_LENGTH def __init__(self, datastr=None): self.ctx = SHA256_CTX() crypto.SHA256_Init(byref(self.ctx)) if datastr: self.update(datastr) def update(self, datastr): crypto.SHA256_Update(byref(self.ctx), datastr, c_int(len(datastr))) #Clone the current context def _copy_ctx(self): ctx = SHA256_CTX() pointer(ctx)[0] = self.ctx return ctx def copy(self): other = sha256() other.ctx = self._copy_ctx() return other def digest(self): #Preserve context in case we get called before hashing is # really finished, since SHA256_Final() clears the SHA256_CTX ctx = self._copy_ctx() hashbuff = HashBuffType() crypto.SHA256_Final(hashbuff, byref(self.ctx)) self.ctx = ctx return str(bytearray(hashbuff)) def hexdigest(self): return self.digest().encode('hex') #Tests def main(): import cPickle import hashlib data = ("Nobody expects ", "the spammish ", "imposition!") print "rehash\n" shaA = sha256(''.join(data)) print shaA.hexdigest() print repr(shaA.digest()) print "digest size =", shaA.digest_size print shaB = sha256() shaB.update(data[0]) print shaB.hexdigest() #Test pickling sha_pickle = cPickle.dumps(shaB, -1) print "Pickle length:", len(sha_pickle) shaC = cPickle.loads(sha_pickle) shaC.update(data[1]) print shaC.hexdigest() #Test copying. Note that copy can be pickled shaD = shaC.copy() shaC.update(data[2]) print shaC.hexdigest() #Verify against hashlib.sha256() print "\nhashlib\n" shaD = hashlib.sha256(''.join(data)) print shaD.hexdigest() print repr(shaD.digest()) print "digest size =", shaD.digest_size print shaE = hashlib.sha256(data[0]) print shaE.hexdigest() shaE.update(data[1]) print shaE.hexdigest() #Test copying. Note that hashlib copy can NOT be pickled shaF = shaE.copy() shaF.update(data[2]) print shaF.hexdigest() if __name__ == '__main__': main() 

resumable_SHA-256.py

 #! /usr/bin/env python ''' Resumable SHA-256 hash for large files using the OpenSSL crypto library The hashing process may be interrupted by Control-C (SIGINT) or SIGTERM. When a signal is received, hashing continues until the end of the current chunk, then the current file position, total file size, and the sha object is saved to a file. The name of this file is formed by appending '.hash' to the name of the file being hashed. Just re-run the program to resume hashing. The '.hash' file will be deleted once hashing is completed. Written by PM 2Ring 2014.11.14 ''' import cPickle as pickle import os import signal import sys import rehash quit = False blocksize = 1<<16 # 64kB blocksperchunk = 1<<8 chunksize = blocksize * blocksperchunk def handler(signum, frame): global quit print "\nGot signal %d, cleaning up." % signum quit = True def do_hash(fname, filesize): hashname = fname + '.hash' if os.path.exists(hashname): with open(hashname, 'rb') as f: pos, fsize, sha = pickle.load(f) if fsize != filesize: print "Error: file size of '%s' doesn't match size recorded in '%s'" % (fname, hashname) print "%d != %d. Aborting" % (fsize, filesize) exit(1) else: pos, fsize, sha = 0, filesize, rehash.sha256() finished = False with open(fname, 'rb') as f: f.seek(pos) while not (quit or finished): for _ in xrange(blocksperchunk): block = f.read(blocksize) if block == '': finished = True break sha.update(block) pos += chunksize sys.stderr.write(" %6.2f%% of %d\r" % (100.0 * pos / fsize, fsize)) if finished or quit: break if quit: with open(hashname, 'wb') as f: pickle.dump((pos, fsize, sha), f, -1) elif os.path.exists(hashname): os.remove(hashname) return (not quit), pos, sha.hexdigest() def main(): if len(sys.argv) != 2: print "Resumable SHA-256 hash of a file." print "Usage:\npython %s filename\n" % sys.argv[0] exit(1) fname = sys.argv[1] filesize = os.path.getsize(fname) signal.signal(signal.SIGINT, handler) signal.signal(signal.SIGTERM, handler) finished, pos, hexdigest = do_hash(fname, filesize) if finished: print "%s %s" % (hexdigest, fname) else: print "sha-256 hash of '%s' incomplete" % fname print "%s" % hexdigest print "%d / %d bytes processed." % (pos, filesize) if __name__ == '__main__': main() 

演示

 import rehash import pickle sha=rehash.sha256("Hello ") s=pickle.dumps(sha.ctx) sha=rehash.sha256() sha.ctx=pickle.loads(s) sha.update("World") print sha.hexdigest() 

产量

 a591a6d40bf420404a011733cfb7b190d62c65bf0bcda32b57b277d9ad9f146e 

编辑

我刚做了一个小编辑,允许rehash在Windows上运行,虽然我只在WinXP上测试过它。 libeay32.dll可以位于当前目录中,也可以位于系统库搜索路径中的某个位置,例如WINDOWS\system32 。 我的相当古老(并且大部分未使用)的XP安装都找不到.dll,即使它被OpenOffice和Avira使用。 所以我只是将它从Avira文件夹复制到system32。 现在它完美无缺。 🙂