IT博客汇 | 【Python】（十四）Python中的哈希

【Python】（十四）Python中的哈希

shendao发表于 2017-02-28 14:26:18

哈希表

哈希查找是一种以O(1)时间复杂为目标的查找方式，效率极高。Python中的内置的字典结构dictionary，其key值的查找就是采用了哈希查找的方式，因而查询操作能够达到O(1)的时间复杂度。
【Python】（四）Python中的字典
哈希表的构造原则可以总结为一下几点：

哈希表中项的个数最好为质数，这有利于冲突后的重新散列。
散列函数应最大限度的减少“冲突”发生。
在以开放寻址的方式解决冲突问题的同时，也应尽量避免“堆积”问题。
当冲突大量发生时，开放寻址的时间成本将越来越高。此时更适合使用链接解决冲突。

（反正看的人也不多，概念和原理我就少写点了……）

用哈希表实现字典

下面我们尝试基于哈希表使用python来实现简单的“字典”结构：

我们将哈希表的长度设定为素数13。
哈希函数选择平方取中法和余数法相结合的方式，具体为：将key作为字符串看待，将每个字符的ASCII值相加再平方，所得的结果取中间三位数，最后再将其除以13，所得的余数即为哈希值。
重新散列函数采用向前间隔为3的线性探测。

具体实现代码如下：

class MyDictionary(object):     # 字典类的初始化     def __init__(self):         self.table_size = 13 # 哈希表的大小         self.key_list = [None]*self.table_size #用以存储key的列表         self.value_list = [None]*self.table_size #用以存储value的列表      # 散列函数，返回散列值     # key为需要计算的key     def hashfuction(self, key):         count_char = 0         key_string = str(key)         for key_char in key_string: # 计算key所有字符的ASCII值的和             count_char += ord(key_char) # ord()函数用于求ASCII值         length = len(str(count_char))         if length > 3 : # 当和的位数大于3时，使用平方取中法，保留中间3位             mid_int = 100*int((str(count_char)[length//2-1])) /                     + 10*int((str(count_char)[length//2])) /                     + 1*int((str(count_char)[length//2+1]))         else: # 当和的位数小于等于3时，全部保留             mid_int = count_char          return mid_int%self.table_size # 取余数作为散列值返回      # 重新散列函数，返回新的散列值     # hash_value为旧的散列值     def rehash(self, hash_value):         return (hash_value+3)%self.table_size #向前间隔为3的线性探测      # 存放键值对     def __setitem__(self, key, value):         hash_value = self.hashfuction(key) #计算哈希值         if None == self.key_list[hash_value]: #哈希值处为空位，则可以放置键值对             pass         elif key == self.key_list[hash_value]: #哈希值处不为空，旧键值对与新键值对的key值相同，则作为更新，可以放置键值对             pass         else: #哈希值处不为空，key值也不同，即发生了“冲突”，则利用重新散列函数继续探测，直到找到空位             hash_value = self.rehash(hash_value) # 重新散列             while (None != self.key_list[hash_value]) and (key != self.key_list[hash_value]): #依然不能插入键值对，重新散列                 hash_value = self.rehash(hash_value) # 重新散列         #放置键值对                 self.key_list[hash_value] = key         self.value_list[hash_value] = value      # 根据key取得value     def __getitem__(self, key):         hash_value = self.hashfuction(key) #计算哈希值         first_hash = hash_value #记录最初的哈希值，作为重新散列探测的停止条件         if None == self.key_list[hash_value]: #哈希值处为空位，则不存在该键值对             return None         elif key == self.key_list[hash_value]: #哈希值处不为空，key值与寻找中的key值相同，则返回相应的value值             return self.value_list[hash_value]         else: #哈希值处不为空，key值也不同，即发生了“冲突”，则利用重新散列函数继续探测，直到找到空位或相同的key值             hash_value = self.rehash(hash_value) # 重新散列             while (None != self.key_list[hash_value]) and (key != self.key_list[hash_value]): #依然没有找到，重新散列                 hash_value = self.rehash(hash_value) # 重新散列                 if hash_value == first_hash: #哈希值探测重回起点，判断为无法找到了                     return None             #结束了while循环，意味着找到了空位或相同的key值             if None == self.key_list[hash_value]: #哈希值处为空位，则不存在该键值对                 return None             else: #哈希值处不为空，key值与寻找中的key值相同，则返回相应的value值                 return self.value_list[hash_value]      # 删除键值对     def __delitem__(self, key):         hash_value = self.hashfuction(key) #计算哈希值         first_hash = hash_value #记录最初的哈希值，作为重新散列探测的停止条件         if None == self.key_list[hash_value]: #哈希值处为空位，则不存在该键值对，无需删除             return         elif key == self.key_list[hash_value]: #哈希值处不为空，key值与寻找中的key值相同，则删除             self.key_list[hash_value] = None             self.value_list[hash_value] = None             return         else: #哈希值处不为空，key值也不同，即发生了“冲突”，则利用重新散列函数继续探测，直到找到空位或相同的key值             hash_value = self.rehash(hash_value) # 重新散列             while (None != self.key_list[hash_value]) and (key != self.key_list[hash_value]): #依然没有找到，重新散列                 hash_value = self.rehash(hash_value) # 重新散列                 if hash_value == first_hash: #哈希值探测重回起点，判断为无法找到了                     return             #结束了while循环，意味着找到了空位或相同的key值             if None == self.key_list[hash_value]: #哈希值处为空位，则不存在该键值对                 return             else: #哈希值处不为空，key值与寻找中的key值相同，则删除                 self.key_list[hash_value] = None                 self.value_list[hash_value] = None                 return      # 返回字典的长度     def __len__(self):         count = 0         for key in self.key_list:             if key != None:                 count += 1         return count  def main():     H = MyDictionary()     H["kcat"]="cat"     H["kdog"]="dog"     H["klion"]="lion"     H["ktiger"]="tiger"     H["kbird"]="bird"     H["kcow"]="cow"     H["kgoat"]="goat"     H["pig"]="pig"     H["chicken"]="chicken"     print("字典的长度为%d"%len(H))     print("键 %s 的值为为 %s"%("kcow",H["kcow"]))     print("字典的长度为%d"%len(H))     print("键 %s 的值为为 %s"%("kmonkey",H["kmonkey"]))     print("字典的长度为%d"%len(H))     del H["klion"]     print("字典的长度为%d"%len(H))     print(H.key_list)     print(H.value_list)  if __name__ == "__main__":     main()

运行结果如下：

字典的长度为9 键 kcow 的值为为 cow 字典的长度为9 键 kmonkey 的值为为 None 字典的长度为9 字典的长度为8 [None, 'kgoat', None, 'kcat', 'kbird', 'kdog', None, 'kcow', None, 'ktiger', 'chicken', 'pig', None] [None, 'goat', None, 'cat', 'bird', 'dog', None, 'cow', None, 'tiger', 'chicken', 'pig', None]

结果是正确的。