Hash算法进行图片相似度识别的本质,就是将图片进行Hash转换,生成一组二进制数字,然后通过比较不同图片的Hash值距离找出相似图片。aHash中文叫平均哈希算法,顾名思义,在进行转化过程中将用到像素均值。
优点:速度快
缺点:精确度较差,对均值敏感
from PIL import Image
import os
import numpy as np
# 均值哈希算法
def aHash(image):
image_new = image
# 计算均值
avreage = np.mean(image_new)
hash = []
for i in range(image.shape[0]):
for j in range(image.shape[1]):
if image[i.j] > avreage:
hash.append(1)
else:
hash.append(0)
return hash
# 计算汉明距离
def Hamming_distance(hash1,hash2):
num = 0
for index in range(len(hash1)):
if hash1[index] != hash2[index]:
num += 1
return num
if __name__ == '__main__':
image1 = Image.open("image1.png")
image2 = Image.open("image2.png")
# 缩小尺寸并灰度化
image1 = np.array(image1.resize((8,8),image.ANTIALIAS).convert('L'),'f')
image2 = np.array(image2.resize((8,8),image.ANTIALIAS).convert('L'),'f')
hash1 = aHash(image1)
hash2 = aHash(image2)
dist = Hamming_distance(hash1,hash2)
# 将汉明距离转化为相似度
similarity = 1 - dist * 1.0 /64
print('dist is ' + '%d' % dist)
print('similarity is ' + '%d' % similarity)
dHash中文叫差异哈希算法,在对图片进行哈希转换时,通过左右两个像素大小的比较,得到最终哈希序列
速度快、判断效果比aHash好
from PIL import Image
import os
import numpy as np
import time
# 差异哈希算法
def dHash(image):
image_new = image
# 计算平均值
avreage = np.mean(image_new)
hash = []
# 每行前一个像素大于后一个像素为1 相反为0 生成哈希
for i in range(8):
for j in range(8):
if image[i,j] > image[i , j+1]:
hash.append(1)
else:
hash.append(0)
return hash
# 计算汉明距离
def Hamming_distance(hash1,hash2):
num = 0
for index in range(len(hash1)):
if hash1[index] != hash2[index]:
num += 1
return num
if __name__ == '__main__':
image1 = Image.open('image1.png')
image2 = Image.open('image2.png')
start = time.time()
image1 = np.array(image1.resize((9,8),Image.ANTIALIAS).convert('L'),'f')
image2 = np.array(image2.resize((9,8),Image.ANTIALIAS).convert('L'),'f')
hash1 = dHash(image1)
hash2 = dHash(image2)
dist = Hamming_distance(hash1,hash2)
end = time.time()
# 将距离转换为相似度
similarity = 1 - dist * 1.0 / 64
print('dist is '+ '%d' % dist)
print('similarity is '+'%d'%similarity)
print('time is ' + '%f'%(end-start))