基于图片相似度对视频进行抽帧

文章目录

需求
方法
代码

需求

做深度学习需要自己收集图片，其中一种是收集视频，然后将视频转换成图片。在视频转图片过程中，会存在大量的高度相似帧，对于模型训练无用，而且增加标注成本，如何选取有足够差异的图片是我们需要的。

方法

基于图片相似度来选取不同的图片进行保存，相似度计算方法主要参考https://aistudio.baidu.com/projectdetail/4185629?channelType=0&channel=0 这篇中的方法。

代码

直接上代码，内容简单，很容易看明白。代码中提供基于hash的三种方法和一种结构相似性方法，需要手动改代码来切换方法及相关阈值。

import os
import cv2
import numpy as np
import sys
import shutil
from datetime import datetime
from skimage.metrics import structural_similarity as compare_ssim

# 均值哈希算法
def ahash(image):
    # 将图片缩放为8*8的
    image = cv2.resize(image, (8, 8), interpolation=cv2.INTER_CUBIC)
    # 将图片转化为灰度图
    gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
    # s为像素和初始灰度值，hash_str为哈希值初始值
    s = 0
    # 遍历像素累加和
    for i in range(8):
        for j in range(8):
            s = s + gray[i, j]
    # 计算像素平均值
    avg = s / 64
    # 灰度大于平均值为1相反为0，得到图片的平均哈希值，此时得到的hash值为64位的01字符串
    ahash_str = ''
    for i in range(8):
        for j in range(8):
            if gray[i, j] > avg:
                ahash_str = ahash_str + '1'
            else:
                ahash_str = ahash_str + '0'
    result = ''
    for i in range(0, 64, 4):
        result += ''.join('%x' % int(ahash_str[i: i + 4], 2))
    # print("ahash值：",result)
    return result
# phash
def phash(img):
    # 加载并调整图片为32*32的灰度图片
    img1 = cv2.resize(img, (32, 32),cv2.COLOR_RGB2GRAY)

    # 创建二维列表
    h, w = img.shape[:2]
    vis0 = np.zeros((h, w), np.float32)
    vis0[:h, :w] = img1

    # DCT二维变换
    # 离散余弦变换，得到dct系数矩阵
    img_dct = cv2.dct(cv2.dct(vis0))
    img_dct.resize(8,8)
    # 把list变成一维list
    img_list = np.array().flatten(img_dct.tolist())
    # 计算均值
    img_mean = cv2.mean(img_list)
    avg_list = ['0' if i<img_mean else '1' for i in img_list]
    return ''.join(['%x' % int(''.join(avg_list[x:x+4]),2) for x in range(0,64,4)])
#差异值哈希算法
def dhash(image):
    #将图片resize 到8x8
    image = cv2.resize(image,(9,8),interpolation=cv2.INTER_CUBIC)
    #转成灰度图
    gray = cv2.cvtColor(image,cv2.COLOR_RGB2GRAY)
    #计算dhash 二进制
    dhash_str =""
    for i in range(8):
        for j in range(8):
            if gray[i,j]>gray[i,j+1]:
                dhash_str = dhash_str+"1"
            else:
                dhash_str = dhash_str+"0"
    #二进制转十六近制
    result = ""
    for i in range(0,64,4):
        result += "".join("%x" %int(dhash_str[i:i+4],2))
    return result
# 计算两个哈希值之间的差异
def campHash(hash1, hash2):
    n = 0
    # hash长度不同返回-1,此时不能比较
    if len(hash1) != len(hash2):
        return -1
    # 如果hash长度相同遍历长度
    for i in range(len(hash1)):
        if hash1[i] != hash2[i]:
            n = n + 1
    return n
def extract_frames(video_path, similarity_threshold, output_dir):
    # 读取视频文件
    cap = cv2.VideoCapture(video_path)

    # 创建输出文件夹
    if os.path.exists(output_dir):
        shutil.rmtree(output_dir)
    os.makedirs(output_dir)
    #要保存的图片
    previous_image=None
    frame_count=0
    # 遍历视频帧
    while True:
        # 读取一帧
        ret, frame = cap.read()

        # 如果读取到最后一帧，退出循环
        if not ret:
            break

        # 将帧转换为图像
        image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        
        if previous_image is None:
            previous_image=cv2.resize(image,(128,128))
            # 获取当前时间  
            now = datetime.now()            
            # 格式化成指定的时间格式  
            formatted_time = now.strftime("%Y_%m_%d-%H_%M_%S")
            # 保存帧
            cv2.imwrite(os.path.join(output_dir,f"{formatted_time}_{frame_count}.jpg"),frame)
            continue
        else:
            # 计算图像之间的相似度
            current_image = cv2.resize(image,(128,128))
            #ssim
            #similarity = compare_ssim(current_image, previous_image,channel_axis=2)
            #差异hash
            hash1 = ahash(previous_image)
            hash2 = ahash(current_image)
            similarity = campHash(hash1,hash2)

            # ssim如果相似度小于阈值，则不够相似，则抽取帧
            #if similarity < similarity_threshold:
            # dhash如果相似度大于阈值，则不够相似，则抽取帧
            if similarity > similarity_threshold:
                # 获取当前时间  
                now = datetime.now()            
                # 格式化成指定的时间格式  
                formatted_time = now.strftime("%Y_%m_%d-%H_%M_%S")
                # 保存帧
                cv2.imwrite(os.path.join(output_dir,f"{formatted_time}_{frame_count}.jpg"),frame)

                # 更新上一帧
                previous_image = current_image
        frame_count += 1
        print(".",end="")
        sys.stdout.flush()

    cap.release()

if __name__ == "__main__":
    # 视频路径
    video_path = "../jiabo/20230829/跳远30fps_20230829194849_CH01.avi"

    # ssim相似度阈值
    #similarity_threshold = 0.9
    # dhash 相似度阈值
    similarity_threshold = 10

    # 输出文件夹
    output_dir = "split_output_ahash"

    # 抽取帧
    extract_frames(video_path, similarity_threshold, output_dir)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160

相关阅读:
4T硬盘剩余很多提示“No space left on device“磁盘空间不足
 STL 容器操作集合
 IP地址、子网掩码、默认网关介绍及例题计算
 项目持续集成配置流程
 TCP/IP五元组
 关于分布的理解
 力扣（LeetCode）算法_C++—— 快乐数
 手机和windows的便签怎么共享账号使用
 okcc呼叫中心数据操作的效率问题
 串口通信协议
原文地址：https://blog.csdn.net/u011119817/article/details/133646323