深度学习在处理视频数据集中长度过短的脏数据时,需要批量访问嵌套的多级文件夹,找出其中过短的视频数据,并自动删除这些文件,实现代码如下:
import os
import cv2
# address of the files to process
rawpath = '/root/autodl-tmp/webvid/video10_20%'
video_len = 64
# to get dirs' path and filename
for root, dirs, files in os.walk(rawpath+'/'):
for f in files:
# a = root.rfind('/')
# pt: path of a single file
pt = root+'/'+f
cap = cv2.VideoCapture(pt)
flag = 0
while flag < video_len:
ret, img = cap.read()
if ret:
flag += 1
else:
# just raise error or delete the chosen file
# raise NotImplementedError(f"the video '{pt}' is too short")
print('rm -rf ' + pt)
os.system('rm -rf ' + pt)
break