遍历目录下,将GGA文件中符合要求(起始-截止的字符串)的数据剔除掉,生成新的文件。
- import os
- import fnmatch
- import shutil
- import re
- import datetime
-
- def find_files(directory, pattern):
- for root, dirs, files in os.walk(directory):
- for filename in fnmatch.filter(files, pattern):
- yield os.path.join(root, filename)
-
- # 调用函数,查找所有包含 'xxx' 的文件
-
-
- def write_file(path_copy, path_copy_ex, pattern_start, pattern_end):
- with open(path_copy, 'r') as dst, open(path_copy_ex, 'w') as dst_w:
- # 默认写入文件
- writing = True
- num_count = 0
- for line in dst:
- # 正则匹配,起始行
- if re.search(pattern_start, line) and line.startswith('$GPZDA'):
- writing = False
- if writing:
- num_count += 1
- dst_w.write(line)
- # 正则匹配,截止行
- if re.search(pattern_end, line): # and line.startswith('$GPGGA'):
- writing = True
- # break
- if num_count == 0:
- print(r'当前时间:%s,没有数据匹配' % (datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')))
- else:
- print(r'当前时间:%s,总共%s行写入完成,起始于:%s,截止于:%s' % (
- datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'), num_count, pattern_start, pattern_end))
-
- if __name__ == '__main__':
- directory = r"E:\data\日志文件第三组"
- # 匹配的起始截止字符
- pattern_start = "033040.00"
- pattern_end = "033140.00"
-
- for file_path in find_files(directory, "*gga*"):
- # 获取文件名称和文件路径
- file_path_base = os.path.basename(file_path)
- file_path_dir = os.path.dirname(file_path)
-
- # 拼接复制的文件名
- filename_new = "new_" + file_path_base
- path_copy = os.path.join(file_path_dir, filename_new)
-
- # 先复制文件
- shutil.copy2(file_path, path_copy)
-
- # 拼接处理(剔除)后的文件名
- filename_new_ex = "ex_" + file_path_base
- path_copy_ex = os.path.join(file_path_dir, filename_new_ex)
-
- # 匹配数据写入新文件
- write_file(path_copy, path_copy_ex, pattern_start, pattern_end)
-
- # 删除复制的文件
- if path_copy:
- os.remove(path_copy)