1.文件夹以及子文件下的文件导出
2.指定文件格式为pdf(或者选择其他比如:txt等)
3导出文件并自动保存在新建的txt或者excel文件中
参考:https://blog.csdn.net/weixin_41521681/article/details/92768157
import os
image_path = 'F:\\test\\frames'
# 遍历文件夹及其子文件夹中的文件,并存储在一个列表中
# 输入文件夹路径、空文件列表[]
# 返回 文件列表Filelist,包含文件名(完整路径)
def get_filelist(dir, Filelist):
newDir = dir
if os.path.isfile(dir):
Filelist.append(dir)
# # 若只是要返回文件文,使用这个
# Filelist.append(os.path.basename(dir))
elif os.path.isdir(dir):
for s in os.listdir(dir):
# 如果需要忽略某些文件夹,使用以下代码
#if s == "xxx":
#continue
newDir=os.path.join(dir,s)
get_filelist(newDir, Filelist)
return Filelist
if __name__ =='__main__' :
list = get_filelist('F:\\test\\frames', [])
print(len(list))
for e in list:
print(e)
import os
path ='F:\\test\\frames'
def get_filelist(dir):
Filelist = []
for home, dirs, files in os.walk(path):
for filename in files:
# 文件名列表,包含完整路径
Filelist.append(os.path.join(home, filename))
# # 文件名列表,只包含文件名
# Filelist.append( filename)
return Filelist
if __name__ =="__main__":
Filelist = get_filelist(dir)
print(len( Filelist))
for file in Filelist :
print(file)
def subdir_list(dirname):
"""获取目录下所有子目录名
@param dirname: str 目录的完整路径
@return: list(str) 所有子目录完整路径组成的列表
"""
return list(filter(os.path.isdir,
map(lambda filename: os.path.join(dirname, filename),
os.listdir(dirname) )
))
def file_list(dirname, ext='.csv'):
"""获取目录下所有特定后缀的文件
@param dirname: str 目录的完整路径
@param ext: str 后缀名, 以点号开头
@return: list(str) 所有子文件名(不包含路径)组成的列表
"""
return list(filter( lambda filename: os.path.splitext(filename)[1] == ext,
os.listdir(dirname) ))
import codecs
import os
path = 'C:\test\test\test\test'
def get_filelist(test):
Filelist = []
for home, dirs, files in os.walk(test):
for filename in files:
# 文件名列表,包含完整路径
if os.path.splitext(filename)[1] == '.pdf':
#可以更换其他的类型
Filelist.append(filename)
return Filelist
补充:
***os.path.splitext()***method in Python is used to split the path name into a pair root and ext. Here, ext stands for extension and has the extension portion of the specified path while root is everything except ext part.
ext is empty if specified path does not have any extension. If the specified path has leading period (‘.’), it will be ignored.For example consider the following path names:
path name root ext /home/User/Desktop/file.txt /home/User/Desktop/file .txt /home/User/Desktop /home/User/Desktop {empty} file.py file .py .txt .txt {empty}
- 1
- 2
- 3
- 4
- 5
Python program to explain os.path.splitext() method
# importing os module
import os
# path
path = '/home/User/Desktop/file.txt'
# Split the path in
# root and ext pair
root_ext = os.path.splitext(path)
# print root and ext
# of the specified path
print("root part of '% s':" % path, root_ext[0])
print("ext part of '% s':" % path, root_ext[1], "\n")
# path
path = '/home/User/Desktop/'
# Split the path in
# root and ext pair
root_ext = os.path.splitext(path)
# print root and ext
# of the specified path
print("root part of '% s':" % path, root_ext[0])
print("ext part of '% s':" % path, root_ext[1])
Output:
root part of '/home/User/Desktop/file.txt': /home/User/Desktop/file
ext part of '/home/User/Desktop/file.txt': .txt
root part of '/home/User/Desktop/': /home/User/Desktop/
ext part of '/home/User/Desktop/':
import codecs
with codecs.open("text.txt", mode='w', encoding='utf-8') as f:
result = get_filelist(path)
for entry in result:
f.write(str(entry) + '\n')
import codecs
import os
path = 'C:\test\test\test\test'
def get_filelist(test):
Filelist = []
for home, dirs, files in os.walk(test):
for filename in files:
# 文件名列表,包含完整路径
if os.path.splitext(filename)[1] == '.pdf':
Filelist.append(filename)
return Filelist
with codecs.open("text.txt", mode='w', encoding='utf-8') as f:
result = get_filelist(path)
for entry in result:
f.write(str(entry) + '\n')