本文所讲内容如下:
文件分为二进制文件与文本文件(TXT)
其中二进制文件无法直接理解,文本文件可以理解。
with open (file_name [,mode='r',encoding=None]) as fp:
文件操作语句
open(file_name [,mode='r',encoding=None]) as fp:文件操作语句
with open(r'C:\Users\Administrator\Desktop\Files\data\test.txt')
就是从菜单栏直接复制所得到的路径
实例
- with open(r'F:\1.txt','w') as f:
- f.write('i am a good boy!\n')
- f.write('i want to enter Tsing hua University!\n')
- f.write('胜利一定属于我!')

- with open(r'./data/test.txt') as f:
- with open(r'test.txt') as f :
./表示当前文件所在目录
../表示当前文件所在目录的上一级目录

xxx.py打开打开aa.txt,在文件目录之下,两者是同级目录
with open (r'./a_file/aa.txt','r') as f:
with open (r'../b_file/a.txt','r') as f:

有关二进制文件:

fileobjext.write([str])
- with open(r'F:\1.txt','w') as f:
- f.write('i am a good boy!\n')
- f.write('i want to enter Tsing hua University!\n')
- f.write('胜利一定属于我!')
writelines——写入预定义字符串
fileobjext.writelines([str])
- >>> with open('./data/test.txt','w') as file:
- >>> seq = ["菜鸟教程 1\n", "菜鸟教程 2"]
- >>> file.writelines( seq )

fileobjext.read([size])
- >>> with open('./data/test.txt','r') as file:
- >>> content=file.read(4)
- >>> print(content)
fileobjext.readlines()
- with open('./data/test.txt','r') as file:
- >>> content=file.readlines()
- >>> print(content)
-
- #['1,hellow word! \n', '2,hellow word! \n', '3,hellow word! \n']
fileobjext.readline()
- >>> with open('./data/test.txt','r') as file:
- >>> content=file.readline()
- >>> print(content)
- # 1,hellow word! \n
fileobjext.tell() #无参数
- >>> with open('./data/test.txt','r') as file:
- >>> line = file.readline()
- >>> print ("读取的数据为: %s" % (line))
- >>> position = file.tell()
- >>> print ("当前位置: %d" % (position))

指定位置开始读取或者写入文件的数据
seek(offset, from)
- with open(r'F:\1.txt','r') as f:
- info=f.readlines()
- print(info)
- # f.write('i am a good boy!\n')
- # f.writ(e('i want to enter Tsing hua University!\n')
- # # f.write'胜利一定属于我!')
- print(f.tell())
- print(f.seek(800))
- print(f.tell())

由上图可见,指针的位置在seek函数后变成了800
f.close() #关闭文件
- >>> f = open(‘data.txt’, ‘w’)
- >>> try:
- >>> finally:
- >>> f.close()
- >>> with open('data.txt','w') as f:
- >>> f.write('123\n')

import csv

with open(“a.csv”,”r”,encoding=“utf-8”) as csvfile: csv.reader(csvfile,dialect=“excel”, delimiter=“,”)
- >>> with open('result.csv', encoding='utf-8') as f:
- >>> reader = csv.reader(f)
- >>> for row in reader:
- >>> print(row)
- >>> import csv
- >>> datas=["abcd","defg","hijk"]
- >>> with open('example.csv', 'w', newline='') as f:
- >>> writer = csv.writer(f)
- >>> writer.writerows([datas])
- # 不等价
- >>> writer.writerows(datas)


DictReader(字典式)
- >>> import csv
- >>> with open('iris.csv') as f:
- >>> reader=csv.DictReader(f)
- >>> for row in reader:
- >>> print('id为{}'.format(row['id']))


使用DictWriter类,可以写入字典形式的数据,同样键也是标头(表格第一行)
- >>> import csv
- >>> headers = [‘name’, ‘age’] #指定行头
- >>> datas = [{'name':'Bob', 'age':23},
- >>> {'name':'Jerry', 'age':44},
- >>> {'name':'Tom', 'age':15}]
- >>> with open('example.csv', 'w', newline='') as f:
- >>> writer = csv.DictWriter(f, headers)
- >>> writer.writeheader()
- >>> for row in datas:
- >>> writer.writerow(row)
- >>> #writer.writerows(datas)


- >>> import os
- >>> os.getcwd() #返回当前工作目录
- 'C:\\Python35'
- >>> os.mkdir(os.getcwd()+'\\temp') #创建目录
- >>> os.chdir(os.getcwd()+'\\temp') #改变当前工作目录
- >>> os.getcwd()
- 'C:\\Python35\\temp'
- >>> os.mkdir(os.getcwd()+'\\test')
- >>> os.listdir('.')
- ['test']
- >>> os.rmdir('test') #删除目录
- >>> os.listdir('.')
- []
rename(需要修改的文件名, 新的文件名)
remove(待删除的文件名)

序列化
pickle.dump(obj,file,protocol=None) #将对象obj保存至文件file
反序列化
pickle.load(file) #从file中读取并重构一个对象
只用于python,且多版本见不兼容
序列化
- >>> import pickle
- >>> with open(r'dataObj1.dat', 'wb') as f:
- >>> d1=dict(name='Mary', age=19)
- >>> pickle.dump(d1, f)
- >>> import pickle
- >>> with open(r'dataObj1.dat', 'rb') as f:
- >>> o1=pickle.load(f)
- >>> print(type(o1), str(o1))
- {"firstName":"Brett",
- "lastName":"McLaughlin",
- "email":"aaaa"}
- { "people":[
- {"firstName":"Brett","email":"aaaa"},
- {"firstName":"Jason","email":"bbbb"},
- {"firstName":"Elliotte","email":"cccc"}]
- }
- >>> import json
- >>> # 序列化
- >>> with open('json_file','w') as f:
- >>> dic = {'k1':'v1','k2':'v2','k3':'v3'}
- >>> json.dump(dic,f) #dump方法直接将字典转换成json字符串写入文件
- >>> # 反序列化
- >>> with open(‘json_file’) as f:
- >>> dic2 = json.load(f) #load方法直接将文件中的json字符串转换成字典
- >>> print(type(dic2),dic2)
- <class 'dict'> {'k1': 'v1', 'k2': 'v2', 'k3': 'v3'}
- >>> import requests,json
- >>> url="https://movie.douban.com/j/new_search_subjects?sort=U&range=0,10&tags=&start=0"
- >>> head={"User-Agent": "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like
- Gecko) Chrome/94.0.4606.71 Safari/537.36"}
- >>> response= requests.get(url,headers=head)
- >>> strs= response.text
- >>> strs_json=json.loads(strs)
- >>> print(f"爬虫对象的字符串形式解析前的格式为{type(strs)}")
- >>> print(f"爬虫对象的字符串形式解析后的格式为{type(strs_json)}")
- '''{"data":[{"directors":[" 陈凯歌 "," 徐 克 "," 林超贤
- "],"rate":"7.6","cover_x":1080,"star":"40","title":" 长 津 湖
- ","url":"https:\\/\\/movie.douban.com\\/subject\\/25845392\\/","casts":["吴京","易烊千玺","段奕宏"," 朱亚文 "," 李 晨
- "],"cover":"https://img9.doubanio.com\\/view\\/photo\\/s_ratio_poster\\/public\\/p2681329386.jpg","id":"25845392","cover_y":1513} '''
- >>> import requests,json
- >>> url="https://movie.douban.com/j/new_search_subjects?sort=U&range=0,10&tags=&start=0"
- >>> head={"User-Agent": "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like
- Gecko) Chrome/94.0.4606.71 Safari/537.36"}
- >>> response= requests.get(url,headers=head)
- >>> strs= response.text
- >>> strs_json=json.loads(strs)
- >>> print(f"爬虫对象的字符串形式解析前的格式为{type(strs)}")
- >>> print(f"爬虫对象的字符串形式解析后的格式为{type(strs_json)}")
- {'data': [{'directors': ['陈凯歌', '徐克', '林超贤'],
- 'rate': '7.6',
- 'cover_x': 1080,
- 'star': '40',
- 'title': '长津湖',
- 'url': 'https://movie.douban.com/subject/25845392/',
- 'casts': ['吴京', '易烊千玺', '段奕宏', '朱亚文', '李晨'],
- 'cover': 'https://img9.doubanio.com/view/photo/s_ratio_poster/public/p2681329386.jpg',
- 'id': '25845392',
- 'cover_y': 1513} >>> strs_json['data'][1]['rate']