目录
dom会把整个xml文件读入到内存,占用内存大,sax是流模式, 边读边解析,占用内存小
- #! /usr/bin/env/python3
- # -*- coding: utf-8 -*-
-
- '''解析xml中的天气字段'''
-
- from xml.parsers.expat import ParserCreate
-
- xml = '''
-
-
1 -
1 -
OK -
10000 -
-
-
北京市 -
110000 -
北京 -
2021-05-14 18:33:48 -
-
-
2021-05-14 -
5 -
多云 -
小雨 -
26 -
17 -
东南 -
东南 -
≤3 -
≤3 -
-
-
2021-05-15 -
6 -
小雨 -
小雨 -
21 -
14 -
北 -
北 -
4 -
4 -
-
-
2021-05-16 -
7 -
多云 -
晴 -
25 -
13 -
西 -
西 -
≤3 -
≤3 -
-
-
2021-05-17 -
1 -
晴 -
晴 -
30 -
15 -
西南 -
西南 -
≤3 -
≤3 -
-
-
-
- '''
-
- class MySaxHandler(object):
- def __init__(self, city='Beijing'):
- # 城市字段
- self.city = city
- # 一天天气
- self.one_day_weather = {}
- # 所有天天气
- self.all_day_weather = []
- # 指定城市的所有天气
- self.weather = {'city': self.city, 'forecasts': self.all_day_weather}
- # 天气字段
- self.weather_attrs_list = ['date', 'week', 'dayweather', 'nightweather',
- 'daytemp', 'nighttemp', 'daywind', 'nightwind']
-
- def start_element_handler(self, name, attrs):
- '''
- 开始标签处理方法
- :param name: 标签名称
- :param attrs: 标签属性
- :return:
- '''
- self.start_element_name = name
- self.start_element_attrs = attrs
-
- def end_element_handler(self, name):
- '''
- 结束标签处理方法
- :param name: 标签名称
- :return:
- '''
- self.end_element_name = name
-
- if self.end_element_name in self.weather_attrs_list:
- self.one_day_weather[self.end_element_name] = self.text
-
- if len(self.one_day_weather) == len(self.weather_attrs_list):
- self.all_day_weather.append(self.one_day_weather)
- self.one_day_weather = {}
-
- def chardata_handler(self, text):
- '''
- 标签信息处理方法
- :param text: 标签信息
- :return:
- '''
- self.text = text
-
- def parser_xml(xml):
- my_handler = MySaxHandler()
- parser = ParserCreate()
- parser.StartElementHandler = my_handler.start_element_handler
- parser.EndElementHandler = my_handler.end_element_handler
- parser.CharacterDataHandler = my_handler.chardata_handler
- # 解析xml
- parser.Parse(xml)
-
- return my_handler.weather
-
-
- if __name__ == "__main__":
- result = parser_xml(xml)
-
- print(result)
-
- ----------------------------------------------------------------------------------
- # 输出
- "C:\Program Files\Python311\python.exe" D:\python_core_programming_learn\chapter_7_dict_set\notes.py
- {'city': 'Beijing', 'forecasts': [{'date': '2021-05-14', 'week': '5', 'dayweather': '多云', 'nightweather': '小雨', 'daytemp': '26', 'nighttemp': '17', 'daywind': '东南', 'nightwind': '东南'}, {'date': '2021-05-15', 'week': '6', 'dayweather': '小雨', 'nightweather': '小雨', 'daytemp': '21', 'nighttemp': '14', 'daywind': '北', 'nightwind': '北'}, {'date': '2021-05-16', 'week': '7', 'dayweather': '多云', 'nightweather': '晴', 'daytemp': '25', 'nighttemp': '13', 'daywind': '西', 'nightwind': '西'}, {'date': '2021-05-17', 'week': '1', 'dayweather': '晴', 'nightweather': '晴', 'daytemp': '30', 'nighttemp': '15', 'daywind': '西南', 'nightwind': '西南'}]}
-