• 百度地图API爬取不同类型POI的详细数据


    一、相关概念
    查询某个范围内的所有POI

    参数介绍:

    page_size:单次查询返回的POI的数量,最大值为20
    page_num:查找的POI数量超过20时,会分页显示;比如60个POI就会分3页;此时,page_num=1/2/3会先显示全部的数据;当page_num=4时,第4页的结果集大小为0;
    scope:1为默认值;2会显示详细数据
    region:检索的行政区域
    URL链接:

    http://api.map.baidu.com/place/v2/search/?query=查询关键字&page_size=20&page_num=0&output=json&bounds=40.817,111.697,40.821,111.709&scope=2&ak=你的ak

    查询结果示例:

    “status”:0,
    “message”:“ok”,
    “total”:2,
    “result_type”:“poi_type”,
    “results”:[
    {
    “name”:“红螺寺”,
    “location”:{
    “lat”:40.390454,
    “lng”:116.632411
    },
    “address”:“北京市怀柔区红螺东路2号”,
    “province”:“北京市”,
    “city”:“北京市”,
    “area”:“怀柔区”,
    “street_id”:“”,
    “telephone”:“(010)60681175,(010)60681639”,
    “detail”:1,
    “uid”:“605884e7c61e3573871541a3”,
    “detail_info”:{
    “tag”:“旅游景点;文物古迹”,
    “navi_location”:{
    “lng”:116.63176774842,
    “lat”:40.37846005246
    },
    “type”:“scope”,
    “detail_url”:“http://api.map.baidu.com/place/detail?uid=605884e7c61e3573871541a3&output=html&source=placeapi_v2”,
    “overall_rating”:“4.3”,
    “comment_num”:“200”,
    “children”:[

                ]
            }
        },
        {
            "name":"卧佛寺",
            "location":{
                "lat":40.013776,
                "lng":116.213915
            },
            "address":"北京市海淀区卧佛寺路北京植物园内",
            "province":"北京市",
            "city":"北京市",
            "area":"海淀区",
            "street_id":"934b3dbf0a8d977b8b2fb5c0",
            "detail":1,
            "uid":"934b3dbf0a8d977b8b2fb5c0",
            "detail_info":{
                "tag":"旅游景点;文物古迹",
                "navi_location":{
                    "lng":116.21389548337,
                    "lat":40.011540367963
                },
                "type":"scope",
                "detail_url":"http://api.map.baidu.com/place/detail?uid=934b3dbf0a8d977b8b2fb5c0&output=html&source=placeapi_v2",
                "overall_rating":"4.7",
                "image_num":"38",
                "comment_num":"74",
                "children":[
                    
                ]
            }
        }
        ]
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26
    • 27
    • 28
    • 29
    • 30
    • 31
    • 32
    • 33

    查询某个POI的详细数据

    参数介绍:

    uid:某个POI对应的唯一的标识(通过范围查询获取到的)
    URL链接:

    http://api.map.baidu.com/place/v2/detail?uid=fabbfbf31f9a6964ad31e55f&output=json&scope=2&ak=你的ak

    查询结果示例:

    {
    “status”:0,
    “message”:“ok”,
    “result”:{
    “uid”:“605884e7c61e3573871541a3”,
    “street_id”:“”,
    “name”:“红螺寺”,
    “location”:{
    “lng”:116.63241097199,
    “lat”:40.390454021402
    },
    “address”:“北京市怀柔区红螺东路2号”,
    “province”:“北京市”,
    “city”:“北京市”,
    “area”:“怀柔区”,
    “telephone”:“(010)60681175,(010)60681639”,
    “detail_info”:{
    “tag”:“旅游景点;文物古迹”,
    “navi_location”:{
    “lng”:116.63176778525,
    “lat”:40.378460018453
    },
    “detail_url”:“http://api.map.baidu.com/place/detail?uid=605884e7c61e3573871541a3&output=html&source=placeapi_v2”,
    “type”:“scope”,
    “price”:“¥54元”,
    “overall_rating”:“4.3”,
    “image_num”:“133”,
    “comment_num”:“200”,
    “scope_type”:“古迹”,
    “scope_grade”:“AAAA”,
    “content_tag”:“适合亲子;登山;礼佛祈福;赏红叶;适合拍照;日出;适合跑步;银杏;情侣约会;香火旺;免费项目;收费合理;空气清新;绿植繁茂;位置优越;景色优美;人气旺;景区大;气势宏大;环境不错;玩的开心;休闲好去处;值得游玩;建筑风格独特;景点多;保存完整;停车方便;交通便利;设施新全;服务热情;收获颇丰;卫生干净”
    },
    “detail”:1
    }
    }
    二、相关链接
    百度地图API的POI分类

    http://lbsyun.baidu.com/index.php?title=lbscloud/poitags

    申请ak

    http://lbsyun.baidu.com/apiconsole/key#/home

    POI检索相关介绍

    http://lbsyun.baidu.com/index.php?title=webapi/guide/webservice-placeapi

    三、功能模块
    范围查询获取POI数据

    #将查询到的poi数据存入数据库
    def insertPOIData(name_list,ak,cursor):
    #总共查询到了多少对象
    total = 0
    #不重复的向数据库中写入的数据条数
    inserttotal = 0
    for i in name_list:
    #ecxel表格数据判空
    if i == ‘’:
    break
    #j的范围从0开始;上限不一样
    for j in range(0, 10):
    time.sleep(3)
    url = getUrlByName(i, ak, j)
    print(url)
    html = requests.get(url)
    # print(type(html)) response类型
    data = html.json()
    # print(type(data)) dict类型
    print(data)
    #status状态码为0表示获取正常
    if data[‘status’] == 0:
    #判断获取的数量,为0表示查询不到该类型的对象
    if data[‘total’] == 0:
    break
    total = total + data[‘total’]
    if ‘results’ in data:
    for item in data[‘results’]: # 一次返回的results中有20条数据
    # print(item)
    name = item[‘name’]
    if isExist(cursor, item[‘uid’]):
    print(f’{name}已经存在’)
    #跳出循环,判断results中的下一个item
    continue
    insert = “insert into poidatas(tag,uid,lat,lng,name,address,province,city,area) values (‘%s’,‘%s’,‘%s’,‘%s’,‘%s’,‘%s’,‘%s’,‘%s’,‘%s’)” % (
    i, item[‘uid’], str(item[‘location’][‘lat’]), str(item[‘location’][‘lng’]), item[‘name’],
    item[‘address’], item[‘province’], item[‘city’], item[‘area’]) # 字符串类型的数据插入要加单引号
    if cursor.execute(insert):
    inserttotal = inserttotal + 1
    if ‘overall_rating’ in item[‘detail_info’]:
    update = “update poidatas set overall_rating =‘%s’ where uid = ‘%s’” % (
    item[‘detail_info’][‘overall_rating’], item[‘uid’])
    cursor.execute(update)
    if ‘distance’ in item[‘detail_info’]:
    update = “update poidatas set distance =‘%s’ where uid = ‘%s’” % (
    item[‘detail_info’][‘distance’], item[‘uid’])
    cursor.execute(update)
    if ‘comment_num’ in item[‘detail_info’]:
    update = “update poidatas set comment_num =‘%s’ where uid = ‘%s’” % (
    item[‘detail_info’][‘comment_num’], item[‘uid’])
    cursor.execute(update)
    if ‘price’ in item[‘detail_info’]:
    update = “update poidatas set price =‘%s’ where uid = ‘%s’” % (
    item[‘detail_info’][‘price’], item[‘uid’])
    cursor.execute(update)

                if 'result' in data:  #还需要对只有一个返回结果的情况进行判断
                    #区别就是这里不能用for循环
                    item = data['result']
                    # print(item)
                    db = pymysql.connect(host="localhost", user="root", password="root", database="poi")
                    cursor = db.cursor()
                    name = item['name']
                    if isExist(cursor, item['uid']):
                        print(f'{name}已经存在')
                        exit()
                    insert = "insert into poidatas(tag,uid,lat,lng,name,address,province,city,area) values ('%s','%s','%s','%s','%s','%s','%s','%s','%s')" % (
                    i, item['uid'], str(item['location']['lat']), str(item['location']['lng']), item['name'],
                    item['address'], item['province'], item['city'], item['area'])  # 字符串类型的数据插入要加单引号
                    if cursor.execute(insert):
                        inserttotal = inserttotal + 1
                    if 'overall_rating' in item['detail_info']:
                        update = "update poidatas set overall_rating ='%s' where uid = '%s'" % (
                        item['detail_info']['overall_rating'], item['uid'])
                        cursor.execute(update)
                    if 'distance' in item['detail_info']:
                        update = "update poidatas set distance ='%s' where uid = '%s'" % (
                        item['detail_info']['distance'], item['uid'])
                        cursor.execute(update)
                    if 'comment_num' in item['detail_info']:
                        update = "update poidatas set comment_num ='%s' where uid = '%s'" % (
                        item['detail_info']['comment_num'], item['uid'])
                        cursor.execute(update)
                    if 'price' in item['detail_info']:
                        update = "update poidatas set price ='%s' where uid = '%s'" % (
                        item['detail_info']['price'], item['uid'])
                        cursor.execute(update)
    print('总共查找到的POI数量为 : ')
    print(total)
    print('插入数据库的POI数量为 : ')
    print(inserttotal)
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26
    • 27
    • 28
    • 29
    • 30
    • 31
    • 32
    • 33
    • 34
    • 35

    根据uid查询POI详细数据

    #通过uid查询更详细的数据并存入数据库
    def updateDetailInfo(ak,cursor):
    selectsql = ‘SELECT uid FROM poidatas’
    cursor.execute(selectsql)
    result = cursor.fetchall()
    for row in result:
    uid = row[0]
    url2 = ‘http://api.map.baidu.com/place/v2/detail?uid=%s&output=json&scope=2&ak=%s’ %(uid,ak)
    print(url2)
    time.sleep(3)
    html=requests.get(url2)
    data=html.json()
    print(data)
    if data[‘status’]==0:
    if ‘result’ in data:
    #print(data[‘result’])
    #result集合大小为1,这里不能使用for循环
    item = data[‘result’]
    if ‘shop_hours’ in item[‘detail_info’]:
    update = “update poidatas set shop_hours =‘%s’ where uid = ‘%s’” % (item[‘detail_info’][‘shop_hours’],item[‘uid’])
    print(update)
    cursor.execute(update)
    if ‘detail_url’ in item[‘detail_info’]:
    update = “update poidatas set detail_url =‘%s’ where uid = ‘%s’” % (item[‘detail_info’][‘detail_url’],item[‘uid’])
    print(update)
    cursor.execute(update)
    if ‘image_num’ in item[‘detail_info’]:
    update = “update poidatas set image_num =‘%s’ where uid = ‘%s’” % (item[‘detail_info’][‘image_num’],item[‘uid’])
    print(update)
    cursor.execute(update)
    if ‘service_rating’ in item[‘detail_info’]:
    update = “update poidatas set service_rating =‘%s’ where uid = ‘%s’” % (item[‘detail_info’][‘service_rating’],item[‘uid’])
    print(update)
    cursor.execute(update)
    if ‘environment_rating’ in item[‘detail_info’]:
    update = “update poidatas set environment_rating =‘%s’ where uid = ‘%s’” % (item[‘detail_info’][‘environment_rating’],item[‘uid’])
    print(update)
    cursor.execute(update)
    判断POI是否已经存入数据库

    #判断是否已经存入数据库
    def isExist(cursor,uid):
    sql = “select * from poidatas where uid = ‘%s’” % uid
    #print(cursor.execute(sql)) sql语句执行成功,返回的是1
    if cursor.execute(sql):
    return True
    else:
    return False
    从excel表中读取POI类别

    def readExcel(path):
    data = xlrd.open_workbook(path)
    sheets = data.sheets()
    data_list=[]
    for i in range(len(sheets)):
    table=data.sheets()[i]
    table_rows=table.nrows
    table_cols=table.ncols
    for j in range(table_rows):
    data_list.append( table.cell(j,0).value)
    return data_list
    拼接访问URL

    def getUrlByName(name,ak,j):
    #矩形搜索,POI数量较少
    url = ‘http://api.map.baidu.com/place/v2/search/?query=%s&page_size=20&page_num=%s&output=json&bounds=40.817,111.697,40.821,111.709&scope=2&ak=%s’ %(name,j,ak)
    # 行政区域搜索,POI数量较多
    #url = ‘http://api.map.baidu.com/place/v2/search/?query=%s&output=json®ion=呼和浩特&scope=2&ak=%s’ %(name,ak)
    return url
    Main函数

    def Main():
    ak = “~~~~~”
    name_list=readExcel(r’D:\poi类别.xls’)
    db = pymysql.connect(host=“localhost”, user=“root”, password=“root”, database=“poi”)
    cursor = db.cursor()
    insertPOIData(name_list,ak,cursor)
    updateDetailInfo(ak, cursor)
    db.commit()
    cursor.close()

  • 相关阅读:
    【无标题】EXCEL实现刷题
    【Unity细节】如何调节标签图标的大小(select icon)—标签图标太大遮住了物体
    KNN聚类算法
    CS231n课程笔记:Leture3 Loss Functions and Optimization
    【学习笔记】HQL
    基于微信购物商城小程序设计与实现 开题报告
    【数据分享】2005-2022年全国民航机场客货吞吐量和起降架次数据
    如何保护压缩包里的内容不被看到?
    【django-vue】封装logger 封装全局异常 封装response 数据库配置 用户表继承AbstractUser配置
    Java进阶篇--并发容器之BlockingQueue
  • 原文地址:https://blog.csdn.net/weixin_43214644/article/details/126488190