• python爬虫-某政府网站加速乐(简单版)实例小记


    1. # -*- coding:utf-8 -*-
    2. # @Time : 2023/10/23 17:06
    3. # @Author: 水兵没月
    4. # @File : 哈哈哈哈.py
    5. # @Software: PyCharm
    6. ####################
    7. import random
    8. import requests
    9. # 代理
    10. def get_proxy(proxy_type=random.choice([1,2,3,4,5])):
    11. url = "http://ZZZZZZZZZZZZZZZZZZ"
    12. url = "http://XXXXXXXXXXXXXXXX
    13. payload={
    14. "proxy_type": proxy_type,
    15. "spider_type": 2,
    16. }
    17. response = requests.request("POST", url, data=payload)
    18. proxies = response.json()['msg'][0]
    19. return proxies
    20. # 记录
    21. url = 'aHR0cDovL3N0aGp0LmppYW5nc3UuZ292LmNuL2NvbC9jb2w4MzU2OC9pbmRleC5odG1sP3VpZD0zNTEwODUmcGFnZU51bT0xMjE='
    22. headers = {
    23. "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
    24. "Accept-Encoding": "gzip, deflate",
    25. "Accept-Language": "zh-CN,zh;q=0.9",
    26. "Cache-Control": "no-cache",
    27. "Connection": "keep-alive",
    28. "Host": "sthjt.jiangsu.gov.cn",
    29. "Pragma": "no-cache",
    30. "Upgrade-Insecure-Requests": "1",
    31. "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36",
    32. }
    33. res = requests.session().get(url, headers=headers, proxies=get_proxy(1) )
    34. res.encoding = 'UTF-8'
    35. cookies = res.cookies.items()
    36. cookie = ''
    37. for name, value in cookies:
    38. cookie += '{0}={1};'.format(name, value)
    39. headers['Cookie'] = cookie
    40. print(headers)
    41. url = 'aHR0cDovL3N0aGp0LmppYW5nc3UuZ292LmNuL21vZHVsZS93ZWIvanBhZ2UvZGF0YXByb3h5LmpzcD9zdGFydHJlY29yZD0xJmVuZHJlY29yZD0xMjAmcGVycGFnZT00MCcrJyZjb2w9MSZhcHBpZD0xJndlYmlkPTE0JnBhdGg9JTJGJmNvbHVtbmlkPTgzNTY4JnNvdXJjZUNvbnRlbnRUeXBlPTEmdW5pdGlkPTM1MTA4NSZ3ZWJuYW1lPSVFNiVCMSU5RiVFOCU4QiU4RiVFNyU5QyU4MSVFNyU5NCU5RiVFNiU4MCU4MSVFNyU4RSVBRiVFNSVBMiU4MyVFNSU4RSU4NSZwZXJtaXNzaW9udHlwZT0w'
    42. res = requests.session().get(url, headers=headers, proxies=get_proxy())
    43. res = res.text
    44. print([res])
    45. print('========================')

    某网站cookie 反爬为创宇盾加速乐,测试了翻页对和刷新页面cookie的情况,无变化。因此解决这个网址的加速乐。先请求提供的第一步网址,拿到cookie,直接将cookie 用在目标网址请求中即可。亲测可用,但是即使加上代理也很容易被请求限制,建议使用time.sleep(3),防止请求频繁

  • 相关阅读:
    接口自动化测试yaml+requests+allure技术,你学会了吗?
    softmax,softmax loss和交叉熵的关系
    linux内核网络收包过程(二)
    miui14即将闪亮登场 小米首批机型搭配名单 旧版系列机型可能无望更新
    Postman —— postman实现参数化
    PMP 2022-11-01
    集成学习实战:基于集成学习方法完成鸢尾花卉品种预测详细教程
    人工智能论文GPT-3(2):2020.5 Language Models are Few-Shot Learners;微调;少样本Few-Shot (FS)
    gdb-dashboard的简单使用
    租赁系统开发|沈阳租赁系统|免押房屋租赁功能展示
  • 原文地址:https://blog.csdn.net/weixin_43124425/article/details/133995652