- # -*- coding:utf-8 -*-
- # @Time : 2023/10/23 17:06
- # @Author: 水兵没月
- # @File : 哈哈哈哈.py
- # @Software: PyCharm
- ####################
-
- import random
- import requests
-
- # 代理
- def get_proxy(proxy_type=random.choice([1,2,3,4,5])):
- url = "http://ZZZZZZZZZZZZZZZZZZ"
- url = "http://XXXXXXXXXXXXXXXX
- payload={
- "proxy_type": proxy_type,
- "spider_type": 2,
- }
- response = requests.request("POST", url, data=payload)
- proxies = response.json()['msg'][0]
- return proxies
- # 记录
- url = 'aHR0cDovL3N0aGp0LmppYW5nc3UuZ292LmNuL2NvbC9jb2w4MzU2OC9pbmRleC5odG1sP3VpZD0zNTEwODUmcGFnZU51bT0xMjE='
- headers = {
- "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
- "Accept-Encoding": "gzip, deflate",
- "Accept-Language": "zh-CN,zh;q=0.9",
- "Cache-Control": "no-cache",
- "Connection": "keep-alive",
- "Host": "sthjt.jiangsu.gov.cn",
- "Pragma": "no-cache",
- "Upgrade-Insecure-Requests": "1",
- "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36",
- }
- res = requests.session().get(url, headers=headers, proxies=get_proxy(1) )
- res.encoding = 'UTF-8'
- cookies = res.cookies.items()
- cookie = ''
- for name, value in cookies:
- cookie += '{0}={1};'.format(name, value)
- headers['Cookie'] = cookie
- print(headers)
- url = 'aHR0cDovL3N0aGp0LmppYW5nc3UuZ292LmNuL21vZHVsZS93ZWIvanBhZ2UvZGF0YXByb3h5LmpzcD9zdGFydHJlY29yZD0xJmVuZHJlY29yZD0xMjAmcGVycGFnZT00MCcrJyZjb2w9MSZhcHBpZD0xJndlYmlkPTE0JnBhdGg9JTJGJmNvbHVtbmlkPTgzNTY4JnNvdXJjZUNvbnRlbnRUeXBlPTEmdW5pdGlkPTM1MTA4NSZ3ZWJuYW1lPSVFNiVCMSU5RiVFOCU4QiU4RiVFNyU5QyU4MSVFNyU5NCU5RiVFNiU4MCU4MSVFNyU4RSVBRiVFNSVBMiU4MyVFNSU4RSU4NSZwZXJtaXNzaW9udHlwZT0w'
- res = requests.session().get(url, headers=headers, proxies=get_proxy())
- res = res.text
- print([res])
- print('========================')
某网站cookie 反爬为创宇盾加速乐,测试了翻页对和刷新页面cookie的情况,无变化。因此解决这个网址的加速乐。先请求提供的第一步网址,拿到cookie,直接将cookie 用在目标网址请求中即可。亲测可用,但是即使加上代理也很容易被请求限制,建议使用time.sleep(3),防止请求频繁