• python 采用selenium+cookies 获取登录后的网页


    百度网页由于需要登陆+手机短信验证。比较麻烦

    这里我采用先人工登录百度账号,然后将百度账号的相关cookies保存下来

    然后采用selenium动态登录网页

    整体代码如下

    1. from selenium import webdriver
    2. import time
    3. options = webdriver.ChromeOptions()
    4. options.add_argument('--start-maximized') # 浏览器最大化
    5. options.add_argument('--disable-infobars')
    6. browser = webdriver.Chrome(options=options)
    7. browser.get('http://www.baidu.com')
    8. cookie_1 = {"name":"BAIDUID","value":"83D79E79B353728AA1824DACF6D670DC"}
    9. cookie_2 = {"name":"BDUSS","value":"pSUFZPT1ctbXlJeDJVZlZ1VWItWk9qYkVtNE0tZlNqWnZpRUNveHVuVUVSeTVsRVFBQUFBJCQAAAAAAAAAAAEAAABE1ecvwffQx9PqstDDzgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAS6BmUEugZlU3"}
    10. time.sleep(3)
    11. browser.add_cookie(cookie_1)
    12. browser.add_cookie(cookie_2)
    13. time.sleep(3)
    14. browser.get('http://www.baidu.com')
    15. time.sleep(10)

    1、登录百度网页,查看源代码

     找到2所示的两个关键字段 BAIDUID和BDUSS,并人工构造两个cookie

    1. cookie_1 = {"name":"BAIDUID","value":"83D79E79B353728AA1824DACF6D670DC"}
    2. cookie_2 = {"name":"BDUSS","value":"pSUFZPT1ctbXlJeDJVZlZ1VWItWk9qYkVtNE0tZlNqWnZpRUNveHVuVUVSeTVsRVFBQUFBJCQAAAAAAAAAAAEAAABE1ecvwffQx9PqstDDzgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAS6BmUEugZlU3"}

    然后采用selenium 添加构造的两个cookie

    1. browser.add_cookie(cookie_1)
    2. browser.add_cookie(cookie_2)

    接下来大功告成 

    这里有个更快捷的办法,直接把Cookies全部复制

    再人工根据规则构造cookies

    规则类似于如下代码所示

    cookie_1 = {"name": "BAIDUID", "value": "83D79E79B353728AA1824DACF6D670DC"}

    以下为总代码 

    1. def dongtai_BAIDU():
    2. """
    3. :return: 获取登录后的cookies 然后携带这些cookie
    4. """
    5. from selenium import webdriver
    6. import time
    7. options = webdriver.ChromeOptions()
    8. options.add_argument('--start-maximized') # 浏览器最大化
    9. options.add_argument('--disable-infobars')
    10. browser = webdriver.Chrome(options=options)
    11. browser.get('http://www.baidu.com')
    12. # cookie_1 = {"name": "BAIDUID", "value": "83D79E79B353728AA1824DACF6D670DC"}
    13. # cookie_2 = {"name": "BDUSS",
    14. # "value": "pSUFZPT1ctbXlJeDJVZlZ1VWItWk9qYkVtNE0tZlNqWnZpRUNveHVuVUVSeTVsRVFBQUFBJCQAAAAAAAAAAAEAAABE1ecvwffQx9PqstDDzgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAS6BmUEugZlU3"}
    15. cookies = "BIDUPSID=83D79E79B353728A8EC4C62E933EEF8A; PSTM=1694932781; BD_UPN=12314753; BA_HECTOR=8hak0k8gah81808ka4aha52l1igd7pd1p; ZFY=FFDC03Zc:Bp2wVP15g5U4cKd12L:B4UP88tb5D6i6ZhME:C; BDORZ=B490B5EBF6F3CD402E515D22BCDA1598; BD_CK_SAM=1; PSINO=7; delPer=0; shifen[1858839_91638]=1694935272; shifen[1858839_87962]=1694935272; BCLID=11202995316399066065; BCLID_BFESS=11202995316399066065; BDSFRCVID=cOKOJexroG0Aahbq3iXuesms7eKK0gOTDYLEOwXPsp3LGJLVcRc7EG0PtjJ5HU4bLrA9ogKKLmOTHpuF_2uxOjjg8UtVJeC6EG0Ptf8g0M5; BDSFRCVID_BFESS=cOKOJexroG0Aahbq3iXuesms7eKK0gOTDYLEOwXPsp3LGJLVcRc7EG0PtjJ5HU4bLrA9ogKKLmOTHpuF_2uxOjjg8UtVJeC6EG0Ptf8g0M5; H_BDCLCKID_SF=tJAj_D-btK03H48k-4QEbbQH-UnLq-J9W2OZ04n-ah02EJjd-RL5Mqk0bqbLb5b-W20j0h7m3UTdsq76Wh35K5tTQP6rLtJNKbv4KKJxbnckMqnaj-5dKxo-hUJiBM7LBan7QP5IXKohJh7FM4tW3J0ZyxomtfQxtNRJ0DnjtpChbRO4-TFaj6bLef5; H_BDCLCKID_SF_BFESS=tJAj_D-btK03H48k-4QEbbQH-UnLq-J9W2OZ04n-ah02EJjd-RL5Mqk0bqbLb5b-W20j0h7m3UTdsq76Wh35K5tTQP6rLtJNKbv4KKJxbnckMqnaj-5dKxo-hUJiBM7LBan7QP5IXKohJh7FM4tW3J0ZyxomtfQxtNRJ0DnjtpChbRO4-TFaj6bLef5; COOKIE_SESSION=0_0_0_1_0_1_1_0_0_1_0_0_0_0_0_0_0_0_1694935272%7C1%230_0_1694935272%7C1; BDUSS=pSUFZPT1ctbXlJeDJVZlZ1VWItWk9qYkVtNE0tZlNqWnZpRUNveHVuVUVSeTVsRVFBQUFBJCQAAAAAAAAAAAEAAABE1ecvwffQx9PqstDDzgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAS6BmUEugZlU3; BDUSS_BFESS=pSUFZPT1ctbXlJeDJVZlZ1VWItWk9qYkVtNE0tZlNqWnZpRUNveHVuVUVSeTVsRVFBQUFBJCQAAAAAAAAAAAEAAABE1ecvwffQx9PqstDDzgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAS6BmUEugZlU3; BDRCVFR[S4-dAuiWMmn]=I67x6TjHwwYf0; H_PS_PSSID=39310_39363_39279_39349_39097_39198_39261_39359_39233_26350; BAIDUID=83D79E79B353728AA1824DACF6D670DC:SL=0:NR=10:FG=1; sug=3; sugstore=1; ORIGIN=0; bdime=0; H_PS_645EC=429eEe9gpR3wfujbqACMgrQQ0Qa0BzvEMw9PZbFseOM5%2FslGgIVC3wEIxeUdoBbKjw; BAIDUID_BFESS=83D79E79B353728AA1824DACF6D670DC:SL=0:NR=10:FG=1"
    16. cookies = {i.split("=")[0]: i.split("=")[1] for i in cookies.split(";") if len(i.split("=")) > 0}
    17. cookies_ = {}
    18. for i in cookies:
    19. cookies_['name'] = i.replace(" ","")
    20. cookies_['value'] = cookies[i].replace(" ","")
    21. browser.add_cookie(cookies_)
    22. time.sleep(3)
    23. # browser.add_cookie(ret)
    24. # browser.add_cookie(cookie_1)
    25. # browser.add_cookie(cookie_2)
    26. time.sleep(3)
    27. # browser.add_cookie(cookies)
    28. browser.get('http://www.baidu.com')
    29. time.sleep(10)

  • 相关阅读:
    Rustdesk 自建服务器教程
    BootStrap-前端框架
    npm 包管理
    企业级智能客服知识库重磅更新,发布`v0.1.5`
    【线性代数】分块矩阵总结
    Python爬虫:Session、Cookie、JWT
    我把 CPU 三级缓存的秘密,藏在这 8 张图里
    【Java开发工具】下载安装eclipse并汉化配置教程(所以操作系统通用)
    关于QUERY_ALL_PACKAGES权限导致Google下架apk
    智云通CRM:大客户销售流程,新手也能快速入门
  • 原文地址:https://blog.csdn.net/linxizi0622/article/details/132946921