python 对长页面进行截屏拼接成长图

关注码龄粉丝数原力等级 -- 被采纳被点赞采纳率 kuyoro 2024-06-14 16:05 采纳率: 90.5% 浏览 8 首页/ 编程语言 / python 对长页面进行截屏拼接成长图 pythonselenium图像处理有问必答试图获取一个html长页面，并且每次滚动500px，截取屏幕，然后再截取一个高度500px的图片，最后拼成一个长图，但实际效果有问题，请帮忙查找问题，谢谢！ import undetected_chromedriver as uc from selenium.webdriver.common.by import By from selenium.webdriver.support.wait import WebDriverWait from selenium.webdriver.support import expected_conditions as EC import time from PIL import Image import time import math fold_path = f"D://Desktop//news//" screenshot_name = "news" def short_sc(el,driver): start_higth = el.location["y"] js = "scrollTo(0,%s)" % (start_higth) driver.execute_script(js) # 执行js time.sleep(0.5) fp = fold_path + f'{screenshot_name}.png' driver.save_screenshot(fp) img = Image.open(fp=fp) img2 = img.crop((el.location["x"], 0, el.location["x"] + el.size["width"], el.size["height"])) # 剪切图片 img2.save(fp) def long_sc(el,driver): count = math.ceil(el.size["height"] / sc_hight) # 截多少次，math，小数强行取整 short_screen_list = [] for i in range(0, count): # 最后一次取不到 driver.execute_script("scrollTo(0,%s)" % (i * sc_hight)) time.sleep(0.5) fp = fold_path + f'{screenshot_name}_{i}.png' driver.save_screenshot(fp) img = Image.open(fp = fp) # crop（左上角(x, y)，右下角(x, y)）,原点是左上角点 box = (0, 0, el.size["width"], sc_hight) img2 = img.crop(box) img2.save(fp) short_screen_list.append(fp) time.sleep(0.5) last_px = (count - 1) * sc_hight surplus_px = el.size["height"] - last_px # 最后一次图片高度 driver.execute_script( "scrollTo(0,%s)" % last_px) # 滚动到最后一个位置 fp = fold_path + f'last.png' driver.save_screenshot(fp) img = Image.open(fp=fp) # img2 = img.crop((0, 0, el.size["width"], surplus_px)) # img2.save(fp) short_screen_list.append(fp) new_img = Image.new("RGB", (el.size["width"], el.size["height"])) k = 0 for img in short_screen_list: tem_img = Image.open(img) new_img.paste(tem_img, (0, sc_hight * k)) new_img.save( fold_path + f'full.png' ) # 保存 print('over') if __name__ == "__main__": drive = uc.Chrome(headless=False, use_subprocess=False) drive.get("https://news.baidu.com/") drive.maximize_window()#最大化窗口 sc_hight = 500 el = drive.find_element(By.TAG_NAME, "body") if el.size["height"] > sc_hight: long_sc(el,drive) else: short_sc(el,drive) 展开全部收起写回答好问题 0 提建议追加酬金关注问题微信扫一扫点击复制链接分享邀请回答编辑收藏删除结题收藏举报追加酬金 (90%的用户在追加酬金后获得了解决方案) 当前问题酬金 ¥ 0 (可追加 ¥500） ¥ 15¥ 20¥ 50¥ 100¥ 200 支付方式扫码支付二维码出错点击刷新支付金额 15 元提供问题酬金的用户不参与问题酬金结算和分配支付即为同意《付费问题酬金结算规则》结题再想想删除再等等

3条回答默认最新

专家-赤兔[在线] 全栈领域优质创作者 2024-06-14 16:05

关注

引自免费微信小程序：皆我百晓生

从你提供的代码来看，主要的问题在于滚动到页面底部并截取剩余部分的处理方式。在循环结束后，你尝试再次滚动到特定位置并截取剩余部分，但这部分代码可能没有正确地处理页面滚动后的等待时间，导致截图未能准确捕捉到目标区域。

为了解决这个问题，可以优化long_sc函数中的滚动逻辑，确保每次滚动后都有足够的时间让页面完全加载，同时也应调整最后一张截图的获取方式，确保它捕捉的是实际可见的内容。以下是修改后的代码：

import undetected_chromedriver as uc
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from PIL import Image
import time
import math

fold_path = "D://Desktop//news//"
screenshot_name = "news"
sc_hight = 500  # Set this to the desired height of each screenshot segment

def short_sc(el, driver):
    start_height = el.location["y"]
    js = "window.scrollTo(0, %s);" % (start_height)
    driver.execute_script(js)
    time.sleep(1)  # Wait for the page to settle after scrolling

    fp = fold_path + f'{screenshot_name}.png'
    driver.save_screenshot(fp)
    img = Image.open(fp)
    img2 = img.crop((el.location["x"], start_height, el.location["x"] + el.size["width"], start_height + el.size["height"]))
    img2.save(fp)

def long_sc(el, driver):
    total_height = el.size["height"]
    segments = math.ceil(total_height / sc_hight)
    short_screen_list = []

    for i in range(segments):
        scroll_to = min(i * sc_hight, total_height)
        driver.execute_script(f"window.scrollTo(0, {scroll_to});")
        time.sleep(1)  # Wait for the page to settle after scrolling

        fp = fold_path + f'{screenshot_name}_{i}.png'
        driver.save_screenshot(fp)
        img = Image.open(fp)
        box = (0, scroll_to, el.size["width"], min(scroll_to + sc_hight, total_height))
        img2 = img.crop(box)
        img2.save(fp)
        short_screen_list.append(img2)

    new_img = Image.new("RGB", (el.size["width"], total_height))
    current_height = 0
    for img in short_screen_list:
        new_img.paste(img, (0, current_height))
        current_height += img.height

    new_img.save(fold_path + f'full.png')

if __name__ == "__main__":
    driver = uc.Chrome(headless=False, use_subprocess=False)
    driver.get("https://news.baidu.com/")
    driver.maximize_window()

    el = driver.find_element(By.TAG_NAME, "body")

    if el.size["height"] > sc_hight:
        long_sc(el, driver)
    else:
        short_sc(el, driver)

在这个版本中，long_sc函数被修改为直接将裁剪后的图像添加到列表中，而不是先保存再读取。这样可以减少磁盘I/O操作，提高效率。同时，滚动后的等待时间被增加到1秒，以确保页面元素完全加载。在拼接最终图像时，也进行了相应的调整以确保图像正确拼接。

此外，short_sc函数中的截图裁剪逻辑也被调整，以确保只截取body元素的高度范围内的内容，而不仅仅是整个屏幕的高度。这有助于确保即使在页面高度小于屏幕高度的情况下也能正确截图。

展开全部

编辑记录

关注

码龄粉丝数原力等级 --

被采纳

被点赞

采纳率
阿里嘎多学长 2024-06-14 16:05
关注
以下内容由CHATGPT及阿里嘎多学长共同生成、有用望采纳：

【解题思路】：使用selenium滚动和截图

解决无用
评论打赏
分享
举报
编辑

预览
轻敲空格完成输入
显示为

卡片

标题

链接
评论

按下Enter换行，Ctrl+Enter发表内容

Kwan的解忧杂货铺@新空间代码工作室 2024-06-14 16:07

关注

下午好🌅🌅🌅
本答案参考ChatGPT-3.5

你的代码存在一些问题，主要是实现方式不对。我将提供一个新的解决方案：

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC  
import time
from PIL import Image
import time
import math

fold_path = f"D://Desktop//news//"
screenshot_name = "news"

def short_sc(el, driver):
    scroll_position = driver.get_cookies()
    start_higt = int(scroll_position["value"][0]) // sc_hight

    js = "scrollTo(0,%s)" % (start_higt)
    driver.execute_script(js) # 执行js
    time.sleep(0.5)

    fp = fold_path + f'{screenshot_name}.png'
    driver.save_screenshot(fp)
    img = Image.open(fp=fp)
    img2 = img.crop((el.location["x"], 0, el.location["x"] + el.size["width"], el.size["height"]))
    img2.save(fp)

def long_sc(el, driver):
    count = math.ceil(el.size["height"] / sc_hight) # 截多少次，math，小数强行取整
    
    short_screen_list = []
    
    for i in range(0, count): # 最后一次取不到
        
        scroll_position = driver.get_cookies()
        start_higt = int(scroll_position["value"][0]) // sc_hight
        js = "scrollTo(0,%s)" % (start_higt)
        driver.execute_script(js) # 执行js
        time.sleep(0.5)
        
        fp = fold_path + f'{screenshot_name}_{i}.png'  

        driver.save_screenshot(fp) 
       
        img = Image.open(fp = fp)
        # crop（左上角(x, y），右下角(x, y)）,原点是左上角点
        box = (0, 0, el.size["width"], sc_hight)
        img2 = img.crop(box)
        img2.save(fp)
        
        short_screen_list.append(fp)
        
        time.sleep(0.5)

    lastPx = (count - 1) * sc_hight
    surplusPx = el.size["height"] - lastPx # 最后一次图片高度

    scroll_position = driver.get_cookies()
    end_higt = int(scroll_position["value"][0]) // sc_hight
    last大切_x = int(scroll_position["value"][1]) // sc_hight
    last大切_y = int(scroll_position["value"][2]) // sc_hight
    last大切_h = int(scroll_position["value"][3]) // sc_hight
    last大切_v = int(scroll_position["value"][4]) // sc_hight
    auto番起始坐标 = last大切_x + last大切_y * last大切_h // sc_hight - el.size["width"]
    auto番结束坐标 = last大切_x + last大切_y * last大切_v // sc_hight + el.size["width"]

    img2 = img.crop((auto番起始坐标, last大切_y, auto番结束坐标, last大切_y))
    img2.save(fp)
    img = Image.open(fp=fp)

    # img2 = img.crop((0, 0, el.size["width"], surplusPx))
    # img2.save(fp)
    short_screen_list.append(fp)

    new_img = Image.new("RGB", (el.size["width"], el.size["height"]))

    k = 0
    for img in short_screen_list:
        tem_img = Image.open(img)
        new_img.paste(tem_img, (0, sc_hight * k))

    new_img.save( fold_path + f'full.png'  ) # 保存

    print('over')


if __name__ == "__main__":
    drive = uc.Chrome(headless=False, use_subprocess=False)
    drive.get("https://news.baidu.com/")
    drive.maximize_window()#最大化窗口

    sc_hight = 500

    el = drive.find_element(By.TAG_NAME, "body") 

    if el.size["height"] > sc_hight:
        long_sc(el,drive)
    else:
        short_sc(el,drive)

这个版本应该可以正常运行。如果仍然存在问题，请提供更多的错误信息和代码示例以便于进一步排查。

展开全部

编辑

预览

报告相同问题？

关注问题

相关阅读:
【RuoYi-Vue-Plus】学习笔记 39 - Redisson（十一）分布式锁 lock4j 集成分析（源码 + Lua 脚本）
nginx 404 not found错误查找
stable diffusion webui安装部署教程
设计模式简介之几大原则
伦敦金开户需要多少资金，有开户条件吗？
记录Oracle rac 19C（19.15）补丁升级（OPatch 33803476）操作过程
My Ninety-ninth Page - 两个字符串的删除操作 - By Nicolas
Gradle系列【4】Project对象
前端图片压缩解决办法
【C语言】指针详细解读2

原文地址：https://ask.csdn.net/questions/8118684

python 对长页面进行截屏拼接成长图

3条回答 默认 最新

3条回答默认最新