• 读取s3图片并保存至excel


    1. 构建Client类,实现图片读取

    1.1 导包&config

    client.py

    1. import os
    2. import base64
    3. import numpy as np
    4. import pandas as pd
    5. import cv2
    6. import boto3
    7. # boto3安装: pip3 install opencv-python boto3
    8. config = {
    9. "region_name": "us-east-1",
    10. "endpoint_url": "https://s3.rapidcompute.com",
    11. # "image_bucket": "prod-barwaqt-image",
    12. "aws_access_key_id": "rcus_bd-prod",
    13. "aws_secret_access_key": "OgRKm6h...2HdbKA6s",
    14. }

    1.2 类实现

    1. class Client:
    2. def __init__(self):
    3. self.config = config
    4. self.client = boto3.client('s3', **self.config)
    5. def read_image(self, bucket_name, image_key):
    6. try:
    7. response = self.client.get_object(Bucket=bucket_name, Key=image_key)
    8. body = response.get('Body')
    9. tmp = np.frombuffer(body.read(), np.uint8)
    10. image = cv2.imdecode(tmp, cv2.IMREAD_COLOR)
    11. return 'OK', image
    12. except Exception as e:
    13. return 'ERROR', 'READ_IMAGE_ERROR'
    14. def read_image_b64(self, bucket_name, image_key):
    15. status, image = self.read_image(bucket_name, image_key)
    16. if status == 'OK':
    17. retval, buffer = cv2.imencode('.jpg', image)
    18. pic_str = base64.b64encode(buffer)
    19. return status, pic_str.decode()
    20. else:
    21. return status, image


    2. 图片下载至本地文件夹mydir

    2.1 图片下载

    1. def save_img(img_path, file_name='test.jpg'):
    2. status, img = client.read_image('prod-barwaqt-image', img_path)
    3. if status == 'OK':
    4. file_name = os.path.join('mydir', line.split('/')[-1])
    5. # os.makedirs('./mydir', exist_ok=True)
    6. cv2.imwrite(file_name, img)
    7. # cv2.imwrite(file_name, img, [cv2.IMWRITE_PNG_COMPRESSION, 8]) # 轻度压缩

    2.2 测试

    2.2.1 单图下载

    1. client = Client()
    2. img_path = 'prod/18/be56/18be564c36b05d730257dbbe87ede614.jpg'
    3. save_img(img_path)

    2.2.2 批量下载

    1. client = Client()
    2. df = pd.read_csv('img_path.csv')
    3. df['s3_path'].apply(save_img)

    img_path.csv 如下:

    user_account_ids3_path
    210805010001565250prod/12/e122/12e122b5328e1b5007b3de5c76e0bf02.jpg
    210812010008799851prod/26/92b7/2692b7c55bb71581586a6392926c0a24.jpg

    2.2.3 多线程下载

    1. from pandarallel import pandarallel
    2. pandarallel.initialize(nb_workers=10, use_memory_fs=False, progress_bar=True)
    3. client = Client() # 这里client一定要定义在外面
    4. df['img_path'].parallel_apply(save_img)

    2.2.4 本地图片批量压缩

    1. def img_batch_zip(input_dir, output_dir):
    2. for item in os.listdir(input_dir):
    3. try:
    4. # 把jpeg、png格式的图片转换成jpg格式
    5. target_file_name = os.path.join(output_dir, item.split(".")[0] + ".jpg")
    6. # print(target_file_name)
    7. img = cv2.imread(input_dir + item)
    8. # print(img.shape)
    9. output_image = img_pad(img) # 重度压缩
    10. cv2.imwrite(target_file_name, output_image, [cv2.IMWRITE_PNG_COMPRESSION, 8])
    11. except Exception as e:
    12. pass

    图片压缩

    1. def img_pad(pil_file):
    2. # h,w 先后不要写错,不然图片会变形
    3. h, w, c = pil_file.shape
    4. # print(h, w, c)
    5. fixed_size = 1600 # 输出正方形图片的尺寸
    6. if h >= w:
    7. factor = h / float(fixed_size)
    8. new_w = int(w / factor)
    9. if new_w % 2 != 0:
    10. new_w -= 1
    11. pil_file = cv2.resize(pil_file, (new_w, fixed_size))
    12. pad_w = int((fixed_size - new_w) / 2)
    13. array_file = np.array(pil_file)
    14. else:
    15. factor = w / float(fixed_size)
    16. new_h = int(h / factor)
    17. if new_h % 2 != 0:
    18. new_h -= 1
    19. pil_file = cv2.resize(pil_file, (fixed_size, new_h))
    20. pad_h = int((fixed_size - new_h) / 2)
    21. array_file = np.array(pil_file)
    22. return array_file

    3. 保存图片至excel

    读取test_data.csv中的数据,将地址字段进行相应图片下载,追加至行末。

    输入:test_data.csv

     输出:res.xlsx

    3.1 导包

    1. # -*- coding: utf-8 -*-
    2. import os
    3. import pandas as pd
    4. import cv2
    5. import xlsxwriter
    6. import tqdm
    7. # 定义一个excel文件,并添加一个sheet
    8. BOOK = xlsxwriter.Workbook('res.xlsx')
    9. SHEET = BOOK.add_worksheet('sheet1')
    10. CEIL_HEIGHT = 256
    11. SHEET.set_default_row(CEIL_HEIGHT)
    12. SHEET.set_column(0, 18, CEIL_HEIGHT / 18)

    3.2 插入图片内容

    1. 在一个单元格插入一张图片

    1. def inset_a_img(img_name, target_col): # target_col:插入的位置
    2. # 从本地文件夹读图片
    3. image_path = os.path.join("./mydir/", img_name)
    4. h, w, *_ = cv2.imread(image_path).shape
    5. scale = CEIL_HEIGHT * 1.3 / h
    6. SHEET.insert_image(line.Index + 1, target_col, image_path, # x_offset可调整x轴图片偏移
    7. {'x_offset': 100, 'y_offset': 2, 'x_scale': scale, 'y_scale': scale, 'positioning': 1})

    2. 处理一行数据

    1. def insert_image(line):
    2. print('正在操作第几行: ', line.Index)
    3. print("该行有多少列: ", len(line))
    4. # 从第2列开始循环插入(第1列为索引)
    5. for i in range(1, len(line)):
    6. # print("正在操作第几列 col_no: ", i)
    7. if pd.isna(line[i]):
    8. SHEET.write(line.Index + 1, i-1, '') # 由于插入了表头,所以从第一行开始写
    9. else:
    10. SHEET.write(line.Index + 1, i-1, line[i])
    11. if i == 1 and not pd.isna(line.s3_path_1): # 当该列为s3_path_1,且其值不为空
    12. target_col = 3
    13. img_name = line.s3_path_1.split('/')[-1]
    14. print(img_name)
    15. inset_a_img(img_name, target_col)
    16. if i == 2 and not pd.isna(line.s3_path_2): # 为Nan的置空,不写入图片
    17. target_col = 5
    18. img_name = line.s3_path_2.split('/')[-1]
    19. inset_a_img(img_name, target_col)

    3.2 测试

    1. df = pd.read_csv('test_data.csv', dtype=str)
    2. col_list = ['user_account_id', 's3_path_1', 's3_path_2']
    3. df.columns = col_list
    4. # 为写入excel表头
    5. for i in range(len(col_list)):
    6. SHEET.write(0, i, col_list[i]) # 第0行第i列插入字段
    7. for line in tqdm.tqdm(df.itertuples()): # tqdm: 显示进度条
    8. # print(line)
    9. # 算上index列,每行有len(col_list)+1 列
    10. # Pandas(Index=0, user_account_id='21...346', s3_path_1='e4.jpg',
    11. # s3_path_2='fc.jpg')
    12. insert_image(line)
    13. BOOK.close()

       

  • 相关阅读:
    unity使用UniStorm 5.1.0.unitypackage增加天气
    产品软文怎么写?掌握这几个技巧你也能写
    java计算机毕业设计springboot+vue考研资料分享系统
    数仓领域相关技术选型总结
    带你轻松解密白盒测试
    vscode 设置打开终端的默认工作目录/路径
    在 Windows 用 Chrome System Settings 设置代理
    ESP32网络开发实例-Web服务器以仪表形式显示传感器计数
    Linux简单命令学习
    JAVA三元表达式详解
  • 原文地址:https://blog.csdn.net/MusicDancing/article/details/128190047