- a 1
- a 2
- b 3
- c 4
- d 5
- c 6
- b 7
- a 8
- x 9
- SELECT key, SUM(val)
- FROM kv
- GROUP BY key
- import re
-
- import pandas as pd
- from matplotlib import pyplot as plt
-
- lines = []
- with open(r"pandas_group_sum.txt") as f:
- lines = f.read().splitlines()
- print(len(lines))
-
-
- p = re.compile(r'(?P
\S*)\s*(?P\S*)' ) -
- kvs = []
- for line in lines:
- m = p.match(line)
- if m:
- # print(line)
- kvs.append((m.group("key"), int(m.group("val"))))
-
-
- df = pd.DataFrame(kvs, columns=["key", "val"])
- # print(df.dtypes)
-
- # 注:先创建空的`DataFrame`,再追加数据,该方式已过时。
- # df = pd.DataFrame(columns=["key", "val"])
- # for kv in kvs:
- # df = df.append({"key":kv[0], "val": int(float(kv[1]))}, ignore_index=True)
-
-
- # 设置 `pandas` 打印展示效果
- # 显示最大列数,None 为全部
- pd.set_option('display.max_columns', None)
- # 显示最大行数,None 为全部
- pd.set_option('display.max_rows', None)
- # 显示每列字段最大宽度
- pd.set_option('max_colwidth', None)
- # 显示行宽度
- pd.set_option('display.width', None)
-
- # 分组求和
- df = df.groupby(["key"], as_index=True).sum().sort_values("val", ascending=False)
-
- # 结果打印
- print(df)
-
- # 结果输出到 excel
- # df.to_excel("结果.xlsx")
-
- # 结果绘图
- # 设置中文
- plt.rcParams['font.sans-serif'] = ['SimHei']
- # 坐标轴负号显示不正常及不能正常显示中文
- plt.rcParams['axes.unicode_minus'] = False
- # 默认为折线图
- df.plot(kind='bar', title="统计图", figsize=(12, 8), legend=True, fontsize=18)
-
- plt.show()
① `pom.xml` 引入相关依赖:
- <dependency>
- <groupId>sh.joinerygroupId>
- <artifactId>joinery-dataframeartifactId>
- <version>1.10version>
- dependency>
-
- <dependency>
- <groupId>com.xeiam.xchartgroupId>
- <artifactId>xchartartifactId>
- <version>2.3.1version>
- dependency>
-
- <dependency>
- <groupId>org.apache.poigroupId>
- <artifactId>poiartifactId>
- <version>4.1.2version>
- dependency>
② 代码实现
- package org.kwok.joinery;
-
- import java.nio.charset.Charset;
- import java.nio.file.Files;
- import java.nio.file.Paths;
- import java.util.Arrays;
- import java.util.List;
- import java.util.regex.Matcher;
- import java.util.regex.Pattern;
-
- import joinery.DataFrame;
-
- /**
- * 使用 sh.joinery:joinery-dataframe 库进行分组求和
- * 类似 Python pandas 库
- * @author Kwok
- * 2022-11-06
- */
- public class Test_Joinery {
-
- public static void main(String[] args) throws Exception {
-
- System.out.println(Paths.get(Test_Joinery.class.getResource("Test_Joinery.txt").toURI()));
-
- List
lines = Files.readAllLines(Paths.get(Test_Joinery.class.getResource("Test_Joinery.txt").toURI()), Charset.defaultCharset()); - DataFrame
-
-
- Pattern p =Pattern.compile("(?
\\S*)\\s*(?\\S*)" ); -
- lines.stream().forEach(x -> {
- Matcher m = p.matcher(x);
- if(m.matches()) {
- // System.out.println(m.group("key"));
- // System.out.println(m.group("val"));
- df.append(Arrays.asList(m.group("key"), Double.valueOf(m.group("val"))));
- }
-
- });
-
- // 窗体展示
- df.groupBy("key").sum().sortBy("-value").show();
-
- // 打印 1000 行,默认 10 行
- System.out.println(df.groupBy("key").sum().sortBy("-value").toString(1000));
-
- }
- }