接上篇文章使用ES对一段中文进行分词_zyydd_的博客-CSDN博客。当我将某一段文字进行分词之后,会得到一个List。本篇文章讲的是,对该List,进行统计,得到单个词的数量,组织成一个Map
-
- import org.apache.commons.collections.CollectionUtils;
-
- import java.util.*;
- import java.util.stream.Collectors;
-
- /**
- * 词云工具类
- */
- public class FrequencyUtils {
-
- public static void main(String[] args) {
- List
a = JsonMapper.fromJson("[\"点击\",\"上方\",\"蓝字\",\"关注\",\"我们\",\"全体\",\"教职员工\",\"教职员\",\"教职\",\"职员\",\"员工\",\"家长\",\"朋友们\",\"朋友\",\"们\",\"你们\",\"好\",\"快乐\",\"而\",\"充实\",\"的\",\"暑期\",\"生活\",\"即将\",\"结束\",\"新学期\",\"新学\",\"学期\",\"的\",\"各项工作\",\"各项\",\"工作\",\"即将\",\"开启\",\"鉴于\",\"目前国内\",\"目前\",\"国内\",\"省内\",\"严峻\",\"复杂\",\"的\",\"疫情\",\"情形\",\"形势\",\"为\",\"进一步\",\"进一\",\"一步\",\"一\",\"步\",\"做好\",\"幼儿园\",\"幼儿\",\"园\",\"疫情\",\"防\",\"控\",\"工作\",\"为\",\"秋季\",\"开学\",\"创造\",\"良好条件\",\"良好\",\"条件\",\"确保\",\"返\",\"园\",\"后\",\"正常\",\"的\",\"教育\",\"教学秩序\",\"教学\",\"秩序\",\"现\",\"温馨\",\"提示\",\"如下\",\"一\",\"做好\",\"返\",\"安\",\"准备\",\"广大\",\"教职员工\",\"教职员\",\"教职\",\"职员\",\"员工\",\"及\",\"幼儿\",\"根据\",\"开学\",\"学时\",\"时间\",\"以及\",\"疫情\",\"情形\",\"形势\",\"变化\",\"预留\",\"留足\",\"足够\",\"时间\",\"至少\",\"少提\",\"提前\",\"7\",\"天\",\"返\",\"安\",\"或\",\"返回\",\"居住地\",\"居住\",\"住地\",\"即\",\"全体\",\"教师\",\"于\",\"2022\",\"年\",\"8\",\"月\",\"20\",\"日\",\"零时\",\"零\",\"时\",\"前\",\"返\",\"安\",\"全体\",\"幼儿\",\"于\",\"2022\",\"年\",\"8\",\"月\",\"24\",\"日\",\"零时\",\"零\",\"时\",\"前\",\"返\",\"安\",\"并\",\"严格\",\"落实\",\"实属\",\"属地\",\"单位\",\"报备\",\"社区\",\"报备\",\"健康\",\"管理\",\"要求\",\"二\",\"做好\",\"健康\",\"监测\",\"建议\",\"从\",\"外地\",\"返\",\"安\",\"的\",\"教职工\",\"教职\",\"职工\",\"幼儿\",\"及\",\"家长\",\"自觉\",\"进行\",\"3\",\"天\",\"2\",\"次\",\"核酸\",\"检测\",\"至少\",\"少间\",\"间隔\",\"24\",\"小时\",\"时\",\"并\",\"做好\",\"7\",\"天\",\"自我\",\"健康\",\"监测\",\"前\",\"3\",\"天\",\"原则上\",\"原则\",\"上\",\"两点\",\"两\",\"点\",\"一线\",\"一\",\"线\",\"少\",\"聚集\",\"少\",\"聚会\",\"时刻\",\"关注\",\"自己\",\"和家人\",\"家人\",\"的\",\"身体状况\",\"身体\",\"状况\",\"如\",\"出现\",\"发热\",\"干咳\",\"乏力\",\"嗅\",\"味\",\"觉\",\"减退\",\"鼻塞\",\"流涕\",\"咽\",\"痛\",\"结膜炎\",\"结膜\",\"膜炎\",\"肌\",\"痛\",\"和\",\"腹泻\",\"等\",\"症状\",\"及时\",\"到\",\"附近\",\"的\",\"发热\",\"热门\",\"门诊\",\"进行\",\"排查\",\"和\",\"诊疗\",\"就医\",\"过程\",\"尽量\",\"避免\",\"乘坐\",\"公共交通\",\"公共\",\"交通工具\",\"交通\",\"工具\",\"三\",\"做好\",\"重点\",\"防\",\"控\",\"近\",\"7\",\"日内\",\"日\",\"内有\",\"中\",\"高风险\",\"高风\",\"风险\",\"险区\",\"旅居\",\"或与\",\"相关\",\"关人\",\"人员\",\"有\",\"密切接触\",\"密切\",\"接触\",\"的\",\"教师\",\"幼儿\",\"返\",\"安\",\"前\",\"48\",\"小时\",\"向\",\"目的地\",\"目的\",\"地\",\"社区\",\"报备\",\"在\",\"抵\",\"安\",\"后\",\"12\",\"小时内\",\"小时\",\"时\",\"内向\",\"目的地\",\"目的\",\"地\",\"社区\",\"和\",\"幼儿园\",\"幼儿\",\"园\",\"报告\",\"并\",\"配合\",\"合做\",\"做好\",\"信息\",\"登记\",\"核酸\",\"检测\",\"集中\",\"中隔\",\"隔离\",\"或\",\"居家\",\"健康\",\"监测\",\"等\",\"管\",\"控\",\"措施\",\"四\",\"做好\",\"健康\",\"登记\",\"如实\",\"填写\",\"汉滨区\",\"铁路\",\"幼儿园\",\"幼儿\",\"园\",\"疫情\",\"防\",\"控\",\"返\",\"园\",\"承诺书\",\"承诺\",\"书\",\"及\",\"返\",\"园\",\"前\",\"健康\",\"监测\",\"登记表\",\"登记\",\"表\",\"并在\",\"开学\",\"当天\",\"天上\",\"上交\",\"纸质\",\"版\",\"给\",\"班级\",\"教师\",\"电子表格\",\"电子表\",\"电子\",\"子表\",\"表格\",\"已\",\"发至\",\"班级\",\"群\",\"新学期\",\"新学\",\"学期\",\"开学\",\"在即\",\"让我们\",\"我们\",\"一起\",\"一\",\"起\",\"做好\",\"返\",\"园\",\"前\",\"各项\",\"防\",\"控\",\"工作\",\"确保全\",\"确保\",\"保全\",\"全体\",\"教职工\",\"教职\",\"职工\",\"及\",\"幼儿\",\"安全\",\"返\",\"园\",\"祝\",\"大家\",\"身体健康\",\"身体\",\"健康\",\"暑假\",\"愉快\",\"汉滨区\",\"铁路\",\"幼儿园\",\"幼儿\",\"园\",\"2022\",\"年\",\"8\",\"月\",\"19\",\"日\",\"扫\",\"码\",\"关注\",\"分享\",\"给\",\"第一个\",\"第一\",\"一个\",\"一\",\"个\",\"想到\",\"的人\"]\n", List.class); - Map
master = frequencyOfListQ(a, 3);//这个3表示,只有频率出现3次及以上的时候,才会被统计进来 - System.out.println("List
出现次数统计:" + JsonMapper.toJson(master)); - Map
s = new HashMap<>(); - s.put("教职", 32L);
- s.put("教职new_new_new", 102L);
- mapMerge(master, s);//这里是s向master中合并,最后master中越来越多
- System.out.println("Map合并后结果:" + master);
- List
tt = mapSort(master, 5);//这个5是说你最后想要多少个 - System.out.println("Map排序后结果:" + JsonMapper.toJson(tt));
- }
-
- /**
- * @param falcons
- * @param min 只有频率出现min次及以上的时候,才会被统计进来
- * @return
- */
- public static Map
frequencyOfListQ(List falcons, Integer min) { - if (CollectionUtils.isEmpty(falcons)) {
- return new HashMap<>();
- }
- Map
result1 = falcons.stream().collect(Collectors.groupingBy(k -> k, Collectors.counting())); -
- for (Iterator
iterator = result1.keySet().iterator(); iterator.hasNext(); ) { - String key = iterator.next();
- if (key.length() < 2) {
- iterator.remove();
- continue;
- }
- if (result1.get(key) < min) {
- iterator.remove();
- }
- }
- return result1;
- }
-
- /**
- * s向master中合并,最后master中越来越多
- *
- * @param master
- * @param s
- */
- public static void mapMerge(Map
master, Map s) { - if (master == null || master.isEmpty()) {
- throw new RuntimeException("map合并,master为空");
- }
- if (s == null || s.isEmpty()) {
- throw new RuntimeException("map合并,s为空");
- }
- s.forEach((key, value) -> master.merge(key, value, Long::sum));
- }
-
- /**
- * map排序,最多要maxSize个
- *
- * @param map
- * @param maxSize
- * @return
- */
- public static List
mapSort(Map map, int maxSize) { - if (map == null || map.isEmpty()) {
- return new ArrayList<>();
- }
- List
> list = new ArrayList>(map.entrySet()); - Collections.sort(list, new Comparator
>() { - @Override
- public int compare(Map.Entry
o1, Map.Entry o2) { - return o2.getValue().compareTo(o1.getValue());
- }
- });
- List
r = new ArrayList<>(); - for (int i = 0; i < list.size(); i++) {
- if (i >= maxSize) {
- break;
- }
- r.add(new TermResult(list.get(i).getKey(), list.get(i).getValue()));
- }
- return r;
- }
-
- static class TermResult {
-
- private String key;
- private Long count;
-
- TermResult(String key, Long count) {
- this.key = key;
- this.count = count;
- }
-
- public String getKey() {
- return key;
- }
-
- public void setKey(String key) {
- this.key = key;
- }
-
- public Long getCount() {
- return count;
- }
-
- public void setCount(Long count) {
- this.count = count;
- }
- }
-
- }
