Haddop的数据计算部分原理

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;

import java.io.*;
import java.util.*;

public class WorkConut {

public static void main(String\[\] args) throws IOException {
    HashMap map=new HashMap();
    Configuration conf\=new Configuration();

    //连接对象
    FileSystem fileSystem = FileSystem.get(conf);

    //读数据
    FSDataInputStream open = fileSystem.open(new Path("E:\\wc.txt"));

    //处理数据
    BufferedReader reader = new BufferedReader(new InputStreamReader(open));

    //读取每一行数据
    String line=null;
    while ((line=reader.readLine())!=null){
     String\[\] splies\=line.split(" ");

     //逻辑

        for (String word:splies) {
            //当Map集合中有这个key时，就使用这个key值；
            //如果没有就使用默认值defaultValue。
        Integer count= map.getOrDefault(word,0);

        count++;

        map.put(word,count);

        }
    }
    //写数据
    FSDataOutputStream create = fileSystem.create(new Path("E:\\resoult.txt"));

    BufferedWriter writer \= new BufferedWriter(new OutputStreamWriter(create));



    //循环遍历map
    Set>  entries  = map.entrySet();

    //排序
   ArrayList> list= new ArrayList<>(entries);

   list.sort(new Comparator>() {
       @Override
       public int compare(Map.Entry o1, Map.Entry o2) {
           return o2.getValue()-o1.getValue();
       }
   });



    for (Map.Entry entry: list) {
       writer.write(entry.getKey()+"\="+entry.getValue()+"
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55

");
writer.flush();
}

    //关流
    reader.close();
    writer.close();

}
1
2
3
4
5

}

必备添加：D://wc.txt存在。且有数据

Hadoop windows下环境

D://text1.txt不存在

转载于:https://www.cnblogs.com/wangshuang123/p/10914007.html

相关阅读:
百度是否收录查询易语言代码
python如何将代码制作成可以pip的库，将自己的python代码打包成库，让别人pip安装调用？
奥特曼与钢铁侠【InsCode Stable Diffusion美图活动一期】
一套有趣的期权套利题目
Linux入门
【23种设计模式】接口隔离原则
Flink快速入门
pnpm的浅了解
devops学习(三) K8环境部署jenkins
ultraEdit正则匹配多行（xml用）

原文地址：https://blog.csdn.net/m0_67401545/article/details/126663872