• hadoop学习:mapreduce入门案例二:统计学生成绩


    这里相较于 wordcount,新的知识点在于学生实体类的编写以及使用

    数据信息

    1. Student 实体类

    1. import org.apache.hadoop.io.WritableComparable;
    2. import java.io.DataInput;
    3. import java.io.DataOutput;
    4. import java.io.IOException;
    5. public class Student implements WritableComparable {
    6. // Object
    7. private long stuid;
    8. private String stuName;
    9. private int score;
    10. public Student(long stuid, String stuName, int score) {
    11. this.stuid = stuid;
    12. this.stuName = stuName;
    13. this.score = score;
    14. }
    15. @Override
    16. public String toString() {
    17. return "Student{" +
    18. "stuid=" + stuid +
    19. ", stuName='" + stuName + '\'' +
    20. ", score=" + score +
    21. '}';
    22. }
    23. public Student() {
    24. }
    25. public long getStuid() {
    26. return stuid;
    27. }
    28. public void setStuid(long stuid) {
    29. this.stuid = stuid;
    30. }
    31. public String getStuName() {
    32. return stuName;
    33. }
    34. public void setStuName(String stuName) {
    35. this.stuName = stuName;
    36. }
    37. public int getScore() {
    38. return score;
    39. }
    40. public void setScore(int score) {
    41. this.score = score;
    42. }
    43. // 自动整理文件格式 ctrl + shift + f 英文输放状态
    44. @Override
    45. public int compareTo(Student o) {
    46. return this.score > o.score ? 1 : 0;
    47. }
    48. @Override
    49. public void write(DataOutput dataOutput) throws IOException {
    50. dataOutput.writeLong(stuid);
    51. dataOutput.writeUTF(stuName);
    52. dataOutput.writeInt(score);
    53. }
    54. @Override
    55. public void readFields(DataInput dataInput) throws IOException {
    56. this.stuid = dataInput.readLong();
    57. this.stuName = dataInput.readUTF();
    58. this.score = dataInput.readInt();
    59. }
    60. }

    2.  mapper 阶段,StudentMapper 类

    1. import org.apache.hadoop.io.LongWritable;
    2. import org.apache.hadoop.io.Text;
    3. import org.apache.hadoop.mapreduce.Mapper;
    4. import java.io.IOException;
    5. /**
    6. * 输出 key:学生id value:Student对象
    7. */
    8. public class StudentMapper extends Mapper {
    9. @Override
    10. protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
    11. String[] split = value.toString().split(",");
    12. LongWritable stuidKey = new LongWritable(Long.parseLong(split[0]));
    13. Student stuValue = new Student(Long.parseLong(split[0]),split[1],Integer.parseInt(split[2]));
    14. context.write(stuidKey,stuValue);
    15. }
    16. }

    3. reduce 阶段,StudentReduce 类

    1. import org.apache.hadoop.io.LongWritable;
    2. import org.apache.hadoop.io.NullWritable;
    3. import org.apache.hadoop.mapreduce.Reducer;
    4. import java.io.IOException;
    5. public class StudentReducer extends Reducer {
    6. @Override
    7. protected void reduce(LongWritable key, Iterable values, Context context) throws IOException,
    8. InterruptedException {
    9. Student stuOut = new Student();
    10. int sumScore = 0;
    11. String stuName = "";
    12. for (Student stu :
    13. values) {
    14. sumScore+=stu.getScore();
    15. stuName = stu.getStuName();
    16. }
    17. stuOut.setScore(sumScore);
    18. stuOut.setStuid(key.get());
    19. stuOut.setStuName(stuName);
    20. System.out.println(stuOut.toString());
    21. context.write(stuOut, NullWritable.get());
    22. }
    23. }

    4. 驱动类,studentDriver 类

    1. import org.apache.hadoop.conf.Configuration;
    2. import org.apache.hadoop.fs.FileSystem;
    3. import org.apache.hadoop.fs.Path;
    4. import org.apache.hadoop.io.LongWritable;
    5. import org.apache.hadoop.io.NullWritable;
    6. import org.apache.hadoop.mapreduce.Job;
    7. import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
    8. import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
    9. import java.io.IOException;
    10. public class StudentDriver {
    11. public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
    12. Configuration conf = new Configuration();
    13. Job job = Job.getInstance(conf);
    14. job.setJarByClass(StudentDriver.class);
    15. //配置 job中map阶段处理类和map阶段的输出类型
    16. job.setMapperClass(StudentMapper.class);
    17. job.setMapOutputKeyClass(LongWritable.class);
    18. job.setMapOutputValueClass(Student.class);
    19. //配置 job中deduce阶段处理类和reduce阶段的输出类型
    20. job.setReducerClass(StudentReducer.class);
    21. job.setOutputKeyClass(Student.class);
    22. job.setOutputValueClass(NullWritable.class);
    23. // 输入路径配置 "hdfs://kb131:9000/kb23/hadoopstu/stuscore.csv"
    24. Path inpath = new Path(args[0]); // 外界获取文件输入路径
    25. FileInputFormat.setInputPaths(job, inpath);
    26. // 输出路径配置 "hdfs://kb131:9000/kb23/hadoopstu/out2"
    27. Path path = new Path(args[1]); //
    28. FileSystem fs = FileSystem.get(path.toUri(), conf);
    29. if (fs.exists(path))
    30. fs.delete(path,true);
    31. FileOutputFormat.setOutputPath(job,path);
    32. job.waitForCompletion(true);
    33. }
    34. }

  • 相关阅读:
    一文读懂vue3的Pinia
    Vue中的图标
    结束八天了,还是无法与她和解. --vulnhub 靶场
    Mac配置host
    子进程变成僵尸进程
    计算机专业,不擅长打代码,考研该怎么选择?
    JVM学习笔记——内存结构篇
    未来 20 年 12 大发展趋势
    PyTorch笔记 - Convolution卷积的原理 (2)
    Spring Data MongoTemplate insert 插入数据报duplicate key的问题
  • 原文地址:https://blog.csdn.net/jojo_oulaoula/article/details/132568035